i965/drm: Drop softpin support for now.
[mesa.git] / src / mesa / drivers / dri / i965 / intel_bufmgr_gem.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <xf86drm.h>
42 #include <util/u_atomic.h>
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <sys/ioctl.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 #include <stdbool.h>
54
55 #include "errno.h"
56 #ifndef ETIME
57 #define ETIME ETIMEDOUT
58 #endif
59 #include "common/gen_debug.h"
60 #include "common/gen_device_info.h"
61 #include "libdrm_macros.h"
62 #include "main/macros.h"
63 #include "util/macros.h"
64 #include "util/list.h"
65 #include "brw_bufmgr.h"
66 #include "string.h"
67
68 #include "i915_drm.h"
69 #include "uthash.h"
70
71 #ifdef HAVE_VALGRIND
72 #include <valgrind.h>
73 #include <memcheck.h>
74 #define VG(x) x
75 #else
76 #define VG(x)
77 #endif
78
79 #define memclear(s) memset(&s, 0, sizeof(s))
80
81 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
82
83 static inline int
84 atomic_add_unless(int *v, int add, int unless)
85 {
86 int c, old;
87 c = p_atomic_read(v);
88 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
89 c = old;
90 return c == unless;
91 }
92
93 /**
94 * upper_32_bits - return bits 32-63 of a number
95 * @n: the number we're accessing
96 *
97 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
98 * the "right shift count >= width of type" warning when that quantity is
99 * 32-bits.
100 */
101 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
102
103 /**
104 * lower_32_bits - return bits 0-31 of a number
105 * @n: the number we're accessing
106 */
107 #define lower_32_bits(n) ((__u32)(n))
108
109 struct _drm_bacon_context {
110 unsigned int ctx_id;
111 struct _drm_bacon_bufmgr *bufmgr;
112 };
113
114 typedef struct _drm_bacon_bo_gem drm_bacon_bo_gem;
115
116 struct drm_bacon_gem_bo_bucket {
117 struct list_head head;
118 unsigned long size;
119 };
120
121 typedef struct _drm_bacon_bufmgr {
122 int refcount;
123
124 int fd;
125
126 int max_relocs;
127
128 pthread_mutex_t lock;
129
130 struct drm_i915_gem_exec_object2 *exec2_objects;
131 drm_bacon_bo **exec_bos;
132 int exec_size;
133 int exec_count;
134
135 /** Array of lists of cached gem objects of power-of-two sizes */
136 struct drm_bacon_gem_bo_bucket cache_bucket[14 * 4];
137 int num_buckets;
138 time_t time;
139
140 struct list_head managers;
141
142 drm_bacon_bo_gem *name_table;
143 drm_bacon_bo_gem *handle_table;
144
145 struct list_head vma_cache;
146 int vma_count, vma_open, vma_max;
147
148 uint64_t gtt_size;
149 unsigned int has_llc : 1;
150 unsigned int bo_reuse : 1;
151 unsigned int no_exec : 1;
152 unsigned int has_exec_async : 1;
153 } drm_bacon_bufmgr;
154
155 typedef struct _drm_bacon_reloc_target_info {
156 drm_bacon_bo *bo;
157 } drm_bacon_reloc_target;
158
159 struct _drm_bacon_bo_gem {
160 drm_bacon_bo bo;
161
162 int refcount;
163 uint32_t gem_handle;
164 const char *name;
165
166 /**
167 * Kenel-assigned global name for this object
168 *
169 * List contains both flink named and prime fd'd objects
170 */
171 unsigned int global_name;
172
173 UT_hash_handle handle_hh;
174 UT_hash_handle name_hh;
175
176 /**
177 * Index of the buffer within the validation list while preparing a
178 * batchbuffer execution.
179 */
180 int validate_index;
181
182 /**
183 * Current tiling mode
184 */
185 uint32_t tiling_mode;
186 uint32_t swizzle_mode;
187 unsigned long stride;
188
189 unsigned long kflags;
190
191 time_t free_time;
192
193 /** Array passed to the DRM containing relocation information. */
194 struct drm_i915_gem_relocation_entry *relocs;
195 /**
196 * Array of info structs corresponding to relocs[i].target_handle etc
197 */
198 drm_bacon_reloc_target *reloc_target_info;
199 /** Number of entries in relocs */
200 int reloc_count;
201 /** Mapped address for the buffer, saved across map/unmap cycles */
202 void *mem_virtual;
203 /** GTT virtual address for the buffer, saved across map/unmap cycles */
204 void *gtt_virtual;
205 /** WC CPU address for the buffer, saved across map/unmap cycles */
206 void *wc_virtual;
207 int map_count;
208 struct list_head vma_list;
209
210 /** BO cache list */
211 struct list_head head;
212
213 /**
214 * Boolean of whether this BO and its children have been included in
215 * the current drm_bacon_bufmgr_check_aperture_space() total.
216 */
217 bool included_in_check_aperture;
218
219 /**
220 * Boolean of whether this buffer has been used as a relocation
221 * target and had its size accounted for, and thus can't have any
222 * further relocations added to it.
223 */
224 bool used_as_reloc_target;
225
226 /**
227 * Boolean of whether we have encountered an error whilst building the relocation tree.
228 */
229 bool has_error;
230
231 /**
232 * Boolean of whether this buffer can be re-used
233 */
234 bool reusable;
235
236 /**
237 * Boolean of whether the GPU is definitely not accessing the buffer.
238 *
239 * This is only valid when reusable, since non-reusable
240 * buffers are those that have been shared with other
241 * processes, so we don't know their state.
242 */
243 bool idle;
244
245 /**
246 * Size in bytes of this buffer and its relocation descendents.
247 *
248 * Used to avoid costly tree walking in
249 * drm_bacon_bufmgr_check_aperture in the common case.
250 */
251 int reloc_tree_size;
252
253 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */
254 bool mapped_cpu_write;
255 };
256
257 static unsigned int
258 drm_bacon_gem_estimate_batch_space(drm_bacon_bo ** bo_array, int count);
259
260 static unsigned int
261 drm_bacon_gem_compute_batch_space(drm_bacon_bo ** bo_array, int count);
262
263 static int
264 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
265 uint32_t tiling_mode,
266 uint32_t stride);
267
268 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
269 time_t time);
270
271 static void drm_bacon_gem_bo_free(drm_bacon_bo *bo);
272
273 static inline drm_bacon_bo_gem *to_bo_gem(drm_bacon_bo *bo)
274 {
275 return (drm_bacon_bo_gem *)bo;
276 }
277
278 static unsigned long
279 drm_bacon_gem_bo_tile_size(drm_bacon_bufmgr *bufmgr, unsigned long size,
280 uint32_t *tiling_mode)
281 {
282 if (*tiling_mode == I915_TILING_NONE)
283 return size;
284
285 /* 965+ just need multiples of page size for tiling */
286 return ALIGN(size, 4096);
287 }
288
289 /*
290 * Round a given pitch up to the minimum required for X tiling on a
291 * given chip. We use 512 as the minimum to allow for a later tiling
292 * change.
293 */
294 static unsigned long
295 drm_bacon_gem_bo_tile_pitch(drm_bacon_bufmgr *bufmgr,
296 unsigned long pitch, uint32_t *tiling_mode)
297 {
298 unsigned long tile_width;
299
300 /* If untiled, then just align it so that we can do rendering
301 * to it with the 3D engine.
302 */
303 if (*tiling_mode == I915_TILING_NONE)
304 return ALIGN(pitch, 64);
305
306 if (*tiling_mode == I915_TILING_X)
307 tile_width = 512;
308 else
309 tile_width = 128;
310
311 /* 965 is flexible */
312 return ALIGN(pitch, tile_width);
313 }
314
315 static struct drm_bacon_gem_bo_bucket *
316 drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr *bufmgr,
317 unsigned long size)
318 {
319 int i;
320
321 for (i = 0; i < bufmgr->num_buckets; i++) {
322 struct drm_bacon_gem_bo_bucket *bucket =
323 &bufmgr->cache_bucket[i];
324 if (bucket->size >= size) {
325 return bucket;
326 }
327 }
328
329 return NULL;
330 }
331
332 static void
333 drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr *bufmgr)
334 {
335 int i, j;
336
337 for (i = 0; i < bufmgr->exec_count; i++) {
338 drm_bacon_bo *bo = bufmgr->exec_bos[i];
339 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
340
341 if (bo_gem->relocs == NULL) {
342 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
343 bo_gem->name);
344 continue;
345 }
346
347 for (j = 0; j < bo_gem->reloc_count; j++) {
348 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[j].bo;
349 drm_bacon_bo_gem *target_gem =
350 (drm_bacon_bo_gem *) target_bo;
351
352 DBG("%2d: %d (%s)@0x%08x %08x -> "
353 "%d (%s)@0x%08x %08x + 0x%08x\n",
354 i,
355 bo_gem->gem_handle,
356 bo_gem->name,
357 upper_32_bits(bo_gem->relocs[j].offset),
358 lower_32_bits(bo_gem->relocs[j].offset),
359 target_gem->gem_handle,
360 target_gem->name,
361 upper_32_bits(target_bo->offset64),
362 lower_32_bits(target_bo->offset64),
363 bo_gem->relocs[j].delta);
364 }
365 }
366 }
367
368 inline void
369 drm_bacon_bo_reference(drm_bacon_bo *bo)
370 {
371 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
372
373 p_atomic_inc(&bo_gem->refcount);
374 }
375
376 static void
377 drm_bacon_add_validate_buffer2(drm_bacon_bo *bo)
378 {
379 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
380 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
381 int index;
382
383 if (bo_gem->validate_index != -1)
384 return;
385
386 /* Extend the array of validation entries as necessary. */
387 if (bufmgr->exec_count == bufmgr->exec_size) {
388 int new_size = bufmgr->exec_size * 2;
389
390 if (new_size == 0)
391 new_size = 5;
392
393 bufmgr->exec2_objects =
394 realloc(bufmgr->exec2_objects,
395 sizeof(*bufmgr->exec2_objects) * new_size);
396 bufmgr->exec_bos =
397 realloc(bufmgr->exec_bos,
398 sizeof(*bufmgr->exec_bos) * new_size);
399 bufmgr->exec_size = new_size;
400 }
401
402 index = bufmgr->exec_count;
403 bo_gem->validate_index = index;
404 /* Fill in array entry */
405 bufmgr->exec2_objects[index].handle = bo_gem->gem_handle;
406 bufmgr->exec2_objects[index].relocation_count = bo_gem->reloc_count;
407 bufmgr->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
408 bufmgr->exec2_objects[index].alignment = bo->align;
409 bufmgr->exec2_objects[index].offset = bo->offset64;
410 bufmgr->exec2_objects[index].flags = bo_gem->kflags;
411 bufmgr->exec2_objects[index].rsvd1 = 0;
412 bufmgr->exec2_objects[index].rsvd2 = 0;
413 bufmgr->exec_bos[index] = bo;
414 bufmgr->exec_count++;
415 }
416
417 static void
418 drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr *bufmgr,
419 drm_bacon_bo_gem *bo_gem,
420 unsigned int alignment)
421 {
422 unsigned int size;
423
424 assert(!bo_gem->used_as_reloc_target);
425
426 /* The older chipsets are far-less flexible in terms of tiling,
427 * and require tiled buffer to be size aligned in the aperture.
428 * This means that in the worst possible case we will need a hole
429 * twice as large as the object in order for it to fit into the
430 * aperture. Optimal packing is for wimps.
431 */
432 size = bo_gem->bo.size;
433
434 bo_gem->reloc_tree_size = size + alignment;
435 }
436
437 static int
438 drm_bacon_setup_reloc_list(drm_bacon_bo *bo)
439 {
440 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
441 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
442 unsigned int max_relocs = bufmgr->max_relocs;
443
444 if (bo->size / 4 < max_relocs)
445 max_relocs = bo->size / 4;
446
447 bo_gem->relocs = malloc(max_relocs *
448 sizeof(struct drm_i915_gem_relocation_entry));
449 bo_gem->reloc_target_info = malloc(max_relocs *
450 sizeof(drm_bacon_reloc_target));
451 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
452 bo_gem->has_error = true;
453
454 free (bo_gem->relocs);
455 bo_gem->relocs = NULL;
456
457 free (bo_gem->reloc_target_info);
458 bo_gem->reloc_target_info = NULL;
459
460 return 1;
461 }
462
463 return 0;
464 }
465
466 int
467 drm_bacon_bo_busy(drm_bacon_bo *bo)
468 {
469 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
470 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
471 struct drm_i915_gem_busy busy;
472 int ret;
473
474 if (bo_gem->reusable && bo_gem->idle)
475 return false;
476
477 memclear(busy);
478 busy.handle = bo_gem->gem_handle;
479
480 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
481 if (ret == 0) {
482 bo_gem->idle = !busy.busy;
483 return busy.busy;
484 } else {
485 return false;
486 }
487 return (ret == 0 && busy.busy);
488 }
489
490 static int
491 drm_bacon_gem_bo_madvise_internal(drm_bacon_bufmgr *bufmgr,
492 drm_bacon_bo_gem *bo_gem, int state)
493 {
494 struct drm_i915_gem_madvise madv;
495
496 memclear(madv);
497 madv.handle = bo_gem->gem_handle;
498 madv.madv = state;
499 madv.retained = 1;
500 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
501
502 return madv.retained;
503 }
504
505 int
506 drm_bacon_bo_madvise(drm_bacon_bo *bo, int madv)
507 {
508 return drm_bacon_gem_bo_madvise_internal(bo->bufmgr,
509 (drm_bacon_bo_gem *) bo,
510 madv);
511 }
512
513 /* drop the oldest entries that have been purged by the kernel */
514 static void
515 drm_bacon_gem_bo_cache_purge_bucket(drm_bacon_bufmgr *bufmgr,
516 struct drm_bacon_gem_bo_bucket *bucket)
517 {
518 while (!list_empty(&bucket->head)) {
519 drm_bacon_bo_gem *bo_gem;
520
521 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
522 bucket->head.next, head);
523 if (drm_bacon_gem_bo_madvise_internal
524 (bufmgr, bo_gem, I915_MADV_DONTNEED))
525 break;
526
527 list_del(&bo_gem->head);
528 drm_bacon_gem_bo_free(&bo_gem->bo);
529 }
530 }
531
532 static drm_bacon_bo *
533 drm_bacon_gem_bo_alloc_internal(drm_bacon_bufmgr *bufmgr,
534 const char *name,
535 unsigned long size,
536 unsigned long flags,
537 uint32_t tiling_mode,
538 unsigned long stride,
539 unsigned int alignment)
540 {
541 drm_bacon_bo_gem *bo_gem;
542 unsigned int page_size = getpagesize();
543 int ret;
544 struct drm_bacon_gem_bo_bucket *bucket;
545 bool alloc_from_cache;
546 unsigned long bo_size;
547 bool for_render = false;
548
549 if (flags & BO_ALLOC_FOR_RENDER)
550 for_render = true;
551
552 /* Round the allocated size up to a power of two number of pages. */
553 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr, size);
554
555 /* If we don't have caching at this size, don't actually round the
556 * allocation up.
557 */
558 if (bucket == NULL) {
559 bo_size = size;
560 if (bo_size < page_size)
561 bo_size = page_size;
562 } else {
563 bo_size = bucket->size;
564 }
565
566 pthread_mutex_lock(&bufmgr->lock);
567 /* Get a buffer out of the cache if available */
568 retry:
569 alloc_from_cache = false;
570 if (bucket != NULL && !list_empty(&bucket->head)) {
571 if (for_render) {
572 /* Allocate new render-target BOs from the tail (MRU)
573 * of the list, as it will likely be hot in the GPU
574 * cache and in the aperture for us.
575 */
576 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
577 bucket->head.prev, head);
578 list_del(&bo_gem->head);
579 alloc_from_cache = true;
580 bo_gem->bo.align = alignment;
581 } else {
582 assert(alignment == 0);
583 /* For non-render-target BOs (where we're probably
584 * going to map it first thing in order to fill it
585 * with data), check if the last BO in the cache is
586 * unbusy, and only reuse in that case. Otherwise,
587 * allocating a new buffer is probably faster than
588 * waiting for the GPU to finish.
589 */
590 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
591 bucket->head.next, head);
592 if (!drm_bacon_bo_busy(&bo_gem->bo)) {
593 alloc_from_cache = true;
594 list_del(&bo_gem->head);
595 }
596 }
597
598 if (alloc_from_cache) {
599 if (!drm_bacon_gem_bo_madvise_internal
600 (bufmgr, bo_gem, I915_MADV_WILLNEED)) {
601 drm_bacon_gem_bo_free(&bo_gem->bo);
602 drm_bacon_gem_bo_cache_purge_bucket(bufmgr,
603 bucket);
604 goto retry;
605 }
606
607 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
608 tiling_mode,
609 stride)) {
610 drm_bacon_gem_bo_free(&bo_gem->bo);
611 goto retry;
612 }
613 }
614 }
615
616 if (!alloc_from_cache) {
617 struct drm_i915_gem_create create;
618
619 bo_gem = calloc(1, sizeof(*bo_gem));
620 if (!bo_gem)
621 goto err;
622
623 /* drm_bacon_gem_bo_free calls list_del() for an uninitialized
624 list (vma_list), so better set the list head here */
625 list_inithead(&bo_gem->vma_list);
626
627 bo_gem->bo.size = bo_size;
628
629 memclear(create);
630 create.size = bo_size;
631
632 ret = drmIoctl(bufmgr->fd,
633 DRM_IOCTL_I915_GEM_CREATE,
634 &create);
635 if (ret != 0) {
636 free(bo_gem);
637 goto err;
638 }
639
640 bo_gem->gem_handle = create.handle;
641 HASH_ADD(handle_hh, bufmgr->handle_table,
642 gem_handle, sizeof(bo_gem->gem_handle),
643 bo_gem);
644
645 bo_gem->bo.handle = bo_gem->gem_handle;
646 bo_gem->bo.bufmgr = bufmgr;
647 bo_gem->bo.align = alignment;
648
649 bo_gem->tiling_mode = I915_TILING_NONE;
650 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
651 bo_gem->stride = 0;
652
653 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
654 tiling_mode,
655 stride))
656 goto err_free;
657 }
658
659 bo_gem->name = name;
660 p_atomic_set(&bo_gem->refcount, 1);
661 bo_gem->validate_index = -1;
662 bo_gem->used_as_reloc_target = false;
663 bo_gem->has_error = false;
664 bo_gem->reusable = true;
665
666 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, alignment);
667 pthread_mutex_unlock(&bufmgr->lock);
668
669 DBG("bo_create: buf %d (%s) %ldb\n",
670 bo_gem->gem_handle, bo_gem->name, size);
671
672 return &bo_gem->bo;
673
674 err_free:
675 drm_bacon_gem_bo_free(&bo_gem->bo);
676 err:
677 pthread_mutex_unlock(&bufmgr->lock);
678 return NULL;
679 }
680
681 drm_bacon_bo *
682 drm_bacon_bo_alloc_for_render(drm_bacon_bufmgr *bufmgr,
683 const char *name,
684 unsigned long size,
685 unsigned int alignment)
686 {
687 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size,
688 BO_ALLOC_FOR_RENDER,
689 I915_TILING_NONE, 0,
690 alignment);
691 }
692
693 drm_bacon_bo *
694 drm_bacon_bo_alloc(drm_bacon_bufmgr *bufmgr,
695 const char *name,
696 unsigned long size,
697 unsigned int alignment)
698 {
699 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, 0,
700 I915_TILING_NONE, 0, 0);
701 }
702
703 drm_bacon_bo *
704 drm_bacon_bo_alloc_tiled(drm_bacon_bufmgr *bufmgr, const char *name,
705 int x, int y, int cpp, uint32_t *tiling_mode,
706 unsigned long *pitch, unsigned long flags)
707 {
708 unsigned long size, stride;
709 uint32_t tiling;
710
711 do {
712 unsigned long aligned_y, height_alignment;
713
714 tiling = *tiling_mode;
715
716 /* If we're tiled, our allocations are in 8 or 32-row blocks,
717 * so failure to align our height means that we won't allocate
718 * enough pages.
719 *
720 * If we're untiled, we still have to align to 2 rows high
721 * because the data port accesses 2x2 blocks even if the
722 * bottom row isn't to be rendered, so failure to align means
723 * we could walk off the end of the GTT and fault. This is
724 * documented on 965, and may be the case on older chipsets
725 * too so we try to be careful.
726 */
727 aligned_y = y;
728 height_alignment = 2;
729
730 if (tiling == I915_TILING_X)
731 height_alignment = 8;
732 else if (tiling == I915_TILING_Y)
733 height_alignment = 32;
734 aligned_y = ALIGN(y, height_alignment);
735
736 stride = x * cpp;
737 stride = drm_bacon_gem_bo_tile_pitch(bufmgr, stride, tiling_mode);
738 size = stride * aligned_y;
739 size = drm_bacon_gem_bo_tile_size(bufmgr, size, tiling_mode);
740 } while (*tiling_mode != tiling);
741 *pitch = stride;
742
743 if (tiling == I915_TILING_NONE)
744 stride = 0;
745
746 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, flags,
747 tiling, stride, 0);
748 }
749
750 /**
751 * Returns a drm_bacon_bo wrapping the given buffer object handle.
752 *
753 * This can be used when one application needs to pass a buffer object
754 * to another.
755 */
756 drm_bacon_bo *
757 drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr *bufmgr,
758 const char *name,
759 unsigned int handle)
760 {
761 drm_bacon_bo_gem *bo_gem;
762 int ret;
763 struct drm_gem_open open_arg;
764 struct drm_i915_gem_get_tiling get_tiling;
765
766 /* At the moment most applications only have a few named bo.
767 * For instance, in a DRI client only the render buffers passed
768 * between X and the client are named. And since X returns the
769 * alternating names for the front/back buffer a linear search
770 * provides a sufficiently fast match.
771 */
772 pthread_mutex_lock(&bufmgr->lock);
773 HASH_FIND(name_hh, bufmgr->name_table,
774 &handle, sizeof(handle), bo_gem);
775 if (bo_gem) {
776 drm_bacon_bo_reference(&bo_gem->bo);
777 goto out;
778 }
779
780 memclear(open_arg);
781 open_arg.name = handle;
782 ret = drmIoctl(bufmgr->fd,
783 DRM_IOCTL_GEM_OPEN,
784 &open_arg);
785 if (ret != 0) {
786 DBG("Couldn't reference %s handle 0x%08x: %s\n",
787 name, handle, strerror(errno));
788 bo_gem = NULL;
789 goto out;
790 }
791 /* Now see if someone has used a prime handle to get this
792 * object from the kernel before by looking through the list
793 * again for a matching gem_handle
794 */
795 HASH_FIND(handle_hh, bufmgr->handle_table,
796 &open_arg.handle, sizeof(open_arg.handle), bo_gem);
797 if (bo_gem) {
798 drm_bacon_bo_reference(&bo_gem->bo);
799 goto out;
800 }
801
802 bo_gem = calloc(1, sizeof(*bo_gem));
803 if (!bo_gem)
804 goto out;
805
806 p_atomic_set(&bo_gem->refcount, 1);
807 list_inithead(&bo_gem->vma_list);
808
809 bo_gem->bo.size = open_arg.size;
810 bo_gem->bo.offset64 = 0;
811 bo_gem->bo.virtual = NULL;
812 bo_gem->bo.bufmgr = bufmgr;
813 bo_gem->name = name;
814 bo_gem->validate_index = -1;
815 bo_gem->gem_handle = open_arg.handle;
816 bo_gem->bo.handle = open_arg.handle;
817 bo_gem->global_name = handle;
818 bo_gem->reusable = false;
819
820 HASH_ADD(handle_hh, bufmgr->handle_table,
821 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
822 HASH_ADD(name_hh, bufmgr->name_table,
823 global_name, sizeof(bo_gem->global_name), bo_gem);
824
825 memclear(get_tiling);
826 get_tiling.handle = bo_gem->gem_handle;
827 ret = drmIoctl(bufmgr->fd,
828 DRM_IOCTL_I915_GEM_GET_TILING,
829 &get_tiling);
830 if (ret != 0)
831 goto err_unref;
832
833 bo_gem->tiling_mode = get_tiling.tiling_mode;
834 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
835 /* XXX stride is unknown */
836 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
837 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
838
839 out:
840 pthread_mutex_unlock(&bufmgr->lock);
841 return &bo_gem->bo;
842
843 err_unref:
844 drm_bacon_gem_bo_free(&bo_gem->bo);
845 pthread_mutex_unlock(&bufmgr->lock);
846 return NULL;
847 }
848
849 static void
850 drm_bacon_gem_bo_free(drm_bacon_bo *bo)
851 {
852 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
853 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
854 struct drm_gem_close close;
855 int ret;
856
857 list_del(&bo_gem->vma_list);
858 if (bo_gem->mem_virtual) {
859 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
860 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
861 bufmgr->vma_count--;
862 }
863 if (bo_gem->wc_virtual) {
864 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
865 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
866 bufmgr->vma_count--;
867 }
868 if (bo_gem->gtt_virtual) {
869 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
870 bufmgr->vma_count--;
871 }
872
873 if (bo_gem->global_name)
874 HASH_DELETE(name_hh, bufmgr->name_table, bo_gem);
875 HASH_DELETE(handle_hh, bufmgr->handle_table, bo_gem);
876
877 /* Close this object */
878 memclear(close);
879 close.handle = bo_gem->gem_handle;
880 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
881 if (ret != 0) {
882 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
883 bo_gem->gem_handle, bo_gem->name, strerror(errno));
884 }
885 free(bo);
886 }
887
888 static void
889 drm_bacon_gem_bo_mark_mmaps_incoherent(drm_bacon_bo *bo)
890 {
891 #if HAVE_VALGRIND
892 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
893
894 if (bo_gem->mem_virtual)
895 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
896
897 if (bo_gem->wc_virtual)
898 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
899
900 if (bo_gem->gtt_virtual)
901 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
902 #endif
903 }
904
905 /** Frees all cached buffers significantly older than @time. */
906 static void
907 drm_bacon_gem_cleanup_bo_cache(drm_bacon_bufmgr *bufmgr, time_t time)
908 {
909 int i;
910
911 if (bufmgr->time == time)
912 return;
913
914 for (i = 0; i < bufmgr->num_buckets; i++) {
915 struct drm_bacon_gem_bo_bucket *bucket =
916 &bufmgr->cache_bucket[i];
917
918 while (!list_empty(&bucket->head)) {
919 drm_bacon_bo_gem *bo_gem;
920
921 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
922 bucket->head.next, head);
923 if (time - bo_gem->free_time <= 1)
924 break;
925
926 list_del(&bo_gem->head);
927
928 drm_bacon_gem_bo_free(&bo_gem->bo);
929 }
930 }
931
932 bufmgr->time = time;
933 }
934
935 static void drm_bacon_gem_bo_purge_vma_cache(drm_bacon_bufmgr *bufmgr)
936 {
937 int limit;
938
939 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
940 bufmgr->vma_count, bufmgr->vma_open, bufmgr->vma_max);
941
942 if (bufmgr->vma_max < 0)
943 return;
944
945 /* We may need to evict a few entries in order to create new mmaps */
946 limit = bufmgr->vma_max - 2*bufmgr->vma_open;
947 if (limit < 0)
948 limit = 0;
949
950 while (bufmgr->vma_count > limit) {
951 drm_bacon_bo_gem *bo_gem;
952
953 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
954 bufmgr->vma_cache.next,
955 vma_list);
956 assert(bo_gem->map_count == 0);
957 list_delinit(&bo_gem->vma_list);
958
959 if (bo_gem->mem_virtual) {
960 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
961 bo_gem->mem_virtual = NULL;
962 bufmgr->vma_count--;
963 }
964 if (bo_gem->wc_virtual) {
965 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
966 bo_gem->wc_virtual = NULL;
967 bufmgr->vma_count--;
968 }
969 if (bo_gem->gtt_virtual) {
970 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
971 bo_gem->gtt_virtual = NULL;
972 bufmgr->vma_count--;
973 }
974 }
975 }
976
977 static void drm_bacon_gem_bo_close_vma(drm_bacon_bufmgr *bufmgr,
978 drm_bacon_bo_gem *bo_gem)
979 {
980 bufmgr->vma_open--;
981 list_addtail(&bo_gem->vma_list, &bufmgr->vma_cache);
982 if (bo_gem->mem_virtual)
983 bufmgr->vma_count++;
984 if (bo_gem->wc_virtual)
985 bufmgr->vma_count++;
986 if (bo_gem->gtt_virtual)
987 bufmgr->vma_count++;
988 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
989 }
990
991 static void drm_bacon_gem_bo_open_vma(drm_bacon_bufmgr *bufmgr,
992 drm_bacon_bo_gem *bo_gem)
993 {
994 bufmgr->vma_open++;
995 list_del(&bo_gem->vma_list);
996 if (bo_gem->mem_virtual)
997 bufmgr->vma_count--;
998 if (bo_gem->wc_virtual)
999 bufmgr->vma_count--;
1000 if (bo_gem->gtt_virtual)
1001 bufmgr->vma_count--;
1002 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
1003 }
1004
1005 static void
1006 drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time)
1007 {
1008 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1009 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1010 struct drm_bacon_gem_bo_bucket *bucket;
1011 int i;
1012
1013 /* Unreference all the target buffers */
1014 for (i = 0; i < bo_gem->reloc_count; i++) {
1015 if (bo_gem->reloc_target_info[i].bo != bo) {
1016 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->
1017 reloc_target_info[i].bo,
1018 time);
1019 }
1020 }
1021 bo_gem->kflags = 0;
1022 bo_gem->reloc_count = 0;
1023 bo_gem->used_as_reloc_target = false;
1024
1025 DBG("bo_unreference final: %d (%s)\n",
1026 bo_gem->gem_handle, bo_gem->name);
1027
1028 /* release memory associated with this object */
1029 if (bo_gem->reloc_target_info) {
1030 free(bo_gem->reloc_target_info);
1031 bo_gem->reloc_target_info = NULL;
1032 }
1033 if (bo_gem->relocs) {
1034 free(bo_gem->relocs);
1035 bo_gem->relocs = NULL;
1036 }
1037
1038 /* Clear any left-over mappings */
1039 if (bo_gem->map_count) {
1040 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1041 bo_gem->map_count = 0;
1042 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1043 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1044 }
1045
1046 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr, bo->size);
1047 /* Put the buffer into our internal cache for reuse if we can. */
1048 if (bufmgr->bo_reuse && bo_gem->reusable && bucket != NULL &&
1049 drm_bacon_gem_bo_madvise_internal(bufmgr, bo_gem,
1050 I915_MADV_DONTNEED)) {
1051 bo_gem->free_time = time;
1052
1053 bo_gem->name = NULL;
1054 bo_gem->validate_index = -1;
1055
1056 list_addtail(&bo_gem->head, &bucket->head);
1057 } else {
1058 drm_bacon_gem_bo_free(bo);
1059 }
1060 }
1061
1062 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
1063 time_t time)
1064 {
1065 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1066
1067 assert(p_atomic_read(&bo_gem->refcount) > 0);
1068 if (p_atomic_dec_zero(&bo_gem->refcount))
1069 drm_bacon_gem_bo_unreference_final(bo, time);
1070 }
1071
1072 void
1073 drm_bacon_bo_unreference(drm_bacon_bo *bo)
1074 {
1075 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1076
1077 if (bo == NULL)
1078 return;
1079
1080 assert(p_atomic_read(&bo_gem->refcount) > 0);
1081
1082 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1083 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1084 struct timespec time;
1085
1086 clock_gettime(CLOCK_MONOTONIC, &time);
1087
1088 pthread_mutex_lock(&bufmgr->lock);
1089
1090 if (p_atomic_dec_zero(&bo_gem->refcount)) {
1091 drm_bacon_gem_bo_unreference_final(bo, time.tv_sec);
1092 drm_bacon_gem_cleanup_bo_cache(bufmgr, time.tv_sec);
1093 }
1094
1095 pthread_mutex_unlock(&bufmgr->lock);
1096 }
1097 }
1098
1099 int
1100 drm_bacon_bo_map(drm_bacon_bo *bo, int write_enable)
1101 {
1102 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1103 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1104 struct drm_i915_gem_set_domain set_domain;
1105 int ret;
1106
1107 pthread_mutex_lock(&bufmgr->lock);
1108
1109 if (bo_gem->map_count++ == 0)
1110 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
1111
1112 if (!bo_gem->mem_virtual) {
1113 struct drm_i915_gem_mmap mmap_arg;
1114
1115 DBG("bo_map: %d (%s), map_count=%d\n",
1116 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1117
1118 memclear(mmap_arg);
1119 mmap_arg.handle = bo_gem->gem_handle;
1120 mmap_arg.size = bo->size;
1121 ret = drmIoctl(bufmgr->fd,
1122 DRM_IOCTL_I915_GEM_MMAP,
1123 &mmap_arg);
1124 if (ret != 0) {
1125 ret = -errno;
1126 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1127 __FILE__, __LINE__, bo_gem->gem_handle,
1128 bo_gem->name, strerror(errno));
1129 if (--bo_gem->map_count == 0)
1130 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1131 pthread_mutex_unlock(&bufmgr->lock);
1132 return ret;
1133 }
1134 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1135 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1136 }
1137 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1138 bo_gem->mem_virtual);
1139 bo->virtual = bo_gem->mem_virtual;
1140
1141 memclear(set_domain);
1142 set_domain.handle = bo_gem->gem_handle;
1143 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1144 if (write_enable)
1145 set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1146 else
1147 set_domain.write_domain = 0;
1148 ret = drmIoctl(bufmgr->fd,
1149 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1150 &set_domain);
1151 if (ret != 0) {
1152 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1153 __FILE__, __LINE__, bo_gem->gem_handle,
1154 strerror(errno));
1155 }
1156
1157 if (write_enable)
1158 bo_gem->mapped_cpu_write = true;
1159
1160 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1161 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1162 pthread_mutex_unlock(&bufmgr->lock);
1163
1164 return 0;
1165 }
1166
1167 static int
1168 map_gtt(drm_bacon_bo *bo)
1169 {
1170 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1171 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1172 int ret;
1173
1174 if (bo_gem->map_count++ == 0)
1175 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
1176
1177 /* Get a mapping of the buffer if we haven't before. */
1178 if (bo_gem->gtt_virtual == NULL) {
1179 struct drm_i915_gem_mmap_gtt mmap_arg;
1180
1181 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1182 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1183
1184 memclear(mmap_arg);
1185 mmap_arg.handle = bo_gem->gem_handle;
1186
1187 /* Get the fake offset back... */
1188 ret = drmIoctl(bufmgr->fd,
1189 DRM_IOCTL_I915_GEM_MMAP_GTT,
1190 &mmap_arg);
1191 if (ret != 0) {
1192 ret = -errno;
1193 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1194 __FILE__, __LINE__,
1195 bo_gem->gem_handle, bo_gem->name,
1196 strerror(errno));
1197 if (--bo_gem->map_count == 0)
1198 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1199 return ret;
1200 }
1201
1202 /* and mmap it */
1203 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1204 MAP_SHARED, bufmgr->fd,
1205 mmap_arg.offset);
1206 if (bo_gem->gtt_virtual == MAP_FAILED) {
1207 bo_gem->gtt_virtual = NULL;
1208 ret = -errno;
1209 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1210 __FILE__, __LINE__,
1211 bo_gem->gem_handle, bo_gem->name,
1212 strerror(errno));
1213 if (--bo_gem->map_count == 0)
1214 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1215 return ret;
1216 }
1217 }
1218
1219 bo->virtual = bo_gem->gtt_virtual;
1220
1221 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1222 bo_gem->gtt_virtual);
1223
1224 return 0;
1225 }
1226
1227 int
1228 drm_bacon_gem_bo_map_gtt(drm_bacon_bo *bo)
1229 {
1230 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1231 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1232 struct drm_i915_gem_set_domain set_domain;
1233 int ret;
1234
1235 pthread_mutex_lock(&bufmgr->lock);
1236
1237 ret = map_gtt(bo);
1238 if (ret) {
1239 pthread_mutex_unlock(&bufmgr->lock);
1240 return ret;
1241 }
1242
1243 /* Now move it to the GTT domain so that the GPU and CPU
1244 * caches are flushed and the GPU isn't actively using the
1245 * buffer.
1246 *
1247 * The pagefault handler does this domain change for us when
1248 * it has unbound the BO from the GTT, but it's up to us to
1249 * tell it when we're about to use things if we had done
1250 * rendering and it still happens to be bound to the GTT.
1251 */
1252 memclear(set_domain);
1253 set_domain.handle = bo_gem->gem_handle;
1254 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1255 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1256 ret = drmIoctl(bufmgr->fd,
1257 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1258 &set_domain);
1259 if (ret != 0) {
1260 DBG("%s:%d: Error setting domain %d: %s\n",
1261 __FILE__, __LINE__, bo_gem->gem_handle,
1262 strerror(errno));
1263 }
1264
1265 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1266 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1267 pthread_mutex_unlock(&bufmgr->lock);
1268
1269 return 0;
1270 }
1271
1272 /**
1273 * Performs a mapping of the buffer object like the normal GTT
1274 * mapping, but avoids waiting for the GPU to be done reading from or
1275 * rendering to the buffer.
1276 *
1277 * This is used in the implementation of GL_ARB_map_buffer_range: The
1278 * user asks to create a buffer, then does a mapping, fills some
1279 * space, runs a drawing command, then asks to map it again without
1280 * synchronizing because it guarantees that it won't write over the
1281 * data that the GPU is busy using (or, more specifically, that if it
1282 * does write over the data, it acknowledges that rendering is
1283 * undefined).
1284 */
1285
1286 int
1287 drm_bacon_gem_bo_map_unsynchronized(drm_bacon_bo *bo)
1288 {
1289 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1290 #ifdef HAVE_VALGRIND
1291 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1292 #endif
1293 int ret;
1294
1295 /* If the CPU cache isn't coherent with the GTT, then use a
1296 * regular synchronized mapping. The problem is that we don't
1297 * track where the buffer was last used on the CPU side in
1298 * terms of drm_bacon_bo_map vs drm_bacon_gem_bo_map_gtt, so
1299 * we would potentially corrupt the buffer even when the user
1300 * does reasonable things.
1301 */
1302 if (!bufmgr->has_llc)
1303 return drm_bacon_gem_bo_map_gtt(bo);
1304
1305 pthread_mutex_lock(&bufmgr->lock);
1306
1307 ret = map_gtt(bo);
1308 if (ret == 0) {
1309 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1310 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1311 }
1312
1313 pthread_mutex_unlock(&bufmgr->lock);
1314
1315 return ret;
1316 }
1317
1318 int
1319 drm_bacon_bo_unmap(drm_bacon_bo *bo)
1320 {
1321 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1322 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1323 int ret = 0;
1324
1325 if (bo == NULL)
1326 return 0;
1327
1328 pthread_mutex_lock(&bufmgr->lock);
1329
1330 if (bo_gem->map_count <= 0) {
1331 DBG("attempted to unmap an unmapped bo\n");
1332 pthread_mutex_unlock(&bufmgr->lock);
1333 /* Preserve the old behaviour of just treating this as a
1334 * no-op rather than reporting the error.
1335 */
1336 return 0;
1337 }
1338
1339 if (bo_gem->mapped_cpu_write) {
1340 struct drm_i915_gem_sw_finish sw_finish;
1341
1342 /* Cause a flush to happen if the buffer's pinned for
1343 * scanout, so the results show up in a timely manner.
1344 * Unlike GTT set domains, this only does work if the
1345 * buffer should be scanout-related.
1346 */
1347 memclear(sw_finish);
1348 sw_finish.handle = bo_gem->gem_handle;
1349 ret = drmIoctl(bufmgr->fd,
1350 DRM_IOCTL_I915_GEM_SW_FINISH,
1351 &sw_finish);
1352 ret = ret == -1 ? -errno : 0;
1353
1354 bo_gem->mapped_cpu_write = false;
1355 }
1356
1357 /* We need to unmap after every innovation as we cannot track
1358 * an open vma for every bo as that will exhaust the system
1359 * limits and cause later failures.
1360 */
1361 if (--bo_gem->map_count == 0) {
1362 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1363 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1364 bo->virtual = NULL;
1365 }
1366 pthread_mutex_unlock(&bufmgr->lock);
1367
1368 return ret;
1369 }
1370
1371 int
1372 drm_bacon_bo_subdata(drm_bacon_bo *bo, unsigned long offset,
1373 unsigned long size, const void *data)
1374 {
1375 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1376 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1377 struct drm_i915_gem_pwrite pwrite;
1378 int ret;
1379
1380 memclear(pwrite);
1381 pwrite.handle = bo_gem->gem_handle;
1382 pwrite.offset = offset;
1383 pwrite.size = size;
1384 pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1385 ret = drmIoctl(bufmgr->fd,
1386 DRM_IOCTL_I915_GEM_PWRITE,
1387 &pwrite);
1388 if (ret != 0) {
1389 ret = -errno;
1390 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1391 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1392 (int)size, strerror(errno));
1393 }
1394
1395 return ret;
1396 }
1397
1398 int
1399 drm_bacon_bo_get_subdata(drm_bacon_bo *bo, unsigned long offset,
1400 unsigned long size, void *data)
1401 {
1402 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1403 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1404 struct drm_i915_gem_pread pread;
1405 int ret;
1406
1407 memclear(pread);
1408 pread.handle = bo_gem->gem_handle;
1409 pread.offset = offset;
1410 pread.size = size;
1411 pread.data_ptr = (uint64_t) (uintptr_t) data;
1412 ret = drmIoctl(bufmgr->fd,
1413 DRM_IOCTL_I915_GEM_PREAD,
1414 &pread);
1415 if (ret != 0) {
1416 ret = -errno;
1417 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1418 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1419 (int)size, strerror(errno));
1420 }
1421
1422 return ret;
1423 }
1424
1425 /** Waits for all GPU rendering with the object to have completed. */
1426 void
1427 drm_bacon_bo_wait_rendering(drm_bacon_bo *bo)
1428 {
1429 drm_bacon_gem_bo_start_gtt_access(bo, 1);
1430 }
1431
1432 /**
1433 * Waits on a BO for the given amount of time.
1434 *
1435 * @bo: buffer object to wait for
1436 * @timeout_ns: amount of time to wait in nanoseconds.
1437 * If value is less than 0, an infinite wait will occur.
1438 *
1439 * Returns 0 if the wait was successful ie. the last batch referencing the
1440 * object has completed within the allotted time. Otherwise some negative return
1441 * value describes the error. Of particular interest is -ETIME when the wait has
1442 * failed to yield the desired result.
1443 *
1444 * Similar to drm_bacon_gem_bo_wait_rendering except a timeout parameter allows
1445 * the operation to give up after a certain amount of time. Another subtle
1446 * difference is the internal locking semantics are different (this variant does
1447 * not hold the lock for the duration of the wait). This makes the wait subject
1448 * to a larger userspace race window.
1449 *
1450 * The implementation shall wait until the object is no longer actively
1451 * referenced within a batch buffer at the time of the call. The wait will
1452 * not guarantee that the buffer is re-issued via another thread, or an flinked
1453 * handle. Userspace must make sure this race does not occur if such precision
1454 * is important.
1455 *
1456 * Note that some kernels have broken the inifite wait for negative values
1457 * promise, upgrade to latest stable kernels if this is the case.
1458 */
1459 int
1460 drm_bacon_gem_bo_wait(drm_bacon_bo *bo, int64_t timeout_ns)
1461 {
1462 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1463 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1464 struct drm_i915_gem_wait wait;
1465 int ret;
1466
1467 memclear(wait);
1468 wait.bo_handle = bo_gem->gem_handle;
1469 wait.timeout_ns = timeout_ns;
1470 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1471 if (ret == -1)
1472 return -errno;
1473
1474 return ret;
1475 }
1476
1477 /**
1478 * Sets the object to the GTT read and possibly write domain, used by the X
1479 * 2D driver in the absence of kernel support to do drm_bacon_gem_bo_map_gtt().
1480 *
1481 * In combination with drm_bacon_gem_bo_pin() and manual fence management, we
1482 * can do tiled pixmaps this way.
1483 */
1484 void
1485 drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo *bo, int write_enable)
1486 {
1487 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1488 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1489 struct drm_i915_gem_set_domain set_domain;
1490 int ret;
1491
1492 memclear(set_domain);
1493 set_domain.handle = bo_gem->gem_handle;
1494 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1495 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1496 ret = drmIoctl(bufmgr->fd,
1497 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1498 &set_domain);
1499 if (ret != 0) {
1500 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1501 __FILE__, __LINE__, bo_gem->gem_handle,
1502 set_domain.read_domains, set_domain.write_domain,
1503 strerror(errno));
1504 }
1505 }
1506
1507 static void
1508 drm_bacon_bufmgr_gem_destroy(drm_bacon_bufmgr *bufmgr)
1509 {
1510 free(bufmgr->exec2_objects);
1511 free(bufmgr->exec_bos);
1512
1513 pthread_mutex_destroy(&bufmgr->lock);
1514
1515 /* Free any cached buffer objects we were going to reuse */
1516 for (int i = 0; i < bufmgr->num_buckets; i++) {
1517 struct drm_bacon_gem_bo_bucket *bucket =
1518 &bufmgr->cache_bucket[i];
1519 drm_bacon_bo_gem *bo_gem;
1520
1521 while (!list_empty(&bucket->head)) {
1522 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1523 bucket->head.next, head);
1524 list_del(&bo_gem->head);
1525
1526 drm_bacon_gem_bo_free(&bo_gem->bo);
1527 }
1528 }
1529
1530 free(bufmgr);
1531 }
1532
1533 /**
1534 * Adds the target buffer to the validation list and adds the relocation
1535 * to the reloc_buffer's relocation list.
1536 *
1537 * The relocation entry at the given offset must already contain the
1538 * precomputed relocation value, because the kernel will optimize out
1539 * the relocation entry write when the buffer hasn't moved from the
1540 * last known offset in target_bo.
1541 */
1542 int
1543 drm_bacon_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1544 drm_bacon_bo *target_bo, uint32_t target_offset,
1545 uint32_t read_domains, uint32_t write_domain)
1546 {
1547 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1548 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1549 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1550
1551 if (bo_gem->has_error)
1552 return -ENOMEM;
1553
1554 if (target_bo_gem->has_error) {
1555 bo_gem->has_error = true;
1556 return -ENOMEM;
1557 }
1558
1559 /* Create a new relocation list if needed */
1560 if (bo_gem->relocs == NULL && drm_bacon_setup_reloc_list(bo))
1561 return -ENOMEM;
1562
1563 /* Check overflow */
1564 assert(bo_gem->reloc_count < bufmgr->max_relocs);
1565
1566 /* Check args */
1567 assert(offset <= bo->size - 4);
1568 assert((write_domain & (write_domain - 1)) == 0);
1569
1570 /* Make sure that we're not adding a reloc to something whose size has
1571 * already been accounted for.
1572 */
1573 assert(!bo_gem->used_as_reloc_target);
1574 if (target_bo_gem != bo_gem) {
1575 target_bo_gem->used_as_reloc_target = true;
1576 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1577 }
1578
1579 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1580 if (target_bo != bo)
1581 drm_bacon_bo_reference(target_bo);
1582
1583 bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1584 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1585 bo_gem->relocs[bo_gem->reloc_count].target_handle =
1586 target_bo_gem->gem_handle;
1587 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1588 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1589 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
1590 bo_gem->reloc_count++;
1591
1592 return 0;
1593 }
1594
1595 int
1596 drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo *bo)
1597 {
1598 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1599
1600 return bo_gem->reloc_count;
1601 }
1602
1603 /**
1604 * Removes existing relocation entries in the BO after "start".
1605 *
1606 * This allows a user to avoid a two-step process for state setup with
1607 * counting up all the buffer objects and doing a
1608 * drm_bacon_bufmgr_check_aperture_space() before emitting any of the
1609 * relocations for the state setup. Instead, save the state of the
1610 * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the
1611 * state, and then check if it still fits in the aperture.
1612 *
1613 * Any further drm_bacon_bufmgr_check_aperture_space() queries
1614 * involving this buffer in the tree are undefined after this call.
1615 */
1616 void
1617 drm_bacon_gem_bo_clear_relocs(drm_bacon_bo *bo, int start)
1618 {
1619 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1620 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1621 int i;
1622 struct timespec time;
1623
1624 clock_gettime(CLOCK_MONOTONIC, &time);
1625
1626 assert(bo_gem->reloc_count >= start);
1627
1628 /* Unreference the cleared target buffers */
1629 pthread_mutex_lock(&bufmgr->lock);
1630
1631 for (i = start; i < bo_gem->reloc_count; i++) {
1632 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->reloc_target_info[i].bo;
1633 if (&target_bo_gem->bo != bo) {
1634 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
1635 time.tv_sec);
1636 }
1637 }
1638 bo_gem->reloc_count = start;
1639
1640 pthread_mutex_unlock(&bufmgr->lock);
1641
1642 }
1643
1644 static void
1645 drm_bacon_gem_bo_process_reloc2(drm_bacon_bo *bo)
1646 {
1647 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
1648 int i;
1649
1650 if (bo_gem->relocs == NULL)
1651 return;
1652
1653 for (i = 0; i < bo_gem->reloc_count; i++) {
1654 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1655
1656 if (target_bo == bo)
1657 continue;
1658
1659 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1660
1661 /* Continue walking the tree depth-first. */
1662 drm_bacon_gem_bo_process_reloc2(target_bo);
1663
1664 /* Add the target to the validate list */
1665 drm_bacon_add_validate_buffer2(target_bo);
1666 }
1667 }
1668
1669 static void
1670 drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr *bufmgr)
1671 {
1672 int i;
1673
1674 for (i = 0; i < bufmgr->exec_count; i++) {
1675 drm_bacon_bo *bo = bufmgr->exec_bos[i];
1676 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
1677
1678 /* Update the buffer offset */
1679 if (bufmgr->exec2_objects[i].offset != bo->offset64) {
1680 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
1681 bo_gem->gem_handle, bo_gem->name,
1682 upper_32_bits(bo->offset64),
1683 lower_32_bits(bo->offset64),
1684 upper_32_bits(bufmgr->exec2_objects[i].offset),
1685 lower_32_bits(bufmgr->exec2_objects[i].offset));
1686 bo->offset64 = bufmgr->exec2_objects[i].offset;
1687 }
1688 }
1689 }
1690
1691 static int
1692 do_exec2(drm_bacon_bo *bo, int used, drm_bacon_context *ctx,
1693 int in_fence, int *out_fence,
1694 unsigned int flags)
1695 {
1696 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1697 struct drm_i915_gem_execbuffer2 execbuf;
1698 int ret = 0;
1699 int i;
1700
1701 if (to_bo_gem(bo)->has_error)
1702 return -ENOMEM;
1703
1704 pthread_mutex_lock(&bufmgr->lock);
1705 /* Update indices and set up the validate list. */
1706 drm_bacon_gem_bo_process_reloc2(bo);
1707
1708 /* Add the batch buffer to the validation list. There are no relocations
1709 * pointing to it.
1710 */
1711 drm_bacon_add_validate_buffer2(bo);
1712
1713 memclear(execbuf);
1714 execbuf.buffers_ptr = (uintptr_t)bufmgr->exec2_objects;
1715 execbuf.buffer_count = bufmgr->exec_count;
1716 execbuf.batch_start_offset = 0;
1717 execbuf.batch_len = used;
1718 execbuf.cliprects_ptr = 0;
1719 execbuf.num_cliprects = 0;
1720 execbuf.DR1 = 0;
1721 execbuf.DR4 = 0;
1722 execbuf.flags = flags;
1723 if (ctx == NULL)
1724 i915_execbuffer2_set_context_id(execbuf, 0);
1725 else
1726 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
1727 execbuf.rsvd2 = 0;
1728 if (in_fence != -1) {
1729 execbuf.rsvd2 = in_fence;
1730 execbuf.flags |= I915_EXEC_FENCE_IN;
1731 }
1732 if (out_fence != NULL) {
1733 *out_fence = -1;
1734 execbuf.flags |= I915_EXEC_FENCE_OUT;
1735 }
1736
1737 if (bufmgr->no_exec)
1738 goto skip_execution;
1739
1740 ret = drmIoctl(bufmgr->fd,
1741 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
1742 &execbuf);
1743 if (ret != 0) {
1744 ret = -errno;
1745 if (ret == -ENOSPC) {
1746 DBG("Execbuffer fails to pin. "
1747 "Estimate: %u. Actual: %u. Available: %u\n",
1748 drm_bacon_gem_estimate_batch_space(bufmgr->exec_bos,
1749 bufmgr->exec_count),
1750 drm_bacon_gem_compute_batch_space(bufmgr->exec_bos,
1751 bufmgr->exec_count),
1752 (unsigned int) bufmgr->gtt_size);
1753 }
1754 }
1755 drm_bacon_update_buffer_offsets2(bufmgr);
1756
1757 if (ret == 0 && out_fence != NULL)
1758 *out_fence = execbuf.rsvd2 >> 32;
1759
1760 skip_execution:
1761 if (INTEL_DEBUG & DEBUG_BUFMGR)
1762 drm_bacon_gem_dump_validation_list(bufmgr);
1763
1764 for (i = 0; i < bufmgr->exec_count; i++) {
1765 drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr->exec_bos[i]);
1766
1767 bo_gem->idle = false;
1768
1769 /* Disconnect the buffer from the validate list */
1770 bo_gem->validate_index = -1;
1771 bufmgr->exec_bos[i] = NULL;
1772 }
1773 bufmgr->exec_count = 0;
1774 pthread_mutex_unlock(&bufmgr->lock);
1775
1776 return ret;
1777 }
1778
1779 int
1780 drm_bacon_bo_exec(drm_bacon_bo *bo, int used)
1781 {
1782 return do_exec2(bo, used, NULL, -1, NULL, I915_EXEC_RENDER);
1783 }
1784
1785 int
1786 drm_bacon_bo_mrb_exec(drm_bacon_bo *bo, int used, unsigned int flags)
1787 {
1788 return do_exec2(bo, used, NULL, -1, NULL, flags);
1789 }
1790
1791 int
1792 drm_bacon_gem_bo_context_exec(drm_bacon_bo *bo, drm_bacon_context *ctx,
1793 int used, unsigned int flags)
1794 {
1795 return do_exec2(bo, used, ctx, -1, NULL, flags);
1796 }
1797
1798 int
1799 drm_bacon_gem_bo_fence_exec(drm_bacon_bo *bo,
1800 drm_bacon_context *ctx,
1801 int used,
1802 int in_fence,
1803 int *out_fence,
1804 unsigned int flags)
1805 {
1806 return do_exec2(bo, used, ctx, in_fence, out_fence, flags);
1807 }
1808
1809 static int
1810 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
1811 uint32_t tiling_mode,
1812 uint32_t stride)
1813 {
1814 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1815 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1816 struct drm_i915_gem_set_tiling set_tiling;
1817 int ret;
1818
1819 if (bo_gem->global_name == 0 &&
1820 tiling_mode == bo_gem->tiling_mode &&
1821 stride == bo_gem->stride)
1822 return 0;
1823
1824 memset(&set_tiling, 0, sizeof(set_tiling));
1825 do {
1826 /* set_tiling is slightly broken and overwrites the
1827 * input on the error path, so we have to open code
1828 * rmIoctl.
1829 */
1830 set_tiling.handle = bo_gem->gem_handle;
1831 set_tiling.tiling_mode = tiling_mode;
1832 set_tiling.stride = stride;
1833
1834 ret = ioctl(bufmgr->fd,
1835 DRM_IOCTL_I915_GEM_SET_TILING,
1836 &set_tiling);
1837 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
1838 if (ret == -1)
1839 return -errno;
1840
1841 bo_gem->tiling_mode = set_tiling.tiling_mode;
1842 bo_gem->swizzle_mode = set_tiling.swizzle_mode;
1843 bo_gem->stride = set_tiling.stride;
1844 return 0;
1845 }
1846
1847 int
1848 drm_bacon_bo_set_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
1849 uint32_t stride)
1850 {
1851 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1852 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1853 int ret;
1854
1855 /* Linear buffers have no stride. By ensuring that we only ever use
1856 * stride 0 with linear buffers, we simplify our code.
1857 */
1858 if (*tiling_mode == I915_TILING_NONE)
1859 stride = 0;
1860
1861 ret = drm_bacon_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
1862 if (ret == 0)
1863 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
1864
1865 *tiling_mode = bo_gem->tiling_mode;
1866 return ret;
1867 }
1868
1869 int
1870 drm_bacon_bo_get_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
1871 uint32_t *swizzle_mode)
1872 {
1873 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1874
1875 *tiling_mode = bo_gem->tiling_mode;
1876 *swizzle_mode = bo_gem->swizzle_mode;
1877 return 0;
1878 }
1879
1880 drm_bacon_bo *
1881 drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, int prime_fd, int size)
1882 {
1883 int ret;
1884 uint32_t handle;
1885 drm_bacon_bo_gem *bo_gem;
1886 struct drm_i915_gem_get_tiling get_tiling;
1887
1888 pthread_mutex_lock(&bufmgr->lock);
1889 ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1890 if (ret) {
1891 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
1892 pthread_mutex_unlock(&bufmgr->lock);
1893 return NULL;
1894 }
1895
1896 /*
1897 * See if the kernel has already returned this buffer to us. Just as
1898 * for named buffers, we must not create two bo's pointing at the same
1899 * kernel object
1900 */
1901 HASH_FIND(handle_hh, bufmgr->handle_table,
1902 &handle, sizeof(handle), bo_gem);
1903 if (bo_gem) {
1904 drm_bacon_bo_reference(&bo_gem->bo);
1905 goto out;
1906 }
1907
1908 bo_gem = calloc(1, sizeof(*bo_gem));
1909 if (!bo_gem)
1910 goto out;
1911
1912 p_atomic_set(&bo_gem->refcount, 1);
1913 list_inithead(&bo_gem->vma_list);
1914
1915 /* Determine size of bo. The fd-to-handle ioctl really should
1916 * return the size, but it doesn't. If we have kernel 3.12 or
1917 * later, we can lseek on the prime fd to get the size. Older
1918 * kernels will just fail, in which case we fall back to the
1919 * provided (estimated or guess size). */
1920 ret = lseek(prime_fd, 0, SEEK_END);
1921 if (ret != -1)
1922 bo_gem->bo.size = ret;
1923 else
1924 bo_gem->bo.size = size;
1925
1926 bo_gem->bo.handle = handle;
1927 bo_gem->bo.bufmgr = bufmgr;
1928
1929 bo_gem->gem_handle = handle;
1930 HASH_ADD(handle_hh, bufmgr->handle_table,
1931 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1932
1933 bo_gem->name = "prime";
1934 bo_gem->validate_index = -1;
1935 bo_gem->used_as_reloc_target = false;
1936 bo_gem->has_error = false;
1937 bo_gem->reusable = false;
1938
1939 memclear(get_tiling);
1940 get_tiling.handle = bo_gem->gem_handle;
1941 if (drmIoctl(bufmgr->fd,
1942 DRM_IOCTL_I915_GEM_GET_TILING,
1943 &get_tiling))
1944 goto err;
1945
1946 bo_gem->tiling_mode = get_tiling.tiling_mode;
1947 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1948 /* XXX stride is unknown */
1949 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
1950
1951 out:
1952 pthread_mutex_unlock(&bufmgr->lock);
1953 return &bo_gem->bo;
1954
1955 err:
1956 drm_bacon_gem_bo_free(&bo_gem->bo);
1957 pthread_mutex_unlock(&bufmgr->lock);
1958 return NULL;
1959 }
1960
1961 int
1962 drm_bacon_bo_gem_export_to_prime(drm_bacon_bo *bo, int *prime_fd)
1963 {
1964 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1965 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1966
1967 if (drmPrimeHandleToFD(bufmgr->fd, bo_gem->gem_handle,
1968 DRM_CLOEXEC, prime_fd) != 0)
1969 return -errno;
1970
1971 bo_gem->reusable = false;
1972
1973 return 0;
1974 }
1975
1976 int
1977 drm_bacon_bo_flink(drm_bacon_bo *bo, uint32_t *name)
1978 {
1979 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1980 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1981
1982 if (!bo_gem->global_name) {
1983 struct drm_gem_flink flink;
1984
1985 memclear(flink);
1986 flink.handle = bo_gem->gem_handle;
1987 if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
1988 return -errno;
1989
1990 pthread_mutex_lock(&bufmgr->lock);
1991 if (!bo_gem->global_name) {
1992 bo_gem->global_name = flink.name;
1993 bo_gem->reusable = false;
1994
1995 HASH_ADD(name_hh, bufmgr->name_table,
1996 global_name, sizeof(bo_gem->global_name),
1997 bo_gem);
1998 }
1999 pthread_mutex_unlock(&bufmgr->lock);
2000 }
2001
2002 *name = bo_gem->global_name;
2003 return 0;
2004 }
2005
2006 /**
2007 * Enables unlimited caching of buffer objects for reuse.
2008 *
2009 * This is potentially very memory expensive, as the cache at each bucket
2010 * size is only bounded by how many buffers of that size we've managed to have
2011 * in flight at once.
2012 */
2013 void
2014 drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr *bufmgr)
2015 {
2016 bufmgr->bo_reuse = true;
2017 }
2018
2019 /**
2020 * Disables implicit synchronisation before executing the bo
2021 *
2022 * This will cause rendering corruption unless you correctly manage explicit
2023 * fences for all rendering involving this buffer - including use by others.
2024 * Disabling the implicit serialisation is only required if that serialisation
2025 * is too coarse (for example, you have split the buffer into many
2026 * non-overlapping regions and are sharing the whole buffer between concurrent
2027 * independent command streams).
2028 *
2029 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2030 * which can be checked using drm_bacon_bufmgr_can_disable_implicit_sync,
2031 * or subsequent execbufs involving the bo will generate EINVAL.
2032 */
2033 void
2034 drm_bacon_gem_bo_disable_implicit_sync(drm_bacon_bo *bo)
2035 {
2036 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2037
2038 bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2039 }
2040
2041 /**
2042 * Enables implicit synchronisation before executing the bo
2043 *
2044 * This is the default behaviour of the kernel, to wait upon prior writes
2045 * completing on the object before rendering with it, or to wait for prior
2046 * reads to complete before writing into the object.
2047 * drm_bacon_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2048 * the kernel never to insert a stall before using the object. Then this
2049 * function can be used to restore the implicit sync before subsequent
2050 * rendering.
2051 */
2052 void
2053 drm_bacon_gem_bo_enable_implicit_sync(drm_bacon_bo *bo)
2054 {
2055 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2056
2057 bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2058 }
2059
2060 /**
2061 * Query whether the kernel supports disabling of its implicit synchronisation
2062 * before execbuf. See drm_bacon_gem_bo_disable_implicit_sync()
2063 */
2064 int
2065 drm_bacon_bufmgr_gem_can_disable_implicit_sync(drm_bacon_bufmgr *bufmgr)
2066 {
2067 return bufmgr->has_exec_async;
2068 }
2069
2070 /**
2071 * Return the additional aperture space required by the tree of buffer objects
2072 * rooted at bo.
2073 */
2074 static int
2075 drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo *bo)
2076 {
2077 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2078 int i;
2079 int total = 0;
2080
2081 if (bo == NULL || bo_gem->included_in_check_aperture)
2082 return 0;
2083
2084 total += bo->size;
2085 bo_gem->included_in_check_aperture = true;
2086
2087 for (i = 0; i < bo_gem->reloc_count; i++)
2088 total +=
2089 drm_bacon_gem_bo_get_aperture_space(bo_gem->
2090 reloc_target_info[i].bo);
2091
2092 return total;
2093 }
2094
2095 /**
2096 * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready
2097 * for the next drm_bacon_bufmgr_check_aperture_space() call.
2098 */
2099 static void
2100 drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo *bo)
2101 {
2102 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2103 int i;
2104
2105 if (bo == NULL || !bo_gem->included_in_check_aperture)
2106 return;
2107
2108 bo_gem->included_in_check_aperture = false;
2109
2110 for (i = 0; i < bo_gem->reloc_count; i++)
2111 drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem->
2112 reloc_target_info[i].bo);
2113 }
2114
2115 /**
2116 * Return a conservative estimate for the amount of aperture required
2117 * for a collection of buffers. This may double-count some buffers.
2118 */
2119 static unsigned int
2120 drm_bacon_gem_estimate_batch_space(drm_bacon_bo **bo_array, int count)
2121 {
2122 int i;
2123 unsigned int total = 0;
2124
2125 for (i = 0; i < count; i++) {
2126 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo_array[i];
2127 if (bo_gem != NULL)
2128 total += bo_gem->reloc_tree_size;
2129 }
2130 return total;
2131 }
2132
2133 /**
2134 * Return the amount of aperture needed for a collection of buffers.
2135 * This avoids double counting any buffers, at the cost of looking
2136 * at every buffer in the set.
2137 */
2138 static unsigned int
2139 drm_bacon_gem_compute_batch_space(drm_bacon_bo **bo_array, int count)
2140 {
2141 int i;
2142 unsigned int total = 0;
2143
2144 for (i = 0; i < count; i++) {
2145 total += drm_bacon_gem_bo_get_aperture_space(bo_array[i]);
2146 /* For the first buffer object in the array, we get an
2147 * accurate count back for its reloc_tree size (since nothing
2148 * had been flagged as being counted yet). We can save that
2149 * value out as a more conservative reloc_tree_size that
2150 * avoids double-counting target buffers. Since the first
2151 * buffer happens to usually be the batch buffer in our
2152 * callers, this can pull us back from doing the tree
2153 * walk on every new batch emit.
2154 */
2155 if (i == 0) {
2156 drm_bacon_bo_gem *bo_gem =
2157 (drm_bacon_bo_gem *) bo_array[i];
2158 bo_gem->reloc_tree_size = total;
2159 }
2160 }
2161
2162 for (i = 0; i < count; i++)
2163 drm_bacon_gem_bo_clear_aperture_space_flag(bo_array[i]);
2164 return total;
2165 }
2166
2167 /**
2168 * Return -1 if the batchbuffer should be flushed before attempting to
2169 * emit rendering referencing the buffers pointed to by bo_array.
2170 *
2171 * This is required because if we try to emit a batchbuffer with relocations
2172 * to a tree of buffers that won't simultaneously fit in the aperture,
2173 * the rendering will return an error at a point where the software is not
2174 * prepared to recover from it.
2175 *
2176 * However, we also want to emit the batchbuffer significantly before we reach
2177 * the limit, as a series of batchbuffers each of which references buffers
2178 * covering almost all of the aperture means that at each emit we end up
2179 * waiting to evict a buffer from the last rendering, and we get synchronous
2180 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to
2181 * get better parallelism.
2182 */
2183 int
2184 drm_bacon_bufmgr_check_aperture_space(drm_bacon_bo **bo_array, int count)
2185 {
2186 drm_bacon_bufmgr *bufmgr = bo_array[0]->bufmgr;
2187 unsigned int total = 0;
2188 unsigned int threshold = bufmgr->gtt_size * 3 / 4;
2189
2190 total = drm_bacon_gem_estimate_batch_space(bo_array, count);
2191
2192 if (total > threshold)
2193 total = drm_bacon_gem_compute_batch_space(bo_array, count);
2194
2195 if (total > threshold) {
2196 DBG("check_space: overflowed available aperture, "
2197 "%dkb vs %dkb\n",
2198 total / 1024, (int)bufmgr->gtt_size / 1024);
2199 return -ENOSPC;
2200 } else {
2201 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2202 (int)bufmgr->gtt_size / 1024);
2203 return 0;
2204 }
2205 }
2206
2207 /*
2208 * Disable buffer reuse for objects which are shared with the kernel
2209 * as scanout buffers
2210 */
2211 int
2212 drm_bacon_bo_disable_reuse(drm_bacon_bo *bo)
2213 {
2214 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2215
2216 bo_gem->reusable = false;
2217 return 0;
2218 }
2219
2220 int
2221 drm_bacon_bo_is_reusable(drm_bacon_bo *bo)
2222 {
2223 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2224
2225 return bo_gem->reusable;
2226 }
2227
2228 static int
2229 _drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2230 {
2231 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2232 int i;
2233
2234 for (i = 0; i < bo_gem->reloc_count; i++) {
2235 if (bo_gem->reloc_target_info[i].bo == target_bo)
2236 return 1;
2237 if (bo == bo_gem->reloc_target_info[i].bo)
2238 continue;
2239 if (_drm_bacon_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2240 target_bo))
2241 return 1;
2242 }
2243
2244 return 0;
2245 }
2246
2247 /** Return true if target_bo is referenced by bo's relocation tree. */
2248 int
2249 drm_bacon_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2250 {
2251 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
2252
2253 if (bo == NULL || target_bo == NULL)
2254 return 0;
2255 if (target_bo_gem->used_as_reloc_target)
2256 return _drm_bacon_gem_bo_references(bo, target_bo);
2257 return 0;
2258 }
2259
2260 static void
2261 add_bucket(drm_bacon_bufmgr *bufmgr, int size)
2262 {
2263 unsigned int i = bufmgr->num_buckets;
2264
2265 assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
2266
2267 list_inithead(&bufmgr->cache_bucket[i].head);
2268 bufmgr->cache_bucket[i].size = size;
2269 bufmgr->num_buckets++;
2270 }
2271
2272 static void
2273 init_cache_buckets(drm_bacon_bufmgr *bufmgr)
2274 {
2275 unsigned long size, cache_max_size = 64 * 1024 * 1024;
2276
2277 /* OK, so power of two buckets was too wasteful of memory.
2278 * Give 3 other sizes between each power of two, to hopefully
2279 * cover things accurately enough. (The alternative is
2280 * probably to just go for exact matching of sizes, and assume
2281 * that for things like composited window resize the tiled
2282 * width/height alignment and rounding of sizes to pages will
2283 * get us useful cache hit rates anyway)
2284 */
2285 add_bucket(bufmgr, 4096);
2286 add_bucket(bufmgr, 4096 * 2);
2287 add_bucket(bufmgr, 4096 * 3);
2288
2289 /* Initialize the linked lists for BO reuse cache. */
2290 for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2291 add_bucket(bufmgr, size);
2292
2293 add_bucket(bufmgr, size + size * 1 / 4);
2294 add_bucket(bufmgr, size + size * 2 / 4);
2295 add_bucket(bufmgr, size + size * 3 / 4);
2296 }
2297 }
2298
2299 void
2300 drm_bacon_bufmgr_gem_set_vma_cache_size(drm_bacon_bufmgr *bufmgr, int limit)
2301 {
2302 bufmgr->vma_max = limit;
2303
2304 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
2305 }
2306
2307 drm_bacon_context *
2308 drm_bacon_gem_context_create(drm_bacon_bufmgr *bufmgr)
2309 {
2310 struct drm_i915_gem_context_create create;
2311 drm_bacon_context *context = NULL;
2312 int ret;
2313
2314 context = calloc(1, sizeof(*context));
2315 if (!context)
2316 return NULL;
2317
2318 memclear(create);
2319 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
2320 if (ret != 0) {
2321 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
2322 strerror(errno));
2323 free(context);
2324 return NULL;
2325 }
2326
2327 context->ctx_id = create.ctx_id;
2328 context->bufmgr = bufmgr;
2329
2330 return context;
2331 }
2332
2333 int
2334 drm_bacon_gem_context_get_id(drm_bacon_context *ctx, uint32_t *ctx_id)
2335 {
2336 if (ctx == NULL)
2337 return -EINVAL;
2338
2339 *ctx_id = ctx->ctx_id;
2340
2341 return 0;
2342 }
2343
2344 void
2345 drm_bacon_gem_context_destroy(drm_bacon_context *ctx)
2346 {
2347 struct drm_i915_gem_context_destroy destroy;
2348 int ret;
2349
2350 if (ctx == NULL)
2351 return;
2352
2353 memclear(destroy);
2354
2355 destroy.ctx_id = ctx->ctx_id;
2356 ret = drmIoctl(ctx->bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
2357 &destroy);
2358 if (ret != 0)
2359 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
2360 strerror(errno));
2361
2362 free(ctx);
2363 }
2364
2365 int
2366 drm_bacon_get_reset_stats(drm_bacon_context *ctx,
2367 uint32_t *reset_count,
2368 uint32_t *active,
2369 uint32_t *pending)
2370 {
2371 struct drm_i915_reset_stats stats;
2372 int ret;
2373
2374 if (ctx == NULL)
2375 return -EINVAL;
2376
2377 memclear(stats);
2378
2379 stats.ctx_id = ctx->ctx_id;
2380 ret = drmIoctl(ctx->bufmgr->fd,
2381 DRM_IOCTL_I915_GET_RESET_STATS,
2382 &stats);
2383 if (ret == 0) {
2384 if (reset_count != NULL)
2385 *reset_count = stats.reset_count;
2386
2387 if (active != NULL)
2388 *active = stats.batch_active;
2389
2390 if (pending != NULL)
2391 *pending = stats.batch_pending;
2392 }
2393
2394 return ret;
2395 }
2396
2397 int
2398 drm_bacon_reg_read(drm_bacon_bufmgr *bufmgr,
2399 uint32_t offset,
2400 uint64_t *result)
2401 {
2402 struct drm_i915_reg_read reg_read;
2403 int ret;
2404
2405 memclear(reg_read);
2406 reg_read.offset = offset;
2407
2408 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
2409
2410 *result = reg_read.val;
2411 return ret;
2412 }
2413
2414 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
2415 static struct list_head bufmgr_list = { &bufmgr_list, &bufmgr_list };
2416
2417 static drm_bacon_bufmgr *
2418 drm_bacon_bufmgr_gem_find(int fd)
2419 {
2420 list_for_each_entry(drm_bacon_bufmgr,
2421 bufmgr, &bufmgr_list, managers) {
2422 if (bufmgr->fd == fd) {
2423 p_atomic_inc(&bufmgr->refcount);
2424 return bufmgr;
2425 }
2426 }
2427
2428 return NULL;
2429 }
2430
2431 void
2432 drm_bacon_bufmgr_destroy(drm_bacon_bufmgr *bufmgr)
2433 {
2434 if (atomic_add_unless(&bufmgr->refcount, -1, 1)) {
2435 pthread_mutex_lock(&bufmgr_list_mutex);
2436
2437 if (p_atomic_dec_zero(&bufmgr->refcount)) {
2438 list_del(&bufmgr->managers);
2439 drm_bacon_bufmgr_gem_destroy(bufmgr);
2440 }
2441
2442 pthread_mutex_unlock(&bufmgr_list_mutex);
2443 }
2444 }
2445
2446 void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo *bo)
2447 {
2448 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2449 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2450
2451 if (bo_gem->gtt_virtual)
2452 return bo_gem->gtt_virtual;
2453
2454 pthread_mutex_lock(&bufmgr->lock);
2455 if (bo_gem->gtt_virtual == NULL) {
2456 struct drm_i915_gem_mmap_gtt mmap_arg;
2457 void *ptr;
2458
2459 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
2460 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2461
2462 if (bo_gem->map_count++ == 0)
2463 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2464
2465 memclear(mmap_arg);
2466 mmap_arg.handle = bo_gem->gem_handle;
2467
2468 /* Get the fake offset back... */
2469 ptr = MAP_FAILED;
2470 if (drmIoctl(bufmgr->fd,
2471 DRM_IOCTL_I915_GEM_MMAP_GTT,
2472 &mmap_arg) == 0) {
2473 /* and mmap it */
2474 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
2475 MAP_SHARED, bufmgr->fd,
2476 mmap_arg.offset);
2477 }
2478 if (ptr == MAP_FAILED) {
2479 if (--bo_gem->map_count == 0)
2480 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2481 ptr = NULL;
2482 }
2483
2484 bo_gem->gtt_virtual = ptr;
2485 }
2486 pthread_mutex_unlock(&bufmgr->lock);
2487
2488 return bo_gem->gtt_virtual;
2489 }
2490
2491 void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo *bo)
2492 {
2493 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2494 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2495
2496 if (bo_gem->mem_virtual)
2497 return bo_gem->mem_virtual;
2498
2499 pthread_mutex_lock(&bufmgr->lock);
2500 if (!bo_gem->mem_virtual) {
2501 struct drm_i915_gem_mmap mmap_arg;
2502
2503 if (bo_gem->map_count++ == 0)
2504 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2505
2506 DBG("bo_map: %d (%s), map_count=%d\n",
2507 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2508
2509 memclear(mmap_arg);
2510 mmap_arg.handle = bo_gem->gem_handle;
2511 mmap_arg.size = bo->size;
2512 if (drmIoctl(bufmgr->fd,
2513 DRM_IOCTL_I915_GEM_MMAP,
2514 &mmap_arg)) {
2515 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2516 __FILE__, __LINE__, bo_gem->gem_handle,
2517 bo_gem->name, strerror(errno));
2518 if (--bo_gem->map_count == 0)
2519 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2520 } else {
2521 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
2522 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
2523 }
2524 }
2525 pthread_mutex_unlock(&bufmgr->lock);
2526
2527 return bo_gem->mem_virtual;
2528 }
2529
2530 void *drm_bacon_gem_bo_map__wc(drm_bacon_bo *bo)
2531 {
2532 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2533 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2534
2535 if (bo_gem->wc_virtual)
2536 return bo_gem->wc_virtual;
2537
2538 pthread_mutex_lock(&bufmgr->lock);
2539 if (!bo_gem->wc_virtual) {
2540 struct drm_i915_gem_mmap mmap_arg;
2541
2542 if (bo_gem->map_count++ == 0)
2543 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2544
2545 DBG("bo_map: %d (%s), map_count=%d\n",
2546 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2547
2548 memclear(mmap_arg);
2549 mmap_arg.handle = bo_gem->gem_handle;
2550 mmap_arg.size = bo->size;
2551 mmap_arg.flags = I915_MMAP_WC;
2552 if (drmIoctl(bufmgr->fd,
2553 DRM_IOCTL_I915_GEM_MMAP,
2554 &mmap_arg)) {
2555 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2556 __FILE__, __LINE__, bo_gem->gem_handle,
2557 bo_gem->name, strerror(errno));
2558 if (--bo_gem->map_count == 0)
2559 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2560 } else {
2561 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
2562 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
2563 }
2564 }
2565 pthread_mutex_unlock(&bufmgr->lock);
2566
2567 return bo_gem->wc_virtual;
2568 }
2569
2570 /**
2571 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2572 * and manage map buffer objections.
2573 *
2574 * \param fd File descriptor of the opened DRM device.
2575 */
2576 drm_bacon_bufmgr *
2577 drm_bacon_bufmgr_gem_init(struct gen_device_info *devinfo,
2578 int fd, int batch_size)
2579 {
2580 drm_bacon_bufmgr *bufmgr;
2581 struct drm_i915_gem_get_aperture aperture;
2582 drm_i915_getparam_t gp;
2583 int ret, tmp;
2584
2585 pthread_mutex_lock(&bufmgr_list_mutex);
2586
2587 bufmgr = drm_bacon_bufmgr_gem_find(fd);
2588 if (bufmgr)
2589 goto exit;
2590
2591 bufmgr = calloc(1, sizeof(*bufmgr));
2592 if (bufmgr == NULL)
2593 goto exit;
2594
2595 bufmgr->fd = fd;
2596 p_atomic_set(&bufmgr->refcount, 1);
2597
2598 if (pthread_mutex_init(&bufmgr->lock, NULL) != 0) {
2599 free(bufmgr);
2600 bufmgr = NULL;
2601 goto exit;
2602 }
2603
2604 memclear(aperture);
2605 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
2606 bufmgr->gtt_size = aperture.aper_available_size;
2607
2608 memclear(gp);
2609 gp.value = &tmp;
2610
2611 gp.param = I915_PARAM_HAS_EXEC_ASYNC;
2612 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2613 bufmgr->has_exec_async = ret == 0;
2614
2615 bufmgr->has_llc = devinfo->has_llc;
2616
2617 /* Let's go with one relocation per every 2 dwords (but round down a bit
2618 * since a power of two will mean an extra page allocation for the reloc
2619 * buffer).
2620 *
2621 * Every 4 was too few for the blender benchmark.
2622 */
2623 bufmgr->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
2624
2625 init_cache_buckets(bufmgr);
2626
2627 list_inithead(&bufmgr->vma_cache);
2628 bufmgr->vma_max = -1; /* unlimited by default */
2629
2630 list_add(&bufmgr->managers, &bufmgr_list);
2631
2632 exit:
2633 pthread_mutex_unlock(&bufmgr_list_mutex);
2634
2635 return bufmgr;
2636 }