dc8129e0b25e039d61889132e83ab385151c2375
[mesa.git] / src / mesa / drivers / dri / i965 / intel_bufmgr_gem.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <xf86drm.h>
42 #include <util/u_atomic.h>
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <sys/ioctl.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 #include <stdbool.h>
54
55 #include "errno.h"
56 #ifndef ETIME
57 #define ETIME ETIMEDOUT
58 #endif
59 #include "common/gen_debug.h"
60 #include "common/gen_device_info.h"
61 #include "libdrm_macros.h"
62 #include "main/macros.h"
63 #include "util/macros.h"
64 #include "util/hash_table.h"
65 #include "util/list.h"
66 #include "brw_bufmgr.h"
67 #include "string.h"
68
69 #include "i915_drm.h"
70
71 #ifdef HAVE_VALGRIND
72 #include <valgrind.h>
73 #include <memcheck.h>
74 #define VG(x) x
75 #else
76 #define VG(x)
77 #endif
78
79 #define memclear(s) memset(&s, 0, sizeof(s))
80
81 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
82
83 static inline int
84 atomic_add_unless(int *v, int add, int unless)
85 {
86 int c, old;
87 c = p_atomic_read(v);
88 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
89 c = old;
90 return c == unless;
91 }
92
93 /**
94 * upper_32_bits - return bits 32-63 of a number
95 * @n: the number we're accessing
96 *
97 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
98 * the "right shift count >= width of type" warning when that quantity is
99 * 32-bits.
100 */
101 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
102
103 /**
104 * lower_32_bits - return bits 0-31 of a number
105 * @n: the number we're accessing
106 */
107 #define lower_32_bits(n) ((__u32)(n))
108
109 struct _drm_bacon_context {
110 unsigned int ctx_id;
111 struct _drm_bacon_bufmgr *bufmgr;
112 };
113
114 typedef struct _drm_bacon_bo_gem drm_bacon_bo_gem;
115
116 struct drm_bacon_gem_bo_bucket {
117 struct list_head head;
118 unsigned long size;
119 };
120
121 typedef struct _drm_bacon_bufmgr {
122 int fd;
123
124 int max_relocs;
125
126 pthread_mutex_t lock;
127
128 struct drm_i915_gem_exec_object2 *exec2_objects;
129 drm_bacon_bo **exec_bos;
130 int exec_size;
131 int exec_count;
132
133 /** Array of lists of cached gem objects of power-of-two sizes */
134 struct drm_bacon_gem_bo_bucket cache_bucket[14 * 4];
135 int num_buckets;
136 time_t time;
137
138 struct hash_table *name_table;
139 struct hash_table *handle_table;
140
141 struct list_head vma_cache;
142 int vma_count, vma_open, vma_max;
143
144 uint64_t gtt_size;
145 unsigned int has_llc : 1;
146 unsigned int bo_reuse : 1;
147 unsigned int no_exec : 1;
148 } drm_bacon_bufmgr;
149
150 struct _drm_bacon_bo_gem {
151 drm_bacon_bo bo;
152
153 int refcount;
154 uint32_t gem_handle;
155 const char *name;
156
157 /**
158 * Kenel-assigned global name for this object
159 *
160 * List contains both flink named and prime fd'd objects
161 */
162 unsigned int global_name;
163
164 /**
165 * Index of the buffer within the validation list while preparing a
166 * batchbuffer execution.
167 */
168 int validate_index;
169
170 /**
171 * Current tiling mode
172 */
173 uint32_t tiling_mode;
174 uint32_t swizzle_mode;
175 unsigned long stride;
176
177 time_t free_time;
178
179 /** Array passed to the DRM containing relocation information. */
180 struct drm_i915_gem_relocation_entry *relocs;
181 /**
182 * Array of info structs corresponding to relocs[i].target_handle etc
183 */
184 drm_bacon_bo **reloc_bos;
185 /** Number of entries in relocs */
186 int reloc_count;
187 /** Mapped address for the buffer, saved across map/unmap cycles */
188 void *mem_virtual;
189 /** GTT virtual address for the buffer, saved across map/unmap cycles */
190 void *gtt_virtual;
191 /** WC CPU address for the buffer, saved across map/unmap cycles */
192 void *wc_virtual;
193 int map_count;
194 struct list_head vma_list;
195
196 /** BO cache list */
197 struct list_head head;
198
199 /**
200 * Boolean of whether this BO and its children have been included in
201 * the current drm_bacon_bufmgr_check_aperture_space() total.
202 */
203 bool included_in_check_aperture;
204
205 /**
206 * Boolean of whether this buffer has been used as a relocation
207 * target and had its size accounted for, and thus can't have any
208 * further relocations added to it.
209 */
210 bool used_as_reloc_target;
211
212 /**
213 * Boolean of whether we have encountered an error whilst building the relocation tree.
214 */
215 bool has_error;
216
217 /**
218 * Boolean of whether this buffer can be re-used
219 */
220 bool reusable;
221
222 /**
223 * Boolean of whether the GPU is definitely not accessing the buffer.
224 *
225 * This is only valid when reusable, since non-reusable
226 * buffers are those that have been shared with other
227 * processes, so we don't know their state.
228 */
229 bool idle;
230
231 /**
232 * Size in bytes of this buffer and its relocation descendents.
233 *
234 * Used to avoid costly tree walking in
235 * drm_bacon_bufmgr_check_aperture in the common case.
236 */
237 int reloc_tree_size;
238
239 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */
240 bool mapped_cpu_write;
241 };
242
243 static unsigned int
244 drm_bacon_gem_estimate_batch_space(drm_bacon_bo ** bo_array, int count);
245
246 static unsigned int
247 drm_bacon_gem_compute_batch_space(drm_bacon_bo ** bo_array, int count);
248
249 static int
250 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
251 uint32_t tiling_mode,
252 uint32_t stride);
253
254 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
255 time_t time);
256
257 static void drm_bacon_gem_bo_free(drm_bacon_bo *bo);
258
259 static inline drm_bacon_bo_gem *to_bo_gem(drm_bacon_bo *bo)
260 {
261 return (drm_bacon_bo_gem *)bo;
262 }
263
264 static uint32_t
265 key_hash_uint(const void *key)
266 {
267 return _mesa_hash_data(key, 4);
268 }
269
270 static bool
271 key_uint_equal(const void *a, const void *b)
272 {
273 return *((unsigned *) a) == *((unsigned *) b);
274 }
275
276 static drm_bacon_bo_gem *
277 hash_find_bo(struct hash_table *ht, unsigned int key)
278 {
279 struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
280 return entry ? (drm_bacon_bo_gem *) entry->data : NULL;
281 }
282
283 static unsigned long
284 drm_bacon_gem_bo_tile_size(drm_bacon_bufmgr *bufmgr, unsigned long size,
285 uint32_t *tiling_mode)
286 {
287 if (*tiling_mode == I915_TILING_NONE)
288 return size;
289
290 /* 965+ just need multiples of page size for tiling */
291 return ALIGN(size, 4096);
292 }
293
294 /*
295 * Round a given pitch up to the minimum required for X tiling on a
296 * given chip. We use 512 as the minimum to allow for a later tiling
297 * change.
298 */
299 static unsigned long
300 drm_bacon_gem_bo_tile_pitch(drm_bacon_bufmgr *bufmgr,
301 unsigned long pitch, uint32_t *tiling_mode)
302 {
303 unsigned long tile_width;
304
305 /* If untiled, then just align it so that we can do rendering
306 * to it with the 3D engine.
307 */
308 if (*tiling_mode == I915_TILING_NONE)
309 return ALIGN(pitch, 64);
310
311 if (*tiling_mode == I915_TILING_X)
312 tile_width = 512;
313 else
314 tile_width = 128;
315
316 /* 965 is flexible */
317 return ALIGN(pitch, tile_width);
318 }
319
320 static struct drm_bacon_gem_bo_bucket *
321 drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr *bufmgr,
322 unsigned long size)
323 {
324 int i;
325
326 for (i = 0; i < bufmgr->num_buckets; i++) {
327 struct drm_bacon_gem_bo_bucket *bucket =
328 &bufmgr->cache_bucket[i];
329 if (bucket->size >= size) {
330 return bucket;
331 }
332 }
333
334 return NULL;
335 }
336
337 static void
338 drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr *bufmgr)
339 {
340 int i, j;
341
342 for (i = 0; i < bufmgr->exec_count; i++) {
343 drm_bacon_bo *bo = bufmgr->exec_bos[i];
344 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
345
346 if (bo_gem->relocs == NULL) {
347 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
348 bo_gem->name);
349 continue;
350 }
351
352 for (j = 0; j < bo_gem->reloc_count; j++) {
353 drm_bacon_bo *target_bo = bo_gem->reloc_bos[j];
354 drm_bacon_bo_gem *target_gem =
355 (drm_bacon_bo_gem *) target_bo;
356
357 DBG("%2d: %d (%s)@0x%08x %08x -> "
358 "%d (%s)@0x%08x %08x + 0x%08x\n",
359 i,
360 bo_gem->gem_handle,
361 bo_gem->name,
362 upper_32_bits(bo_gem->relocs[j].offset),
363 lower_32_bits(bo_gem->relocs[j].offset),
364 target_gem->gem_handle,
365 target_gem->name,
366 upper_32_bits(target_bo->offset64),
367 lower_32_bits(target_bo->offset64),
368 bo_gem->relocs[j].delta);
369 }
370 }
371 }
372
373 inline void
374 drm_bacon_bo_reference(drm_bacon_bo *bo)
375 {
376 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
377
378 p_atomic_inc(&bo_gem->refcount);
379 }
380
381 static void
382 drm_bacon_add_validate_buffer2(drm_bacon_bo *bo)
383 {
384 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
385 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
386 int index;
387
388 if (bo_gem->validate_index != -1)
389 return;
390
391 /* Extend the array of validation entries as necessary. */
392 if (bufmgr->exec_count == bufmgr->exec_size) {
393 int new_size = bufmgr->exec_size * 2;
394
395 if (new_size == 0)
396 new_size = 5;
397
398 bufmgr->exec2_objects =
399 realloc(bufmgr->exec2_objects,
400 sizeof(*bufmgr->exec2_objects) * new_size);
401 bufmgr->exec_bos =
402 realloc(bufmgr->exec_bos,
403 sizeof(*bufmgr->exec_bos) * new_size);
404 bufmgr->exec_size = new_size;
405 }
406
407 index = bufmgr->exec_count;
408 bo_gem->validate_index = index;
409 /* Fill in array entry */
410 bufmgr->exec2_objects[index].handle = bo_gem->gem_handle;
411 bufmgr->exec2_objects[index].relocation_count = bo_gem->reloc_count;
412 bufmgr->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
413 bufmgr->exec2_objects[index].alignment = bo->align;
414 bufmgr->exec2_objects[index].offset = bo->offset64;
415 bufmgr->exec2_objects[index].flags = 0;
416 bufmgr->exec2_objects[index].rsvd1 = 0;
417 bufmgr->exec2_objects[index].rsvd2 = 0;
418 bufmgr->exec_bos[index] = bo;
419 bufmgr->exec_count++;
420 }
421
422 static void
423 drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr *bufmgr,
424 drm_bacon_bo_gem *bo_gem,
425 unsigned int alignment)
426 {
427 unsigned int size;
428
429 assert(!bo_gem->used_as_reloc_target);
430
431 /* The older chipsets are far-less flexible in terms of tiling,
432 * and require tiled buffer to be size aligned in the aperture.
433 * This means that in the worst possible case we will need a hole
434 * twice as large as the object in order for it to fit into the
435 * aperture. Optimal packing is for wimps.
436 */
437 size = bo_gem->bo.size;
438
439 bo_gem->reloc_tree_size = size + alignment;
440 }
441
442 static int
443 drm_bacon_setup_reloc_list(drm_bacon_bo *bo)
444 {
445 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
446 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
447 unsigned int max_relocs = bufmgr->max_relocs;
448
449 if (bo->size / 4 < max_relocs)
450 max_relocs = bo->size / 4;
451
452 bo_gem->relocs = malloc(max_relocs *
453 sizeof(struct drm_i915_gem_relocation_entry));
454 bo_gem->reloc_bos = malloc(max_relocs * sizeof(drm_bacon_bo *));
455 if (bo_gem->relocs == NULL || bo_gem->reloc_bos == NULL) {
456 bo_gem->has_error = true;
457
458 free (bo_gem->relocs);
459 bo_gem->relocs = NULL;
460
461 free (bo_gem->reloc_bos);
462 bo_gem->reloc_bos = NULL;
463
464 return 1;
465 }
466
467 return 0;
468 }
469
470 int
471 drm_bacon_bo_busy(drm_bacon_bo *bo)
472 {
473 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
474 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
475 struct drm_i915_gem_busy busy;
476 int ret;
477
478 if (bo_gem->reusable && bo_gem->idle)
479 return false;
480
481 memclear(busy);
482 busy.handle = bo_gem->gem_handle;
483
484 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
485 if (ret == 0) {
486 bo_gem->idle = !busy.busy;
487 return busy.busy;
488 } else {
489 return false;
490 }
491 return (ret == 0 && busy.busy);
492 }
493
494 static int
495 drm_bacon_gem_bo_madvise_internal(drm_bacon_bufmgr *bufmgr,
496 drm_bacon_bo_gem *bo_gem, int state)
497 {
498 struct drm_i915_gem_madvise madv;
499
500 memclear(madv);
501 madv.handle = bo_gem->gem_handle;
502 madv.madv = state;
503 madv.retained = 1;
504 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
505
506 return madv.retained;
507 }
508
509 int
510 drm_bacon_bo_madvise(drm_bacon_bo *bo, int madv)
511 {
512 return drm_bacon_gem_bo_madvise_internal(bo->bufmgr,
513 (drm_bacon_bo_gem *) bo,
514 madv);
515 }
516
517 /* drop the oldest entries that have been purged by the kernel */
518 static void
519 drm_bacon_gem_bo_cache_purge_bucket(drm_bacon_bufmgr *bufmgr,
520 struct drm_bacon_gem_bo_bucket *bucket)
521 {
522 while (!list_empty(&bucket->head)) {
523 drm_bacon_bo_gem *bo_gem;
524
525 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
526 bucket->head.next, head);
527 if (drm_bacon_gem_bo_madvise_internal
528 (bufmgr, bo_gem, I915_MADV_DONTNEED))
529 break;
530
531 list_del(&bo_gem->head);
532 drm_bacon_gem_bo_free(&bo_gem->bo);
533 }
534 }
535
536 static drm_bacon_bo *
537 drm_bacon_gem_bo_alloc_internal(drm_bacon_bufmgr *bufmgr,
538 const char *name,
539 unsigned long size,
540 unsigned long flags,
541 uint32_t tiling_mode,
542 unsigned long stride,
543 unsigned int alignment)
544 {
545 drm_bacon_bo_gem *bo_gem;
546 unsigned int page_size = getpagesize();
547 int ret;
548 struct drm_bacon_gem_bo_bucket *bucket;
549 bool alloc_from_cache;
550 unsigned long bo_size;
551 bool for_render = false;
552
553 if (flags & BO_ALLOC_FOR_RENDER)
554 for_render = true;
555
556 /* Round the allocated size up to a power of two number of pages. */
557 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr, size);
558
559 /* If we don't have caching at this size, don't actually round the
560 * allocation up.
561 */
562 if (bucket == NULL) {
563 bo_size = size;
564 if (bo_size < page_size)
565 bo_size = page_size;
566 } else {
567 bo_size = bucket->size;
568 }
569
570 pthread_mutex_lock(&bufmgr->lock);
571 /* Get a buffer out of the cache if available */
572 retry:
573 alloc_from_cache = false;
574 if (bucket != NULL && !list_empty(&bucket->head)) {
575 if (for_render) {
576 /* Allocate new render-target BOs from the tail (MRU)
577 * of the list, as it will likely be hot in the GPU
578 * cache and in the aperture for us.
579 */
580 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
581 bucket->head.prev, head);
582 list_del(&bo_gem->head);
583 alloc_from_cache = true;
584 bo_gem->bo.align = alignment;
585 } else {
586 assert(alignment == 0);
587 /* For non-render-target BOs (where we're probably
588 * going to map it first thing in order to fill it
589 * with data), check if the last BO in the cache is
590 * unbusy, and only reuse in that case. Otherwise,
591 * allocating a new buffer is probably faster than
592 * waiting for the GPU to finish.
593 */
594 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
595 bucket->head.next, head);
596 if (!drm_bacon_bo_busy(&bo_gem->bo)) {
597 alloc_from_cache = true;
598 list_del(&bo_gem->head);
599 }
600 }
601
602 if (alloc_from_cache) {
603 if (!drm_bacon_gem_bo_madvise_internal
604 (bufmgr, bo_gem, I915_MADV_WILLNEED)) {
605 drm_bacon_gem_bo_free(&bo_gem->bo);
606 drm_bacon_gem_bo_cache_purge_bucket(bufmgr,
607 bucket);
608 goto retry;
609 }
610
611 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
612 tiling_mode,
613 stride)) {
614 drm_bacon_gem_bo_free(&bo_gem->bo);
615 goto retry;
616 }
617 }
618 }
619
620 if (!alloc_from_cache) {
621 struct drm_i915_gem_create create;
622
623 bo_gem = calloc(1, sizeof(*bo_gem));
624 if (!bo_gem)
625 goto err;
626
627 /* drm_bacon_gem_bo_free calls list_del() for an uninitialized
628 list (vma_list), so better set the list head here */
629 list_inithead(&bo_gem->vma_list);
630
631 bo_gem->bo.size = bo_size;
632
633 memclear(create);
634 create.size = bo_size;
635
636 ret = drmIoctl(bufmgr->fd,
637 DRM_IOCTL_I915_GEM_CREATE,
638 &create);
639 if (ret != 0) {
640 free(bo_gem);
641 goto err;
642 }
643
644 bo_gem->gem_handle = create.handle;
645 _mesa_hash_table_insert(bufmgr->handle_table,
646 &bo_gem->gem_handle, bo_gem);
647
648 bo_gem->bo.handle = bo_gem->gem_handle;
649 bo_gem->bo.bufmgr = bufmgr;
650 bo_gem->bo.align = alignment;
651
652 bo_gem->tiling_mode = I915_TILING_NONE;
653 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
654 bo_gem->stride = 0;
655
656 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
657 tiling_mode,
658 stride))
659 goto err_free;
660 }
661
662 bo_gem->name = name;
663 p_atomic_set(&bo_gem->refcount, 1);
664 bo_gem->validate_index = -1;
665 bo_gem->used_as_reloc_target = false;
666 bo_gem->has_error = false;
667 bo_gem->reusable = true;
668
669 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, alignment);
670 pthread_mutex_unlock(&bufmgr->lock);
671
672 DBG("bo_create: buf %d (%s) %ldb\n",
673 bo_gem->gem_handle, bo_gem->name, size);
674
675 return &bo_gem->bo;
676
677 err_free:
678 drm_bacon_gem_bo_free(&bo_gem->bo);
679 err:
680 pthread_mutex_unlock(&bufmgr->lock);
681 return NULL;
682 }
683
684 drm_bacon_bo *
685 drm_bacon_bo_alloc_for_render(drm_bacon_bufmgr *bufmgr,
686 const char *name,
687 unsigned long size,
688 unsigned int alignment)
689 {
690 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size,
691 BO_ALLOC_FOR_RENDER,
692 I915_TILING_NONE, 0,
693 alignment);
694 }
695
696 drm_bacon_bo *
697 drm_bacon_bo_alloc(drm_bacon_bufmgr *bufmgr,
698 const char *name,
699 unsigned long size,
700 unsigned int alignment)
701 {
702 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, 0,
703 I915_TILING_NONE, 0, 0);
704 }
705
706 drm_bacon_bo *
707 drm_bacon_bo_alloc_tiled(drm_bacon_bufmgr *bufmgr, const char *name,
708 int x, int y, int cpp, uint32_t *tiling_mode,
709 unsigned long *pitch, unsigned long flags)
710 {
711 unsigned long size, stride;
712 uint32_t tiling;
713
714 do {
715 unsigned long aligned_y, height_alignment;
716
717 tiling = *tiling_mode;
718
719 /* If we're tiled, our allocations are in 8 or 32-row blocks,
720 * so failure to align our height means that we won't allocate
721 * enough pages.
722 *
723 * If we're untiled, we still have to align to 2 rows high
724 * because the data port accesses 2x2 blocks even if the
725 * bottom row isn't to be rendered, so failure to align means
726 * we could walk off the end of the GTT and fault. This is
727 * documented on 965, and may be the case on older chipsets
728 * too so we try to be careful.
729 */
730 aligned_y = y;
731 height_alignment = 2;
732
733 if (tiling == I915_TILING_X)
734 height_alignment = 8;
735 else if (tiling == I915_TILING_Y)
736 height_alignment = 32;
737 aligned_y = ALIGN(y, height_alignment);
738
739 stride = x * cpp;
740 stride = drm_bacon_gem_bo_tile_pitch(bufmgr, stride, tiling_mode);
741 size = stride * aligned_y;
742 size = drm_bacon_gem_bo_tile_size(bufmgr, size, tiling_mode);
743 } while (*tiling_mode != tiling);
744 *pitch = stride;
745
746 if (tiling == I915_TILING_NONE)
747 stride = 0;
748
749 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, flags,
750 tiling, stride, 0);
751 }
752
753 /**
754 * Returns a drm_bacon_bo wrapping the given buffer object handle.
755 *
756 * This can be used when one application needs to pass a buffer object
757 * to another.
758 */
759 drm_bacon_bo *
760 drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr *bufmgr,
761 const char *name,
762 unsigned int handle)
763 {
764 drm_bacon_bo_gem *bo_gem;
765 int ret;
766 struct drm_gem_open open_arg;
767 struct drm_i915_gem_get_tiling get_tiling;
768
769 /* At the moment most applications only have a few named bo.
770 * For instance, in a DRI client only the render buffers passed
771 * between X and the client are named. And since X returns the
772 * alternating names for the front/back buffer a linear search
773 * provides a sufficiently fast match.
774 */
775 pthread_mutex_lock(&bufmgr->lock);
776 bo_gem = hash_find_bo(bufmgr->name_table, handle);
777 if (bo_gem) {
778 drm_bacon_bo_reference(&bo_gem->bo);
779 goto out;
780 }
781
782 memclear(open_arg);
783 open_arg.name = handle;
784 ret = drmIoctl(bufmgr->fd,
785 DRM_IOCTL_GEM_OPEN,
786 &open_arg);
787 if (ret != 0) {
788 DBG("Couldn't reference %s handle 0x%08x: %s\n",
789 name, handle, strerror(errno));
790 bo_gem = NULL;
791 goto out;
792 }
793 /* Now see if someone has used a prime handle to get this
794 * object from the kernel before by looking through the list
795 * again for a matching gem_handle
796 */
797 bo_gem = hash_find_bo(bufmgr->handle_table, open_arg.handle);
798 if (bo_gem) {
799 drm_bacon_bo_reference(&bo_gem->bo);
800 goto out;
801 }
802
803 bo_gem = calloc(1, sizeof(*bo_gem));
804 if (!bo_gem)
805 goto out;
806
807 p_atomic_set(&bo_gem->refcount, 1);
808 list_inithead(&bo_gem->vma_list);
809
810 bo_gem->bo.size = open_arg.size;
811 bo_gem->bo.offset64 = 0;
812 bo_gem->bo.virtual = NULL;
813 bo_gem->bo.bufmgr = bufmgr;
814 bo_gem->name = name;
815 bo_gem->validate_index = -1;
816 bo_gem->gem_handle = open_arg.handle;
817 bo_gem->bo.handle = open_arg.handle;
818 bo_gem->global_name = handle;
819 bo_gem->reusable = false;
820
821 _mesa_hash_table_insert(bufmgr->handle_table,
822 &bo_gem->gem_handle, bo_gem);
823 _mesa_hash_table_insert(bufmgr->name_table,
824 &bo_gem->global_name, bo_gem);
825
826 memclear(get_tiling);
827 get_tiling.handle = bo_gem->gem_handle;
828 ret = drmIoctl(bufmgr->fd,
829 DRM_IOCTL_I915_GEM_GET_TILING,
830 &get_tiling);
831 if (ret != 0)
832 goto err_unref;
833
834 bo_gem->tiling_mode = get_tiling.tiling_mode;
835 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
836 /* XXX stride is unknown */
837 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
838 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
839
840 out:
841 pthread_mutex_unlock(&bufmgr->lock);
842 return &bo_gem->bo;
843
844 err_unref:
845 drm_bacon_gem_bo_free(&bo_gem->bo);
846 pthread_mutex_unlock(&bufmgr->lock);
847 return NULL;
848 }
849
850 static void
851 drm_bacon_gem_bo_free(drm_bacon_bo *bo)
852 {
853 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
854 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
855 struct drm_gem_close close;
856 struct hash_entry *entry;
857 int ret;
858
859 list_del(&bo_gem->vma_list);
860 if (bo_gem->mem_virtual) {
861 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
862 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
863 bufmgr->vma_count--;
864 }
865 if (bo_gem->wc_virtual) {
866 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
867 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
868 bufmgr->vma_count--;
869 }
870 if (bo_gem->gtt_virtual) {
871 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
872 bufmgr->vma_count--;
873 }
874
875 if (bo_gem->global_name) {
876 entry = _mesa_hash_table_search(bufmgr->name_table,
877 &bo_gem->global_name);
878 _mesa_hash_table_remove(bufmgr->name_table, entry);
879 }
880 entry = _mesa_hash_table_search(bufmgr->handle_table,
881 &bo_gem->gem_handle);
882 _mesa_hash_table_remove(bufmgr->handle_table, entry);
883
884 /* Close this object */
885 memclear(close);
886 close.handle = bo_gem->gem_handle;
887 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
888 if (ret != 0) {
889 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
890 bo_gem->gem_handle, bo_gem->name, strerror(errno));
891 }
892 free(bo);
893 }
894
895 static void
896 drm_bacon_gem_bo_mark_mmaps_incoherent(drm_bacon_bo *bo)
897 {
898 #if HAVE_VALGRIND
899 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
900
901 if (bo_gem->mem_virtual)
902 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
903
904 if (bo_gem->wc_virtual)
905 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
906
907 if (bo_gem->gtt_virtual)
908 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
909 #endif
910 }
911
912 /** Frees all cached buffers significantly older than @time. */
913 static void
914 drm_bacon_gem_cleanup_bo_cache(drm_bacon_bufmgr *bufmgr, time_t time)
915 {
916 int i;
917
918 if (bufmgr->time == time)
919 return;
920
921 for (i = 0; i < bufmgr->num_buckets; i++) {
922 struct drm_bacon_gem_bo_bucket *bucket =
923 &bufmgr->cache_bucket[i];
924
925 while (!list_empty(&bucket->head)) {
926 drm_bacon_bo_gem *bo_gem;
927
928 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
929 bucket->head.next, head);
930 if (time - bo_gem->free_time <= 1)
931 break;
932
933 list_del(&bo_gem->head);
934
935 drm_bacon_gem_bo_free(&bo_gem->bo);
936 }
937 }
938
939 bufmgr->time = time;
940 }
941
942 static void drm_bacon_gem_bo_purge_vma_cache(drm_bacon_bufmgr *bufmgr)
943 {
944 int limit;
945
946 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
947 bufmgr->vma_count, bufmgr->vma_open, bufmgr->vma_max);
948
949 if (bufmgr->vma_max < 0)
950 return;
951
952 /* We may need to evict a few entries in order to create new mmaps */
953 limit = bufmgr->vma_max - 2*bufmgr->vma_open;
954 if (limit < 0)
955 limit = 0;
956
957 while (bufmgr->vma_count > limit) {
958 drm_bacon_bo_gem *bo_gem;
959
960 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
961 bufmgr->vma_cache.next,
962 vma_list);
963 assert(bo_gem->map_count == 0);
964 list_delinit(&bo_gem->vma_list);
965
966 if (bo_gem->mem_virtual) {
967 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
968 bo_gem->mem_virtual = NULL;
969 bufmgr->vma_count--;
970 }
971 if (bo_gem->wc_virtual) {
972 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
973 bo_gem->wc_virtual = NULL;
974 bufmgr->vma_count--;
975 }
976 if (bo_gem->gtt_virtual) {
977 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
978 bo_gem->gtt_virtual = NULL;
979 bufmgr->vma_count--;
980 }
981 }
982 }
983
984 static void drm_bacon_gem_bo_close_vma(drm_bacon_bufmgr *bufmgr,
985 drm_bacon_bo_gem *bo_gem)
986 {
987 bufmgr->vma_open--;
988 list_addtail(&bo_gem->vma_list, &bufmgr->vma_cache);
989 if (bo_gem->mem_virtual)
990 bufmgr->vma_count++;
991 if (bo_gem->wc_virtual)
992 bufmgr->vma_count++;
993 if (bo_gem->gtt_virtual)
994 bufmgr->vma_count++;
995 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
996 }
997
998 static void drm_bacon_gem_bo_open_vma(drm_bacon_bufmgr *bufmgr,
999 drm_bacon_bo_gem *bo_gem)
1000 {
1001 bufmgr->vma_open++;
1002 list_del(&bo_gem->vma_list);
1003 if (bo_gem->mem_virtual)
1004 bufmgr->vma_count--;
1005 if (bo_gem->wc_virtual)
1006 bufmgr->vma_count--;
1007 if (bo_gem->gtt_virtual)
1008 bufmgr->vma_count--;
1009 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
1010 }
1011
1012 static void
1013 drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time)
1014 {
1015 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1016 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1017 struct drm_bacon_gem_bo_bucket *bucket;
1018 int i;
1019
1020 /* Unreference all the target buffers */
1021 for (i = 0; i < bo_gem->reloc_count; i++) {
1022 if (bo_gem->reloc_bos[i] != bo) {
1023 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->
1024 reloc_bos[i],
1025 time);
1026 }
1027 }
1028 bo_gem->reloc_count = 0;
1029 bo_gem->used_as_reloc_target = false;
1030
1031 DBG("bo_unreference final: %d (%s)\n",
1032 bo_gem->gem_handle, bo_gem->name);
1033
1034 /* release memory associated with this object */
1035 if (bo_gem->reloc_bos) {
1036 free(bo_gem->reloc_bos);
1037 bo_gem->reloc_bos = NULL;
1038 }
1039 if (bo_gem->relocs) {
1040 free(bo_gem->relocs);
1041 bo_gem->relocs = NULL;
1042 }
1043
1044 /* Clear any left-over mappings */
1045 if (bo_gem->map_count) {
1046 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1047 bo_gem->map_count = 0;
1048 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1049 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1050 }
1051
1052 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr, bo->size);
1053 /* Put the buffer into our internal cache for reuse if we can. */
1054 if (bufmgr->bo_reuse && bo_gem->reusable && bucket != NULL &&
1055 drm_bacon_gem_bo_madvise_internal(bufmgr, bo_gem,
1056 I915_MADV_DONTNEED)) {
1057 bo_gem->free_time = time;
1058
1059 bo_gem->name = NULL;
1060 bo_gem->validate_index = -1;
1061
1062 list_addtail(&bo_gem->head, &bucket->head);
1063 } else {
1064 drm_bacon_gem_bo_free(bo);
1065 }
1066 }
1067
1068 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
1069 time_t time)
1070 {
1071 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1072
1073 assert(p_atomic_read(&bo_gem->refcount) > 0);
1074 if (p_atomic_dec_zero(&bo_gem->refcount))
1075 drm_bacon_gem_bo_unreference_final(bo, time);
1076 }
1077
1078 void
1079 drm_bacon_bo_unreference(drm_bacon_bo *bo)
1080 {
1081 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1082
1083 if (bo == NULL)
1084 return;
1085
1086 assert(p_atomic_read(&bo_gem->refcount) > 0);
1087
1088 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1089 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1090 struct timespec time;
1091
1092 clock_gettime(CLOCK_MONOTONIC, &time);
1093
1094 pthread_mutex_lock(&bufmgr->lock);
1095
1096 if (p_atomic_dec_zero(&bo_gem->refcount)) {
1097 drm_bacon_gem_bo_unreference_final(bo, time.tv_sec);
1098 drm_bacon_gem_cleanup_bo_cache(bufmgr, time.tv_sec);
1099 }
1100
1101 pthread_mutex_unlock(&bufmgr->lock);
1102 }
1103 }
1104
1105 int
1106 drm_bacon_bo_map(drm_bacon_bo *bo, int write_enable)
1107 {
1108 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1109 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1110 struct drm_i915_gem_set_domain set_domain;
1111 int ret;
1112
1113 pthread_mutex_lock(&bufmgr->lock);
1114
1115 if (bo_gem->map_count++ == 0)
1116 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
1117
1118 if (!bo_gem->mem_virtual) {
1119 struct drm_i915_gem_mmap mmap_arg;
1120
1121 DBG("bo_map: %d (%s), map_count=%d\n",
1122 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1123
1124 memclear(mmap_arg);
1125 mmap_arg.handle = bo_gem->gem_handle;
1126 mmap_arg.size = bo->size;
1127 ret = drmIoctl(bufmgr->fd,
1128 DRM_IOCTL_I915_GEM_MMAP,
1129 &mmap_arg);
1130 if (ret != 0) {
1131 ret = -errno;
1132 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1133 __FILE__, __LINE__, bo_gem->gem_handle,
1134 bo_gem->name, strerror(errno));
1135 if (--bo_gem->map_count == 0)
1136 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1137 pthread_mutex_unlock(&bufmgr->lock);
1138 return ret;
1139 }
1140 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1141 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1142 }
1143 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1144 bo_gem->mem_virtual);
1145 bo->virtual = bo_gem->mem_virtual;
1146
1147 memclear(set_domain);
1148 set_domain.handle = bo_gem->gem_handle;
1149 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1150 if (write_enable)
1151 set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1152 else
1153 set_domain.write_domain = 0;
1154 ret = drmIoctl(bufmgr->fd,
1155 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1156 &set_domain);
1157 if (ret != 0) {
1158 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1159 __FILE__, __LINE__, bo_gem->gem_handle,
1160 strerror(errno));
1161 }
1162
1163 if (write_enable)
1164 bo_gem->mapped_cpu_write = true;
1165
1166 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1167 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1168 pthread_mutex_unlock(&bufmgr->lock);
1169
1170 return 0;
1171 }
1172
1173 static int
1174 map_gtt(drm_bacon_bo *bo)
1175 {
1176 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1177 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1178 int ret;
1179
1180 if (bo_gem->map_count++ == 0)
1181 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
1182
1183 /* Get a mapping of the buffer if we haven't before. */
1184 if (bo_gem->gtt_virtual == NULL) {
1185 struct drm_i915_gem_mmap_gtt mmap_arg;
1186
1187 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1188 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1189
1190 memclear(mmap_arg);
1191 mmap_arg.handle = bo_gem->gem_handle;
1192
1193 /* Get the fake offset back... */
1194 ret = drmIoctl(bufmgr->fd,
1195 DRM_IOCTL_I915_GEM_MMAP_GTT,
1196 &mmap_arg);
1197 if (ret != 0) {
1198 ret = -errno;
1199 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1200 __FILE__, __LINE__,
1201 bo_gem->gem_handle, bo_gem->name,
1202 strerror(errno));
1203 if (--bo_gem->map_count == 0)
1204 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1205 return ret;
1206 }
1207
1208 /* and mmap it */
1209 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1210 MAP_SHARED, bufmgr->fd,
1211 mmap_arg.offset);
1212 if (bo_gem->gtt_virtual == MAP_FAILED) {
1213 bo_gem->gtt_virtual = NULL;
1214 ret = -errno;
1215 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1216 __FILE__, __LINE__,
1217 bo_gem->gem_handle, bo_gem->name,
1218 strerror(errno));
1219 if (--bo_gem->map_count == 0)
1220 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1221 return ret;
1222 }
1223 }
1224
1225 bo->virtual = bo_gem->gtt_virtual;
1226
1227 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1228 bo_gem->gtt_virtual);
1229
1230 return 0;
1231 }
1232
1233 int
1234 drm_bacon_gem_bo_map_gtt(drm_bacon_bo *bo)
1235 {
1236 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1237 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1238 struct drm_i915_gem_set_domain set_domain;
1239 int ret;
1240
1241 pthread_mutex_lock(&bufmgr->lock);
1242
1243 ret = map_gtt(bo);
1244 if (ret) {
1245 pthread_mutex_unlock(&bufmgr->lock);
1246 return ret;
1247 }
1248
1249 /* Now move it to the GTT domain so that the GPU and CPU
1250 * caches are flushed and the GPU isn't actively using the
1251 * buffer.
1252 *
1253 * The pagefault handler does this domain change for us when
1254 * it has unbound the BO from the GTT, but it's up to us to
1255 * tell it when we're about to use things if we had done
1256 * rendering and it still happens to be bound to the GTT.
1257 */
1258 memclear(set_domain);
1259 set_domain.handle = bo_gem->gem_handle;
1260 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1261 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1262 ret = drmIoctl(bufmgr->fd,
1263 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1264 &set_domain);
1265 if (ret != 0) {
1266 DBG("%s:%d: Error setting domain %d: %s\n",
1267 __FILE__, __LINE__, bo_gem->gem_handle,
1268 strerror(errno));
1269 }
1270
1271 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1272 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1273 pthread_mutex_unlock(&bufmgr->lock);
1274
1275 return 0;
1276 }
1277
1278 /**
1279 * Performs a mapping of the buffer object like the normal GTT
1280 * mapping, but avoids waiting for the GPU to be done reading from or
1281 * rendering to the buffer.
1282 *
1283 * This is used in the implementation of GL_ARB_map_buffer_range: The
1284 * user asks to create a buffer, then does a mapping, fills some
1285 * space, runs a drawing command, then asks to map it again without
1286 * synchronizing because it guarantees that it won't write over the
1287 * data that the GPU is busy using (or, more specifically, that if it
1288 * does write over the data, it acknowledges that rendering is
1289 * undefined).
1290 */
1291
1292 int
1293 drm_bacon_gem_bo_map_unsynchronized(drm_bacon_bo *bo)
1294 {
1295 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1296 #ifdef HAVE_VALGRIND
1297 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1298 #endif
1299 int ret;
1300
1301 /* If the CPU cache isn't coherent with the GTT, then use a
1302 * regular synchronized mapping. The problem is that we don't
1303 * track where the buffer was last used on the CPU side in
1304 * terms of drm_bacon_bo_map vs drm_bacon_gem_bo_map_gtt, so
1305 * we would potentially corrupt the buffer even when the user
1306 * does reasonable things.
1307 */
1308 if (!bufmgr->has_llc)
1309 return drm_bacon_gem_bo_map_gtt(bo);
1310
1311 pthread_mutex_lock(&bufmgr->lock);
1312
1313 ret = map_gtt(bo);
1314 if (ret == 0) {
1315 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1316 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1317 }
1318
1319 pthread_mutex_unlock(&bufmgr->lock);
1320
1321 return ret;
1322 }
1323
1324 int
1325 drm_bacon_bo_unmap(drm_bacon_bo *bo)
1326 {
1327 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1328 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1329 int ret = 0;
1330
1331 if (bo == NULL)
1332 return 0;
1333
1334 pthread_mutex_lock(&bufmgr->lock);
1335
1336 if (bo_gem->map_count <= 0) {
1337 DBG("attempted to unmap an unmapped bo\n");
1338 pthread_mutex_unlock(&bufmgr->lock);
1339 /* Preserve the old behaviour of just treating this as a
1340 * no-op rather than reporting the error.
1341 */
1342 return 0;
1343 }
1344
1345 if (bo_gem->mapped_cpu_write) {
1346 struct drm_i915_gem_sw_finish sw_finish;
1347
1348 /* Cause a flush to happen if the buffer's pinned for
1349 * scanout, so the results show up in a timely manner.
1350 * Unlike GTT set domains, this only does work if the
1351 * buffer should be scanout-related.
1352 */
1353 memclear(sw_finish);
1354 sw_finish.handle = bo_gem->gem_handle;
1355 ret = drmIoctl(bufmgr->fd,
1356 DRM_IOCTL_I915_GEM_SW_FINISH,
1357 &sw_finish);
1358 ret = ret == -1 ? -errno : 0;
1359
1360 bo_gem->mapped_cpu_write = false;
1361 }
1362
1363 /* We need to unmap after every innovation as we cannot track
1364 * an open vma for every bo as that will exhaust the system
1365 * limits and cause later failures.
1366 */
1367 if (--bo_gem->map_count == 0) {
1368 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1369 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1370 bo->virtual = NULL;
1371 }
1372 pthread_mutex_unlock(&bufmgr->lock);
1373
1374 return ret;
1375 }
1376
1377 int
1378 drm_bacon_bo_subdata(drm_bacon_bo *bo, unsigned long offset,
1379 unsigned long size, const void *data)
1380 {
1381 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1382 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1383 struct drm_i915_gem_pwrite pwrite;
1384 int ret;
1385
1386 memclear(pwrite);
1387 pwrite.handle = bo_gem->gem_handle;
1388 pwrite.offset = offset;
1389 pwrite.size = size;
1390 pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1391 ret = drmIoctl(bufmgr->fd,
1392 DRM_IOCTL_I915_GEM_PWRITE,
1393 &pwrite);
1394 if (ret != 0) {
1395 ret = -errno;
1396 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1397 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1398 (int)size, strerror(errno));
1399 }
1400
1401 return ret;
1402 }
1403
1404 int
1405 drm_bacon_bo_get_subdata(drm_bacon_bo *bo, unsigned long offset,
1406 unsigned long size, void *data)
1407 {
1408 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1409 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1410 struct drm_i915_gem_pread pread;
1411 int ret;
1412
1413 memclear(pread);
1414 pread.handle = bo_gem->gem_handle;
1415 pread.offset = offset;
1416 pread.size = size;
1417 pread.data_ptr = (uint64_t) (uintptr_t) data;
1418 ret = drmIoctl(bufmgr->fd,
1419 DRM_IOCTL_I915_GEM_PREAD,
1420 &pread);
1421 if (ret != 0) {
1422 ret = -errno;
1423 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1424 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1425 (int)size, strerror(errno));
1426 }
1427
1428 return ret;
1429 }
1430
1431 /** Waits for all GPU rendering with the object to have completed. */
1432 void
1433 drm_bacon_bo_wait_rendering(drm_bacon_bo *bo)
1434 {
1435 drm_bacon_gem_bo_start_gtt_access(bo, 1);
1436 }
1437
1438 /**
1439 * Waits on a BO for the given amount of time.
1440 *
1441 * @bo: buffer object to wait for
1442 * @timeout_ns: amount of time to wait in nanoseconds.
1443 * If value is less than 0, an infinite wait will occur.
1444 *
1445 * Returns 0 if the wait was successful ie. the last batch referencing the
1446 * object has completed within the allotted time. Otherwise some negative return
1447 * value describes the error. Of particular interest is -ETIME when the wait has
1448 * failed to yield the desired result.
1449 *
1450 * Similar to drm_bacon_gem_bo_wait_rendering except a timeout parameter allows
1451 * the operation to give up after a certain amount of time. Another subtle
1452 * difference is the internal locking semantics are different (this variant does
1453 * not hold the lock for the duration of the wait). This makes the wait subject
1454 * to a larger userspace race window.
1455 *
1456 * The implementation shall wait until the object is no longer actively
1457 * referenced within a batch buffer at the time of the call. The wait will
1458 * not guarantee that the buffer is re-issued via another thread, or an flinked
1459 * handle. Userspace must make sure this race does not occur if such precision
1460 * is important.
1461 *
1462 * Note that some kernels have broken the inifite wait for negative values
1463 * promise, upgrade to latest stable kernels if this is the case.
1464 */
1465 int
1466 drm_bacon_gem_bo_wait(drm_bacon_bo *bo, int64_t timeout_ns)
1467 {
1468 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1469 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1470 struct drm_i915_gem_wait wait;
1471 int ret;
1472
1473 memclear(wait);
1474 wait.bo_handle = bo_gem->gem_handle;
1475 wait.timeout_ns = timeout_ns;
1476 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1477 if (ret == -1)
1478 return -errno;
1479
1480 return ret;
1481 }
1482
1483 /**
1484 * Sets the object to the GTT read and possibly write domain, used by the X
1485 * 2D driver in the absence of kernel support to do drm_bacon_gem_bo_map_gtt().
1486 *
1487 * In combination with drm_bacon_gem_bo_pin() and manual fence management, we
1488 * can do tiled pixmaps this way.
1489 */
1490 void
1491 drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo *bo, int write_enable)
1492 {
1493 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1494 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1495 struct drm_i915_gem_set_domain set_domain;
1496 int ret;
1497
1498 memclear(set_domain);
1499 set_domain.handle = bo_gem->gem_handle;
1500 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1501 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1502 ret = drmIoctl(bufmgr->fd,
1503 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1504 &set_domain);
1505 if (ret != 0) {
1506 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1507 __FILE__, __LINE__, bo_gem->gem_handle,
1508 set_domain.read_domains, set_domain.write_domain,
1509 strerror(errno));
1510 }
1511 }
1512
1513 void
1514 drm_bacon_bufmgr_destroy(drm_bacon_bufmgr *bufmgr)
1515 {
1516 free(bufmgr->exec2_objects);
1517 free(bufmgr->exec_bos);
1518
1519 pthread_mutex_destroy(&bufmgr->lock);
1520
1521 /* Free any cached buffer objects we were going to reuse */
1522 for (int i = 0; i < bufmgr->num_buckets; i++) {
1523 struct drm_bacon_gem_bo_bucket *bucket =
1524 &bufmgr->cache_bucket[i];
1525 drm_bacon_bo_gem *bo_gem;
1526
1527 while (!list_empty(&bucket->head)) {
1528 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1529 bucket->head.next, head);
1530 list_del(&bo_gem->head);
1531
1532 drm_bacon_gem_bo_free(&bo_gem->bo);
1533 }
1534 }
1535
1536 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1537 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1538
1539 free(bufmgr);
1540 }
1541
1542 /**
1543 * Adds the target buffer to the validation list and adds the relocation
1544 * to the reloc_buffer's relocation list.
1545 *
1546 * The relocation entry at the given offset must already contain the
1547 * precomputed relocation value, because the kernel will optimize out
1548 * the relocation entry write when the buffer hasn't moved from the
1549 * last known offset in target_bo.
1550 */
1551 int
1552 drm_bacon_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1553 drm_bacon_bo *target_bo, uint32_t target_offset,
1554 uint32_t read_domains, uint32_t write_domain)
1555 {
1556 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1557 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1558 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1559
1560 if (bo_gem->has_error)
1561 return -ENOMEM;
1562
1563 if (target_bo_gem->has_error) {
1564 bo_gem->has_error = true;
1565 return -ENOMEM;
1566 }
1567
1568 /* Create a new relocation list if needed */
1569 if (bo_gem->relocs == NULL && drm_bacon_setup_reloc_list(bo))
1570 return -ENOMEM;
1571
1572 /* Check overflow */
1573 assert(bo_gem->reloc_count < bufmgr->max_relocs);
1574
1575 /* Check args */
1576 assert(offset <= bo->size - 4);
1577 assert((write_domain & (write_domain - 1)) == 0);
1578
1579 /* Make sure that we're not adding a reloc to something whose size has
1580 * already been accounted for.
1581 */
1582 assert(!bo_gem->used_as_reloc_target);
1583 if (target_bo_gem != bo_gem) {
1584 target_bo_gem->used_as_reloc_target = true;
1585 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1586 }
1587
1588 bo_gem->reloc_bos[bo_gem->reloc_count] = target_bo;
1589 if (target_bo != bo)
1590 drm_bacon_bo_reference(target_bo);
1591
1592 bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1593 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1594 bo_gem->relocs[bo_gem->reloc_count].target_handle =
1595 target_bo_gem->gem_handle;
1596 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1597 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1598 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
1599 bo_gem->reloc_count++;
1600
1601 return 0;
1602 }
1603
1604 int
1605 drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo *bo)
1606 {
1607 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1608
1609 return bo_gem->reloc_count;
1610 }
1611
1612 /**
1613 * Removes existing relocation entries in the BO after "start".
1614 *
1615 * This allows a user to avoid a two-step process for state setup with
1616 * counting up all the buffer objects and doing a
1617 * drm_bacon_bufmgr_check_aperture_space() before emitting any of the
1618 * relocations for the state setup. Instead, save the state of the
1619 * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the
1620 * state, and then check if it still fits in the aperture.
1621 *
1622 * Any further drm_bacon_bufmgr_check_aperture_space() queries
1623 * involving this buffer in the tree are undefined after this call.
1624 */
1625 void
1626 drm_bacon_gem_bo_clear_relocs(drm_bacon_bo *bo, int start)
1627 {
1628 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1629 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1630 int i;
1631 struct timespec time;
1632
1633 clock_gettime(CLOCK_MONOTONIC, &time);
1634
1635 assert(bo_gem->reloc_count >= start);
1636
1637 /* Unreference the cleared target buffers */
1638 pthread_mutex_lock(&bufmgr->lock);
1639
1640 for (i = start; i < bo_gem->reloc_count; i++) {
1641 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->reloc_bos[i];
1642 if (&target_bo_gem->bo != bo) {
1643 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
1644 time.tv_sec);
1645 }
1646 }
1647 bo_gem->reloc_count = start;
1648
1649 pthread_mutex_unlock(&bufmgr->lock);
1650
1651 }
1652
1653 static void
1654 drm_bacon_gem_bo_process_reloc2(drm_bacon_bo *bo)
1655 {
1656 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
1657 int i;
1658
1659 if (bo_gem->relocs == NULL)
1660 return;
1661
1662 for (i = 0; i < bo_gem->reloc_count; i++) {
1663 drm_bacon_bo *target_bo = bo_gem->reloc_bos[i];
1664
1665 if (target_bo == bo)
1666 continue;
1667
1668 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1669
1670 /* Continue walking the tree depth-first. */
1671 drm_bacon_gem_bo_process_reloc2(target_bo);
1672
1673 /* Add the target to the validate list */
1674 drm_bacon_add_validate_buffer2(target_bo);
1675 }
1676 }
1677
1678 static void
1679 drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr *bufmgr)
1680 {
1681 int i;
1682
1683 for (i = 0; i < bufmgr->exec_count; i++) {
1684 drm_bacon_bo *bo = bufmgr->exec_bos[i];
1685 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
1686
1687 /* Update the buffer offset */
1688 if (bufmgr->exec2_objects[i].offset != bo->offset64) {
1689 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
1690 bo_gem->gem_handle, bo_gem->name,
1691 upper_32_bits(bo->offset64),
1692 lower_32_bits(bo->offset64),
1693 upper_32_bits(bufmgr->exec2_objects[i].offset),
1694 lower_32_bits(bufmgr->exec2_objects[i].offset));
1695 bo->offset64 = bufmgr->exec2_objects[i].offset;
1696 }
1697 }
1698 }
1699
1700 static int
1701 do_exec2(drm_bacon_bo *bo, int used, drm_bacon_context *ctx,
1702 int in_fence, int *out_fence,
1703 unsigned int flags)
1704 {
1705 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1706 struct drm_i915_gem_execbuffer2 execbuf;
1707 int ret = 0;
1708 int i;
1709
1710 if (to_bo_gem(bo)->has_error)
1711 return -ENOMEM;
1712
1713 pthread_mutex_lock(&bufmgr->lock);
1714 /* Update indices and set up the validate list. */
1715 drm_bacon_gem_bo_process_reloc2(bo);
1716
1717 /* Add the batch buffer to the validation list. There are no relocations
1718 * pointing to it.
1719 */
1720 drm_bacon_add_validate_buffer2(bo);
1721
1722 memclear(execbuf);
1723 execbuf.buffers_ptr = (uintptr_t)bufmgr->exec2_objects;
1724 execbuf.buffer_count = bufmgr->exec_count;
1725 execbuf.batch_start_offset = 0;
1726 execbuf.batch_len = used;
1727 execbuf.cliprects_ptr = 0;
1728 execbuf.num_cliprects = 0;
1729 execbuf.DR1 = 0;
1730 execbuf.DR4 = 0;
1731 execbuf.flags = flags;
1732 if (ctx == NULL)
1733 i915_execbuffer2_set_context_id(execbuf, 0);
1734 else
1735 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
1736 execbuf.rsvd2 = 0;
1737 if (in_fence != -1) {
1738 execbuf.rsvd2 = in_fence;
1739 execbuf.flags |= I915_EXEC_FENCE_IN;
1740 }
1741 if (out_fence != NULL) {
1742 *out_fence = -1;
1743 execbuf.flags |= I915_EXEC_FENCE_OUT;
1744 }
1745
1746 if (bufmgr->no_exec)
1747 goto skip_execution;
1748
1749 ret = drmIoctl(bufmgr->fd,
1750 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
1751 &execbuf);
1752 if (ret != 0) {
1753 ret = -errno;
1754 if (ret == -ENOSPC) {
1755 DBG("Execbuffer fails to pin. "
1756 "Estimate: %u. Actual: %u. Available: %u\n",
1757 drm_bacon_gem_estimate_batch_space(bufmgr->exec_bos,
1758 bufmgr->exec_count),
1759 drm_bacon_gem_compute_batch_space(bufmgr->exec_bos,
1760 bufmgr->exec_count),
1761 (unsigned int) bufmgr->gtt_size);
1762 }
1763 }
1764 drm_bacon_update_buffer_offsets2(bufmgr);
1765
1766 if (ret == 0 && out_fence != NULL)
1767 *out_fence = execbuf.rsvd2 >> 32;
1768
1769 skip_execution:
1770 if (INTEL_DEBUG & DEBUG_BUFMGR)
1771 drm_bacon_gem_dump_validation_list(bufmgr);
1772
1773 for (i = 0; i < bufmgr->exec_count; i++) {
1774 drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr->exec_bos[i]);
1775
1776 bo_gem->idle = false;
1777
1778 /* Disconnect the buffer from the validate list */
1779 bo_gem->validate_index = -1;
1780 bufmgr->exec_bos[i] = NULL;
1781 }
1782 bufmgr->exec_count = 0;
1783 pthread_mutex_unlock(&bufmgr->lock);
1784
1785 return ret;
1786 }
1787
1788 int
1789 drm_bacon_bo_exec(drm_bacon_bo *bo, int used)
1790 {
1791 return do_exec2(bo, used, NULL, -1, NULL, I915_EXEC_RENDER);
1792 }
1793
1794 int
1795 drm_bacon_bo_mrb_exec(drm_bacon_bo *bo, int used, unsigned int flags)
1796 {
1797 return do_exec2(bo, used, NULL, -1, NULL, flags);
1798 }
1799
1800 int
1801 drm_bacon_gem_bo_context_exec(drm_bacon_bo *bo, drm_bacon_context *ctx,
1802 int used, unsigned int flags)
1803 {
1804 return do_exec2(bo, used, ctx, -1, NULL, flags);
1805 }
1806
1807 int
1808 drm_bacon_gem_bo_fence_exec(drm_bacon_bo *bo,
1809 drm_bacon_context *ctx,
1810 int used,
1811 int in_fence,
1812 int *out_fence,
1813 unsigned int flags)
1814 {
1815 return do_exec2(bo, used, ctx, in_fence, out_fence, flags);
1816 }
1817
1818 static int
1819 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
1820 uint32_t tiling_mode,
1821 uint32_t stride)
1822 {
1823 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1824 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1825 struct drm_i915_gem_set_tiling set_tiling;
1826 int ret;
1827
1828 if (bo_gem->global_name == 0 &&
1829 tiling_mode == bo_gem->tiling_mode &&
1830 stride == bo_gem->stride)
1831 return 0;
1832
1833 memset(&set_tiling, 0, sizeof(set_tiling));
1834 do {
1835 /* set_tiling is slightly broken and overwrites the
1836 * input on the error path, so we have to open code
1837 * rmIoctl.
1838 */
1839 set_tiling.handle = bo_gem->gem_handle;
1840 set_tiling.tiling_mode = tiling_mode;
1841 set_tiling.stride = stride;
1842
1843 ret = ioctl(bufmgr->fd,
1844 DRM_IOCTL_I915_GEM_SET_TILING,
1845 &set_tiling);
1846 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
1847 if (ret == -1)
1848 return -errno;
1849
1850 bo_gem->tiling_mode = set_tiling.tiling_mode;
1851 bo_gem->swizzle_mode = set_tiling.swizzle_mode;
1852 bo_gem->stride = set_tiling.stride;
1853 return 0;
1854 }
1855
1856 int
1857 drm_bacon_bo_set_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
1858 uint32_t stride)
1859 {
1860 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1861 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1862 int ret;
1863
1864 /* Linear buffers have no stride. By ensuring that we only ever use
1865 * stride 0 with linear buffers, we simplify our code.
1866 */
1867 if (*tiling_mode == I915_TILING_NONE)
1868 stride = 0;
1869
1870 ret = drm_bacon_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
1871 if (ret == 0)
1872 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
1873
1874 *tiling_mode = bo_gem->tiling_mode;
1875 return ret;
1876 }
1877
1878 int
1879 drm_bacon_bo_get_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
1880 uint32_t *swizzle_mode)
1881 {
1882 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1883
1884 *tiling_mode = bo_gem->tiling_mode;
1885 *swizzle_mode = bo_gem->swizzle_mode;
1886 return 0;
1887 }
1888
1889 drm_bacon_bo *
1890 drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, int prime_fd, int size)
1891 {
1892 int ret;
1893 uint32_t handle;
1894 drm_bacon_bo_gem *bo_gem;
1895 struct drm_i915_gem_get_tiling get_tiling;
1896
1897 pthread_mutex_lock(&bufmgr->lock);
1898 ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1899 if (ret) {
1900 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
1901 pthread_mutex_unlock(&bufmgr->lock);
1902 return NULL;
1903 }
1904
1905 /*
1906 * See if the kernel has already returned this buffer to us. Just as
1907 * for named buffers, we must not create two bo's pointing at the same
1908 * kernel object
1909 */
1910 bo_gem = hash_find_bo(bufmgr->handle_table, handle);
1911 if (bo_gem) {
1912 drm_bacon_bo_reference(&bo_gem->bo);
1913 goto out;
1914 }
1915
1916 bo_gem = calloc(1, sizeof(*bo_gem));
1917 if (!bo_gem)
1918 goto out;
1919
1920 p_atomic_set(&bo_gem->refcount, 1);
1921 list_inithead(&bo_gem->vma_list);
1922
1923 /* Determine size of bo. The fd-to-handle ioctl really should
1924 * return the size, but it doesn't. If we have kernel 3.12 or
1925 * later, we can lseek on the prime fd to get the size. Older
1926 * kernels will just fail, in which case we fall back to the
1927 * provided (estimated or guess size). */
1928 ret = lseek(prime_fd, 0, SEEK_END);
1929 if (ret != -1)
1930 bo_gem->bo.size = ret;
1931 else
1932 bo_gem->bo.size = size;
1933
1934 bo_gem->bo.handle = handle;
1935 bo_gem->bo.bufmgr = bufmgr;
1936
1937 bo_gem->gem_handle = handle;
1938 _mesa_hash_table_insert(bufmgr->handle_table,
1939 &bo_gem->gem_handle, bo_gem);
1940
1941 bo_gem->name = "prime";
1942 bo_gem->validate_index = -1;
1943 bo_gem->used_as_reloc_target = false;
1944 bo_gem->has_error = false;
1945 bo_gem->reusable = false;
1946
1947 memclear(get_tiling);
1948 get_tiling.handle = bo_gem->gem_handle;
1949 if (drmIoctl(bufmgr->fd,
1950 DRM_IOCTL_I915_GEM_GET_TILING,
1951 &get_tiling))
1952 goto err;
1953
1954 bo_gem->tiling_mode = get_tiling.tiling_mode;
1955 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1956 /* XXX stride is unknown */
1957 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
1958
1959 out:
1960 pthread_mutex_unlock(&bufmgr->lock);
1961 return &bo_gem->bo;
1962
1963 err:
1964 drm_bacon_gem_bo_free(&bo_gem->bo);
1965 pthread_mutex_unlock(&bufmgr->lock);
1966 return NULL;
1967 }
1968
1969 int
1970 drm_bacon_bo_gem_export_to_prime(drm_bacon_bo *bo, int *prime_fd)
1971 {
1972 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1973 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1974
1975 if (drmPrimeHandleToFD(bufmgr->fd, bo_gem->gem_handle,
1976 DRM_CLOEXEC, prime_fd) != 0)
1977 return -errno;
1978
1979 bo_gem->reusable = false;
1980
1981 return 0;
1982 }
1983
1984 int
1985 drm_bacon_bo_flink(drm_bacon_bo *bo, uint32_t *name)
1986 {
1987 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1988 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1989
1990 if (!bo_gem->global_name) {
1991 struct drm_gem_flink flink;
1992
1993 memclear(flink);
1994 flink.handle = bo_gem->gem_handle;
1995 if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
1996 return -errno;
1997
1998 pthread_mutex_lock(&bufmgr->lock);
1999 if (!bo_gem->global_name) {
2000 bo_gem->global_name = flink.name;
2001 bo_gem->reusable = false;
2002
2003 _mesa_hash_table_insert(bufmgr->name_table,
2004 &bo_gem->global_name, bo_gem);
2005 }
2006 pthread_mutex_unlock(&bufmgr->lock);
2007 }
2008
2009 *name = bo_gem->global_name;
2010 return 0;
2011 }
2012
2013 /**
2014 * Enables unlimited caching of buffer objects for reuse.
2015 *
2016 * This is potentially very memory expensive, as the cache at each bucket
2017 * size is only bounded by how many buffers of that size we've managed to have
2018 * in flight at once.
2019 */
2020 void
2021 drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr *bufmgr)
2022 {
2023 bufmgr->bo_reuse = true;
2024 }
2025
2026 /**
2027 * Return the additional aperture space required by the tree of buffer objects
2028 * rooted at bo.
2029 */
2030 static int
2031 drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo *bo)
2032 {
2033 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2034 int i;
2035 int total = 0;
2036
2037 if (bo == NULL || bo_gem->included_in_check_aperture)
2038 return 0;
2039
2040 total += bo->size;
2041 bo_gem->included_in_check_aperture = true;
2042
2043 for (i = 0; i < bo_gem->reloc_count; i++)
2044 total +=
2045 drm_bacon_gem_bo_get_aperture_space(bo_gem->reloc_bos[i]);
2046
2047 return total;
2048 }
2049
2050 /**
2051 * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready
2052 * for the next drm_bacon_bufmgr_check_aperture_space() call.
2053 */
2054 static void
2055 drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo *bo)
2056 {
2057 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2058 int i;
2059
2060 if (bo == NULL || !bo_gem->included_in_check_aperture)
2061 return;
2062
2063 bo_gem->included_in_check_aperture = false;
2064
2065 for (i = 0; i < bo_gem->reloc_count; i++)
2066 drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem->reloc_bos[i]);
2067 }
2068
2069 /**
2070 * Return a conservative estimate for the amount of aperture required
2071 * for a collection of buffers. This may double-count some buffers.
2072 */
2073 static unsigned int
2074 drm_bacon_gem_estimate_batch_space(drm_bacon_bo **bo_array, int count)
2075 {
2076 int i;
2077 unsigned int total = 0;
2078
2079 for (i = 0; i < count; i++) {
2080 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo_array[i];
2081 if (bo_gem != NULL)
2082 total += bo_gem->reloc_tree_size;
2083 }
2084 return total;
2085 }
2086
2087 /**
2088 * Return the amount of aperture needed for a collection of buffers.
2089 * This avoids double counting any buffers, at the cost of looking
2090 * at every buffer in the set.
2091 */
2092 static unsigned int
2093 drm_bacon_gem_compute_batch_space(drm_bacon_bo **bo_array, int count)
2094 {
2095 int i;
2096 unsigned int total = 0;
2097
2098 for (i = 0; i < count; i++) {
2099 total += drm_bacon_gem_bo_get_aperture_space(bo_array[i]);
2100 /* For the first buffer object in the array, we get an
2101 * accurate count back for its reloc_tree size (since nothing
2102 * had been flagged as being counted yet). We can save that
2103 * value out as a more conservative reloc_tree_size that
2104 * avoids double-counting target buffers. Since the first
2105 * buffer happens to usually be the batch buffer in our
2106 * callers, this can pull us back from doing the tree
2107 * walk on every new batch emit.
2108 */
2109 if (i == 0) {
2110 drm_bacon_bo_gem *bo_gem =
2111 (drm_bacon_bo_gem *) bo_array[i];
2112 bo_gem->reloc_tree_size = total;
2113 }
2114 }
2115
2116 for (i = 0; i < count; i++)
2117 drm_bacon_gem_bo_clear_aperture_space_flag(bo_array[i]);
2118 return total;
2119 }
2120
2121 /**
2122 * Return -1 if the batchbuffer should be flushed before attempting to
2123 * emit rendering referencing the buffers pointed to by bo_array.
2124 *
2125 * This is required because if we try to emit a batchbuffer with relocations
2126 * to a tree of buffers that won't simultaneously fit in the aperture,
2127 * the rendering will return an error at a point where the software is not
2128 * prepared to recover from it.
2129 *
2130 * However, we also want to emit the batchbuffer significantly before we reach
2131 * the limit, as a series of batchbuffers each of which references buffers
2132 * covering almost all of the aperture means that at each emit we end up
2133 * waiting to evict a buffer from the last rendering, and we get synchronous
2134 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to
2135 * get better parallelism.
2136 */
2137 int
2138 drm_bacon_bufmgr_check_aperture_space(drm_bacon_bo **bo_array, int count)
2139 {
2140 drm_bacon_bufmgr *bufmgr = bo_array[0]->bufmgr;
2141 unsigned int total = 0;
2142 unsigned int threshold = bufmgr->gtt_size * 3 / 4;
2143
2144 total = drm_bacon_gem_estimate_batch_space(bo_array, count);
2145
2146 if (total > threshold)
2147 total = drm_bacon_gem_compute_batch_space(bo_array, count);
2148
2149 if (total > threshold) {
2150 DBG("check_space: overflowed available aperture, "
2151 "%dkb vs %dkb\n",
2152 total / 1024, (int)bufmgr->gtt_size / 1024);
2153 return -ENOSPC;
2154 } else {
2155 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2156 (int)bufmgr->gtt_size / 1024);
2157 return 0;
2158 }
2159 }
2160
2161 /*
2162 * Disable buffer reuse for objects which are shared with the kernel
2163 * as scanout buffers
2164 */
2165 int
2166 drm_bacon_bo_disable_reuse(drm_bacon_bo *bo)
2167 {
2168 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2169
2170 bo_gem->reusable = false;
2171 return 0;
2172 }
2173
2174 int
2175 drm_bacon_bo_is_reusable(drm_bacon_bo *bo)
2176 {
2177 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2178
2179 return bo_gem->reusable;
2180 }
2181
2182 static int
2183 _drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2184 {
2185 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2186 int i;
2187
2188 for (i = 0; i < bo_gem->reloc_count; i++) {
2189 if (bo_gem->reloc_bos[i] == target_bo)
2190 return 1;
2191 if (bo == bo_gem->reloc_bos[i])
2192 continue;
2193 if (_drm_bacon_gem_bo_references(bo_gem->reloc_bos[i],
2194 target_bo))
2195 return 1;
2196 }
2197
2198 return 0;
2199 }
2200
2201 /** Return true if target_bo is referenced by bo's relocation tree. */
2202 int
2203 drm_bacon_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2204 {
2205 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
2206
2207 if (bo == NULL || target_bo == NULL)
2208 return 0;
2209 if (target_bo_gem->used_as_reloc_target)
2210 return _drm_bacon_gem_bo_references(bo, target_bo);
2211 return 0;
2212 }
2213
2214 static void
2215 add_bucket(drm_bacon_bufmgr *bufmgr, int size)
2216 {
2217 unsigned int i = bufmgr->num_buckets;
2218
2219 assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
2220
2221 list_inithead(&bufmgr->cache_bucket[i].head);
2222 bufmgr->cache_bucket[i].size = size;
2223 bufmgr->num_buckets++;
2224 }
2225
2226 static void
2227 init_cache_buckets(drm_bacon_bufmgr *bufmgr)
2228 {
2229 unsigned long size, cache_max_size = 64 * 1024 * 1024;
2230
2231 /* OK, so power of two buckets was too wasteful of memory.
2232 * Give 3 other sizes between each power of two, to hopefully
2233 * cover things accurately enough. (The alternative is
2234 * probably to just go for exact matching of sizes, and assume
2235 * that for things like composited window resize the tiled
2236 * width/height alignment and rounding of sizes to pages will
2237 * get us useful cache hit rates anyway)
2238 */
2239 add_bucket(bufmgr, 4096);
2240 add_bucket(bufmgr, 4096 * 2);
2241 add_bucket(bufmgr, 4096 * 3);
2242
2243 /* Initialize the linked lists for BO reuse cache. */
2244 for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2245 add_bucket(bufmgr, size);
2246
2247 add_bucket(bufmgr, size + size * 1 / 4);
2248 add_bucket(bufmgr, size + size * 2 / 4);
2249 add_bucket(bufmgr, size + size * 3 / 4);
2250 }
2251 }
2252
2253 void
2254 drm_bacon_bufmgr_gem_set_vma_cache_size(drm_bacon_bufmgr *bufmgr, int limit)
2255 {
2256 bufmgr->vma_max = limit;
2257
2258 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
2259 }
2260
2261 drm_bacon_context *
2262 drm_bacon_gem_context_create(drm_bacon_bufmgr *bufmgr)
2263 {
2264 struct drm_i915_gem_context_create create;
2265 drm_bacon_context *context = NULL;
2266 int ret;
2267
2268 context = calloc(1, sizeof(*context));
2269 if (!context)
2270 return NULL;
2271
2272 memclear(create);
2273 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
2274 if (ret != 0) {
2275 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
2276 strerror(errno));
2277 free(context);
2278 return NULL;
2279 }
2280
2281 context->ctx_id = create.ctx_id;
2282 context->bufmgr = bufmgr;
2283
2284 return context;
2285 }
2286
2287 int
2288 drm_bacon_gem_context_get_id(drm_bacon_context *ctx, uint32_t *ctx_id)
2289 {
2290 if (ctx == NULL)
2291 return -EINVAL;
2292
2293 *ctx_id = ctx->ctx_id;
2294
2295 return 0;
2296 }
2297
2298 void
2299 drm_bacon_gem_context_destroy(drm_bacon_context *ctx)
2300 {
2301 struct drm_i915_gem_context_destroy destroy;
2302 int ret;
2303
2304 if (ctx == NULL)
2305 return;
2306
2307 memclear(destroy);
2308
2309 destroy.ctx_id = ctx->ctx_id;
2310 ret = drmIoctl(ctx->bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
2311 &destroy);
2312 if (ret != 0)
2313 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
2314 strerror(errno));
2315
2316 free(ctx);
2317 }
2318
2319 int
2320 drm_bacon_get_reset_stats(drm_bacon_context *ctx,
2321 uint32_t *reset_count,
2322 uint32_t *active,
2323 uint32_t *pending)
2324 {
2325 struct drm_i915_reset_stats stats;
2326 int ret;
2327
2328 if (ctx == NULL)
2329 return -EINVAL;
2330
2331 memclear(stats);
2332
2333 stats.ctx_id = ctx->ctx_id;
2334 ret = drmIoctl(ctx->bufmgr->fd,
2335 DRM_IOCTL_I915_GET_RESET_STATS,
2336 &stats);
2337 if (ret == 0) {
2338 if (reset_count != NULL)
2339 *reset_count = stats.reset_count;
2340
2341 if (active != NULL)
2342 *active = stats.batch_active;
2343
2344 if (pending != NULL)
2345 *pending = stats.batch_pending;
2346 }
2347
2348 return ret;
2349 }
2350
2351 int
2352 drm_bacon_reg_read(drm_bacon_bufmgr *bufmgr,
2353 uint32_t offset,
2354 uint64_t *result)
2355 {
2356 struct drm_i915_reg_read reg_read;
2357 int ret;
2358
2359 memclear(reg_read);
2360 reg_read.offset = offset;
2361
2362 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
2363
2364 *result = reg_read.val;
2365 return ret;
2366 }
2367
2368 void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo *bo)
2369 {
2370 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2371 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2372
2373 if (bo_gem->gtt_virtual)
2374 return bo_gem->gtt_virtual;
2375
2376 pthread_mutex_lock(&bufmgr->lock);
2377 if (bo_gem->gtt_virtual == NULL) {
2378 struct drm_i915_gem_mmap_gtt mmap_arg;
2379 void *ptr;
2380
2381 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
2382 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2383
2384 if (bo_gem->map_count++ == 0)
2385 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2386
2387 memclear(mmap_arg);
2388 mmap_arg.handle = bo_gem->gem_handle;
2389
2390 /* Get the fake offset back... */
2391 ptr = MAP_FAILED;
2392 if (drmIoctl(bufmgr->fd,
2393 DRM_IOCTL_I915_GEM_MMAP_GTT,
2394 &mmap_arg) == 0) {
2395 /* and mmap it */
2396 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
2397 MAP_SHARED, bufmgr->fd,
2398 mmap_arg.offset);
2399 }
2400 if (ptr == MAP_FAILED) {
2401 if (--bo_gem->map_count == 0)
2402 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2403 ptr = NULL;
2404 }
2405
2406 bo_gem->gtt_virtual = ptr;
2407 }
2408 pthread_mutex_unlock(&bufmgr->lock);
2409
2410 return bo_gem->gtt_virtual;
2411 }
2412
2413 void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo *bo)
2414 {
2415 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2416 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2417
2418 if (bo_gem->mem_virtual)
2419 return bo_gem->mem_virtual;
2420
2421 pthread_mutex_lock(&bufmgr->lock);
2422 if (!bo_gem->mem_virtual) {
2423 struct drm_i915_gem_mmap mmap_arg;
2424
2425 if (bo_gem->map_count++ == 0)
2426 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2427
2428 DBG("bo_map: %d (%s), map_count=%d\n",
2429 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2430
2431 memclear(mmap_arg);
2432 mmap_arg.handle = bo_gem->gem_handle;
2433 mmap_arg.size = bo->size;
2434 if (drmIoctl(bufmgr->fd,
2435 DRM_IOCTL_I915_GEM_MMAP,
2436 &mmap_arg)) {
2437 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2438 __FILE__, __LINE__, bo_gem->gem_handle,
2439 bo_gem->name, strerror(errno));
2440 if (--bo_gem->map_count == 0)
2441 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2442 } else {
2443 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
2444 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
2445 }
2446 }
2447 pthread_mutex_unlock(&bufmgr->lock);
2448
2449 return bo_gem->mem_virtual;
2450 }
2451
2452 void *drm_bacon_gem_bo_map__wc(drm_bacon_bo *bo)
2453 {
2454 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2455 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2456
2457 if (bo_gem->wc_virtual)
2458 return bo_gem->wc_virtual;
2459
2460 pthread_mutex_lock(&bufmgr->lock);
2461 if (!bo_gem->wc_virtual) {
2462 struct drm_i915_gem_mmap mmap_arg;
2463
2464 if (bo_gem->map_count++ == 0)
2465 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2466
2467 DBG("bo_map: %d (%s), map_count=%d\n",
2468 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2469
2470 memclear(mmap_arg);
2471 mmap_arg.handle = bo_gem->gem_handle;
2472 mmap_arg.size = bo->size;
2473 mmap_arg.flags = I915_MMAP_WC;
2474 if (drmIoctl(bufmgr->fd,
2475 DRM_IOCTL_I915_GEM_MMAP,
2476 &mmap_arg)) {
2477 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2478 __FILE__, __LINE__, bo_gem->gem_handle,
2479 bo_gem->name, strerror(errno));
2480 if (--bo_gem->map_count == 0)
2481 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2482 } else {
2483 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
2484 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
2485 }
2486 }
2487 pthread_mutex_unlock(&bufmgr->lock);
2488
2489 return bo_gem->wc_virtual;
2490 }
2491
2492 /**
2493 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2494 * and manage map buffer objections.
2495 *
2496 * \param fd File descriptor of the opened DRM device.
2497 */
2498 drm_bacon_bufmgr *
2499 drm_bacon_bufmgr_gem_init(struct gen_device_info *devinfo,
2500 int fd, int batch_size)
2501 {
2502 drm_bacon_bufmgr *bufmgr;
2503 struct drm_i915_gem_get_aperture aperture;
2504
2505 bufmgr = calloc(1, sizeof(*bufmgr));
2506 if (bufmgr == NULL)
2507 return NULL;
2508
2509 /* Handles to buffer objects belong to the device fd and are not
2510 * reference counted by the kernel. If the same fd is used by
2511 * multiple parties (threads sharing the same screen bufmgr, or
2512 * even worse the same device fd passed to multiple libraries)
2513 * ownership of those handles is shared by those independent parties.
2514 *
2515 * Don't do this! Ensure that each library/bufmgr has its own device
2516 * fd so that its namespace does not clash with another.
2517 */
2518 bufmgr->fd = fd;
2519
2520 if (pthread_mutex_init(&bufmgr->lock, NULL) != 0) {
2521 free(bufmgr);
2522 return NULL;
2523 }
2524
2525 memclear(aperture);
2526 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
2527 bufmgr->gtt_size = aperture.aper_available_size;
2528
2529 bufmgr->has_llc = devinfo->has_llc;
2530
2531 /* Let's go with one relocation per every 2 dwords (but round down a bit
2532 * since a power of two will mean an extra page allocation for the reloc
2533 * buffer).
2534 *
2535 * Every 4 was too few for the blender benchmark.
2536 */
2537 bufmgr->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
2538
2539 init_cache_buckets(bufmgr);
2540
2541 list_inithead(&bufmgr->vma_cache);
2542 bufmgr->vma_max = -1; /* unlimited by default */
2543
2544 bufmgr->name_table =
2545 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
2546 bufmgr->handle_table =
2547 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
2548
2549 return bufmgr;
2550 }