1 /**************************************************************************
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
29 **************************************************************************/
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
42 #include <util/u_atomic.h>
50 #include <sys/ioctl.h>
52 #include <sys/types.h>
57 #define ETIME ETIMEDOUT
59 #include "libdrm_macros.h"
60 #include "libdrm_lists.h"
61 #include "brw_bufmgr.h"
62 #include "intel_bufmgr_priv.h"
63 #include "intel_chipset.h"
77 #define memclear(s) memset(&s, 0, sizeof(s))
79 #define DBG(...) do { \
80 if (bufmgr_gem->bufmgr.debug) \
81 fprintf(stderr, __VA_ARGS__); \
84 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
85 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
88 atomic_add_unless(int *v
, int add
, int unless
)
92 while (c
!= unless
&& (old
= p_atomic_cmpxchg(v
, c
, c
+ add
)) != c
)
98 * upper_32_bits - return bits 32-63 of a number
99 * @n: the number we're accessing
101 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
102 * the "right shift count >= width of type" warning when that quantity is
105 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
108 * lower_32_bits - return bits 0-31 of a number
109 * @n: the number we're accessing
111 #define lower_32_bits(n) ((__u32)(n))
113 typedef struct _drm_bacon_bo_gem drm_bacon_bo_gem
;
115 struct drm_bacon_gem_bo_bucket
{
120 typedef struct _drm_bacon_bufmgr_gem
{
121 drm_bacon_bufmgr bufmgr
;
129 pthread_mutex_t lock
;
131 struct drm_i915_gem_exec_object
*exec_objects
;
132 struct drm_i915_gem_exec_object2
*exec2_objects
;
133 drm_bacon_bo
**exec_bos
;
137 /** Array of lists of cached gem objects of power-of-two sizes */
138 struct drm_bacon_gem_bo_bucket cache_bucket
[14 * 4];
142 drmMMListHead managers
;
144 drm_bacon_bo_gem
*name_table
;
145 drm_bacon_bo_gem
*handle_table
;
147 drmMMListHead vma_cache
;
148 int vma_count
, vma_open
, vma_max
;
151 int available_fences
;
154 unsigned int has_bsd
: 1;
155 unsigned int has_blt
: 1;
156 unsigned int has_relaxed_fencing
: 1;
157 unsigned int has_llc
: 1;
158 unsigned int has_wait_timeout
: 1;
159 unsigned int bo_reuse
: 1;
160 unsigned int no_exec
: 1;
161 unsigned int has_vebox
: 1;
162 unsigned int has_exec_async
: 1;
170 } drm_bacon_bufmgr_gem
;
172 #define DRM_INTEL_RELOC_FENCE (1<<0)
174 typedef struct _drm_bacon_reloc_target_info
{
177 } drm_bacon_reloc_target
;
179 struct _drm_bacon_bo_gem
{
187 * Kenel-assigned global name for this object
189 * List contains both flink named and prime fd'd objects
191 unsigned int global_name
;
193 UT_hash_handle handle_hh
;
194 UT_hash_handle name_hh
;
197 * Index of the buffer within the validation list while preparing a
198 * batchbuffer execution.
203 * Current tiling mode
205 uint32_t tiling_mode
;
206 uint32_t swizzle_mode
;
207 unsigned long stride
;
209 unsigned long kflags
;
213 /** Array passed to the DRM containing relocation information. */
214 struct drm_i915_gem_relocation_entry
*relocs
;
216 * Array of info structs corresponding to relocs[i].target_handle etc
218 drm_bacon_reloc_target
*reloc_target_info
;
219 /** Number of entries in relocs */
221 /** Array of BOs that are referenced by this buffer and will be softpinned */
222 drm_bacon_bo
**softpin_target
;
223 /** Number softpinned BOs that are referenced by this buffer */
224 int softpin_target_count
;
225 /** Maximum amount of softpinned BOs that are referenced by this buffer */
226 int softpin_target_size
;
228 /** Mapped address for the buffer, saved across map/unmap cycles */
230 /** GTT virtual address for the buffer, saved across map/unmap cycles */
232 /** WC CPU address for the buffer, saved across map/unmap cycles */
235 * Virtual address of the buffer allocated by user, used for userptr
240 drmMMListHead vma_list
;
246 * Boolean of whether this BO and its children have been included in
247 * the current drm_bacon_bufmgr_check_aperture_space() total.
249 bool included_in_check_aperture
;
252 * Boolean of whether this buffer has been used as a relocation
253 * target and had its size accounted for, and thus can't have any
254 * further relocations added to it.
256 bool used_as_reloc_target
;
259 * Boolean of whether we have encountered an error whilst building the relocation tree.
264 * Boolean of whether this buffer can be re-used
269 * Boolean of whether the GPU is definitely not accessing the buffer.
271 * This is only valid when reusable, since non-reusable
272 * buffers are those that have been shared with other
273 * processes, so we don't know their state.
278 * Boolean of whether this buffer was allocated with userptr
283 * Size in bytes of this buffer and its relocation descendents.
285 * Used to avoid costly tree walking in
286 * drm_bacon_bufmgr_check_aperture in the common case.
291 * Number of potential fence registers required by this buffer and its
294 int reloc_tree_fences
;
296 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */
297 bool mapped_cpu_write
;
301 drm_bacon_gem_estimate_batch_space(drm_bacon_bo
** bo_array
, int count
);
304 drm_bacon_gem_compute_batch_space(drm_bacon_bo
** bo_array
, int count
);
307 drm_bacon_gem_bo_get_tiling(drm_bacon_bo
*bo
, uint32_t * tiling_mode
,
308 uint32_t * swizzle_mode
);
311 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo
*bo
,
312 uint32_t tiling_mode
,
315 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo
*bo
,
318 static void drm_bacon_gem_bo_unreference(drm_bacon_bo
*bo
);
320 static void drm_bacon_gem_bo_free(drm_bacon_bo
*bo
);
322 static inline drm_bacon_bo_gem
*to_bo_gem(drm_bacon_bo
*bo
)
324 return (drm_bacon_bo_gem
*)bo
;
328 drm_bacon_gem_bo_tile_size(drm_bacon_bufmgr_gem
*bufmgr_gem
, unsigned long size
,
329 uint32_t *tiling_mode
)
331 unsigned long min_size
, max_size
;
334 if (*tiling_mode
== I915_TILING_NONE
)
337 /* 965+ just need multiples of page size for tiling */
338 if (bufmgr_gem
->gen
>= 4)
339 return ROUND_UP_TO(size
, 4096);
341 /* Older chips need powers of two, of at least 512k or 1M */
342 if (bufmgr_gem
->gen
== 3) {
343 min_size
= 1024*1024;
344 max_size
= 128*1024*1024;
347 max_size
= 64*1024*1024;
350 if (size
> max_size
) {
351 *tiling_mode
= I915_TILING_NONE
;
355 /* Do we need to allocate every page for the fence? */
356 if (bufmgr_gem
->has_relaxed_fencing
)
357 return ROUND_UP_TO(size
, 4096);
359 for (i
= min_size
; i
< size
; i
<<= 1)
366 * Round a given pitch up to the minimum required for X tiling on a
367 * given chip. We use 512 as the minimum to allow for a later tiling
371 drm_bacon_gem_bo_tile_pitch(drm_bacon_bufmgr_gem
*bufmgr_gem
,
372 unsigned long pitch
, uint32_t *tiling_mode
)
374 unsigned long tile_width
;
377 /* If untiled, then just align it so that we can do rendering
378 * to it with the 3D engine.
380 if (*tiling_mode
== I915_TILING_NONE
)
381 return ALIGN(pitch
, 64);
383 if (*tiling_mode
== I915_TILING_X
384 || (IS_915(bufmgr_gem
->pci_device
)
385 && *tiling_mode
== I915_TILING_Y
))
390 /* 965 is flexible */
391 if (bufmgr_gem
->gen
>= 4)
392 return ROUND_UP_TO(pitch
, tile_width
);
394 /* The older hardware has a maximum pitch of 8192 with tiled
395 * surfaces, so fallback to untiled if it's too large.
398 *tiling_mode
= I915_TILING_NONE
;
399 return ALIGN(pitch
, 64);
402 /* Pre-965 needs power of two tile width */
403 for (i
= tile_width
; i
< pitch
; i
<<= 1)
409 static struct drm_bacon_gem_bo_bucket
*
410 drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr_gem
*bufmgr_gem
,
415 for (i
= 0; i
< bufmgr_gem
->num_buckets
; i
++) {
416 struct drm_bacon_gem_bo_bucket
*bucket
=
417 &bufmgr_gem
->cache_bucket
[i
];
418 if (bucket
->size
>= size
) {
427 drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr_gem
*bufmgr_gem
)
431 for (i
= 0; i
< bufmgr_gem
->exec_count
; i
++) {
432 drm_bacon_bo
*bo
= bufmgr_gem
->exec_bos
[i
];
433 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
435 if (bo_gem
->relocs
== NULL
&& bo_gem
->softpin_target
== NULL
) {
436 DBG("%2d: %d %s(%s)\n", i
, bo_gem
->gem_handle
,
437 bo_gem
->kflags
& EXEC_OBJECT_PINNED
? "*" : "",
442 for (j
= 0; j
< bo_gem
->reloc_count
; j
++) {
443 drm_bacon_bo
*target_bo
= bo_gem
->reloc_target_info
[j
].bo
;
444 drm_bacon_bo_gem
*target_gem
=
445 (drm_bacon_bo_gem
*) target_bo
;
447 DBG("%2d: %d %s(%s)@0x%08x %08x -> "
448 "%d (%s)@0x%08x %08x + 0x%08x\n",
451 bo_gem
->kflags
& EXEC_OBJECT_PINNED
? "*" : "",
453 upper_32_bits(bo_gem
->relocs
[j
].offset
),
454 lower_32_bits(bo_gem
->relocs
[j
].offset
),
455 target_gem
->gem_handle
,
457 upper_32_bits(target_bo
->offset64
),
458 lower_32_bits(target_bo
->offset64
),
459 bo_gem
->relocs
[j
].delta
);
462 for (j
= 0; j
< bo_gem
->softpin_target_count
; j
++) {
463 drm_bacon_bo
*target_bo
= bo_gem
->softpin_target
[j
];
464 drm_bacon_bo_gem
*target_gem
=
465 (drm_bacon_bo_gem
*) target_bo
;
466 DBG("%2d: %d %s(%s) -> "
467 "%d *(%s)@0x%08x %08x\n",
470 bo_gem
->kflags
& EXEC_OBJECT_PINNED
? "*" : "",
472 target_gem
->gem_handle
,
474 upper_32_bits(target_bo
->offset64
),
475 lower_32_bits(target_bo
->offset64
));
481 drm_bacon_gem_bo_reference(drm_bacon_bo
*bo
)
483 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
485 p_atomic_inc(&bo_gem
->refcount
);
489 * Adds the given buffer to the list of buffers to be validated (moved into the
490 * appropriate memory type) with the next batch submission.
492 * If a buffer is validated multiple times in a batch submission, it ends up
493 * with the intersection of the memory type flags and the union of the
497 drm_bacon_add_validate_buffer(drm_bacon_bo
*bo
)
499 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
500 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
503 if (bo_gem
->validate_index
!= -1)
506 /* Extend the array of validation entries as necessary. */
507 if (bufmgr_gem
->exec_count
== bufmgr_gem
->exec_size
) {
508 int new_size
= bufmgr_gem
->exec_size
* 2;
513 bufmgr_gem
->exec_objects
=
514 realloc(bufmgr_gem
->exec_objects
,
515 sizeof(*bufmgr_gem
->exec_objects
) * new_size
);
516 bufmgr_gem
->exec_bos
=
517 realloc(bufmgr_gem
->exec_bos
,
518 sizeof(*bufmgr_gem
->exec_bos
) * new_size
);
519 bufmgr_gem
->exec_size
= new_size
;
522 index
= bufmgr_gem
->exec_count
;
523 bo_gem
->validate_index
= index
;
524 /* Fill in array entry */
525 bufmgr_gem
->exec_objects
[index
].handle
= bo_gem
->gem_handle
;
526 bufmgr_gem
->exec_objects
[index
].relocation_count
= bo_gem
->reloc_count
;
527 bufmgr_gem
->exec_objects
[index
].relocs_ptr
= (uintptr_t) bo_gem
->relocs
;
528 bufmgr_gem
->exec_objects
[index
].alignment
= bo
->align
;
529 bufmgr_gem
->exec_objects
[index
].offset
= 0;
530 bufmgr_gem
->exec_bos
[index
] = bo
;
531 bufmgr_gem
->exec_count
++;
535 drm_bacon_add_validate_buffer2(drm_bacon_bo
*bo
, int need_fence
)
537 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bo
->bufmgr
;
538 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*)bo
;
544 flags
|= EXEC_OBJECT_NEEDS_FENCE
;
546 if (bo_gem
->validate_index
!= -1) {
547 bufmgr_gem
->exec2_objects
[bo_gem
->validate_index
].flags
|= flags
;
551 /* Extend the array of validation entries as necessary. */
552 if (bufmgr_gem
->exec_count
== bufmgr_gem
->exec_size
) {
553 int new_size
= bufmgr_gem
->exec_size
* 2;
558 bufmgr_gem
->exec2_objects
=
559 realloc(bufmgr_gem
->exec2_objects
,
560 sizeof(*bufmgr_gem
->exec2_objects
) * new_size
);
561 bufmgr_gem
->exec_bos
=
562 realloc(bufmgr_gem
->exec_bos
,
563 sizeof(*bufmgr_gem
->exec_bos
) * new_size
);
564 bufmgr_gem
->exec_size
= new_size
;
567 index
= bufmgr_gem
->exec_count
;
568 bo_gem
->validate_index
= index
;
569 /* Fill in array entry */
570 bufmgr_gem
->exec2_objects
[index
].handle
= bo_gem
->gem_handle
;
571 bufmgr_gem
->exec2_objects
[index
].relocation_count
= bo_gem
->reloc_count
;
572 bufmgr_gem
->exec2_objects
[index
].relocs_ptr
= (uintptr_t)bo_gem
->relocs
;
573 bufmgr_gem
->exec2_objects
[index
].alignment
= bo
->align
;
574 bufmgr_gem
->exec2_objects
[index
].offset
= bo
->offset64
;
575 bufmgr_gem
->exec2_objects
[index
].flags
= bo_gem
->kflags
| flags
;
576 bufmgr_gem
->exec2_objects
[index
].rsvd1
= 0;
577 bufmgr_gem
->exec2_objects
[index
].rsvd2
= 0;
578 bufmgr_gem
->exec_bos
[index
] = bo
;
579 bufmgr_gem
->exec_count
++;
582 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
586 drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr_gem
*bufmgr_gem
,
587 drm_bacon_bo_gem
*bo_gem
,
588 unsigned int alignment
)
592 assert(!bo_gem
->used_as_reloc_target
);
594 /* The older chipsets are far-less flexible in terms of tiling,
595 * and require tiled buffer to be size aligned in the aperture.
596 * This means that in the worst possible case we will need a hole
597 * twice as large as the object in order for it to fit into the
598 * aperture. Optimal packing is for wimps.
600 size
= bo_gem
->bo
.size
;
601 if (bufmgr_gem
->gen
< 4 && bo_gem
->tiling_mode
!= I915_TILING_NONE
) {
602 unsigned int min_size
;
604 if (bufmgr_gem
->has_relaxed_fencing
) {
605 if (bufmgr_gem
->gen
== 3)
606 min_size
= 1024*1024;
610 while (min_size
< size
)
615 /* Account for worst-case alignment. */
616 alignment
= MAX2(alignment
, min_size
);
619 bo_gem
->reloc_tree_size
= size
+ alignment
;
623 drm_bacon_setup_reloc_list(drm_bacon_bo
*bo
)
625 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
626 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
627 unsigned int max_relocs
= bufmgr_gem
->max_relocs
;
629 if (bo
->size
/ 4 < max_relocs
)
630 max_relocs
= bo
->size
/ 4;
632 bo_gem
->relocs
= malloc(max_relocs
*
633 sizeof(struct drm_i915_gem_relocation_entry
));
634 bo_gem
->reloc_target_info
= malloc(max_relocs
*
635 sizeof(drm_bacon_reloc_target
));
636 if (bo_gem
->relocs
== NULL
|| bo_gem
->reloc_target_info
== NULL
) {
637 bo_gem
->has_error
= true;
639 free (bo_gem
->relocs
);
640 bo_gem
->relocs
= NULL
;
642 free (bo_gem
->reloc_target_info
);
643 bo_gem
->reloc_target_info
= NULL
;
652 drm_bacon_gem_bo_busy(drm_bacon_bo
*bo
)
654 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
655 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
656 struct drm_i915_gem_busy busy
;
659 if (bo_gem
->reusable
&& bo_gem
->idle
)
663 busy
.handle
= bo_gem
->gem_handle
;
665 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GEM_BUSY
, &busy
);
667 bo_gem
->idle
= !busy
.busy
;
672 return (ret
== 0 && busy
.busy
);
676 drm_bacon_gem_bo_madvise_internal(drm_bacon_bufmgr_gem
*bufmgr_gem
,
677 drm_bacon_bo_gem
*bo_gem
, int state
)
679 struct drm_i915_gem_madvise madv
;
682 madv
.handle
= bo_gem
->gem_handle
;
685 drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GEM_MADVISE
, &madv
);
687 return madv
.retained
;
691 drm_bacon_gem_bo_madvise(drm_bacon_bo
*bo
, int madv
)
693 return drm_bacon_gem_bo_madvise_internal
694 ((drm_bacon_bufmgr_gem
*) bo
->bufmgr
,
695 (drm_bacon_bo_gem
*) bo
,
699 /* drop the oldest entries that have been purged by the kernel */
701 drm_bacon_gem_bo_cache_purge_bucket(drm_bacon_bufmgr_gem
*bufmgr_gem
,
702 struct drm_bacon_gem_bo_bucket
*bucket
)
704 while (!DRMLISTEMPTY(&bucket
->head
)) {
705 drm_bacon_bo_gem
*bo_gem
;
707 bo_gem
= DRMLISTENTRY(drm_bacon_bo_gem
,
708 bucket
->head
.next
, head
);
709 if (drm_bacon_gem_bo_madvise_internal
710 (bufmgr_gem
, bo_gem
, I915_MADV_DONTNEED
))
713 DRMLISTDEL(&bo_gem
->head
);
714 drm_bacon_gem_bo_free(&bo_gem
->bo
);
718 static drm_bacon_bo
*
719 drm_bacon_gem_bo_alloc_internal(drm_bacon_bufmgr
*bufmgr
,
723 uint32_t tiling_mode
,
724 unsigned long stride
,
725 unsigned int alignment
)
727 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bufmgr
;
728 drm_bacon_bo_gem
*bo_gem
;
729 unsigned int page_size
= getpagesize();
731 struct drm_bacon_gem_bo_bucket
*bucket
;
732 bool alloc_from_cache
;
733 unsigned long bo_size
;
734 bool for_render
= false;
736 if (flags
& BO_ALLOC_FOR_RENDER
)
739 /* Round the allocated size up to a power of two number of pages. */
740 bucket
= drm_bacon_gem_bo_bucket_for_size(bufmgr_gem
, size
);
742 /* If we don't have caching at this size, don't actually round the
745 if (bucket
== NULL
) {
747 if (bo_size
< page_size
)
750 bo_size
= bucket
->size
;
753 pthread_mutex_lock(&bufmgr_gem
->lock
);
754 /* Get a buffer out of the cache if available */
756 alloc_from_cache
= false;
757 if (bucket
!= NULL
&& !DRMLISTEMPTY(&bucket
->head
)) {
759 /* Allocate new render-target BOs from the tail (MRU)
760 * of the list, as it will likely be hot in the GPU
761 * cache and in the aperture for us.
763 bo_gem
= DRMLISTENTRY(drm_bacon_bo_gem
,
764 bucket
->head
.prev
, head
);
765 DRMLISTDEL(&bo_gem
->head
);
766 alloc_from_cache
= true;
767 bo_gem
->bo
.align
= alignment
;
769 assert(alignment
== 0);
770 /* For non-render-target BOs (where we're probably
771 * going to map it first thing in order to fill it
772 * with data), check if the last BO in the cache is
773 * unbusy, and only reuse in that case. Otherwise,
774 * allocating a new buffer is probably faster than
775 * waiting for the GPU to finish.
777 bo_gem
= DRMLISTENTRY(drm_bacon_bo_gem
,
778 bucket
->head
.next
, head
);
779 if (!drm_bacon_gem_bo_busy(&bo_gem
->bo
)) {
780 alloc_from_cache
= true;
781 DRMLISTDEL(&bo_gem
->head
);
785 if (alloc_from_cache
) {
786 if (!drm_bacon_gem_bo_madvise_internal
787 (bufmgr_gem
, bo_gem
, I915_MADV_WILLNEED
)) {
788 drm_bacon_gem_bo_free(&bo_gem
->bo
);
789 drm_bacon_gem_bo_cache_purge_bucket(bufmgr_gem
,
794 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem
->bo
,
797 drm_bacon_gem_bo_free(&bo_gem
->bo
);
803 if (!alloc_from_cache
) {
804 struct drm_i915_gem_create create
;
806 bo_gem
= calloc(1, sizeof(*bo_gem
));
810 /* drm_bacon_gem_bo_free calls DRMLISTDEL() for an uninitialized
811 list (vma_list), so better set the list head here */
812 DRMINITLISTHEAD(&bo_gem
->vma_list
);
814 bo_gem
->bo
.size
= bo_size
;
817 create
.size
= bo_size
;
819 ret
= drmIoctl(bufmgr_gem
->fd
,
820 DRM_IOCTL_I915_GEM_CREATE
,
827 bo_gem
->gem_handle
= create
.handle
;
828 HASH_ADD(handle_hh
, bufmgr_gem
->handle_table
,
829 gem_handle
, sizeof(bo_gem
->gem_handle
),
832 bo_gem
->bo
.handle
= bo_gem
->gem_handle
;
833 bo_gem
->bo
.bufmgr
= bufmgr
;
834 bo_gem
->bo
.align
= alignment
;
836 bo_gem
->tiling_mode
= I915_TILING_NONE
;
837 bo_gem
->swizzle_mode
= I915_BIT_6_SWIZZLE_NONE
;
840 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem
->bo
,
847 p_atomic_set(&bo_gem
->refcount
, 1);
848 bo_gem
->validate_index
= -1;
849 bo_gem
->reloc_tree_fences
= 0;
850 bo_gem
->used_as_reloc_target
= false;
851 bo_gem
->has_error
= false;
852 bo_gem
->reusable
= true;
854 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem
, bo_gem
, alignment
);
855 pthread_mutex_unlock(&bufmgr_gem
->lock
);
857 DBG("bo_create: buf %d (%s) %ldb\n",
858 bo_gem
->gem_handle
, bo_gem
->name
, size
);
863 drm_bacon_gem_bo_free(&bo_gem
->bo
);
865 pthread_mutex_unlock(&bufmgr_gem
->lock
);
869 static drm_bacon_bo
*
870 drm_bacon_gem_bo_alloc_for_render(drm_bacon_bufmgr
*bufmgr
,
873 unsigned int alignment
)
875 return drm_bacon_gem_bo_alloc_internal(bufmgr
, name
, size
,
881 static drm_bacon_bo
*
882 drm_bacon_gem_bo_alloc(drm_bacon_bufmgr
*bufmgr
,
885 unsigned int alignment
)
887 return drm_bacon_gem_bo_alloc_internal(bufmgr
, name
, size
, 0,
888 I915_TILING_NONE
, 0, 0);
891 static drm_bacon_bo
*
892 drm_bacon_gem_bo_alloc_tiled(drm_bacon_bufmgr
*bufmgr
, const char *name
,
893 int x
, int y
, int cpp
, uint32_t *tiling_mode
,
894 unsigned long *pitch
, unsigned long flags
)
896 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bufmgr
;
897 unsigned long size
, stride
;
901 unsigned long aligned_y
, height_alignment
;
903 tiling
= *tiling_mode
;
905 /* If we're tiled, our allocations are in 8 or 32-row blocks,
906 * so failure to align our height means that we won't allocate
909 * If we're untiled, we still have to align to 2 rows high
910 * because the data port accesses 2x2 blocks even if the
911 * bottom row isn't to be rendered, so failure to align means
912 * we could walk off the end of the GTT and fault. This is
913 * documented on 965, and may be the case on older chipsets
914 * too so we try to be careful.
917 height_alignment
= 2;
919 if ((bufmgr_gem
->gen
== 2) && tiling
!= I915_TILING_NONE
)
920 height_alignment
= 16;
921 else if (tiling
== I915_TILING_X
922 || (IS_915(bufmgr_gem
->pci_device
)
923 && tiling
== I915_TILING_Y
))
924 height_alignment
= 8;
925 else if (tiling
== I915_TILING_Y
)
926 height_alignment
= 32;
927 aligned_y
= ALIGN(y
, height_alignment
);
930 stride
= drm_bacon_gem_bo_tile_pitch(bufmgr_gem
, stride
, tiling_mode
);
931 size
= stride
* aligned_y
;
932 size
= drm_bacon_gem_bo_tile_size(bufmgr_gem
, size
, tiling_mode
);
933 } while (*tiling_mode
!= tiling
);
936 if (tiling
== I915_TILING_NONE
)
939 return drm_bacon_gem_bo_alloc_internal(bufmgr
, name
, size
, flags
,
943 static drm_bacon_bo
*
944 drm_bacon_gem_bo_alloc_userptr(drm_bacon_bufmgr
*bufmgr
,
947 uint32_t tiling_mode
,
952 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bufmgr
;
953 drm_bacon_bo_gem
*bo_gem
;
955 struct drm_i915_gem_userptr userptr
;
957 /* Tiling with userptr surfaces is not supported
958 * on all hardware so refuse it for time being.
960 if (tiling_mode
!= I915_TILING_NONE
)
963 bo_gem
= calloc(1, sizeof(*bo_gem
));
967 p_atomic_set(&bo_gem
->refcount
, 1);
968 DRMINITLISTHEAD(&bo_gem
->vma_list
);
970 bo_gem
->bo
.size
= size
;
973 userptr
.user_ptr
= (__u64
)((unsigned long)addr
);
974 userptr
.user_size
= size
;
975 userptr
.flags
= flags
;
977 ret
= drmIoctl(bufmgr_gem
->fd
,
978 DRM_IOCTL_I915_GEM_USERPTR
,
981 DBG("bo_create_userptr: "
982 "ioctl failed with user ptr %p size 0x%lx, "
983 "user flags 0x%lx\n", addr
, size
, flags
);
988 pthread_mutex_lock(&bufmgr_gem
->lock
);
990 bo_gem
->gem_handle
= userptr
.handle
;
991 bo_gem
->bo
.handle
= bo_gem
->gem_handle
;
992 bo_gem
->bo
.bufmgr
= bufmgr
;
993 bo_gem
->is_userptr
= true;
994 bo_gem
->bo
.virtual = addr
;
995 /* Save the address provided by user */
996 bo_gem
->user_virtual
= addr
;
997 bo_gem
->tiling_mode
= I915_TILING_NONE
;
998 bo_gem
->swizzle_mode
= I915_BIT_6_SWIZZLE_NONE
;
1001 HASH_ADD(handle_hh
, bufmgr_gem
->handle_table
,
1002 gem_handle
, sizeof(bo_gem
->gem_handle
),
1005 bo_gem
->name
= name
;
1006 bo_gem
->validate_index
= -1;
1007 bo_gem
->reloc_tree_fences
= 0;
1008 bo_gem
->used_as_reloc_target
= false;
1009 bo_gem
->has_error
= false;
1010 bo_gem
->reusable
= false;
1012 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem
, bo_gem
, 0);
1013 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1015 DBG("bo_create_userptr: "
1016 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
1017 addr
, bo_gem
->gem_handle
, bo_gem
->name
,
1018 size
, stride
, tiling_mode
);
1024 has_userptr(drm_bacon_bufmgr_gem
*bufmgr_gem
)
1029 struct drm_i915_gem_userptr userptr
;
1031 pgsz
= sysconf(_SC_PAGESIZE
);
1034 ret
= posix_memalign(&ptr
, pgsz
, pgsz
);
1036 DBG("Failed to get a page (%ld) for userptr detection!\n",
1042 userptr
.user_ptr
= (__u64
)(unsigned long)ptr
;
1043 userptr
.user_size
= pgsz
;
1046 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GEM_USERPTR
, &userptr
);
1048 if (errno
== ENODEV
&& userptr
.flags
== 0) {
1049 userptr
.flags
= I915_USERPTR_UNSYNCHRONIZED
;
1056 /* We don't release the userptr bo here as we want to keep the
1057 * kernel mm tracking alive for our lifetime. The first time we
1058 * create a userptr object the kernel has to install a mmu_notifer
1059 * which is a heavyweight operation (e.g. it requires taking all
1060 * mm_locks and stop_machine()).
1063 bufmgr_gem
->userptr_active
.ptr
= ptr
;
1064 bufmgr_gem
->userptr_active
.handle
= userptr
.handle
;
1069 static drm_bacon_bo
*
1070 check_bo_alloc_userptr(drm_bacon_bufmgr
*bufmgr
,
1073 uint32_t tiling_mode
,
1076 unsigned long flags
)
1078 if (has_userptr((drm_bacon_bufmgr_gem
*)bufmgr
))
1079 bufmgr
->bo_alloc_userptr
= drm_bacon_gem_bo_alloc_userptr
;
1081 bufmgr
->bo_alloc_userptr
= NULL
;
1083 return drm_bacon_bo_alloc_userptr(bufmgr
, name
, addr
,
1084 tiling_mode
, stride
, size
, flags
);
1088 * Returns a drm_bacon_bo wrapping the given buffer object handle.
1090 * This can be used when one application needs to pass a buffer object
1094 drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr
*bufmgr
,
1096 unsigned int handle
)
1098 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bufmgr
;
1099 drm_bacon_bo_gem
*bo_gem
;
1101 struct drm_gem_open open_arg
;
1102 struct drm_i915_gem_get_tiling get_tiling
;
1104 /* At the moment most applications only have a few named bo.
1105 * For instance, in a DRI client only the render buffers passed
1106 * between X and the client are named. And since X returns the
1107 * alternating names for the front/back buffer a linear search
1108 * provides a sufficiently fast match.
1110 pthread_mutex_lock(&bufmgr_gem
->lock
);
1111 HASH_FIND(name_hh
, bufmgr_gem
->name_table
,
1112 &handle
, sizeof(handle
), bo_gem
);
1114 drm_bacon_gem_bo_reference(&bo_gem
->bo
);
1119 open_arg
.name
= handle
;
1120 ret
= drmIoctl(bufmgr_gem
->fd
,
1124 DBG("Couldn't reference %s handle 0x%08x: %s\n",
1125 name
, handle
, strerror(errno
));
1129 /* Now see if someone has used a prime handle to get this
1130 * object from the kernel before by looking through the list
1131 * again for a matching gem_handle
1133 HASH_FIND(handle_hh
, bufmgr_gem
->handle_table
,
1134 &open_arg
.handle
, sizeof(open_arg
.handle
), bo_gem
);
1136 drm_bacon_gem_bo_reference(&bo_gem
->bo
);
1140 bo_gem
= calloc(1, sizeof(*bo_gem
));
1144 p_atomic_set(&bo_gem
->refcount
, 1);
1145 DRMINITLISTHEAD(&bo_gem
->vma_list
);
1147 bo_gem
->bo
.size
= open_arg
.size
;
1148 bo_gem
->bo
.offset
= 0;
1149 bo_gem
->bo
.offset64
= 0;
1150 bo_gem
->bo
.virtual = NULL
;
1151 bo_gem
->bo
.bufmgr
= bufmgr
;
1152 bo_gem
->name
= name
;
1153 bo_gem
->validate_index
= -1;
1154 bo_gem
->gem_handle
= open_arg
.handle
;
1155 bo_gem
->bo
.handle
= open_arg
.handle
;
1156 bo_gem
->global_name
= handle
;
1157 bo_gem
->reusable
= false;
1159 HASH_ADD(handle_hh
, bufmgr_gem
->handle_table
,
1160 gem_handle
, sizeof(bo_gem
->gem_handle
), bo_gem
);
1161 HASH_ADD(name_hh
, bufmgr_gem
->name_table
,
1162 global_name
, sizeof(bo_gem
->global_name
), bo_gem
);
1164 memclear(get_tiling
);
1165 get_tiling
.handle
= bo_gem
->gem_handle
;
1166 ret
= drmIoctl(bufmgr_gem
->fd
,
1167 DRM_IOCTL_I915_GEM_GET_TILING
,
1172 bo_gem
->tiling_mode
= get_tiling
.tiling_mode
;
1173 bo_gem
->swizzle_mode
= get_tiling
.swizzle_mode
;
1174 /* XXX stride is unknown */
1175 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem
, bo_gem
, 0);
1176 DBG("bo_create_from_handle: %d (%s)\n", handle
, bo_gem
->name
);
1179 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1183 drm_bacon_gem_bo_free(&bo_gem
->bo
);
1184 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1189 drm_bacon_gem_bo_free(drm_bacon_bo
*bo
)
1191 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1192 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1193 struct drm_gem_close close
;
1196 DRMLISTDEL(&bo_gem
->vma_list
);
1197 if (bo_gem
->mem_virtual
) {
1198 VG(VALGRIND_FREELIKE_BLOCK(bo_gem
->mem_virtual
, 0));
1199 drm_munmap(bo_gem
->mem_virtual
, bo_gem
->bo
.size
);
1200 bufmgr_gem
->vma_count
--;
1202 if (bo_gem
->wc_virtual
) {
1203 VG(VALGRIND_FREELIKE_BLOCK(bo_gem
->wc_virtual
, 0));
1204 drm_munmap(bo_gem
->wc_virtual
, bo_gem
->bo
.size
);
1205 bufmgr_gem
->vma_count
--;
1207 if (bo_gem
->gtt_virtual
) {
1208 drm_munmap(bo_gem
->gtt_virtual
, bo_gem
->bo
.size
);
1209 bufmgr_gem
->vma_count
--;
1212 if (bo_gem
->global_name
)
1213 HASH_DELETE(name_hh
, bufmgr_gem
->name_table
, bo_gem
);
1214 HASH_DELETE(handle_hh
, bufmgr_gem
->handle_table
, bo_gem
);
1216 /* Close this object */
1218 close
.handle
= bo_gem
->gem_handle
;
1219 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_GEM_CLOSE
, &close
);
1221 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1222 bo_gem
->gem_handle
, bo_gem
->name
, strerror(errno
));
1228 drm_bacon_gem_bo_mark_mmaps_incoherent(drm_bacon_bo
*bo
)
1231 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1233 if (bo_gem
->mem_virtual
)
1234 VALGRIND_MAKE_MEM_NOACCESS(bo_gem
->mem_virtual
, bo
->size
);
1236 if (bo_gem
->wc_virtual
)
1237 VALGRIND_MAKE_MEM_NOACCESS(bo_gem
->wc_virtual
, bo
->size
);
1239 if (bo_gem
->gtt_virtual
)
1240 VALGRIND_MAKE_MEM_NOACCESS(bo_gem
->gtt_virtual
, bo
->size
);
1244 /** Frees all cached buffers significantly older than @time. */
1246 drm_bacon_gem_cleanup_bo_cache(drm_bacon_bufmgr_gem
*bufmgr_gem
, time_t time
)
1250 if (bufmgr_gem
->time
== time
)
1253 for (i
= 0; i
< bufmgr_gem
->num_buckets
; i
++) {
1254 struct drm_bacon_gem_bo_bucket
*bucket
=
1255 &bufmgr_gem
->cache_bucket
[i
];
1257 while (!DRMLISTEMPTY(&bucket
->head
)) {
1258 drm_bacon_bo_gem
*bo_gem
;
1260 bo_gem
= DRMLISTENTRY(drm_bacon_bo_gem
,
1261 bucket
->head
.next
, head
);
1262 if (time
- bo_gem
->free_time
<= 1)
1265 DRMLISTDEL(&bo_gem
->head
);
1267 drm_bacon_gem_bo_free(&bo_gem
->bo
);
1271 bufmgr_gem
->time
= time
;
1274 static void drm_bacon_gem_bo_purge_vma_cache(drm_bacon_bufmgr_gem
*bufmgr_gem
)
1278 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__
,
1279 bufmgr_gem
->vma_count
, bufmgr_gem
->vma_open
, bufmgr_gem
->vma_max
);
1281 if (bufmgr_gem
->vma_max
< 0)
1284 /* We may need to evict a few entries in order to create new mmaps */
1285 limit
= bufmgr_gem
->vma_max
- 2*bufmgr_gem
->vma_open
;
1289 while (bufmgr_gem
->vma_count
> limit
) {
1290 drm_bacon_bo_gem
*bo_gem
;
1292 bo_gem
= DRMLISTENTRY(drm_bacon_bo_gem
,
1293 bufmgr_gem
->vma_cache
.next
,
1295 assert(bo_gem
->map_count
== 0);
1296 DRMLISTDELINIT(&bo_gem
->vma_list
);
1298 if (bo_gem
->mem_virtual
) {
1299 drm_munmap(bo_gem
->mem_virtual
, bo_gem
->bo
.size
);
1300 bo_gem
->mem_virtual
= NULL
;
1301 bufmgr_gem
->vma_count
--;
1303 if (bo_gem
->wc_virtual
) {
1304 drm_munmap(bo_gem
->wc_virtual
, bo_gem
->bo
.size
);
1305 bo_gem
->wc_virtual
= NULL
;
1306 bufmgr_gem
->vma_count
--;
1308 if (bo_gem
->gtt_virtual
) {
1309 drm_munmap(bo_gem
->gtt_virtual
, bo_gem
->bo
.size
);
1310 bo_gem
->gtt_virtual
= NULL
;
1311 bufmgr_gem
->vma_count
--;
1316 static void drm_bacon_gem_bo_close_vma(drm_bacon_bufmgr_gem
*bufmgr_gem
,
1317 drm_bacon_bo_gem
*bo_gem
)
1319 bufmgr_gem
->vma_open
--;
1320 DRMLISTADDTAIL(&bo_gem
->vma_list
, &bufmgr_gem
->vma_cache
);
1321 if (bo_gem
->mem_virtual
)
1322 bufmgr_gem
->vma_count
++;
1323 if (bo_gem
->wc_virtual
)
1324 bufmgr_gem
->vma_count
++;
1325 if (bo_gem
->gtt_virtual
)
1326 bufmgr_gem
->vma_count
++;
1327 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem
);
1330 static void drm_bacon_gem_bo_open_vma(drm_bacon_bufmgr_gem
*bufmgr_gem
,
1331 drm_bacon_bo_gem
*bo_gem
)
1333 bufmgr_gem
->vma_open
++;
1334 DRMLISTDEL(&bo_gem
->vma_list
);
1335 if (bo_gem
->mem_virtual
)
1336 bufmgr_gem
->vma_count
--;
1337 if (bo_gem
->wc_virtual
)
1338 bufmgr_gem
->vma_count
--;
1339 if (bo_gem
->gtt_virtual
)
1340 bufmgr_gem
->vma_count
--;
1341 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem
);
1345 drm_bacon_gem_bo_unreference_final(drm_bacon_bo
*bo
, time_t time
)
1347 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1348 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1349 struct drm_bacon_gem_bo_bucket
*bucket
;
1352 /* Unreference all the target buffers */
1353 for (i
= 0; i
< bo_gem
->reloc_count
; i
++) {
1354 if (bo_gem
->reloc_target_info
[i
].bo
!= bo
) {
1355 drm_bacon_gem_bo_unreference_locked_timed(bo_gem
->
1356 reloc_target_info
[i
].bo
,
1360 for (i
= 0; i
< bo_gem
->softpin_target_count
; i
++)
1361 drm_bacon_gem_bo_unreference_locked_timed(bo_gem
->softpin_target
[i
],
1364 bo_gem
->reloc_count
= 0;
1365 bo_gem
->used_as_reloc_target
= false;
1366 bo_gem
->softpin_target_count
= 0;
1368 DBG("bo_unreference final: %d (%s)\n",
1369 bo_gem
->gem_handle
, bo_gem
->name
);
1371 /* release memory associated with this object */
1372 if (bo_gem
->reloc_target_info
) {
1373 free(bo_gem
->reloc_target_info
);
1374 bo_gem
->reloc_target_info
= NULL
;
1376 if (bo_gem
->relocs
) {
1377 free(bo_gem
->relocs
);
1378 bo_gem
->relocs
= NULL
;
1380 if (bo_gem
->softpin_target
) {
1381 free(bo_gem
->softpin_target
);
1382 bo_gem
->softpin_target
= NULL
;
1383 bo_gem
->softpin_target_size
= 0;
1386 /* Clear any left-over mappings */
1387 if (bo_gem
->map_count
) {
1388 DBG("bo freed with non-zero map-count %d\n", bo_gem
->map_count
);
1389 bo_gem
->map_count
= 0;
1390 drm_bacon_gem_bo_close_vma(bufmgr_gem
, bo_gem
);
1391 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1394 bucket
= drm_bacon_gem_bo_bucket_for_size(bufmgr_gem
, bo
->size
);
1395 /* Put the buffer into our internal cache for reuse if we can. */
1396 if (bufmgr_gem
->bo_reuse
&& bo_gem
->reusable
&& bucket
!= NULL
&&
1397 drm_bacon_gem_bo_madvise_internal(bufmgr_gem
, bo_gem
,
1398 I915_MADV_DONTNEED
)) {
1399 bo_gem
->free_time
= time
;
1401 bo_gem
->name
= NULL
;
1402 bo_gem
->validate_index
= -1;
1404 DRMLISTADDTAIL(&bo_gem
->head
, &bucket
->head
);
1406 drm_bacon_gem_bo_free(bo
);
1410 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo
*bo
,
1413 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1415 assert(p_atomic_read(&bo_gem
->refcount
) > 0);
1416 if (p_atomic_dec_zero(&bo_gem
->refcount
))
1417 drm_bacon_gem_bo_unreference_final(bo
, time
);
1420 static void drm_bacon_gem_bo_unreference(drm_bacon_bo
*bo
)
1422 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1424 assert(p_atomic_read(&bo_gem
->refcount
) > 0);
1426 if (atomic_add_unless(&bo_gem
->refcount
, -1, 1)) {
1427 drm_bacon_bufmgr_gem
*bufmgr_gem
=
1428 (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1429 struct timespec time
;
1431 clock_gettime(CLOCK_MONOTONIC
, &time
);
1433 pthread_mutex_lock(&bufmgr_gem
->lock
);
1435 if (p_atomic_dec_zero(&bo_gem
->refcount
)) {
1436 drm_bacon_gem_bo_unreference_final(bo
, time
.tv_sec
);
1437 drm_bacon_gem_cleanup_bo_cache(bufmgr_gem
, time
.tv_sec
);
1440 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1444 static int drm_bacon_gem_bo_map(drm_bacon_bo
*bo
, int write_enable
)
1446 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1447 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1448 struct drm_i915_gem_set_domain set_domain
;
1451 if (bo_gem
->is_userptr
) {
1452 /* Return the same user ptr */
1453 bo
->virtual = bo_gem
->user_virtual
;
1457 pthread_mutex_lock(&bufmgr_gem
->lock
);
1459 if (bo_gem
->map_count
++ == 0)
1460 drm_bacon_gem_bo_open_vma(bufmgr_gem
, bo_gem
);
1462 if (!bo_gem
->mem_virtual
) {
1463 struct drm_i915_gem_mmap mmap_arg
;
1465 DBG("bo_map: %d (%s), map_count=%d\n",
1466 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
1469 mmap_arg
.handle
= bo_gem
->gem_handle
;
1470 mmap_arg
.size
= bo
->size
;
1471 ret
= drmIoctl(bufmgr_gem
->fd
,
1472 DRM_IOCTL_I915_GEM_MMAP
,
1476 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1477 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
1478 bo_gem
->name
, strerror(errno
));
1479 if (--bo_gem
->map_count
== 0)
1480 drm_bacon_gem_bo_close_vma(bufmgr_gem
, bo_gem
);
1481 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1484 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
1485 bo_gem
->mem_virtual
= (void *)(uintptr_t) mmap_arg
.addr_ptr
;
1487 DBG("bo_map: %d (%s) -> %p\n", bo_gem
->gem_handle
, bo_gem
->name
,
1488 bo_gem
->mem_virtual
);
1489 bo
->virtual = bo_gem
->mem_virtual
;
1491 memclear(set_domain
);
1492 set_domain
.handle
= bo_gem
->gem_handle
;
1493 set_domain
.read_domains
= I915_GEM_DOMAIN_CPU
;
1495 set_domain
.write_domain
= I915_GEM_DOMAIN_CPU
;
1497 set_domain
.write_domain
= 0;
1498 ret
= drmIoctl(bufmgr_gem
->fd
,
1499 DRM_IOCTL_I915_GEM_SET_DOMAIN
,
1502 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1503 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
1508 bo_gem
->mapped_cpu_write
= true;
1510 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1511 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem
->mem_virtual
, bo
->size
));
1512 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1518 map_gtt(drm_bacon_bo
*bo
)
1520 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1521 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1524 if (bo_gem
->is_userptr
)
1527 if (bo_gem
->map_count
++ == 0)
1528 drm_bacon_gem_bo_open_vma(bufmgr_gem
, bo_gem
);
1530 /* Get a mapping of the buffer if we haven't before. */
1531 if (bo_gem
->gtt_virtual
== NULL
) {
1532 struct drm_i915_gem_mmap_gtt mmap_arg
;
1534 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1535 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
1538 mmap_arg
.handle
= bo_gem
->gem_handle
;
1540 /* Get the fake offset back... */
1541 ret
= drmIoctl(bufmgr_gem
->fd
,
1542 DRM_IOCTL_I915_GEM_MMAP_GTT
,
1546 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1548 bo_gem
->gem_handle
, bo_gem
->name
,
1550 if (--bo_gem
->map_count
== 0)
1551 drm_bacon_gem_bo_close_vma(bufmgr_gem
, bo_gem
);
1556 bo_gem
->gtt_virtual
= drm_mmap(0, bo
->size
, PROT_READ
| PROT_WRITE
,
1557 MAP_SHARED
, bufmgr_gem
->fd
,
1559 if (bo_gem
->gtt_virtual
== MAP_FAILED
) {
1560 bo_gem
->gtt_virtual
= NULL
;
1562 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1564 bo_gem
->gem_handle
, bo_gem
->name
,
1566 if (--bo_gem
->map_count
== 0)
1567 drm_bacon_gem_bo_close_vma(bufmgr_gem
, bo_gem
);
1572 bo
->virtual = bo_gem
->gtt_virtual
;
1574 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem
->gem_handle
, bo_gem
->name
,
1575 bo_gem
->gtt_virtual
);
1581 drm_bacon_gem_bo_map_gtt(drm_bacon_bo
*bo
)
1583 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1584 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1585 struct drm_i915_gem_set_domain set_domain
;
1588 pthread_mutex_lock(&bufmgr_gem
->lock
);
1592 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1596 /* Now move it to the GTT domain so that the GPU and CPU
1597 * caches are flushed and the GPU isn't actively using the
1600 * The pagefault handler does this domain change for us when
1601 * it has unbound the BO from the GTT, but it's up to us to
1602 * tell it when we're about to use things if we had done
1603 * rendering and it still happens to be bound to the GTT.
1605 memclear(set_domain
);
1606 set_domain
.handle
= bo_gem
->gem_handle
;
1607 set_domain
.read_domains
= I915_GEM_DOMAIN_GTT
;
1608 set_domain
.write_domain
= I915_GEM_DOMAIN_GTT
;
1609 ret
= drmIoctl(bufmgr_gem
->fd
,
1610 DRM_IOCTL_I915_GEM_SET_DOMAIN
,
1613 DBG("%s:%d: Error setting domain %d: %s\n",
1614 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
1618 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1619 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem
->gtt_virtual
, bo
->size
));
1620 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1626 * Performs a mapping of the buffer object like the normal GTT
1627 * mapping, but avoids waiting for the GPU to be done reading from or
1628 * rendering to the buffer.
1630 * This is used in the implementation of GL_ARB_map_buffer_range: The
1631 * user asks to create a buffer, then does a mapping, fills some
1632 * space, runs a drawing command, then asks to map it again without
1633 * synchronizing because it guarantees that it won't write over the
1634 * data that the GPU is busy using (or, more specifically, that if it
1635 * does write over the data, it acknowledges that rendering is
1640 drm_bacon_gem_bo_map_unsynchronized(drm_bacon_bo
*bo
)
1642 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1643 #ifdef HAVE_VALGRIND
1644 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1648 /* If the CPU cache isn't coherent with the GTT, then use a
1649 * regular synchronized mapping. The problem is that we don't
1650 * track where the buffer was last used on the CPU side in
1651 * terms of drm_bacon_bo_map vs drm_bacon_gem_bo_map_gtt, so
1652 * we would potentially corrupt the buffer even when the user
1653 * does reasonable things.
1655 if (!bufmgr_gem
->has_llc
)
1656 return drm_bacon_gem_bo_map_gtt(bo
);
1658 pthread_mutex_lock(&bufmgr_gem
->lock
);
1662 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1663 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem
->gtt_virtual
, bo
->size
));
1666 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1671 static int drm_bacon_gem_bo_unmap(drm_bacon_bo
*bo
)
1673 drm_bacon_bufmgr_gem
*bufmgr_gem
;
1674 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1680 if (bo_gem
->is_userptr
)
1683 bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1685 pthread_mutex_lock(&bufmgr_gem
->lock
);
1687 if (bo_gem
->map_count
<= 0) {
1688 DBG("attempted to unmap an unmapped bo\n");
1689 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1690 /* Preserve the old behaviour of just treating this as a
1691 * no-op rather than reporting the error.
1696 if (bo_gem
->mapped_cpu_write
) {
1697 struct drm_i915_gem_sw_finish sw_finish
;
1699 /* Cause a flush to happen if the buffer's pinned for
1700 * scanout, so the results show up in a timely manner.
1701 * Unlike GTT set domains, this only does work if the
1702 * buffer should be scanout-related.
1704 memclear(sw_finish
);
1705 sw_finish
.handle
= bo_gem
->gem_handle
;
1706 ret
= drmIoctl(bufmgr_gem
->fd
,
1707 DRM_IOCTL_I915_GEM_SW_FINISH
,
1709 ret
= ret
== -1 ? -errno
: 0;
1711 bo_gem
->mapped_cpu_write
= false;
1714 /* We need to unmap after every innovation as we cannot track
1715 * an open vma for every bo as that will exhaust the system
1716 * limits and cause later failures.
1718 if (--bo_gem
->map_count
== 0) {
1719 drm_bacon_gem_bo_close_vma(bufmgr_gem
, bo_gem
);
1720 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1723 pthread_mutex_unlock(&bufmgr_gem
->lock
);
1729 drm_bacon_gem_bo_unmap_gtt(drm_bacon_bo
*bo
)
1731 return drm_bacon_gem_bo_unmap(bo
);
1735 drm_bacon_gem_bo_subdata(drm_bacon_bo
*bo
, unsigned long offset
,
1736 unsigned long size
, const void *data
)
1738 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1739 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1740 struct drm_i915_gem_pwrite pwrite
;
1743 if (bo_gem
->is_userptr
)
1747 pwrite
.handle
= bo_gem
->gem_handle
;
1748 pwrite
.offset
= offset
;
1750 pwrite
.data_ptr
= (uint64_t) (uintptr_t) data
;
1751 ret
= drmIoctl(bufmgr_gem
->fd
,
1752 DRM_IOCTL_I915_GEM_PWRITE
,
1756 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1757 __FILE__
, __LINE__
, bo_gem
->gem_handle
, (int)offset
,
1758 (int)size
, strerror(errno
));
1765 drm_bacon_gem_get_pipe_from_crtc_id(drm_bacon_bufmgr
*bufmgr
, int crtc_id
)
1767 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bufmgr
;
1768 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id
;
1771 memclear(get_pipe_from_crtc_id
);
1772 get_pipe_from_crtc_id
.crtc_id
= crtc_id
;
1773 ret
= drmIoctl(bufmgr_gem
->fd
,
1774 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID
,
1775 &get_pipe_from_crtc_id
);
1777 /* We return -1 here to signal that we don't
1778 * know which pipe is associated with this crtc.
1779 * This lets the caller know that this information
1780 * isn't available; using the wrong pipe for
1781 * vblank waiting can cause the chipset to lock up
1786 return get_pipe_from_crtc_id
.pipe
;
1790 drm_bacon_gem_bo_get_subdata(drm_bacon_bo
*bo
, unsigned long offset
,
1791 unsigned long size
, void *data
)
1793 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1794 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1795 struct drm_i915_gem_pread pread
;
1798 if (bo_gem
->is_userptr
)
1802 pread
.handle
= bo_gem
->gem_handle
;
1803 pread
.offset
= offset
;
1805 pread
.data_ptr
= (uint64_t) (uintptr_t) data
;
1806 ret
= drmIoctl(bufmgr_gem
->fd
,
1807 DRM_IOCTL_I915_GEM_PREAD
,
1811 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1812 __FILE__
, __LINE__
, bo_gem
->gem_handle
, (int)offset
,
1813 (int)size
, strerror(errno
));
1819 /** Waits for all GPU rendering with the object to have completed. */
1821 drm_bacon_gem_bo_wait_rendering(drm_bacon_bo
*bo
)
1823 drm_bacon_gem_bo_start_gtt_access(bo
, 1);
1827 * Waits on a BO for the given amount of time.
1829 * @bo: buffer object to wait for
1830 * @timeout_ns: amount of time to wait in nanoseconds.
1831 * If value is less than 0, an infinite wait will occur.
1833 * Returns 0 if the wait was successful ie. the last batch referencing the
1834 * object has completed within the allotted time. Otherwise some negative return
1835 * value describes the error. Of particular interest is -ETIME when the wait has
1836 * failed to yield the desired result.
1838 * Similar to drm_bacon_gem_bo_wait_rendering except a timeout parameter allows
1839 * the operation to give up after a certain amount of time. Another subtle
1840 * difference is the internal locking semantics are different (this variant does
1841 * not hold the lock for the duration of the wait). This makes the wait subject
1842 * to a larger userspace race window.
1844 * The implementation shall wait until the object is no longer actively
1845 * referenced within a batch buffer at the time of the call. The wait will
1846 * not guarantee that the buffer is re-issued via another thread, or an flinked
1847 * handle. Userspace must make sure this race does not occur if such precision
1850 * Note that some kernels have broken the inifite wait for negative values
1851 * promise, upgrade to latest stable kernels if this is the case.
1854 drm_bacon_gem_bo_wait(drm_bacon_bo
*bo
, int64_t timeout_ns
)
1856 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1857 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1858 struct drm_i915_gem_wait wait
;
1861 if (!bufmgr_gem
->has_wait_timeout
) {
1862 DBG("%s:%d: Timed wait is not supported. Falling back to "
1863 "infinite wait\n", __FILE__
, __LINE__
);
1865 drm_bacon_gem_bo_wait_rendering(bo
);
1868 return drm_bacon_gem_bo_busy(bo
) ? -ETIME
: 0;
1873 wait
.bo_handle
= bo_gem
->gem_handle
;
1874 wait
.timeout_ns
= timeout_ns
;
1875 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GEM_WAIT
, &wait
);
1883 * Sets the object to the GTT read and possibly write domain, used by the X
1884 * 2D driver in the absence of kernel support to do drm_bacon_gem_bo_map_gtt().
1886 * In combination with drm_bacon_gem_bo_pin() and manual fence management, we
1887 * can do tiled pixmaps this way.
1890 drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo
*bo
, int write_enable
)
1892 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1893 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1894 struct drm_i915_gem_set_domain set_domain
;
1897 memclear(set_domain
);
1898 set_domain
.handle
= bo_gem
->gem_handle
;
1899 set_domain
.read_domains
= I915_GEM_DOMAIN_GTT
;
1900 set_domain
.write_domain
= write_enable
? I915_GEM_DOMAIN_GTT
: 0;
1901 ret
= drmIoctl(bufmgr_gem
->fd
,
1902 DRM_IOCTL_I915_GEM_SET_DOMAIN
,
1905 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1906 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
1907 set_domain
.read_domains
, set_domain
.write_domain
,
1913 drm_bacon_bufmgr_gem_destroy(drm_bacon_bufmgr
*bufmgr
)
1915 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bufmgr
;
1916 struct drm_gem_close close_bo
;
1919 free(bufmgr_gem
->exec2_objects
);
1920 free(bufmgr_gem
->exec_objects
);
1921 free(bufmgr_gem
->exec_bos
);
1923 pthread_mutex_destroy(&bufmgr_gem
->lock
);
1925 /* Free any cached buffer objects we were going to reuse */
1926 for (i
= 0; i
< bufmgr_gem
->num_buckets
; i
++) {
1927 struct drm_bacon_gem_bo_bucket
*bucket
=
1928 &bufmgr_gem
->cache_bucket
[i
];
1929 drm_bacon_bo_gem
*bo_gem
;
1931 while (!DRMLISTEMPTY(&bucket
->head
)) {
1932 bo_gem
= DRMLISTENTRY(drm_bacon_bo_gem
,
1933 bucket
->head
.next
, head
);
1934 DRMLISTDEL(&bo_gem
->head
);
1936 drm_bacon_gem_bo_free(&bo_gem
->bo
);
1940 /* Release userptr bo kept hanging around for optimisation. */
1941 if (bufmgr_gem
->userptr_active
.ptr
) {
1943 close_bo
.handle
= bufmgr_gem
->userptr_active
.handle
;
1944 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_GEM_CLOSE
, &close_bo
);
1945 free(bufmgr_gem
->userptr_active
.ptr
);
1948 "Failed to release test userptr object! (%d) "
1949 "i915 kernel driver may not be sane!\n", errno
);
1956 * Adds the target buffer to the validation list and adds the relocation
1957 * to the reloc_buffer's relocation list.
1959 * The relocation entry at the given offset must already contain the
1960 * precomputed relocation value, because the kernel will optimize out
1961 * the relocation entry write when the buffer hasn't moved from the
1962 * last known offset in target_bo.
1965 do_bo_emit_reloc(drm_bacon_bo
*bo
, uint32_t offset
,
1966 drm_bacon_bo
*target_bo
, uint32_t target_offset
,
1967 uint32_t read_domains
, uint32_t write_domain
,
1970 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
1971 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1972 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*) target_bo
;
1973 bool fenced_command
;
1975 if (bo_gem
->has_error
)
1978 if (target_bo_gem
->has_error
) {
1979 bo_gem
->has_error
= true;
1983 /* We never use HW fences for rendering on 965+ */
1984 if (bufmgr_gem
->gen
>= 4)
1987 fenced_command
= need_fence
;
1988 if (target_bo_gem
->tiling_mode
== I915_TILING_NONE
)
1991 /* Create a new relocation list if needed */
1992 if (bo_gem
->relocs
== NULL
&& drm_bacon_setup_reloc_list(bo
))
1995 /* Check overflow */
1996 assert(bo_gem
->reloc_count
< bufmgr_gem
->max_relocs
);
1999 assert(offset
<= bo
->size
- 4);
2000 assert((write_domain
& (write_domain
- 1)) == 0);
2002 /* An object needing a fence is a tiled buffer, so it won't have
2003 * relocs to other buffers.
2006 assert(target_bo_gem
->reloc_count
== 0);
2007 target_bo_gem
->reloc_tree_fences
= 1;
2010 /* Make sure that we're not adding a reloc to something whose size has
2011 * already been accounted for.
2013 assert(!bo_gem
->used_as_reloc_target
);
2014 if (target_bo_gem
!= bo_gem
) {
2015 target_bo_gem
->used_as_reloc_target
= true;
2016 bo_gem
->reloc_tree_size
+= target_bo_gem
->reloc_tree_size
;
2017 bo_gem
->reloc_tree_fences
+= target_bo_gem
->reloc_tree_fences
;
2020 bo_gem
->reloc_target_info
[bo_gem
->reloc_count
].bo
= target_bo
;
2021 if (target_bo
!= bo
)
2022 drm_bacon_gem_bo_reference(target_bo
);
2024 bo_gem
->reloc_target_info
[bo_gem
->reloc_count
].flags
=
2025 DRM_INTEL_RELOC_FENCE
;
2027 bo_gem
->reloc_target_info
[bo_gem
->reloc_count
].flags
= 0;
2029 bo_gem
->relocs
[bo_gem
->reloc_count
].offset
= offset
;
2030 bo_gem
->relocs
[bo_gem
->reloc_count
].delta
= target_offset
;
2031 bo_gem
->relocs
[bo_gem
->reloc_count
].target_handle
=
2032 target_bo_gem
->gem_handle
;
2033 bo_gem
->relocs
[bo_gem
->reloc_count
].read_domains
= read_domains
;
2034 bo_gem
->relocs
[bo_gem
->reloc_count
].write_domain
= write_domain
;
2035 bo_gem
->relocs
[bo_gem
->reloc_count
].presumed_offset
= target_bo
->offset64
;
2036 bo_gem
->reloc_count
++;
2042 drm_bacon_gem_bo_use_48b_address_range(drm_bacon_bo
*bo
, uint32_t enable
)
2044 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2047 bo_gem
->kflags
|= EXEC_OBJECT_SUPPORTS_48B_ADDRESS
;
2049 bo_gem
->kflags
&= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS
;
2053 drm_bacon_gem_bo_add_softpin_target(drm_bacon_bo
*bo
, drm_bacon_bo
*target_bo
)
2055 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2056 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2057 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*) target_bo
;
2058 if (bo_gem
->has_error
)
2061 if (target_bo_gem
->has_error
) {
2062 bo_gem
->has_error
= true;
2066 if (!(target_bo_gem
->kflags
& EXEC_OBJECT_PINNED
))
2068 if (target_bo_gem
== bo_gem
)
2071 if (bo_gem
->softpin_target_count
== bo_gem
->softpin_target_size
) {
2072 int new_size
= bo_gem
->softpin_target_size
* 2;
2074 new_size
= bufmgr_gem
->max_relocs
;
2076 bo_gem
->softpin_target
= realloc(bo_gem
->softpin_target
, new_size
*
2077 sizeof(drm_bacon_bo
*));
2078 if (!bo_gem
->softpin_target
)
2081 bo_gem
->softpin_target_size
= new_size
;
2083 bo_gem
->softpin_target
[bo_gem
->softpin_target_count
] = target_bo
;
2084 drm_bacon_gem_bo_reference(target_bo
);
2085 bo_gem
->softpin_target_count
++;
2091 drm_bacon_gem_bo_emit_reloc(drm_bacon_bo
*bo
, uint32_t offset
,
2092 drm_bacon_bo
*target_bo
, uint32_t target_offset
,
2093 uint32_t read_domains
, uint32_t write_domain
)
2095 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bo
->bufmgr
;
2096 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*)target_bo
;
2098 if (target_bo_gem
->kflags
& EXEC_OBJECT_PINNED
)
2099 return drm_bacon_gem_bo_add_softpin_target(bo
, target_bo
);
2101 return do_bo_emit_reloc(bo
, offset
, target_bo
, target_offset
,
2102 read_domains
, write_domain
,
2103 !bufmgr_gem
->fenced_relocs
);
2107 drm_bacon_gem_bo_emit_reloc_fence(drm_bacon_bo
*bo
, uint32_t offset
,
2108 drm_bacon_bo
*target_bo
,
2109 uint32_t target_offset
,
2110 uint32_t read_domains
, uint32_t write_domain
)
2112 return do_bo_emit_reloc(bo
, offset
, target_bo
, target_offset
,
2113 read_domains
, write_domain
, true);
2117 drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo
*bo
)
2119 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2121 return bo_gem
->reloc_count
;
2125 * Removes existing relocation entries in the BO after "start".
2127 * This allows a user to avoid a two-step process for state setup with
2128 * counting up all the buffer objects and doing a
2129 * drm_bacon_bufmgr_check_aperture_space() before emitting any of the
2130 * relocations for the state setup. Instead, save the state of the
2131 * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the
2132 * state, and then check if it still fits in the aperture.
2134 * Any further drm_bacon_bufmgr_check_aperture_space() queries
2135 * involving this buffer in the tree are undefined after this call.
2137 * This also removes all softpinned targets being referenced by the BO.
2140 drm_bacon_gem_bo_clear_relocs(drm_bacon_bo
*bo
, int start
)
2142 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2143 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2145 struct timespec time
;
2147 clock_gettime(CLOCK_MONOTONIC
, &time
);
2149 assert(bo_gem
->reloc_count
>= start
);
2151 /* Unreference the cleared target buffers */
2152 pthread_mutex_lock(&bufmgr_gem
->lock
);
2154 for (i
= start
; i
< bo_gem
->reloc_count
; i
++) {
2155 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*) bo_gem
->reloc_target_info
[i
].bo
;
2156 if (&target_bo_gem
->bo
!= bo
) {
2157 bo_gem
->reloc_tree_fences
-= target_bo_gem
->reloc_tree_fences
;
2158 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem
->bo
,
2162 bo_gem
->reloc_count
= start
;
2164 for (i
= 0; i
< bo_gem
->softpin_target_count
; i
++) {
2165 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*) bo_gem
->softpin_target
[i
];
2166 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem
->bo
, time
.tv_sec
);
2168 bo_gem
->softpin_target_count
= 0;
2170 pthread_mutex_unlock(&bufmgr_gem
->lock
);
2175 * Walk the tree of relocations rooted at BO and accumulate the list of
2176 * validations to be performed and update the relocation buffers with
2177 * index values into the validation list.
2180 drm_bacon_gem_bo_process_reloc(drm_bacon_bo
*bo
)
2182 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2185 if (bo_gem
->relocs
== NULL
)
2188 for (i
= 0; i
< bo_gem
->reloc_count
; i
++) {
2189 drm_bacon_bo
*target_bo
= bo_gem
->reloc_target_info
[i
].bo
;
2191 if (target_bo
== bo
)
2194 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
2196 /* Continue walking the tree depth-first. */
2197 drm_bacon_gem_bo_process_reloc(target_bo
);
2199 /* Add the target to the validate list */
2200 drm_bacon_add_validate_buffer(target_bo
);
2205 drm_bacon_gem_bo_process_reloc2(drm_bacon_bo
*bo
)
2207 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*)bo
;
2210 if (bo_gem
->relocs
== NULL
&& bo_gem
->softpin_target
== NULL
)
2213 for (i
= 0; i
< bo_gem
->reloc_count
; i
++) {
2214 drm_bacon_bo
*target_bo
= bo_gem
->reloc_target_info
[i
].bo
;
2217 if (target_bo
== bo
)
2220 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
2222 /* Continue walking the tree depth-first. */
2223 drm_bacon_gem_bo_process_reloc2(target_bo
);
2225 need_fence
= (bo_gem
->reloc_target_info
[i
].flags
&
2226 DRM_INTEL_RELOC_FENCE
);
2228 /* Add the target to the validate list */
2229 drm_bacon_add_validate_buffer2(target_bo
, need_fence
);
2232 for (i
= 0; i
< bo_gem
->softpin_target_count
; i
++) {
2233 drm_bacon_bo
*target_bo
= bo_gem
->softpin_target
[i
];
2235 if (target_bo
== bo
)
2238 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
2239 drm_bacon_gem_bo_process_reloc2(target_bo
);
2240 drm_bacon_add_validate_buffer2(target_bo
, false);
2246 drm_bacon_update_buffer_offsets(drm_bacon_bufmgr_gem
*bufmgr_gem
)
2250 for (i
= 0; i
< bufmgr_gem
->exec_count
; i
++) {
2251 drm_bacon_bo
*bo
= bufmgr_gem
->exec_bos
[i
];
2252 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2254 /* Update the buffer offset */
2255 if (bufmgr_gem
->exec_objects
[i
].offset
!= bo
->offset64
) {
2256 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2257 bo_gem
->gem_handle
, bo_gem
->name
,
2258 upper_32_bits(bo
->offset64
),
2259 lower_32_bits(bo
->offset64
),
2260 upper_32_bits(bufmgr_gem
->exec_objects
[i
].offset
),
2261 lower_32_bits(bufmgr_gem
->exec_objects
[i
].offset
));
2262 bo
->offset64
= bufmgr_gem
->exec_objects
[i
].offset
;
2263 bo
->offset
= bufmgr_gem
->exec_objects
[i
].offset
;
2269 drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr_gem
*bufmgr_gem
)
2273 for (i
= 0; i
< bufmgr_gem
->exec_count
; i
++) {
2274 drm_bacon_bo
*bo
= bufmgr_gem
->exec_bos
[i
];
2275 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*)bo
;
2277 /* Update the buffer offset */
2278 if (bufmgr_gem
->exec2_objects
[i
].offset
!= bo
->offset64
) {
2279 /* If we're seeing softpinned object here it means that the kernel
2280 * has relocated our object... Indicating a programming error
2282 assert(!(bo_gem
->kflags
& EXEC_OBJECT_PINNED
));
2283 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2284 bo_gem
->gem_handle
, bo_gem
->name
,
2285 upper_32_bits(bo
->offset64
),
2286 lower_32_bits(bo
->offset64
),
2287 upper_32_bits(bufmgr_gem
->exec2_objects
[i
].offset
),
2288 lower_32_bits(bufmgr_gem
->exec2_objects
[i
].offset
));
2289 bo
->offset64
= bufmgr_gem
->exec2_objects
[i
].offset
;
2290 bo
->offset
= bufmgr_gem
->exec2_objects
[i
].offset
;
2296 drm_bacon_gem_bo_exec(drm_bacon_bo
*bo
, int used
,
2297 drm_clip_rect_t
* cliprects
, int num_cliprects
, int DR4
)
2299 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2300 struct drm_i915_gem_execbuffer execbuf
;
2303 if (to_bo_gem(bo
)->has_error
)
2306 pthread_mutex_lock(&bufmgr_gem
->lock
);
2307 /* Update indices and set up the validate list. */
2308 drm_bacon_gem_bo_process_reloc(bo
);
2310 /* Add the batch buffer to the validation list. There are no
2311 * relocations pointing to it.
2313 drm_bacon_add_validate_buffer(bo
);
2316 execbuf
.buffers_ptr
= (uintptr_t) bufmgr_gem
->exec_objects
;
2317 execbuf
.buffer_count
= bufmgr_gem
->exec_count
;
2318 execbuf
.batch_start_offset
= 0;
2319 execbuf
.batch_len
= used
;
2320 execbuf
.cliprects_ptr
= (uintptr_t) cliprects
;
2321 execbuf
.num_cliprects
= num_cliprects
;
2325 ret
= drmIoctl(bufmgr_gem
->fd
,
2326 DRM_IOCTL_I915_GEM_EXECBUFFER
,
2330 if (errno
== ENOSPC
) {
2331 DBG("Execbuffer fails to pin. "
2332 "Estimate: %u. Actual: %u. Available: %u\n",
2333 drm_bacon_gem_estimate_batch_space(bufmgr_gem
->exec_bos
,
2336 drm_bacon_gem_compute_batch_space(bufmgr_gem
->exec_bos
,
2339 (unsigned int)bufmgr_gem
->gtt_size
);
2342 drm_bacon_update_buffer_offsets(bufmgr_gem
);
2344 if (bufmgr_gem
->bufmgr
.debug
)
2345 drm_bacon_gem_dump_validation_list(bufmgr_gem
);
2347 for (i
= 0; i
< bufmgr_gem
->exec_count
; i
++) {
2348 drm_bacon_bo_gem
*bo_gem
= to_bo_gem(bufmgr_gem
->exec_bos
[i
]);
2350 bo_gem
->idle
= false;
2352 /* Disconnect the buffer from the validate list */
2353 bo_gem
->validate_index
= -1;
2354 bufmgr_gem
->exec_bos
[i
] = NULL
;
2356 bufmgr_gem
->exec_count
= 0;
2357 pthread_mutex_unlock(&bufmgr_gem
->lock
);
2363 do_exec2(drm_bacon_bo
*bo
, int used
, drm_bacon_context
*ctx
,
2364 drm_clip_rect_t
*cliprects
, int num_cliprects
, int DR4
,
2365 int in_fence
, int *out_fence
,
2368 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bo
->bufmgr
;
2369 struct drm_i915_gem_execbuffer2 execbuf
;
2373 if (to_bo_gem(bo
)->has_error
)
2376 switch (flags
& 0x7) {
2380 if (!bufmgr_gem
->has_blt
)
2384 if (!bufmgr_gem
->has_bsd
)
2387 case I915_EXEC_VEBOX
:
2388 if (!bufmgr_gem
->has_vebox
)
2391 case I915_EXEC_RENDER
:
2392 case I915_EXEC_DEFAULT
:
2396 pthread_mutex_lock(&bufmgr_gem
->lock
);
2397 /* Update indices and set up the validate list. */
2398 drm_bacon_gem_bo_process_reloc2(bo
);
2400 /* Add the batch buffer to the validation list. There are no relocations
2403 drm_bacon_add_validate_buffer2(bo
, 0);
2406 execbuf
.buffers_ptr
= (uintptr_t)bufmgr_gem
->exec2_objects
;
2407 execbuf
.buffer_count
= bufmgr_gem
->exec_count
;
2408 execbuf
.batch_start_offset
= 0;
2409 execbuf
.batch_len
= used
;
2410 execbuf
.cliprects_ptr
= (uintptr_t)cliprects
;
2411 execbuf
.num_cliprects
= num_cliprects
;
2414 execbuf
.flags
= flags
;
2416 i915_execbuffer2_set_context_id(execbuf
, 0);
2418 i915_execbuffer2_set_context_id(execbuf
, ctx
->ctx_id
);
2420 if (in_fence
!= -1) {
2421 execbuf
.rsvd2
= in_fence
;
2422 execbuf
.flags
|= I915_EXEC_FENCE_IN
;
2424 if (out_fence
!= NULL
) {
2426 execbuf
.flags
|= I915_EXEC_FENCE_OUT
;
2429 if (bufmgr_gem
->no_exec
)
2430 goto skip_execution
;
2432 ret
= drmIoctl(bufmgr_gem
->fd
,
2433 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR
,
2437 if (ret
== -ENOSPC
) {
2438 DBG("Execbuffer fails to pin. "
2439 "Estimate: %u. Actual: %u. Available: %u\n",
2440 drm_bacon_gem_estimate_batch_space(bufmgr_gem
->exec_bos
,
2441 bufmgr_gem
->exec_count
),
2442 drm_bacon_gem_compute_batch_space(bufmgr_gem
->exec_bos
,
2443 bufmgr_gem
->exec_count
),
2444 (unsigned int) bufmgr_gem
->gtt_size
);
2447 drm_bacon_update_buffer_offsets2(bufmgr_gem
);
2449 if (ret
== 0 && out_fence
!= NULL
)
2450 *out_fence
= execbuf
.rsvd2
>> 32;
2453 if (bufmgr_gem
->bufmgr
.debug
)
2454 drm_bacon_gem_dump_validation_list(bufmgr_gem
);
2456 for (i
= 0; i
< bufmgr_gem
->exec_count
; i
++) {
2457 drm_bacon_bo_gem
*bo_gem
= to_bo_gem(bufmgr_gem
->exec_bos
[i
]);
2459 bo_gem
->idle
= false;
2461 /* Disconnect the buffer from the validate list */
2462 bo_gem
->validate_index
= -1;
2463 bufmgr_gem
->exec_bos
[i
] = NULL
;
2465 bufmgr_gem
->exec_count
= 0;
2466 pthread_mutex_unlock(&bufmgr_gem
->lock
);
2472 drm_bacon_gem_bo_exec2(drm_bacon_bo
*bo
, int used
,
2473 drm_clip_rect_t
*cliprects
, int num_cliprects
,
2476 return do_exec2(bo
, used
, NULL
, cliprects
, num_cliprects
, DR4
,
2477 -1, NULL
, I915_EXEC_RENDER
);
2481 drm_bacon_gem_bo_mrb_exec2(drm_bacon_bo
*bo
, int used
,
2482 drm_clip_rect_t
*cliprects
, int num_cliprects
, int DR4
,
2485 return do_exec2(bo
, used
, NULL
, cliprects
, num_cliprects
, DR4
,
2490 drm_bacon_gem_bo_context_exec(drm_bacon_bo
*bo
, drm_bacon_context
*ctx
,
2491 int used
, unsigned int flags
)
2493 return do_exec2(bo
, used
, ctx
, NULL
, 0, 0, -1, NULL
, flags
);
2497 drm_bacon_gem_bo_fence_exec(drm_bacon_bo
*bo
,
2498 drm_bacon_context
*ctx
,
2504 return do_exec2(bo
, used
, ctx
, NULL
, 0, 0, in_fence
, out_fence
, flags
);
2508 drm_bacon_gem_bo_pin(drm_bacon_bo
*bo
, uint32_t alignment
)
2510 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2511 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2512 struct drm_i915_gem_pin pin
;
2516 pin
.handle
= bo_gem
->gem_handle
;
2517 pin
.alignment
= alignment
;
2519 ret
= drmIoctl(bufmgr_gem
->fd
,
2520 DRM_IOCTL_I915_GEM_PIN
,
2525 bo
->offset64
= pin
.offset
;
2526 bo
->offset
= pin
.offset
;
2531 drm_bacon_gem_bo_unpin(drm_bacon_bo
*bo
)
2533 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2534 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2535 struct drm_i915_gem_unpin unpin
;
2539 unpin
.handle
= bo_gem
->gem_handle
;
2541 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GEM_UNPIN
, &unpin
);
2549 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo
*bo
,
2550 uint32_t tiling_mode
,
2553 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2554 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2555 struct drm_i915_gem_set_tiling set_tiling
;
2558 if (bo_gem
->global_name
== 0 &&
2559 tiling_mode
== bo_gem
->tiling_mode
&&
2560 stride
== bo_gem
->stride
)
2563 memset(&set_tiling
, 0, sizeof(set_tiling
));
2565 /* set_tiling is slightly broken and overwrites the
2566 * input on the error path, so we have to open code
2569 set_tiling
.handle
= bo_gem
->gem_handle
;
2570 set_tiling
.tiling_mode
= tiling_mode
;
2571 set_tiling
.stride
= stride
;
2573 ret
= ioctl(bufmgr_gem
->fd
,
2574 DRM_IOCTL_I915_GEM_SET_TILING
,
2576 } while (ret
== -1 && (errno
== EINTR
|| errno
== EAGAIN
));
2580 bo_gem
->tiling_mode
= set_tiling
.tiling_mode
;
2581 bo_gem
->swizzle_mode
= set_tiling
.swizzle_mode
;
2582 bo_gem
->stride
= set_tiling
.stride
;
2587 drm_bacon_gem_bo_set_tiling(drm_bacon_bo
*bo
, uint32_t * tiling_mode
,
2590 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2591 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2594 /* Tiling with userptr surfaces is not supported
2595 * on all hardware so refuse it for time being.
2597 if (bo_gem
->is_userptr
)
2600 /* Linear buffers have no stride. By ensuring that we only ever use
2601 * stride 0 with linear buffers, we simplify our code.
2603 if (*tiling_mode
== I915_TILING_NONE
)
2606 ret
= drm_bacon_gem_bo_set_tiling_internal(bo
, *tiling_mode
, stride
);
2608 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem
, bo_gem
, 0);
2610 *tiling_mode
= bo_gem
->tiling_mode
;
2615 drm_bacon_gem_bo_get_tiling(drm_bacon_bo
*bo
, uint32_t * tiling_mode
,
2616 uint32_t * swizzle_mode
)
2618 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2620 *tiling_mode
= bo_gem
->tiling_mode
;
2621 *swizzle_mode
= bo_gem
->swizzle_mode
;
2626 drm_bacon_gem_bo_set_softpin_offset(drm_bacon_bo
*bo
, uint64_t offset
)
2628 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2630 bo
->offset64
= offset
;
2631 bo
->offset
= offset
;
2632 bo_gem
->kflags
|= EXEC_OBJECT_PINNED
;
2638 drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr
*bufmgr
, int prime_fd
, int size
)
2640 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bufmgr
;
2643 drm_bacon_bo_gem
*bo_gem
;
2644 struct drm_i915_gem_get_tiling get_tiling
;
2646 pthread_mutex_lock(&bufmgr_gem
->lock
);
2647 ret
= drmPrimeFDToHandle(bufmgr_gem
->fd
, prime_fd
, &handle
);
2649 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno
));
2650 pthread_mutex_unlock(&bufmgr_gem
->lock
);
2655 * See if the kernel has already returned this buffer to us. Just as
2656 * for named buffers, we must not create two bo's pointing at the same
2659 HASH_FIND(handle_hh
, bufmgr_gem
->handle_table
,
2660 &handle
, sizeof(handle
), bo_gem
);
2662 drm_bacon_gem_bo_reference(&bo_gem
->bo
);
2666 bo_gem
= calloc(1, sizeof(*bo_gem
));
2670 p_atomic_set(&bo_gem
->refcount
, 1);
2671 DRMINITLISTHEAD(&bo_gem
->vma_list
);
2673 /* Determine size of bo. The fd-to-handle ioctl really should
2674 * return the size, but it doesn't. If we have kernel 3.12 or
2675 * later, we can lseek on the prime fd to get the size. Older
2676 * kernels will just fail, in which case we fall back to the
2677 * provided (estimated or guess size). */
2678 ret
= lseek(prime_fd
, 0, SEEK_END
);
2680 bo_gem
->bo
.size
= ret
;
2682 bo_gem
->bo
.size
= size
;
2684 bo_gem
->bo
.handle
= handle
;
2685 bo_gem
->bo
.bufmgr
= bufmgr
;
2687 bo_gem
->gem_handle
= handle
;
2688 HASH_ADD(handle_hh
, bufmgr_gem
->handle_table
,
2689 gem_handle
, sizeof(bo_gem
->gem_handle
), bo_gem
);
2691 bo_gem
->name
= "prime";
2692 bo_gem
->validate_index
= -1;
2693 bo_gem
->reloc_tree_fences
= 0;
2694 bo_gem
->used_as_reloc_target
= false;
2695 bo_gem
->has_error
= false;
2696 bo_gem
->reusable
= false;
2698 memclear(get_tiling
);
2699 get_tiling
.handle
= bo_gem
->gem_handle
;
2700 if (drmIoctl(bufmgr_gem
->fd
,
2701 DRM_IOCTL_I915_GEM_GET_TILING
,
2705 bo_gem
->tiling_mode
= get_tiling
.tiling_mode
;
2706 bo_gem
->swizzle_mode
= get_tiling
.swizzle_mode
;
2707 /* XXX stride is unknown */
2708 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem
, bo_gem
, 0);
2711 pthread_mutex_unlock(&bufmgr_gem
->lock
);
2715 drm_bacon_gem_bo_free(&bo_gem
->bo
);
2716 pthread_mutex_unlock(&bufmgr_gem
->lock
);
2721 drm_bacon_bo_gem_export_to_prime(drm_bacon_bo
*bo
, int *prime_fd
)
2723 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2724 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2726 if (drmPrimeHandleToFD(bufmgr_gem
->fd
, bo_gem
->gem_handle
,
2727 DRM_CLOEXEC
, prime_fd
) != 0)
2730 bo_gem
->reusable
= false;
2736 drm_bacon_gem_bo_flink(drm_bacon_bo
*bo
, uint32_t * name
)
2738 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
2739 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2741 if (!bo_gem
->global_name
) {
2742 struct drm_gem_flink flink
;
2745 flink
.handle
= bo_gem
->gem_handle
;
2746 if (drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_GEM_FLINK
, &flink
))
2749 pthread_mutex_lock(&bufmgr_gem
->lock
);
2750 if (!bo_gem
->global_name
) {
2751 bo_gem
->global_name
= flink
.name
;
2752 bo_gem
->reusable
= false;
2754 HASH_ADD(name_hh
, bufmgr_gem
->name_table
,
2755 global_name
, sizeof(bo_gem
->global_name
),
2758 pthread_mutex_unlock(&bufmgr_gem
->lock
);
2761 *name
= bo_gem
->global_name
;
2766 * Enables unlimited caching of buffer objects for reuse.
2768 * This is potentially very memory expensive, as the cache at each bucket
2769 * size is only bounded by how many buffers of that size we've managed to have
2770 * in flight at once.
2773 drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr
*bufmgr
)
2775 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bufmgr
;
2777 bufmgr_gem
->bo_reuse
= true;
2781 * Disables implicit synchronisation before executing the bo
2783 * This will cause rendering corruption unless you correctly manage explicit
2784 * fences for all rendering involving this buffer - including use by others.
2785 * Disabling the implicit serialisation is only required if that serialisation
2786 * is too coarse (for example, you have split the buffer into many
2787 * non-overlapping regions and are sharing the whole buffer between concurrent
2788 * independent command streams).
2790 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2791 * which can be checked using drm_bacon_bufmgr_can_disable_implicit_sync,
2792 * or subsequent execbufs involving the bo will generate EINVAL.
2795 drm_bacon_gem_bo_disable_implicit_sync(drm_bacon_bo
*bo
)
2797 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2799 bo_gem
->kflags
|= EXEC_OBJECT_ASYNC
;
2803 * Enables implicit synchronisation before executing the bo
2805 * This is the default behaviour of the kernel, to wait upon prior writes
2806 * completing on the object before rendering with it, or to wait for prior
2807 * reads to complete before writing into the object.
2808 * drm_bacon_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2809 * the kernel never to insert a stall before using the object. Then this
2810 * function can be used to restore the implicit sync before subsequent
2814 drm_bacon_gem_bo_enable_implicit_sync(drm_bacon_bo
*bo
)
2816 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2818 bo_gem
->kflags
&= ~EXEC_OBJECT_ASYNC
;
2822 * Query whether the kernel supports disabling of its implicit synchronisation
2823 * before execbuf. See drm_bacon_gem_bo_disable_implicit_sync()
2826 drm_bacon_bufmgr_gem_can_disable_implicit_sync(drm_bacon_bufmgr
*bufmgr
)
2828 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bufmgr
;
2830 return bufmgr_gem
->has_exec_async
;
2834 * Enable use of fenced reloc type.
2836 * New code should enable this to avoid unnecessary fence register
2837 * allocation. If this option is not enabled, all relocs will have fence
2838 * register allocated.
2841 drm_bacon_bufmgr_gem_enable_fenced_relocs(drm_bacon_bufmgr
*bufmgr
)
2843 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bufmgr
;
2845 if (bufmgr_gem
->bufmgr
.bo_exec
== drm_bacon_gem_bo_exec2
)
2846 bufmgr_gem
->fenced_relocs
= true;
2850 * Return the additional aperture space required by the tree of buffer objects
2854 drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo
*bo
)
2856 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2860 if (bo
== NULL
|| bo_gem
->included_in_check_aperture
)
2864 bo_gem
->included_in_check_aperture
= true;
2866 for (i
= 0; i
< bo_gem
->reloc_count
; i
++)
2868 drm_bacon_gem_bo_get_aperture_space(bo_gem
->
2869 reloc_target_info
[i
].bo
);
2875 * Count the number of buffers in this list that need a fence reg
2877 * If the count is greater than the number of available regs, we'll have
2878 * to ask the caller to resubmit a batch with fewer tiled buffers.
2880 * This function over-counts if the same buffer is used multiple times.
2883 drm_bacon_gem_total_fences(drm_bacon_bo
** bo_array
, int count
)
2886 unsigned int total
= 0;
2888 for (i
= 0; i
< count
; i
++) {
2889 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo_array
[i
];
2894 total
+= bo_gem
->reloc_tree_fences
;
2900 * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready
2901 * for the next drm_bacon_bufmgr_check_aperture_space() call.
2904 drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo
*bo
)
2906 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2909 if (bo
== NULL
|| !bo_gem
->included_in_check_aperture
)
2912 bo_gem
->included_in_check_aperture
= false;
2914 for (i
= 0; i
< bo_gem
->reloc_count
; i
++)
2915 drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem
->
2916 reloc_target_info
[i
].bo
);
2920 * Return a conservative estimate for the amount of aperture required
2921 * for a collection of buffers. This may double-count some buffers.
2924 drm_bacon_gem_estimate_batch_space(drm_bacon_bo
**bo_array
, int count
)
2927 unsigned int total
= 0;
2929 for (i
= 0; i
< count
; i
++) {
2930 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo_array
[i
];
2932 total
+= bo_gem
->reloc_tree_size
;
2938 * Return the amount of aperture needed for a collection of buffers.
2939 * This avoids double counting any buffers, at the cost of looking
2940 * at every buffer in the set.
2943 drm_bacon_gem_compute_batch_space(drm_bacon_bo
**bo_array
, int count
)
2946 unsigned int total
= 0;
2948 for (i
= 0; i
< count
; i
++) {
2949 total
+= drm_bacon_gem_bo_get_aperture_space(bo_array
[i
]);
2950 /* For the first buffer object in the array, we get an
2951 * accurate count back for its reloc_tree size (since nothing
2952 * had been flagged as being counted yet). We can save that
2953 * value out as a more conservative reloc_tree_size that
2954 * avoids double-counting target buffers. Since the first
2955 * buffer happens to usually be the batch buffer in our
2956 * callers, this can pull us back from doing the tree
2957 * walk on every new batch emit.
2960 drm_bacon_bo_gem
*bo_gem
=
2961 (drm_bacon_bo_gem
*) bo_array
[i
];
2962 bo_gem
->reloc_tree_size
= total
;
2966 for (i
= 0; i
< count
; i
++)
2967 drm_bacon_gem_bo_clear_aperture_space_flag(bo_array
[i
]);
2972 * Return -1 if the batchbuffer should be flushed before attempting to
2973 * emit rendering referencing the buffers pointed to by bo_array.
2975 * This is required because if we try to emit a batchbuffer with relocations
2976 * to a tree of buffers that won't simultaneously fit in the aperture,
2977 * the rendering will return an error at a point where the software is not
2978 * prepared to recover from it.
2980 * However, we also want to emit the batchbuffer significantly before we reach
2981 * the limit, as a series of batchbuffers each of which references buffers
2982 * covering almost all of the aperture means that at each emit we end up
2983 * waiting to evict a buffer from the last rendering, and we get synchronous
2984 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to
2985 * get better parallelism.
2988 drm_bacon_gem_check_aperture_space(drm_bacon_bo
**bo_array
, int count
)
2990 drm_bacon_bufmgr_gem
*bufmgr_gem
=
2991 (drm_bacon_bufmgr_gem
*) bo_array
[0]->bufmgr
;
2992 unsigned int total
= 0;
2993 unsigned int threshold
= bufmgr_gem
->gtt_size
* 3 / 4;
2996 /* Check for fence reg constraints if necessary */
2997 if (bufmgr_gem
->available_fences
) {
2998 total_fences
= drm_bacon_gem_total_fences(bo_array
, count
);
2999 if (total_fences
> bufmgr_gem
->available_fences
)
3003 total
= drm_bacon_gem_estimate_batch_space(bo_array
, count
);
3005 if (total
> threshold
)
3006 total
= drm_bacon_gem_compute_batch_space(bo_array
, count
);
3008 if (total
> threshold
) {
3009 DBG("check_space: overflowed available aperture, "
3011 total
/ 1024, (int)bufmgr_gem
->gtt_size
/ 1024);
3014 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total
/ 1024,
3015 (int)bufmgr_gem
->gtt_size
/ 1024);
3021 * Disable buffer reuse for objects which are shared with the kernel
3022 * as scanout buffers
3025 drm_bacon_gem_bo_disable_reuse(drm_bacon_bo
*bo
)
3027 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
3029 bo_gem
->reusable
= false;
3034 drm_bacon_gem_bo_is_reusable(drm_bacon_bo
*bo
)
3036 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
3038 return bo_gem
->reusable
;
3042 _drm_bacon_gem_bo_references(drm_bacon_bo
*bo
, drm_bacon_bo
*target_bo
)
3044 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
3047 for (i
= 0; i
< bo_gem
->reloc_count
; i
++) {
3048 if (bo_gem
->reloc_target_info
[i
].bo
== target_bo
)
3050 if (bo
== bo_gem
->reloc_target_info
[i
].bo
)
3052 if (_drm_bacon_gem_bo_references(bo_gem
->reloc_target_info
[i
].bo
,
3057 for (i
= 0; i
< bo_gem
->softpin_target_count
; i
++) {
3058 if (bo_gem
->softpin_target
[i
] == target_bo
)
3060 if (_drm_bacon_gem_bo_references(bo_gem
->softpin_target
[i
], target_bo
))
3067 /** Return true if target_bo is referenced by bo's relocation tree. */
3069 drm_bacon_gem_bo_references(drm_bacon_bo
*bo
, drm_bacon_bo
*target_bo
)
3071 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*) target_bo
;
3073 if (bo
== NULL
|| target_bo
== NULL
)
3075 if (target_bo_gem
->used_as_reloc_target
)
3076 return _drm_bacon_gem_bo_references(bo
, target_bo
);
3081 add_bucket(drm_bacon_bufmgr_gem
*bufmgr_gem
, int size
)
3083 unsigned int i
= bufmgr_gem
->num_buckets
;
3085 assert(i
< ARRAY_SIZE(bufmgr_gem
->cache_bucket
));
3087 DRMINITLISTHEAD(&bufmgr_gem
->cache_bucket
[i
].head
);
3088 bufmgr_gem
->cache_bucket
[i
].size
= size
;
3089 bufmgr_gem
->num_buckets
++;
3093 init_cache_buckets(drm_bacon_bufmgr_gem
*bufmgr_gem
)
3095 unsigned long size
, cache_max_size
= 64 * 1024 * 1024;
3097 /* OK, so power of two buckets was too wasteful of memory.
3098 * Give 3 other sizes between each power of two, to hopefully
3099 * cover things accurately enough. (The alternative is
3100 * probably to just go for exact matching of sizes, and assume
3101 * that for things like composited window resize the tiled
3102 * width/height alignment and rounding of sizes to pages will
3103 * get us useful cache hit rates anyway)
3105 add_bucket(bufmgr_gem
, 4096);
3106 add_bucket(bufmgr_gem
, 4096 * 2);
3107 add_bucket(bufmgr_gem
, 4096 * 3);
3109 /* Initialize the linked lists for BO reuse cache. */
3110 for (size
= 4 * 4096; size
<= cache_max_size
; size
*= 2) {
3111 add_bucket(bufmgr_gem
, size
);
3113 add_bucket(bufmgr_gem
, size
+ size
* 1 / 4);
3114 add_bucket(bufmgr_gem
, size
+ size
* 2 / 4);
3115 add_bucket(bufmgr_gem
, size
+ size
* 3 / 4);
3120 drm_bacon_bufmgr_gem_set_vma_cache_size(drm_bacon_bufmgr
*bufmgr
, int limit
)
3122 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bufmgr
;
3124 bufmgr_gem
->vma_max
= limit
;
3126 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem
);
3130 parse_devid_override(const char *devid_override
)
3132 static const struct {
3136 { "brw", PCI_CHIP_I965_GM
},
3137 { "g4x", PCI_CHIP_GM45_GM
},
3138 { "ilk", PCI_CHIP_ILD_G
},
3139 { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS
},
3140 { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2
},
3141 { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3
},
3142 { "byt", PCI_CHIP_VALLEYVIEW_3
},
3143 { "bdw", 0x1620 | BDW_ULX
},
3144 { "skl", PCI_CHIP_SKYLAKE_DT_GT2
},
3145 { "kbl", PCI_CHIP_KABYLAKE_DT_GT2
},
3149 for (i
= 0; i
< ARRAY_SIZE(name_map
); i
++) {
3150 if (!strcmp(name_map
[i
].name
, devid_override
))
3151 return name_map
[i
].pci_id
;
3154 return strtod(devid_override
, NULL
);
3158 * Get the PCI ID for the device. This can be overridden by setting the
3159 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3162 get_pci_device_id(drm_bacon_bufmgr_gem
*bufmgr_gem
)
3164 char *devid_override
;
3167 drm_i915_getparam_t gp
;
3169 if (geteuid() == getuid()) {
3170 devid_override
= getenv("INTEL_DEVID_OVERRIDE");
3171 if (devid_override
) {
3172 bufmgr_gem
->no_exec
= true;
3173 return parse_devid_override(devid_override
);
3178 gp
.param
= I915_PARAM_CHIPSET_ID
;
3180 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3182 fprintf(stderr
, "get chip id failed: %d [%d]\n", ret
, errno
);
3183 fprintf(stderr
, "param: %d, val: %d\n", gp
.param
, *gp
.value
);
3189 drm_bacon_bufmgr_gem_get_devid(drm_bacon_bufmgr
*bufmgr
)
3191 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bufmgr
;
3193 return bufmgr_gem
->pci_device
;
3197 drm_bacon_gem_context_create(drm_bacon_bufmgr
*bufmgr
)
3199 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bufmgr
;
3200 struct drm_i915_gem_context_create create
;
3201 drm_bacon_context
*context
= NULL
;
3204 context
= calloc(1, sizeof(*context
));
3209 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GEM_CONTEXT_CREATE
, &create
);
3211 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3217 context
->ctx_id
= create
.ctx_id
;
3218 context
->bufmgr
= bufmgr
;
3224 drm_bacon_gem_context_get_id(drm_bacon_context
*ctx
, uint32_t *ctx_id
)
3229 *ctx_id
= ctx
->ctx_id
;
3235 drm_bacon_gem_context_destroy(drm_bacon_context
*ctx
)
3237 drm_bacon_bufmgr_gem
*bufmgr_gem
;
3238 struct drm_i915_gem_context_destroy destroy
;
3246 bufmgr_gem
= (drm_bacon_bufmgr_gem
*)ctx
->bufmgr
;
3247 destroy
.ctx_id
= ctx
->ctx_id
;
3248 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY
,
3251 fprintf(stderr
, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3258 drm_bacon_get_reset_stats(drm_bacon_context
*ctx
,
3259 uint32_t *reset_count
,
3263 drm_bacon_bufmgr_gem
*bufmgr_gem
;
3264 struct drm_i915_reset_stats stats
;
3272 bufmgr_gem
= (drm_bacon_bufmgr_gem
*)ctx
->bufmgr
;
3273 stats
.ctx_id
= ctx
->ctx_id
;
3274 ret
= drmIoctl(bufmgr_gem
->fd
,
3275 DRM_IOCTL_I915_GET_RESET_STATS
,
3278 if (reset_count
!= NULL
)
3279 *reset_count
= stats
.reset_count
;
3282 *active
= stats
.batch_active
;
3284 if (pending
!= NULL
)
3285 *pending
= stats
.batch_pending
;
3292 drm_bacon_reg_read(drm_bacon_bufmgr
*bufmgr
,
3296 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bufmgr
;
3297 struct drm_i915_reg_read reg_read
;
3301 reg_read
.offset
= offset
;
3303 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_REG_READ
, ®_read
);
3305 *result
= reg_read
.val
;
3310 drm_bacon_get_subslice_total(int fd
, unsigned int *subslice_total
)
3312 drm_i915_getparam_t gp
;
3316 gp
.value
= (int*)subslice_total
;
3317 gp
.param
= I915_PARAM_SUBSLICE_TOTAL
;
3318 ret
= drmIoctl(fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3326 drm_bacon_get_eu_total(int fd
, unsigned int *eu_total
)
3328 drm_i915_getparam_t gp
;
3332 gp
.value
= (int*)eu_total
;
3333 gp
.param
= I915_PARAM_EU_TOTAL
;
3334 ret
= drmIoctl(fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3342 drm_bacon_get_pooled_eu(int fd
)
3344 drm_i915_getparam_t gp
;
3348 gp
.param
= I915_PARAM_HAS_POOLED_EU
;
3350 if (drmIoctl(fd
, DRM_IOCTL_I915_GETPARAM
, &gp
))
3357 drm_bacon_get_min_eu_in_pool(int fd
)
3359 drm_i915_getparam_t gp
;
3363 gp
.param
= I915_PARAM_MIN_EU_IN_POOL
;
3365 if (drmIoctl(fd
, DRM_IOCTL_I915_GETPARAM
, &gp
))
3371 static pthread_mutex_t bufmgr_list_mutex
= PTHREAD_MUTEX_INITIALIZER
;
3372 static drmMMListHead bufmgr_list
= { &bufmgr_list
, &bufmgr_list
};
3374 static drm_bacon_bufmgr_gem
*
3375 drm_bacon_bufmgr_gem_find(int fd
)
3377 drm_bacon_bufmgr_gem
*bufmgr_gem
;
3379 DRMLISTFOREACHENTRY(bufmgr_gem
, &bufmgr_list
, managers
) {
3380 if (bufmgr_gem
->fd
== fd
) {
3381 p_atomic_inc(&bufmgr_gem
->refcount
);
3390 drm_bacon_bufmgr_gem_unref(drm_bacon_bufmgr
*bufmgr
)
3392 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*)bufmgr
;
3394 if (atomic_add_unless(&bufmgr_gem
->refcount
, -1, 1)) {
3395 pthread_mutex_lock(&bufmgr_list_mutex
);
3397 if (p_atomic_dec_zero(&bufmgr_gem
->refcount
)) {
3398 DRMLISTDEL(&bufmgr_gem
->managers
);
3399 drm_bacon_bufmgr_gem_destroy(bufmgr
);
3402 pthread_mutex_unlock(&bufmgr_list_mutex
);
3406 void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo
*bo
)
3408 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
3409 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
3411 if (bo_gem
->gtt_virtual
)
3412 return bo_gem
->gtt_virtual
;
3414 if (bo_gem
->is_userptr
)
3417 pthread_mutex_lock(&bufmgr_gem
->lock
);
3418 if (bo_gem
->gtt_virtual
== NULL
) {
3419 struct drm_i915_gem_mmap_gtt mmap_arg
;
3422 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3423 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
3425 if (bo_gem
->map_count
++ == 0)
3426 drm_bacon_gem_bo_open_vma(bufmgr_gem
, bo_gem
);
3429 mmap_arg
.handle
= bo_gem
->gem_handle
;
3431 /* Get the fake offset back... */
3433 if (drmIoctl(bufmgr_gem
->fd
,
3434 DRM_IOCTL_I915_GEM_MMAP_GTT
,
3437 ptr
= drm_mmap(0, bo
->size
, PROT_READ
| PROT_WRITE
,
3438 MAP_SHARED
, bufmgr_gem
->fd
,
3441 if (ptr
== MAP_FAILED
) {
3442 if (--bo_gem
->map_count
== 0)
3443 drm_bacon_gem_bo_close_vma(bufmgr_gem
, bo_gem
);
3447 bo_gem
->gtt_virtual
= ptr
;
3449 pthread_mutex_unlock(&bufmgr_gem
->lock
);
3451 return bo_gem
->gtt_virtual
;
3454 void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo
*bo
)
3456 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
3457 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
3459 if (bo_gem
->mem_virtual
)
3460 return bo_gem
->mem_virtual
;
3462 if (bo_gem
->is_userptr
) {
3463 /* Return the same user ptr */
3464 return bo_gem
->user_virtual
;
3467 pthread_mutex_lock(&bufmgr_gem
->lock
);
3468 if (!bo_gem
->mem_virtual
) {
3469 struct drm_i915_gem_mmap mmap_arg
;
3471 if (bo_gem
->map_count
++ == 0)
3472 drm_bacon_gem_bo_open_vma(bufmgr_gem
, bo_gem
);
3474 DBG("bo_map: %d (%s), map_count=%d\n",
3475 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
3478 mmap_arg
.handle
= bo_gem
->gem_handle
;
3479 mmap_arg
.size
= bo
->size
;
3480 if (drmIoctl(bufmgr_gem
->fd
,
3481 DRM_IOCTL_I915_GEM_MMAP
,
3483 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3484 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
3485 bo_gem
->name
, strerror(errno
));
3486 if (--bo_gem
->map_count
== 0)
3487 drm_bacon_gem_bo_close_vma(bufmgr_gem
, bo_gem
);
3489 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
3490 bo_gem
->mem_virtual
= (void *)(uintptr_t) mmap_arg
.addr_ptr
;
3493 pthread_mutex_unlock(&bufmgr_gem
->lock
);
3495 return bo_gem
->mem_virtual
;
3498 void *drm_bacon_gem_bo_map__wc(drm_bacon_bo
*bo
)
3500 drm_bacon_bufmgr_gem
*bufmgr_gem
= (drm_bacon_bufmgr_gem
*) bo
->bufmgr
;
3501 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
3503 if (bo_gem
->wc_virtual
)
3504 return bo_gem
->wc_virtual
;
3506 if (bo_gem
->is_userptr
)
3509 pthread_mutex_lock(&bufmgr_gem
->lock
);
3510 if (!bo_gem
->wc_virtual
) {
3511 struct drm_i915_gem_mmap mmap_arg
;
3513 if (bo_gem
->map_count
++ == 0)
3514 drm_bacon_gem_bo_open_vma(bufmgr_gem
, bo_gem
);
3516 DBG("bo_map: %d (%s), map_count=%d\n",
3517 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
3520 mmap_arg
.handle
= bo_gem
->gem_handle
;
3521 mmap_arg
.size
= bo
->size
;
3522 mmap_arg
.flags
= I915_MMAP_WC
;
3523 if (drmIoctl(bufmgr_gem
->fd
,
3524 DRM_IOCTL_I915_GEM_MMAP
,
3526 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3527 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
3528 bo_gem
->name
, strerror(errno
));
3529 if (--bo_gem
->map_count
== 0)
3530 drm_bacon_gem_bo_close_vma(bufmgr_gem
, bo_gem
);
3532 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
3533 bo_gem
->wc_virtual
= (void *)(uintptr_t) mmap_arg
.addr_ptr
;
3536 pthread_mutex_unlock(&bufmgr_gem
->lock
);
3538 return bo_gem
->wc_virtual
;
3542 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3543 * and manage map buffer objections.
3545 * \param fd File descriptor of the opened DRM device.
3548 drm_bacon_bufmgr_gem_init(int fd
, int batch_size
)
3550 drm_bacon_bufmgr_gem
*bufmgr_gem
;
3551 struct drm_i915_gem_get_aperture aperture
;
3552 drm_i915_getparam_t gp
;
3556 pthread_mutex_lock(&bufmgr_list_mutex
);
3558 bufmgr_gem
= drm_bacon_bufmgr_gem_find(fd
);
3562 bufmgr_gem
= calloc(1, sizeof(*bufmgr_gem
));
3563 if (bufmgr_gem
== NULL
)
3566 bufmgr_gem
->fd
= fd
;
3567 p_atomic_set(&bufmgr_gem
->refcount
, 1);
3569 if (pthread_mutex_init(&bufmgr_gem
->lock
, NULL
) != 0) {
3576 ret
= drmIoctl(bufmgr_gem
->fd
,
3577 DRM_IOCTL_I915_GEM_GET_APERTURE
,
3581 bufmgr_gem
->gtt_size
= aperture
.aper_available_size
;
3583 fprintf(stderr
, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3585 bufmgr_gem
->gtt_size
= 128 * 1024 * 1024;
3586 fprintf(stderr
, "Assuming %dkB available aperture size.\n"
3587 "May lead to reduced performance or incorrect "
3589 (int)bufmgr_gem
->gtt_size
/ 1024);
3592 bufmgr_gem
->pci_device
= get_pci_device_id(bufmgr_gem
);
3594 if (IS_GEN2(bufmgr_gem
->pci_device
))
3595 bufmgr_gem
->gen
= 2;
3596 else if (IS_GEN3(bufmgr_gem
->pci_device
))
3597 bufmgr_gem
->gen
= 3;
3598 else if (IS_GEN4(bufmgr_gem
->pci_device
))
3599 bufmgr_gem
->gen
= 4;
3600 else if (IS_GEN5(bufmgr_gem
->pci_device
))
3601 bufmgr_gem
->gen
= 5;
3602 else if (IS_GEN6(bufmgr_gem
->pci_device
))
3603 bufmgr_gem
->gen
= 6;
3604 else if (IS_GEN7(bufmgr_gem
->pci_device
))
3605 bufmgr_gem
->gen
= 7;
3606 else if (IS_GEN8(bufmgr_gem
->pci_device
))
3607 bufmgr_gem
->gen
= 8;
3608 else if (IS_GEN9(bufmgr_gem
->pci_device
))
3609 bufmgr_gem
->gen
= 9;
3616 if (IS_GEN3(bufmgr_gem
->pci_device
) &&
3617 bufmgr_gem
->gtt_size
> 256*1024*1024) {
3618 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3619 * be used for tiled blits. To simplify the accounting, just
3620 * subtract the unmappable part (fixed to 256MB on all known
3621 * gen3 devices) if the kernel advertises it. */
3622 bufmgr_gem
->gtt_size
-= 256*1024*1024;
3628 gp
.param
= I915_PARAM_HAS_EXECBUF2
;
3629 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3633 gp
.param
= I915_PARAM_HAS_BSD
;
3634 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3635 bufmgr_gem
->has_bsd
= ret
== 0;
3637 gp
.param
= I915_PARAM_HAS_BLT
;
3638 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3639 bufmgr_gem
->has_blt
= ret
== 0;
3641 gp
.param
= I915_PARAM_HAS_RELAXED_FENCING
;
3642 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3643 bufmgr_gem
->has_relaxed_fencing
= ret
== 0;
3645 gp
.param
= I915_PARAM_HAS_EXEC_ASYNC
;
3646 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3647 bufmgr_gem
->has_exec_async
= ret
== 0;
3649 bufmgr_gem
->bufmgr
.bo_alloc_userptr
= check_bo_alloc_userptr
;
3651 gp
.param
= I915_PARAM_HAS_WAIT_TIMEOUT
;
3652 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3653 bufmgr_gem
->has_wait_timeout
= ret
== 0;
3655 gp
.param
= I915_PARAM_HAS_LLC
;
3656 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3658 /* Kernel does not supports HAS_LLC query, fallback to GPU
3659 * generation detection and assume that we have LLC on GEN6/7
3661 bufmgr_gem
->has_llc
= (IS_GEN6(bufmgr_gem
->pci_device
) |
3662 IS_GEN7(bufmgr_gem
->pci_device
));
3664 bufmgr_gem
->has_llc
= *gp
.value
;
3666 gp
.param
= I915_PARAM_HAS_VEBOX
;
3667 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3668 bufmgr_gem
->has_vebox
= (ret
== 0) & (*gp
.value
> 0);
3670 gp
.param
= I915_PARAM_HAS_EXEC_SOFTPIN
;
3671 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3672 if (ret
== 0 && *gp
.value
> 0)
3673 bufmgr_gem
->bufmgr
.bo_set_softpin_offset
= drm_bacon_gem_bo_set_softpin_offset
;
3675 if (bufmgr_gem
->gen
< 4) {
3676 gp
.param
= I915_PARAM_NUM_FENCES_AVAIL
;
3677 gp
.value
= &bufmgr_gem
->available_fences
;
3678 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3680 fprintf(stderr
, "get fences failed: %d [%d]\n", ret
,
3682 fprintf(stderr
, "param: %d, val: %d\n", gp
.param
,
3684 bufmgr_gem
->available_fences
= 0;
3686 /* XXX The kernel reports the total number of fences,
3687 * including any that may be pinned.
3689 * We presume that there will be at least one pinned
3690 * fence for the scanout buffer, but there may be more
3691 * than one scanout and the user may be manually
3692 * pinning buffers. Let's move to execbuffer2 and
3693 * thereby forget the insanity of using fences...
3695 bufmgr_gem
->available_fences
-= 2;
3696 if (bufmgr_gem
->available_fences
< 0)
3697 bufmgr_gem
->available_fences
= 0;
3701 if (bufmgr_gem
->gen
>= 8) {
3702 gp
.param
= I915_PARAM_HAS_ALIASING_PPGTT
;
3703 ret
= drmIoctl(bufmgr_gem
->fd
, DRM_IOCTL_I915_GETPARAM
, &gp
);
3704 if (ret
== 0 && *gp
.value
== 3)
3705 bufmgr_gem
->bufmgr
.bo_use_48b_address_range
= drm_bacon_gem_bo_use_48b_address_range
;
3708 /* Let's go with one relocation per every 2 dwords (but round down a bit
3709 * since a power of two will mean an extra page allocation for the reloc
3712 * Every 4 was too few for the blender benchmark.
3714 bufmgr_gem
->max_relocs
= batch_size
/ sizeof(uint32_t) / 2 - 2;
3716 bufmgr_gem
->bufmgr
.bo_alloc
= drm_bacon_gem_bo_alloc
;
3717 bufmgr_gem
->bufmgr
.bo_alloc_for_render
=
3718 drm_bacon_gem_bo_alloc_for_render
;
3719 bufmgr_gem
->bufmgr
.bo_alloc_tiled
= drm_bacon_gem_bo_alloc_tiled
;
3720 bufmgr_gem
->bufmgr
.bo_reference
= drm_bacon_gem_bo_reference
;
3721 bufmgr_gem
->bufmgr
.bo_unreference
= drm_bacon_gem_bo_unreference
;
3722 bufmgr_gem
->bufmgr
.bo_map
= drm_bacon_gem_bo_map
;
3723 bufmgr_gem
->bufmgr
.bo_unmap
= drm_bacon_gem_bo_unmap
;
3724 bufmgr_gem
->bufmgr
.bo_subdata
= drm_bacon_gem_bo_subdata
;
3725 bufmgr_gem
->bufmgr
.bo_get_subdata
= drm_bacon_gem_bo_get_subdata
;
3726 bufmgr_gem
->bufmgr
.bo_wait_rendering
= drm_bacon_gem_bo_wait_rendering
;
3727 bufmgr_gem
->bufmgr
.bo_emit_reloc
= drm_bacon_gem_bo_emit_reloc
;
3728 bufmgr_gem
->bufmgr
.bo_emit_reloc_fence
= drm_bacon_gem_bo_emit_reloc_fence
;
3729 bufmgr_gem
->bufmgr
.bo_pin
= drm_bacon_gem_bo_pin
;
3730 bufmgr_gem
->bufmgr
.bo_unpin
= drm_bacon_gem_bo_unpin
;
3731 bufmgr_gem
->bufmgr
.bo_get_tiling
= drm_bacon_gem_bo_get_tiling
;
3732 bufmgr_gem
->bufmgr
.bo_set_tiling
= drm_bacon_gem_bo_set_tiling
;
3733 bufmgr_gem
->bufmgr
.bo_flink
= drm_bacon_gem_bo_flink
;
3734 /* Use the new one if available */
3736 bufmgr_gem
->bufmgr
.bo_exec
= drm_bacon_gem_bo_exec2
;
3737 bufmgr_gem
->bufmgr
.bo_mrb_exec
= drm_bacon_gem_bo_mrb_exec2
;
3739 bufmgr_gem
->bufmgr
.bo_exec
= drm_bacon_gem_bo_exec
;
3740 bufmgr_gem
->bufmgr
.bo_busy
= drm_bacon_gem_bo_busy
;
3741 bufmgr_gem
->bufmgr
.bo_madvise
= drm_bacon_gem_bo_madvise
;
3742 bufmgr_gem
->bufmgr
.destroy
= drm_bacon_bufmgr_gem_unref
;
3743 bufmgr_gem
->bufmgr
.debug
= 0;
3744 bufmgr_gem
->bufmgr
.check_aperture_space
=
3745 drm_bacon_gem_check_aperture_space
;
3746 bufmgr_gem
->bufmgr
.bo_disable_reuse
= drm_bacon_gem_bo_disable_reuse
;
3747 bufmgr_gem
->bufmgr
.bo_is_reusable
= drm_bacon_gem_bo_is_reusable
;
3748 bufmgr_gem
->bufmgr
.get_pipe_from_crtc_id
=
3749 drm_bacon_gem_get_pipe_from_crtc_id
;
3750 bufmgr_gem
->bufmgr
.bo_references
= drm_bacon_gem_bo_references
;
3752 init_cache_buckets(bufmgr_gem
);
3754 DRMINITLISTHEAD(&bufmgr_gem
->vma_cache
);
3755 bufmgr_gem
->vma_max
= -1; /* unlimited by default */
3757 DRMLISTADD(&bufmgr_gem
->managers
, &bufmgr_list
);
3760 pthread_mutex_unlock(&bufmgr_list_mutex
);
3762 return bufmgr_gem
!= NULL
? &bufmgr_gem
->bufmgr
: NULL
;