1 /**************************************************************************
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
29 **************************************************************************/
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
42 #include <util/u_atomic.h>
50 #include <sys/ioctl.h>
52 #include <sys/types.h>
57 #define ETIME ETIMEDOUT
59 #include "common/gen_debug.h"
60 #include "common/gen_device_info.h"
61 #include "libdrm_macros.h"
62 #include "main/macros.h"
63 #include "util/macros.h"
64 #include "util/hash_table.h"
65 #include "util/list.h"
66 #include "brw_bufmgr.h"
79 #define memclear(s) memset(&s, 0, sizeof(s))
81 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
84 atomic_add_unless(int *v
, int add
, int unless
)
88 while (c
!= unless
&& (old
= p_atomic_cmpxchg(v
, c
, c
+ add
)) != c
)
94 * upper_32_bits - return bits 32-63 of a number
95 * @n: the number we're accessing
97 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
98 * the "right shift count >= width of type" warning when that quantity is
101 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
104 * lower_32_bits - return bits 0-31 of a number
105 * @n: the number we're accessing
107 #define lower_32_bits(n) ((__u32)(n))
109 struct _drm_bacon_context
{
111 struct _drm_bacon_bufmgr
*bufmgr
;
114 typedef struct _drm_bacon_bo_gem drm_bacon_bo_gem
;
116 struct drm_bacon_gem_bo_bucket
{
117 struct list_head head
;
121 typedef struct _drm_bacon_bufmgr
{
128 pthread_mutex_t lock
;
130 struct drm_i915_gem_exec_object2
*exec2_objects
;
131 drm_bacon_bo
**exec_bos
;
135 /** Array of lists of cached gem objects of power-of-two sizes */
136 struct drm_bacon_gem_bo_bucket cache_bucket
[14 * 4];
140 struct list_head managers
;
142 struct hash_table
*name_table
;
143 struct hash_table
*handle_table
;
145 struct list_head vma_cache
;
146 int vma_count
, vma_open
, vma_max
;
149 unsigned int has_llc
: 1;
150 unsigned int bo_reuse
: 1;
151 unsigned int no_exec
: 1;
154 struct _drm_bacon_bo_gem
{
162 * Kenel-assigned global name for this object
164 * List contains both flink named and prime fd'd objects
166 unsigned int global_name
;
169 * Index of the buffer within the validation list while preparing a
170 * batchbuffer execution.
175 * Current tiling mode
177 uint32_t tiling_mode
;
178 uint32_t swizzle_mode
;
179 unsigned long stride
;
183 /** Array passed to the DRM containing relocation information. */
184 struct drm_i915_gem_relocation_entry
*relocs
;
186 * Array of info structs corresponding to relocs[i].target_handle etc
188 drm_bacon_bo
**reloc_bos
;
189 /** Number of entries in relocs */
191 /** Mapped address for the buffer, saved across map/unmap cycles */
193 /** GTT virtual address for the buffer, saved across map/unmap cycles */
195 /** WC CPU address for the buffer, saved across map/unmap cycles */
198 struct list_head vma_list
;
201 struct list_head head
;
204 * Boolean of whether this BO and its children have been included in
205 * the current drm_bacon_bufmgr_check_aperture_space() total.
207 bool included_in_check_aperture
;
210 * Boolean of whether this buffer has been used as a relocation
211 * target and had its size accounted for, and thus can't have any
212 * further relocations added to it.
214 bool used_as_reloc_target
;
217 * Boolean of whether we have encountered an error whilst building the relocation tree.
222 * Boolean of whether this buffer can be re-used
227 * Boolean of whether the GPU is definitely not accessing the buffer.
229 * This is only valid when reusable, since non-reusable
230 * buffers are those that have been shared with other
231 * processes, so we don't know their state.
236 * Size in bytes of this buffer and its relocation descendents.
238 * Used to avoid costly tree walking in
239 * drm_bacon_bufmgr_check_aperture in the common case.
243 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */
244 bool mapped_cpu_write
;
248 drm_bacon_gem_estimate_batch_space(drm_bacon_bo
** bo_array
, int count
);
251 drm_bacon_gem_compute_batch_space(drm_bacon_bo
** bo_array
, int count
);
254 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo
*bo
,
255 uint32_t tiling_mode
,
258 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo
*bo
,
261 static void drm_bacon_gem_bo_free(drm_bacon_bo
*bo
);
263 static inline drm_bacon_bo_gem
*to_bo_gem(drm_bacon_bo
*bo
)
265 return (drm_bacon_bo_gem
*)bo
;
269 key_hash_uint(const void *key
)
271 return _mesa_hash_data(key
, 4);
275 key_uint_equal(const void *a
, const void *b
)
277 return *((unsigned *) a
) == *((unsigned *) b
);
280 static drm_bacon_bo_gem
*
281 hash_find_bo(struct hash_table
*ht
, unsigned int key
)
283 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, &key
);
284 return entry
? (drm_bacon_bo_gem
*) entry
->data
: NULL
;
288 drm_bacon_gem_bo_tile_size(drm_bacon_bufmgr
*bufmgr
, unsigned long size
,
289 uint32_t *tiling_mode
)
291 if (*tiling_mode
== I915_TILING_NONE
)
294 /* 965+ just need multiples of page size for tiling */
295 return ALIGN(size
, 4096);
299 * Round a given pitch up to the minimum required for X tiling on a
300 * given chip. We use 512 as the minimum to allow for a later tiling
304 drm_bacon_gem_bo_tile_pitch(drm_bacon_bufmgr
*bufmgr
,
305 unsigned long pitch
, uint32_t *tiling_mode
)
307 unsigned long tile_width
;
309 /* If untiled, then just align it so that we can do rendering
310 * to it with the 3D engine.
312 if (*tiling_mode
== I915_TILING_NONE
)
313 return ALIGN(pitch
, 64);
315 if (*tiling_mode
== I915_TILING_X
)
320 /* 965 is flexible */
321 return ALIGN(pitch
, tile_width
);
324 static struct drm_bacon_gem_bo_bucket
*
325 drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr
*bufmgr
,
330 for (i
= 0; i
< bufmgr
->num_buckets
; i
++) {
331 struct drm_bacon_gem_bo_bucket
*bucket
=
332 &bufmgr
->cache_bucket
[i
];
333 if (bucket
->size
>= size
) {
342 drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr
*bufmgr
)
346 for (i
= 0; i
< bufmgr
->exec_count
; i
++) {
347 drm_bacon_bo
*bo
= bufmgr
->exec_bos
[i
];
348 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
350 if (bo_gem
->relocs
== NULL
) {
351 DBG("%2d: %d (%s)\n", i
, bo_gem
->gem_handle
,
356 for (j
= 0; j
< bo_gem
->reloc_count
; j
++) {
357 drm_bacon_bo
*target_bo
= bo_gem
->reloc_bos
[j
];
358 drm_bacon_bo_gem
*target_gem
=
359 (drm_bacon_bo_gem
*) target_bo
;
361 DBG("%2d: %d (%s)@0x%08x %08x -> "
362 "%d (%s)@0x%08x %08x + 0x%08x\n",
366 upper_32_bits(bo_gem
->relocs
[j
].offset
),
367 lower_32_bits(bo_gem
->relocs
[j
].offset
),
368 target_gem
->gem_handle
,
370 upper_32_bits(target_bo
->offset64
),
371 lower_32_bits(target_bo
->offset64
),
372 bo_gem
->relocs
[j
].delta
);
378 drm_bacon_bo_reference(drm_bacon_bo
*bo
)
380 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
382 p_atomic_inc(&bo_gem
->refcount
);
386 drm_bacon_add_validate_buffer2(drm_bacon_bo
*bo
)
388 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
389 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*)bo
;
392 if (bo_gem
->validate_index
!= -1)
395 /* Extend the array of validation entries as necessary. */
396 if (bufmgr
->exec_count
== bufmgr
->exec_size
) {
397 int new_size
= bufmgr
->exec_size
* 2;
402 bufmgr
->exec2_objects
=
403 realloc(bufmgr
->exec2_objects
,
404 sizeof(*bufmgr
->exec2_objects
) * new_size
);
406 realloc(bufmgr
->exec_bos
,
407 sizeof(*bufmgr
->exec_bos
) * new_size
);
408 bufmgr
->exec_size
= new_size
;
411 index
= bufmgr
->exec_count
;
412 bo_gem
->validate_index
= index
;
413 /* Fill in array entry */
414 bufmgr
->exec2_objects
[index
].handle
= bo_gem
->gem_handle
;
415 bufmgr
->exec2_objects
[index
].relocation_count
= bo_gem
->reloc_count
;
416 bufmgr
->exec2_objects
[index
].relocs_ptr
= (uintptr_t)bo_gem
->relocs
;
417 bufmgr
->exec2_objects
[index
].alignment
= bo
->align
;
418 bufmgr
->exec2_objects
[index
].offset
= bo
->offset64
;
419 bufmgr
->exec2_objects
[index
].flags
= 0;
420 bufmgr
->exec2_objects
[index
].rsvd1
= 0;
421 bufmgr
->exec2_objects
[index
].rsvd2
= 0;
422 bufmgr
->exec_bos
[index
] = bo
;
423 bufmgr
->exec_count
++;
427 drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr
*bufmgr
,
428 drm_bacon_bo_gem
*bo_gem
,
429 unsigned int alignment
)
433 assert(!bo_gem
->used_as_reloc_target
);
435 /* The older chipsets are far-less flexible in terms of tiling,
436 * and require tiled buffer to be size aligned in the aperture.
437 * This means that in the worst possible case we will need a hole
438 * twice as large as the object in order for it to fit into the
439 * aperture. Optimal packing is for wimps.
441 size
= bo_gem
->bo
.size
;
443 bo_gem
->reloc_tree_size
= size
+ alignment
;
447 drm_bacon_setup_reloc_list(drm_bacon_bo
*bo
)
449 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
450 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
451 unsigned int max_relocs
= bufmgr
->max_relocs
;
453 if (bo
->size
/ 4 < max_relocs
)
454 max_relocs
= bo
->size
/ 4;
456 bo_gem
->relocs
= malloc(max_relocs
*
457 sizeof(struct drm_i915_gem_relocation_entry
));
458 bo_gem
->reloc_bos
= malloc(max_relocs
* sizeof(drm_bacon_bo
*));
459 if (bo_gem
->relocs
== NULL
|| bo_gem
->reloc_bos
== NULL
) {
460 bo_gem
->has_error
= true;
462 free (bo_gem
->relocs
);
463 bo_gem
->relocs
= NULL
;
465 free (bo_gem
->reloc_bos
);
466 bo_gem
->reloc_bos
= NULL
;
475 drm_bacon_bo_busy(drm_bacon_bo
*bo
)
477 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
478 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
479 struct drm_i915_gem_busy busy
;
482 if (bo_gem
->reusable
&& bo_gem
->idle
)
486 busy
.handle
= bo_gem
->gem_handle
;
488 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_BUSY
, &busy
);
490 bo_gem
->idle
= !busy
.busy
;
495 return (ret
== 0 && busy
.busy
);
499 drm_bacon_gem_bo_madvise_internal(drm_bacon_bufmgr
*bufmgr
,
500 drm_bacon_bo_gem
*bo_gem
, int state
)
502 struct drm_i915_gem_madvise madv
;
505 madv
.handle
= bo_gem
->gem_handle
;
508 drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_MADVISE
, &madv
);
510 return madv
.retained
;
514 drm_bacon_bo_madvise(drm_bacon_bo
*bo
, int madv
)
516 return drm_bacon_gem_bo_madvise_internal(bo
->bufmgr
,
517 (drm_bacon_bo_gem
*) bo
,
521 /* drop the oldest entries that have been purged by the kernel */
523 drm_bacon_gem_bo_cache_purge_bucket(drm_bacon_bufmgr
*bufmgr
,
524 struct drm_bacon_gem_bo_bucket
*bucket
)
526 while (!list_empty(&bucket
->head
)) {
527 drm_bacon_bo_gem
*bo_gem
;
529 bo_gem
= LIST_ENTRY(drm_bacon_bo_gem
,
530 bucket
->head
.next
, head
);
531 if (drm_bacon_gem_bo_madvise_internal
532 (bufmgr
, bo_gem
, I915_MADV_DONTNEED
))
535 list_del(&bo_gem
->head
);
536 drm_bacon_gem_bo_free(&bo_gem
->bo
);
540 static drm_bacon_bo
*
541 drm_bacon_gem_bo_alloc_internal(drm_bacon_bufmgr
*bufmgr
,
545 uint32_t tiling_mode
,
546 unsigned long stride
,
547 unsigned int alignment
)
549 drm_bacon_bo_gem
*bo_gem
;
550 unsigned int page_size
= getpagesize();
552 struct drm_bacon_gem_bo_bucket
*bucket
;
553 bool alloc_from_cache
;
554 unsigned long bo_size
;
555 bool for_render
= false;
557 if (flags
& BO_ALLOC_FOR_RENDER
)
560 /* Round the allocated size up to a power of two number of pages. */
561 bucket
= drm_bacon_gem_bo_bucket_for_size(bufmgr
, size
);
563 /* If we don't have caching at this size, don't actually round the
566 if (bucket
== NULL
) {
568 if (bo_size
< page_size
)
571 bo_size
= bucket
->size
;
574 pthread_mutex_lock(&bufmgr
->lock
);
575 /* Get a buffer out of the cache if available */
577 alloc_from_cache
= false;
578 if (bucket
!= NULL
&& !list_empty(&bucket
->head
)) {
580 /* Allocate new render-target BOs from the tail (MRU)
581 * of the list, as it will likely be hot in the GPU
582 * cache and in the aperture for us.
584 bo_gem
= LIST_ENTRY(drm_bacon_bo_gem
,
585 bucket
->head
.prev
, head
);
586 list_del(&bo_gem
->head
);
587 alloc_from_cache
= true;
588 bo_gem
->bo
.align
= alignment
;
590 assert(alignment
== 0);
591 /* For non-render-target BOs (where we're probably
592 * going to map it first thing in order to fill it
593 * with data), check if the last BO in the cache is
594 * unbusy, and only reuse in that case. Otherwise,
595 * allocating a new buffer is probably faster than
596 * waiting for the GPU to finish.
598 bo_gem
= LIST_ENTRY(drm_bacon_bo_gem
,
599 bucket
->head
.next
, head
);
600 if (!drm_bacon_bo_busy(&bo_gem
->bo
)) {
601 alloc_from_cache
= true;
602 list_del(&bo_gem
->head
);
606 if (alloc_from_cache
) {
607 if (!drm_bacon_gem_bo_madvise_internal
608 (bufmgr
, bo_gem
, I915_MADV_WILLNEED
)) {
609 drm_bacon_gem_bo_free(&bo_gem
->bo
);
610 drm_bacon_gem_bo_cache_purge_bucket(bufmgr
,
615 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem
->bo
,
618 drm_bacon_gem_bo_free(&bo_gem
->bo
);
624 if (!alloc_from_cache
) {
625 struct drm_i915_gem_create create
;
627 bo_gem
= calloc(1, sizeof(*bo_gem
));
631 /* drm_bacon_gem_bo_free calls list_del() for an uninitialized
632 list (vma_list), so better set the list head here */
633 list_inithead(&bo_gem
->vma_list
);
635 bo_gem
->bo
.size
= bo_size
;
638 create
.size
= bo_size
;
640 ret
= drmIoctl(bufmgr
->fd
,
641 DRM_IOCTL_I915_GEM_CREATE
,
648 bo_gem
->gem_handle
= create
.handle
;
649 _mesa_hash_table_insert(bufmgr
->handle_table
,
650 &bo_gem
->gem_handle
, bo_gem
);
652 bo_gem
->bo
.handle
= bo_gem
->gem_handle
;
653 bo_gem
->bo
.bufmgr
= bufmgr
;
654 bo_gem
->bo
.align
= alignment
;
656 bo_gem
->tiling_mode
= I915_TILING_NONE
;
657 bo_gem
->swizzle_mode
= I915_BIT_6_SWIZZLE_NONE
;
660 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem
->bo
,
667 p_atomic_set(&bo_gem
->refcount
, 1);
668 bo_gem
->validate_index
= -1;
669 bo_gem
->used_as_reloc_target
= false;
670 bo_gem
->has_error
= false;
671 bo_gem
->reusable
= true;
673 drm_bacon_bo_gem_set_in_aperture_size(bufmgr
, bo_gem
, alignment
);
674 pthread_mutex_unlock(&bufmgr
->lock
);
676 DBG("bo_create: buf %d (%s) %ldb\n",
677 bo_gem
->gem_handle
, bo_gem
->name
, size
);
682 drm_bacon_gem_bo_free(&bo_gem
->bo
);
684 pthread_mutex_unlock(&bufmgr
->lock
);
689 drm_bacon_bo_alloc_for_render(drm_bacon_bufmgr
*bufmgr
,
692 unsigned int alignment
)
694 return drm_bacon_gem_bo_alloc_internal(bufmgr
, name
, size
,
701 drm_bacon_bo_alloc(drm_bacon_bufmgr
*bufmgr
,
704 unsigned int alignment
)
706 return drm_bacon_gem_bo_alloc_internal(bufmgr
, name
, size
, 0,
707 I915_TILING_NONE
, 0, 0);
711 drm_bacon_bo_alloc_tiled(drm_bacon_bufmgr
*bufmgr
, const char *name
,
712 int x
, int y
, int cpp
, uint32_t *tiling_mode
,
713 unsigned long *pitch
, unsigned long flags
)
715 unsigned long size
, stride
;
719 unsigned long aligned_y
, height_alignment
;
721 tiling
= *tiling_mode
;
723 /* If we're tiled, our allocations are in 8 or 32-row blocks,
724 * so failure to align our height means that we won't allocate
727 * If we're untiled, we still have to align to 2 rows high
728 * because the data port accesses 2x2 blocks even if the
729 * bottom row isn't to be rendered, so failure to align means
730 * we could walk off the end of the GTT and fault. This is
731 * documented on 965, and may be the case on older chipsets
732 * too so we try to be careful.
735 height_alignment
= 2;
737 if (tiling
== I915_TILING_X
)
738 height_alignment
= 8;
739 else if (tiling
== I915_TILING_Y
)
740 height_alignment
= 32;
741 aligned_y
= ALIGN(y
, height_alignment
);
744 stride
= drm_bacon_gem_bo_tile_pitch(bufmgr
, stride
, tiling_mode
);
745 size
= stride
* aligned_y
;
746 size
= drm_bacon_gem_bo_tile_size(bufmgr
, size
, tiling_mode
);
747 } while (*tiling_mode
!= tiling
);
750 if (tiling
== I915_TILING_NONE
)
753 return drm_bacon_gem_bo_alloc_internal(bufmgr
, name
, size
, flags
,
758 * Returns a drm_bacon_bo wrapping the given buffer object handle.
760 * This can be used when one application needs to pass a buffer object
764 drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr
*bufmgr
,
768 drm_bacon_bo_gem
*bo_gem
;
770 struct drm_gem_open open_arg
;
771 struct drm_i915_gem_get_tiling get_tiling
;
773 /* At the moment most applications only have a few named bo.
774 * For instance, in a DRI client only the render buffers passed
775 * between X and the client are named. And since X returns the
776 * alternating names for the front/back buffer a linear search
777 * provides a sufficiently fast match.
779 pthread_mutex_lock(&bufmgr
->lock
);
780 bo_gem
= hash_find_bo(bufmgr
->name_table
, handle
);
782 drm_bacon_bo_reference(&bo_gem
->bo
);
787 open_arg
.name
= handle
;
788 ret
= drmIoctl(bufmgr
->fd
,
792 DBG("Couldn't reference %s handle 0x%08x: %s\n",
793 name
, handle
, strerror(errno
));
797 /* Now see if someone has used a prime handle to get this
798 * object from the kernel before by looking through the list
799 * again for a matching gem_handle
801 bo_gem
= hash_find_bo(bufmgr
->handle_table
, open_arg
.handle
);
803 drm_bacon_bo_reference(&bo_gem
->bo
);
807 bo_gem
= calloc(1, sizeof(*bo_gem
));
811 p_atomic_set(&bo_gem
->refcount
, 1);
812 list_inithead(&bo_gem
->vma_list
);
814 bo_gem
->bo
.size
= open_arg
.size
;
815 bo_gem
->bo
.offset64
= 0;
816 bo_gem
->bo
.virtual = NULL
;
817 bo_gem
->bo
.bufmgr
= bufmgr
;
819 bo_gem
->validate_index
= -1;
820 bo_gem
->gem_handle
= open_arg
.handle
;
821 bo_gem
->bo
.handle
= open_arg
.handle
;
822 bo_gem
->global_name
= handle
;
823 bo_gem
->reusable
= false;
825 _mesa_hash_table_insert(bufmgr
->handle_table
,
826 &bo_gem
->gem_handle
, bo_gem
);
827 _mesa_hash_table_insert(bufmgr
->name_table
,
828 &bo_gem
->global_name
, bo_gem
);
830 memclear(get_tiling
);
831 get_tiling
.handle
= bo_gem
->gem_handle
;
832 ret
= drmIoctl(bufmgr
->fd
,
833 DRM_IOCTL_I915_GEM_GET_TILING
,
838 bo_gem
->tiling_mode
= get_tiling
.tiling_mode
;
839 bo_gem
->swizzle_mode
= get_tiling
.swizzle_mode
;
840 /* XXX stride is unknown */
841 drm_bacon_bo_gem_set_in_aperture_size(bufmgr
, bo_gem
, 0);
842 DBG("bo_create_from_handle: %d (%s)\n", handle
, bo_gem
->name
);
845 pthread_mutex_unlock(&bufmgr
->lock
);
849 drm_bacon_gem_bo_free(&bo_gem
->bo
);
850 pthread_mutex_unlock(&bufmgr
->lock
);
855 drm_bacon_gem_bo_free(drm_bacon_bo
*bo
)
857 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
858 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
859 struct drm_gem_close close
;
860 struct hash_entry
*entry
;
863 list_del(&bo_gem
->vma_list
);
864 if (bo_gem
->mem_virtual
) {
865 VG(VALGRIND_FREELIKE_BLOCK(bo_gem
->mem_virtual
, 0));
866 drm_munmap(bo_gem
->mem_virtual
, bo_gem
->bo
.size
);
869 if (bo_gem
->wc_virtual
) {
870 VG(VALGRIND_FREELIKE_BLOCK(bo_gem
->wc_virtual
, 0));
871 drm_munmap(bo_gem
->wc_virtual
, bo_gem
->bo
.size
);
874 if (bo_gem
->gtt_virtual
) {
875 drm_munmap(bo_gem
->gtt_virtual
, bo_gem
->bo
.size
);
879 if (bo_gem
->global_name
) {
880 entry
= _mesa_hash_table_search(bufmgr
->name_table
,
881 &bo_gem
->global_name
);
882 _mesa_hash_table_remove(bufmgr
->name_table
, entry
);
884 entry
= _mesa_hash_table_search(bufmgr
->handle_table
,
885 &bo_gem
->gem_handle
);
886 _mesa_hash_table_remove(bufmgr
->handle_table
, entry
);
888 /* Close this object */
890 close
.handle
= bo_gem
->gem_handle
;
891 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_GEM_CLOSE
, &close
);
893 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
894 bo_gem
->gem_handle
, bo_gem
->name
, strerror(errno
));
900 drm_bacon_gem_bo_mark_mmaps_incoherent(drm_bacon_bo
*bo
)
903 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
905 if (bo_gem
->mem_virtual
)
906 VALGRIND_MAKE_MEM_NOACCESS(bo_gem
->mem_virtual
, bo
->size
);
908 if (bo_gem
->wc_virtual
)
909 VALGRIND_MAKE_MEM_NOACCESS(bo_gem
->wc_virtual
, bo
->size
);
911 if (bo_gem
->gtt_virtual
)
912 VALGRIND_MAKE_MEM_NOACCESS(bo_gem
->gtt_virtual
, bo
->size
);
916 /** Frees all cached buffers significantly older than @time. */
918 drm_bacon_gem_cleanup_bo_cache(drm_bacon_bufmgr
*bufmgr
, time_t time
)
922 if (bufmgr
->time
== time
)
925 for (i
= 0; i
< bufmgr
->num_buckets
; i
++) {
926 struct drm_bacon_gem_bo_bucket
*bucket
=
927 &bufmgr
->cache_bucket
[i
];
929 while (!list_empty(&bucket
->head
)) {
930 drm_bacon_bo_gem
*bo_gem
;
932 bo_gem
= LIST_ENTRY(drm_bacon_bo_gem
,
933 bucket
->head
.next
, head
);
934 if (time
- bo_gem
->free_time
<= 1)
937 list_del(&bo_gem
->head
);
939 drm_bacon_gem_bo_free(&bo_gem
->bo
);
946 static void drm_bacon_gem_bo_purge_vma_cache(drm_bacon_bufmgr
*bufmgr
)
950 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__
,
951 bufmgr
->vma_count
, bufmgr
->vma_open
, bufmgr
->vma_max
);
953 if (bufmgr
->vma_max
< 0)
956 /* We may need to evict a few entries in order to create new mmaps */
957 limit
= bufmgr
->vma_max
- 2*bufmgr
->vma_open
;
961 while (bufmgr
->vma_count
> limit
) {
962 drm_bacon_bo_gem
*bo_gem
;
964 bo_gem
= LIST_ENTRY(drm_bacon_bo_gem
,
965 bufmgr
->vma_cache
.next
,
967 assert(bo_gem
->map_count
== 0);
968 list_delinit(&bo_gem
->vma_list
);
970 if (bo_gem
->mem_virtual
) {
971 drm_munmap(bo_gem
->mem_virtual
, bo_gem
->bo
.size
);
972 bo_gem
->mem_virtual
= NULL
;
975 if (bo_gem
->wc_virtual
) {
976 drm_munmap(bo_gem
->wc_virtual
, bo_gem
->bo
.size
);
977 bo_gem
->wc_virtual
= NULL
;
980 if (bo_gem
->gtt_virtual
) {
981 drm_munmap(bo_gem
->gtt_virtual
, bo_gem
->bo
.size
);
982 bo_gem
->gtt_virtual
= NULL
;
988 static void drm_bacon_gem_bo_close_vma(drm_bacon_bufmgr
*bufmgr
,
989 drm_bacon_bo_gem
*bo_gem
)
992 list_addtail(&bo_gem
->vma_list
, &bufmgr
->vma_cache
);
993 if (bo_gem
->mem_virtual
)
995 if (bo_gem
->wc_virtual
)
997 if (bo_gem
->gtt_virtual
)
999 drm_bacon_gem_bo_purge_vma_cache(bufmgr
);
1002 static void drm_bacon_gem_bo_open_vma(drm_bacon_bufmgr
*bufmgr
,
1003 drm_bacon_bo_gem
*bo_gem
)
1006 list_del(&bo_gem
->vma_list
);
1007 if (bo_gem
->mem_virtual
)
1008 bufmgr
->vma_count
--;
1009 if (bo_gem
->wc_virtual
)
1010 bufmgr
->vma_count
--;
1011 if (bo_gem
->gtt_virtual
)
1012 bufmgr
->vma_count
--;
1013 drm_bacon_gem_bo_purge_vma_cache(bufmgr
);
1017 drm_bacon_gem_bo_unreference_final(drm_bacon_bo
*bo
, time_t time
)
1019 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1020 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1021 struct drm_bacon_gem_bo_bucket
*bucket
;
1024 /* Unreference all the target buffers */
1025 for (i
= 0; i
< bo_gem
->reloc_count
; i
++) {
1026 if (bo_gem
->reloc_bos
[i
] != bo
) {
1027 drm_bacon_gem_bo_unreference_locked_timed(bo_gem
->
1032 bo_gem
->reloc_count
= 0;
1033 bo_gem
->used_as_reloc_target
= false;
1035 DBG("bo_unreference final: %d (%s)\n",
1036 bo_gem
->gem_handle
, bo_gem
->name
);
1038 /* release memory associated with this object */
1039 if (bo_gem
->reloc_bos
) {
1040 free(bo_gem
->reloc_bos
);
1041 bo_gem
->reloc_bos
= NULL
;
1043 if (bo_gem
->relocs
) {
1044 free(bo_gem
->relocs
);
1045 bo_gem
->relocs
= NULL
;
1048 /* Clear any left-over mappings */
1049 if (bo_gem
->map_count
) {
1050 DBG("bo freed with non-zero map-count %d\n", bo_gem
->map_count
);
1051 bo_gem
->map_count
= 0;
1052 drm_bacon_gem_bo_close_vma(bufmgr
, bo_gem
);
1053 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1056 bucket
= drm_bacon_gem_bo_bucket_for_size(bufmgr
, bo
->size
);
1057 /* Put the buffer into our internal cache for reuse if we can. */
1058 if (bufmgr
->bo_reuse
&& bo_gem
->reusable
&& bucket
!= NULL
&&
1059 drm_bacon_gem_bo_madvise_internal(bufmgr
, bo_gem
,
1060 I915_MADV_DONTNEED
)) {
1061 bo_gem
->free_time
= time
;
1063 bo_gem
->name
= NULL
;
1064 bo_gem
->validate_index
= -1;
1066 list_addtail(&bo_gem
->head
, &bucket
->head
);
1068 drm_bacon_gem_bo_free(bo
);
1072 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo
*bo
,
1075 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1077 assert(p_atomic_read(&bo_gem
->refcount
) > 0);
1078 if (p_atomic_dec_zero(&bo_gem
->refcount
))
1079 drm_bacon_gem_bo_unreference_final(bo
, time
);
1083 drm_bacon_bo_unreference(drm_bacon_bo
*bo
)
1085 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1090 assert(p_atomic_read(&bo_gem
->refcount
) > 0);
1092 if (atomic_add_unless(&bo_gem
->refcount
, -1, 1)) {
1093 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1094 struct timespec time
;
1096 clock_gettime(CLOCK_MONOTONIC
, &time
);
1098 pthread_mutex_lock(&bufmgr
->lock
);
1100 if (p_atomic_dec_zero(&bo_gem
->refcount
)) {
1101 drm_bacon_gem_bo_unreference_final(bo
, time
.tv_sec
);
1102 drm_bacon_gem_cleanup_bo_cache(bufmgr
, time
.tv_sec
);
1105 pthread_mutex_unlock(&bufmgr
->lock
);
1110 drm_bacon_bo_map(drm_bacon_bo
*bo
, int write_enable
)
1112 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1113 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1114 struct drm_i915_gem_set_domain set_domain
;
1117 pthread_mutex_lock(&bufmgr
->lock
);
1119 if (bo_gem
->map_count
++ == 0)
1120 drm_bacon_gem_bo_open_vma(bufmgr
, bo_gem
);
1122 if (!bo_gem
->mem_virtual
) {
1123 struct drm_i915_gem_mmap mmap_arg
;
1125 DBG("bo_map: %d (%s), map_count=%d\n",
1126 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
1129 mmap_arg
.handle
= bo_gem
->gem_handle
;
1130 mmap_arg
.size
= bo
->size
;
1131 ret
= drmIoctl(bufmgr
->fd
,
1132 DRM_IOCTL_I915_GEM_MMAP
,
1136 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1137 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
1138 bo_gem
->name
, strerror(errno
));
1139 if (--bo_gem
->map_count
== 0)
1140 drm_bacon_gem_bo_close_vma(bufmgr
, bo_gem
);
1141 pthread_mutex_unlock(&bufmgr
->lock
);
1144 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
1145 bo_gem
->mem_virtual
= (void *)(uintptr_t) mmap_arg
.addr_ptr
;
1147 DBG("bo_map: %d (%s) -> %p\n", bo_gem
->gem_handle
, bo_gem
->name
,
1148 bo_gem
->mem_virtual
);
1149 bo
->virtual = bo_gem
->mem_virtual
;
1151 memclear(set_domain
);
1152 set_domain
.handle
= bo_gem
->gem_handle
;
1153 set_domain
.read_domains
= I915_GEM_DOMAIN_CPU
;
1155 set_domain
.write_domain
= I915_GEM_DOMAIN_CPU
;
1157 set_domain
.write_domain
= 0;
1158 ret
= drmIoctl(bufmgr
->fd
,
1159 DRM_IOCTL_I915_GEM_SET_DOMAIN
,
1162 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1163 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
1168 bo_gem
->mapped_cpu_write
= true;
1170 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1171 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem
->mem_virtual
, bo
->size
));
1172 pthread_mutex_unlock(&bufmgr
->lock
);
1178 map_gtt(drm_bacon_bo
*bo
)
1180 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1181 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1184 if (bo_gem
->map_count
++ == 0)
1185 drm_bacon_gem_bo_open_vma(bufmgr
, bo_gem
);
1187 /* Get a mapping of the buffer if we haven't before. */
1188 if (bo_gem
->gtt_virtual
== NULL
) {
1189 struct drm_i915_gem_mmap_gtt mmap_arg
;
1191 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1192 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
1195 mmap_arg
.handle
= bo_gem
->gem_handle
;
1197 /* Get the fake offset back... */
1198 ret
= drmIoctl(bufmgr
->fd
,
1199 DRM_IOCTL_I915_GEM_MMAP_GTT
,
1203 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1205 bo_gem
->gem_handle
, bo_gem
->name
,
1207 if (--bo_gem
->map_count
== 0)
1208 drm_bacon_gem_bo_close_vma(bufmgr
, bo_gem
);
1213 bo_gem
->gtt_virtual
= drm_mmap(0, bo
->size
, PROT_READ
| PROT_WRITE
,
1214 MAP_SHARED
, bufmgr
->fd
,
1216 if (bo_gem
->gtt_virtual
== MAP_FAILED
) {
1217 bo_gem
->gtt_virtual
= NULL
;
1219 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1221 bo_gem
->gem_handle
, bo_gem
->name
,
1223 if (--bo_gem
->map_count
== 0)
1224 drm_bacon_gem_bo_close_vma(bufmgr
, bo_gem
);
1229 bo
->virtual = bo_gem
->gtt_virtual
;
1231 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem
->gem_handle
, bo_gem
->name
,
1232 bo_gem
->gtt_virtual
);
1238 drm_bacon_gem_bo_map_gtt(drm_bacon_bo
*bo
)
1240 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1241 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1242 struct drm_i915_gem_set_domain set_domain
;
1245 pthread_mutex_lock(&bufmgr
->lock
);
1249 pthread_mutex_unlock(&bufmgr
->lock
);
1253 /* Now move it to the GTT domain so that the GPU and CPU
1254 * caches are flushed and the GPU isn't actively using the
1257 * The pagefault handler does this domain change for us when
1258 * it has unbound the BO from the GTT, but it's up to us to
1259 * tell it when we're about to use things if we had done
1260 * rendering and it still happens to be bound to the GTT.
1262 memclear(set_domain
);
1263 set_domain
.handle
= bo_gem
->gem_handle
;
1264 set_domain
.read_domains
= I915_GEM_DOMAIN_GTT
;
1265 set_domain
.write_domain
= I915_GEM_DOMAIN_GTT
;
1266 ret
= drmIoctl(bufmgr
->fd
,
1267 DRM_IOCTL_I915_GEM_SET_DOMAIN
,
1270 DBG("%s:%d: Error setting domain %d: %s\n",
1271 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
1275 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1276 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem
->gtt_virtual
, bo
->size
));
1277 pthread_mutex_unlock(&bufmgr
->lock
);
1283 * Performs a mapping of the buffer object like the normal GTT
1284 * mapping, but avoids waiting for the GPU to be done reading from or
1285 * rendering to the buffer.
1287 * This is used in the implementation of GL_ARB_map_buffer_range: The
1288 * user asks to create a buffer, then does a mapping, fills some
1289 * space, runs a drawing command, then asks to map it again without
1290 * synchronizing because it guarantees that it won't write over the
1291 * data that the GPU is busy using (or, more specifically, that if it
1292 * does write over the data, it acknowledges that rendering is
1297 drm_bacon_gem_bo_map_unsynchronized(drm_bacon_bo
*bo
)
1299 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1300 #ifdef HAVE_VALGRIND
1301 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1305 /* If the CPU cache isn't coherent with the GTT, then use a
1306 * regular synchronized mapping. The problem is that we don't
1307 * track where the buffer was last used on the CPU side in
1308 * terms of drm_bacon_bo_map vs drm_bacon_gem_bo_map_gtt, so
1309 * we would potentially corrupt the buffer even when the user
1310 * does reasonable things.
1312 if (!bufmgr
->has_llc
)
1313 return drm_bacon_gem_bo_map_gtt(bo
);
1315 pthread_mutex_lock(&bufmgr
->lock
);
1319 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1320 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem
->gtt_virtual
, bo
->size
));
1323 pthread_mutex_unlock(&bufmgr
->lock
);
1329 drm_bacon_bo_unmap(drm_bacon_bo
*bo
)
1331 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1332 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1338 pthread_mutex_lock(&bufmgr
->lock
);
1340 if (bo_gem
->map_count
<= 0) {
1341 DBG("attempted to unmap an unmapped bo\n");
1342 pthread_mutex_unlock(&bufmgr
->lock
);
1343 /* Preserve the old behaviour of just treating this as a
1344 * no-op rather than reporting the error.
1349 if (bo_gem
->mapped_cpu_write
) {
1350 struct drm_i915_gem_sw_finish sw_finish
;
1352 /* Cause a flush to happen if the buffer's pinned for
1353 * scanout, so the results show up in a timely manner.
1354 * Unlike GTT set domains, this only does work if the
1355 * buffer should be scanout-related.
1357 memclear(sw_finish
);
1358 sw_finish
.handle
= bo_gem
->gem_handle
;
1359 ret
= drmIoctl(bufmgr
->fd
,
1360 DRM_IOCTL_I915_GEM_SW_FINISH
,
1362 ret
= ret
== -1 ? -errno
: 0;
1364 bo_gem
->mapped_cpu_write
= false;
1367 /* We need to unmap after every innovation as we cannot track
1368 * an open vma for every bo as that will exhaust the system
1369 * limits and cause later failures.
1371 if (--bo_gem
->map_count
== 0) {
1372 drm_bacon_gem_bo_close_vma(bufmgr
, bo_gem
);
1373 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1376 pthread_mutex_unlock(&bufmgr
->lock
);
1382 drm_bacon_bo_subdata(drm_bacon_bo
*bo
, unsigned long offset
,
1383 unsigned long size
, const void *data
)
1385 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1386 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1387 struct drm_i915_gem_pwrite pwrite
;
1391 pwrite
.handle
= bo_gem
->gem_handle
;
1392 pwrite
.offset
= offset
;
1394 pwrite
.data_ptr
= (uint64_t) (uintptr_t) data
;
1395 ret
= drmIoctl(bufmgr
->fd
,
1396 DRM_IOCTL_I915_GEM_PWRITE
,
1400 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1401 __FILE__
, __LINE__
, bo_gem
->gem_handle
, (int)offset
,
1402 (int)size
, strerror(errno
));
1409 drm_bacon_bo_get_subdata(drm_bacon_bo
*bo
, unsigned long offset
,
1410 unsigned long size
, void *data
)
1412 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1413 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1414 struct drm_i915_gem_pread pread
;
1418 pread
.handle
= bo_gem
->gem_handle
;
1419 pread
.offset
= offset
;
1421 pread
.data_ptr
= (uint64_t) (uintptr_t) data
;
1422 ret
= drmIoctl(bufmgr
->fd
,
1423 DRM_IOCTL_I915_GEM_PREAD
,
1427 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1428 __FILE__
, __LINE__
, bo_gem
->gem_handle
, (int)offset
,
1429 (int)size
, strerror(errno
));
1435 /** Waits for all GPU rendering with the object to have completed. */
1437 drm_bacon_bo_wait_rendering(drm_bacon_bo
*bo
)
1439 drm_bacon_gem_bo_start_gtt_access(bo
, 1);
1443 * Waits on a BO for the given amount of time.
1445 * @bo: buffer object to wait for
1446 * @timeout_ns: amount of time to wait in nanoseconds.
1447 * If value is less than 0, an infinite wait will occur.
1449 * Returns 0 if the wait was successful ie. the last batch referencing the
1450 * object has completed within the allotted time. Otherwise some negative return
1451 * value describes the error. Of particular interest is -ETIME when the wait has
1452 * failed to yield the desired result.
1454 * Similar to drm_bacon_gem_bo_wait_rendering except a timeout parameter allows
1455 * the operation to give up after a certain amount of time. Another subtle
1456 * difference is the internal locking semantics are different (this variant does
1457 * not hold the lock for the duration of the wait). This makes the wait subject
1458 * to a larger userspace race window.
1460 * The implementation shall wait until the object is no longer actively
1461 * referenced within a batch buffer at the time of the call. The wait will
1462 * not guarantee that the buffer is re-issued via another thread, or an flinked
1463 * handle. Userspace must make sure this race does not occur if such precision
1466 * Note that some kernels have broken the inifite wait for negative values
1467 * promise, upgrade to latest stable kernels if this is the case.
1470 drm_bacon_gem_bo_wait(drm_bacon_bo
*bo
, int64_t timeout_ns
)
1472 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1473 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1474 struct drm_i915_gem_wait wait
;
1478 wait
.bo_handle
= bo_gem
->gem_handle
;
1479 wait
.timeout_ns
= timeout_ns
;
1480 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_WAIT
, &wait
);
1488 * Sets the object to the GTT read and possibly write domain, used by the X
1489 * 2D driver in the absence of kernel support to do drm_bacon_gem_bo_map_gtt().
1491 * In combination with drm_bacon_gem_bo_pin() and manual fence management, we
1492 * can do tiled pixmaps this way.
1495 drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo
*bo
, int write_enable
)
1497 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1498 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1499 struct drm_i915_gem_set_domain set_domain
;
1502 memclear(set_domain
);
1503 set_domain
.handle
= bo_gem
->gem_handle
;
1504 set_domain
.read_domains
= I915_GEM_DOMAIN_GTT
;
1505 set_domain
.write_domain
= write_enable
? I915_GEM_DOMAIN_GTT
: 0;
1506 ret
= drmIoctl(bufmgr
->fd
,
1507 DRM_IOCTL_I915_GEM_SET_DOMAIN
,
1510 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1511 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
1512 set_domain
.read_domains
, set_domain
.write_domain
,
1518 drm_bacon_bufmgr_gem_destroy(drm_bacon_bufmgr
*bufmgr
)
1520 free(bufmgr
->exec2_objects
);
1521 free(bufmgr
->exec_bos
);
1523 pthread_mutex_destroy(&bufmgr
->lock
);
1525 /* Free any cached buffer objects we were going to reuse */
1526 for (int i
= 0; i
< bufmgr
->num_buckets
; i
++) {
1527 struct drm_bacon_gem_bo_bucket
*bucket
=
1528 &bufmgr
->cache_bucket
[i
];
1529 drm_bacon_bo_gem
*bo_gem
;
1531 while (!list_empty(&bucket
->head
)) {
1532 bo_gem
= LIST_ENTRY(drm_bacon_bo_gem
,
1533 bucket
->head
.next
, head
);
1534 list_del(&bo_gem
->head
);
1536 drm_bacon_gem_bo_free(&bo_gem
->bo
);
1540 _mesa_hash_table_destroy(bufmgr
->name_table
, NULL
);
1541 _mesa_hash_table_destroy(bufmgr
->handle_table
, NULL
);
1547 * Adds the target buffer to the validation list and adds the relocation
1548 * to the reloc_buffer's relocation list.
1550 * The relocation entry at the given offset must already contain the
1551 * precomputed relocation value, because the kernel will optimize out
1552 * the relocation entry write when the buffer hasn't moved from the
1553 * last known offset in target_bo.
1556 drm_bacon_bo_emit_reloc(drm_bacon_bo
*bo
, uint32_t offset
,
1557 drm_bacon_bo
*target_bo
, uint32_t target_offset
,
1558 uint32_t read_domains
, uint32_t write_domain
)
1560 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1561 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1562 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*) target_bo
;
1564 if (bo_gem
->has_error
)
1567 if (target_bo_gem
->has_error
) {
1568 bo_gem
->has_error
= true;
1572 /* Create a new relocation list if needed */
1573 if (bo_gem
->relocs
== NULL
&& drm_bacon_setup_reloc_list(bo
))
1576 /* Check overflow */
1577 assert(bo_gem
->reloc_count
< bufmgr
->max_relocs
);
1580 assert(offset
<= bo
->size
- 4);
1581 assert((write_domain
& (write_domain
- 1)) == 0);
1583 /* Make sure that we're not adding a reloc to something whose size has
1584 * already been accounted for.
1586 assert(!bo_gem
->used_as_reloc_target
);
1587 if (target_bo_gem
!= bo_gem
) {
1588 target_bo_gem
->used_as_reloc_target
= true;
1589 bo_gem
->reloc_tree_size
+= target_bo_gem
->reloc_tree_size
;
1592 bo_gem
->reloc_bos
[bo_gem
->reloc_count
] = target_bo
;
1593 if (target_bo
!= bo
)
1594 drm_bacon_bo_reference(target_bo
);
1596 bo_gem
->relocs
[bo_gem
->reloc_count
].offset
= offset
;
1597 bo_gem
->relocs
[bo_gem
->reloc_count
].delta
= target_offset
;
1598 bo_gem
->relocs
[bo_gem
->reloc_count
].target_handle
=
1599 target_bo_gem
->gem_handle
;
1600 bo_gem
->relocs
[bo_gem
->reloc_count
].read_domains
= read_domains
;
1601 bo_gem
->relocs
[bo_gem
->reloc_count
].write_domain
= write_domain
;
1602 bo_gem
->relocs
[bo_gem
->reloc_count
].presumed_offset
= target_bo
->offset64
;
1603 bo_gem
->reloc_count
++;
1609 drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo
*bo
)
1611 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1613 return bo_gem
->reloc_count
;
1617 * Removes existing relocation entries in the BO after "start".
1619 * This allows a user to avoid a two-step process for state setup with
1620 * counting up all the buffer objects and doing a
1621 * drm_bacon_bufmgr_check_aperture_space() before emitting any of the
1622 * relocations for the state setup. Instead, save the state of the
1623 * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the
1624 * state, and then check if it still fits in the aperture.
1626 * Any further drm_bacon_bufmgr_check_aperture_space() queries
1627 * involving this buffer in the tree are undefined after this call.
1630 drm_bacon_gem_bo_clear_relocs(drm_bacon_bo
*bo
, int start
)
1632 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1633 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1635 struct timespec time
;
1637 clock_gettime(CLOCK_MONOTONIC
, &time
);
1639 assert(bo_gem
->reloc_count
>= start
);
1641 /* Unreference the cleared target buffers */
1642 pthread_mutex_lock(&bufmgr
->lock
);
1644 for (i
= start
; i
< bo_gem
->reloc_count
; i
++) {
1645 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*) bo_gem
->reloc_bos
[i
];
1646 if (&target_bo_gem
->bo
!= bo
) {
1647 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem
->bo
,
1651 bo_gem
->reloc_count
= start
;
1653 pthread_mutex_unlock(&bufmgr
->lock
);
1658 drm_bacon_gem_bo_process_reloc2(drm_bacon_bo
*bo
)
1660 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*)bo
;
1663 if (bo_gem
->relocs
== NULL
)
1666 for (i
= 0; i
< bo_gem
->reloc_count
; i
++) {
1667 drm_bacon_bo
*target_bo
= bo_gem
->reloc_bos
[i
];
1669 if (target_bo
== bo
)
1672 drm_bacon_gem_bo_mark_mmaps_incoherent(bo
);
1674 /* Continue walking the tree depth-first. */
1675 drm_bacon_gem_bo_process_reloc2(target_bo
);
1677 /* Add the target to the validate list */
1678 drm_bacon_add_validate_buffer2(target_bo
);
1683 drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr
*bufmgr
)
1687 for (i
= 0; i
< bufmgr
->exec_count
; i
++) {
1688 drm_bacon_bo
*bo
= bufmgr
->exec_bos
[i
];
1689 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*)bo
;
1691 /* Update the buffer offset */
1692 if (bufmgr
->exec2_objects
[i
].offset
!= bo
->offset64
) {
1693 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
1694 bo_gem
->gem_handle
, bo_gem
->name
,
1695 upper_32_bits(bo
->offset64
),
1696 lower_32_bits(bo
->offset64
),
1697 upper_32_bits(bufmgr
->exec2_objects
[i
].offset
),
1698 lower_32_bits(bufmgr
->exec2_objects
[i
].offset
));
1699 bo
->offset64
= bufmgr
->exec2_objects
[i
].offset
;
1705 do_exec2(drm_bacon_bo
*bo
, int used
, drm_bacon_context
*ctx
,
1706 int in_fence
, int *out_fence
,
1709 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1710 struct drm_i915_gem_execbuffer2 execbuf
;
1714 if (to_bo_gem(bo
)->has_error
)
1717 pthread_mutex_lock(&bufmgr
->lock
);
1718 /* Update indices and set up the validate list. */
1719 drm_bacon_gem_bo_process_reloc2(bo
);
1721 /* Add the batch buffer to the validation list. There are no relocations
1724 drm_bacon_add_validate_buffer2(bo
);
1727 execbuf
.buffers_ptr
= (uintptr_t)bufmgr
->exec2_objects
;
1728 execbuf
.buffer_count
= bufmgr
->exec_count
;
1729 execbuf
.batch_start_offset
= 0;
1730 execbuf
.batch_len
= used
;
1731 execbuf
.cliprects_ptr
= 0;
1732 execbuf
.num_cliprects
= 0;
1735 execbuf
.flags
= flags
;
1737 i915_execbuffer2_set_context_id(execbuf
, 0);
1739 i915_execbuffer2_set_context_id(execbuf
, ctx
->ctx_id
);
1741 if (in_fence
!= -1) {
1742 execbuf
.rsvd2
= in_fence
;
1743 execbuf
.flags
|= I915_EXEC_FENCE_IN
;
1745 if (out_fence
!= NULL
) {
1747 execbuf
.flags
|= I915_EXEC_FENCE_OUT
;
1750 if (bufmgr
->no_exec
)
1751 goto skip_execution
;
1753 ret
= drmIoctl(bufmgr
->fd
,
1754 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR
,
1758 if (ret
== -ENOSPC
) {
1759 DBG("Execbuffer fails to pin. "
1760 "Estimate: %u. Actual: %u. Available: %u\n",
1761 drm_bacon_gem_estimate_batch_space(bufmgr
->exec_bos
,
1762 bufmgr
->exec_count
),
1763 drm_bacon_gem_compute_batch_space(bufmgr
->exec_bos
,
1764 bufmgr
->exec_count
),
1765 (unsigned int) bufmgr
->gtt_size
);
1768 drm_bacon_update_buffer_offsets2(bufmgr
);
1770 if (ret
== 0 && out_fence
!= NULL
)
1771 *out_fence
= execbuf
.rsvd2
>> 32;
1774 if (INTEL_DEBUG
& DEBUG_BUFMGR
)
1775 drm_bacon_gem_dump_validation_list(bufmgr
);
1777 for (i
= 0; i
< bufmgr
->exec_count
; i
++) {
1778 drm_bacon_bo_gem
*bo_gem
= to_bo_gem(bufmgr
->exec_bos
[i
]);
1780 bo_gem
->idle
= false;
1782 /* Disconnect the buffer from the validate list */
1783 bo_gem
->validate_index
= -1;
1784 bufmgr
->exec_bos
[i
] = NULL
;
1786 bufmgr
->exec_count
= 0;
1787 pthread_mutex_unlock(&bufmgr
->lock
);
1793 drm_bacon_bo_exec(drm_bacon_bo
*bo
, int used
)
1795 return do_exec2(bo
, used
, NULL
, -1, NULL
, I915_EXEC_RENDER
);
1799 drm_bacon_bo_mrb_exec(drm_bacon_bo
*bo
, int used
, unsigned int flags
)
1801 return do_exec2(bo
, used
, NULL
, -1, NULL
, flags
);
1805 drm_bacon_gem_bo_context_exec(drm_bacon_bo
*bo
, drm_bacon_context
*ctx
,
1806 int used
, unsigned int flags
)
1808 return do_exec2(bo
, used
, ctx
, -1, NULL
, flags
);
1812 drm_bacon_gem_bo_fence_exec(drm_bacon_bo
*bo
,
1813 drm_bacon_context
*ctx
,
1819 return do_exec2(bo
, used
, ctx
, in_fence
, out_fence
, flags
);
1823 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo
*bo
,
1824 uint32_t tiling_mode
,
1827 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1828 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1829 struct drm_i915_gem_set_tiling set_tiling
;
1832 if (bo_gem
->global_name
== 0 &&
1833 tiling_mode
== bo_gem
->tiling_mode
&&
1834 stride
== bo_gem
->stride
)
1837 memset(&set_tiling
, 0, sizeof(set_tiling
));
1839 /* set_tiling is slightly broken and overwrites the
1840 * input on the error path, so we have to open code
1843 set_tiling
.handle
= bo_gem
->gem_handle
;
1844 set_tiling
.tiling_mode
= tiling_mode
;
1845 set_tiling
.stride
= stride
;
1847 ret
= ioctl(bufmgr
->fd
,
1848 DRM_IOCTL_I915_GEM_SET_TILING
,
1850 } while (ret
== -1 && (errno
== EINTR
|| errno
== EAGAIN
));
1854 bo_gem
->tiling_mode
= set_tiling
.tiling_mode
;
1855 bo_gem
->swizzle_mode
= set_tiling
.swizzle_mode
;
1856 bo_gem
->stride
= set_tiling
.stride
;
1861 drm_bacon_bo_set_tiling(drm_bacon_bo
*bo
, uint32_t * tiling_mode
,
1864 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1865 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1868 /* Linear buffers have no stride. By ensuring that we only ever use
1869 * stride 0 with linear buffers, we simplify our code.
1871 if (*tiling_mode
== I915_TILING_NONE
)
1874 ret
= drm_bacon_gem_bo_set_tiling_internal(bo
, *tiling_mode
, stride
);
1876 drm_bacon_bo_gem_set_in_aperture_size(bufmgr
, bo_gem
, 0);
1878 *tiling_mode
= bo_gem
->tiling_mode
;
1883 drm_bacon_bo_get_tiling(drm_bacon_bo
*bo
, uint32_t * tiling_mode
,
1884 uint32_t *swizzle_mode
)
1886 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1888 *tiling_mode
= bo_gem
->tiling_mode
;
1889 *swizzle_mode
= bo_gem
->swizzle_mode
;
1894 drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr
*bufmgr
, int prime_fd
, int size
)
1898 drm_bacon_bo_gem
*bo_gem
;
1899 struct drm_i915_gem_get_tiling get_tiling
;
1901 pthread_mutex_lock(&bufmgr
->lock
);
1902 ret
= drmPrimeFDToHandle(bufmgr
->fd
, prime_fd
, &handle
);
1904 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno
));
1905 pthread_mutex_unlock(&bufmgr
->lock
);
1910 * See if the kernel has already returned this buffer to us. Just as
1911 * for named buffers, we must not create two bo's pointing at the same
1914 bo_gem
= hash_find_bo(bufmgr
->handle_table
, handle
);
1916 drm_bacon_bo_reference(&bo_gem
->bo
);
1920 bo_gem
= calloc(1, sizeof(*bo_gem
));
1924 p_atomic_set(&bo_gem
->refcount
, 1);
1925 list_inithead(&bo_gem
->vma_list
);
1927 /* Determine size of bo. The fd-to-handle ioctl really should
1928 * return the size, but it doesn't. If we have kernel 3.12 or
1929 * later, we can lseek on the prime fd to get the size. Older
1930 * kernels will just fail, in which case we fall back to the
1931 * provided (estimated or guess size). */
1932 ret
= lseek(prime_fd
, 0, SEEK_END
);
1934 bo_gem
->bo
.size
= ret
;
1936 bo_gem
->bo
.size
= size
;
1938 bo_gem
->bo
.handle
= handle
;
1939 bo_gem
->bo
.bufmgr
= bufmgr
;
1941 bo_gem
->gem_handle
= handle
;
1942 _mesa_hash_table_insert(bufmgr
->handle_table
,
1943 &bo_gem
->gem_handle
, bo_gem
);
1945 bo_gem
->name
= "prime";
1946 bo_gem
->validate_index
= -1;
1947 bo_gem
->used_as_reloc_target
= false;
1948 bo_gem
->has_error
= false;
1949 bo_gem
->reusable
= false;
1951 memclear(get_tiling
);
1952 get_tiling
.handle
= bo_gem
->gem_handle
;
1953 if (drmIoctl(bufmgr
->fd
,
1954 DRM_IOCTL_I915_GEM_GET_TILING
,
1958 bo_gem
->tiling_mode
= get_tiling
.tiling_mode
;
1959 bo_gem
->swizzle_mode
= get_tiling
.swizzle_mode
;
1960 /* XXX stride is unknown */
1961 drm_bacon_bo_gem_set_in_aperture_size(bufmgr
, bo_gem
, 0);
1964 pthread_mutex_unlock(&bufmgr
->lock
);
1968 drm_bacon_gem_bo_free(&bo_gem
->bo
);
1969 pthread_mutex_unlock(&bufmgr
->lock
);
1974 drm_bacon_bo_gem_export_to_prime(drm_bacon_bo
*bo
, int *prime_fd
)
1976 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1977 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1979 if (drmPrimeHandleToFD(bufmgr
->fd
, bo_gem
->gem_handle
,
1980 DRM_CLOEXEC
, prime_fd
) != 0)
1983 bo_gem
->reusable
= false;
1989 drm_bacon_bo_flink(drm_bacon_bo
*bo
, uint32_t *name
)
1991 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
1992 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
1994 if (!bo_gem
->global_name
) {
1995 struct drm_gem_flink flink
;
1998 flink
.handle
= bo_gem
->gem_handle
;
1999 if (drmIoctl(bufmgr
->fd
, DRM_IOCTL_GEM_FLINK
, &flink
))
2002 pthread_mutex_lock(&bufmgr
->lock
);
2003 if (!bo_gem
->global_name
) {
2004 bo_gem
->global_name
= flink
.name
;
2005 bo_gem
->reusable
= false;
2007 _mesa_hash_table_insert(bufmgr
->name_table
,
2008 &bo_gem
->global_name
, bo_gem
);
2010 pthread_mutex_unlock(&bufmgr
->lock
);
2013 *name
= bo_gem
->global_name
;
2018 * Enables unlimited caching of buffer objects for reuse.
2020 * This is potentially very memory expensive, as the cache at each bucket
2021 * size is only bounded by how many buffers of that size we've managed to have
2022 * in flight at once.
2025 drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr
*bufmgr
)
2027 bufmgr
->bo_reuse
= true;
2031 * Return the additional aperture space required by the tree of buffer objects
2035 drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo
*bo
)
2037 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2041 if (bo
== NULL
|| bo_gem
->included_in_check_aperture
)
2045 bo_gem
->included_in_check_aperture
= true;
2047 for (i
= 0; i
< bo_gem
->reloc_count
; i
++)
2049 drm_bacon_gem_bo_get_aperture_space(bo_gem
->reloc_bos
[i
]);
2055 * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready
2056 * for the next drm_bacon_bufmgr_check_aperture_space() call.
2059 drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo
*bo
)
2061 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2064 if (bo
== NULL
|| !bo_gem
->included_in_check_aperture
)
2067 bo_gem
->included_in_check_aperture
= false;
2069 for (i
= 0; i
< bo_gem
->reloc_count
; i
++)
2070 drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem
->reloc_bos
[i
]);
2074 * Return a conservative estimate for the amount of aperture required
2075 * for a collection of buffers. This may double-count some buffers.
2078 drm_bacon_gem_estimate_batch_space(drm_bacon_bo
**bo_array
, int count
)
2081 unsigned int total
= 0;
2083 for (i
= 0; i
< count
; i
++) {
2084 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo_array
[i
];
2086 total
+= bo_gem
->reloc_tree_size
;
2092 * Return the amount of aperture needed for a collection of buffers.
2093 * This avoids double counting any buffers, at the cost of looking
2094 * at every buffer in the set.
2097 drm_bacon_gem_compute_batch_space(drm_bacon_bo
**bo_array
, int count
)
2100 unsigned int total
= 0;
2102 for (i
= 0; i
< count
; i
++) {
2103 total
+= drm_bacon_gem_bo_get_aperture_space(bo_array
[i
]);
2104 /* For the first buffer object in the array, we get an
2105 * accurate count back for its reloc_tree size (since nothing
2106 * had been flagged as being counted yet). We can save that
2107 * value out as a more conservative reloc_tree_size that
2108 * avoids double-counting target buffers. Since the first
2109 * buffer happens to usually be the batch buffer in our
2110 * callers, this can pull us back from doing the tree
2111 * walk on every new batch emit.
2114 drm_bacon_bo_gem
*bo_gem
=
2115 (drm_bacon_bo_gem
*) bo_array
[i
];
2116 bo_gem
->reloc_tree_size
= total
;
2120 for (i
= 0; i
< count
; i
++)
2121 drm_bacon_gem_bo_clear_aperture_space_flag(bo_array
[i
]);
2126 * Return -1 if the batchbuffer should be flushed before attempting to
2127 * emit rendering referencing the buffers pointed to by bo_array.
2129 * This is required because if we try to emit a batchbuffer with relocations
2130 * to a tree of buffers that won't simultaneously fit in the aperture,
2131 * the rendering will return an error at a point where the software is not
2132 * prepared to recover from it.
2134 * However, we also want to emit the batchbuffer significantly before we reach
2135 * the limit, as a series of batchbuffers each of which references buffers
2136 * covering almost all of the aperture means that at each emit we end up
2137 * waiting to evict a buffer from the last rendering, and we get synchronous
2138 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to
2139 * get better parallelism.
2142 drm_bacon_bufmgr_check_aperture_space(drm_bacon_bo
**bo_array
, int count
)
2144 drm_bacon_bufmgr
*bufmgr
= bo_array
[0]->bufmgr
;
2145 unsigned int total
= 0;
2146 unsigned int threshold
= bufmgr
->gtt_size
* 3 / 4;
2148 total
= drm_bacon_gem_estimate_batch_space(bo_array
, count
);
2150 if (total
> threshold
)
2151 total
= drm_bacon_gem_compute_batch_space(bo_array
, count
);
2153 if (total
> threshold
) {
2154 DBG("check_space: overflowed available aperture, "
2156 total
/ 1024, (int)bufmgr
->gtt_size
/ 1024);
2159 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total
/ 1024,
2160 (int)bufmgr
->gtt_size
/ 1024);
2166 * Disable buffer reuse for objects which are shared with the kernel
2167 * as scanout buffers
2170 drm_bacon_bo_disable_reuse(drm_bacon_bo
*bo
)
2172 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2174 bo_gem
->reusable
= false;
2179 drm_bacon_bo_is_reusable(drm_bacon_bo
*bo
)
2181 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2183 return bo_gem
->reusable
;
2187 _drm_bacon_gem_bo_references(drm_bacon_bo
*bo
, drm_bacon_bo
*target_bo
)
2189 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2192 for (i
= 0; i
< bo_gem
->reloc_count
; i
++) {
2193 if (bo_gem
->reloc_bos
[i
] == target_bo
)
2195 if (bo
== bo_gem
->reloc_bos
[i
])
2197 if (_drm_bacon_gem_bo_references(bo_gem
->reloc_bos
[i
],
2205 /** Return true if target_bo is referenced by bo's relocation tree. */
2207 drm_bacon_bo_references(drm_bacon_bo
*bo
, drm_bacon_bo
*target_bo
)
2209 drm_bacon_bo_gem
*target_bo_gem
= (drm_bacon_bo_gem
*) target_bo
;
2211 if (bo
== NULL
|| target_bo
== NULL
)
2213 if (target_bo_gem
->used_as_reloc_target
)
2214 return _drm_bacon_gem_bo_references(bo
, target_bo
);
2219 add_bucket(drm_bacon_bufmgr
*bufmgr
, int size
)
2221 unsigned int i
= bufmgr
->num_buckets
;
2223 assert(i
< ARRAY_SIZE(bufmgr
->cache_bucket
));
2225 list_inithead(&bufmgr
->cache_bucket
[i
].head
);
2226 bufmgr
->cache_bucket
[i
].size
= size
;
2227 bufmgr
->num_buckets
++;
2231 init_cache_buckets(drm_bacon_bufmgr
*bufmgr
)
2233 unsigned long size
, cache_max_size
= 64 * 1024 * 1024;
2235 /* OK, so power of two buckets was too wasteful of memory.
2236 * Give 3 other sizes between each power of two, to hopefully
2237 * cover things accurately enough. (The alternative is
2238 * probably to just go for exact matching of sizes, and assume
2239 * that for things like composited window resize the tiled
2240 * width/height alignment and rounding of sizes to pages will
2241 * get us useful cache hit rates anyway)
2243 add_bucket(bufmgr
, 4096);
2244 add_bucket(bufmgr
, 4096 * 2);
2245 add_bucket(bufmgr
, 4096 * 3);
2247 /* Initialize the linked lists for BO reuse cache. */
2248 for (size
= 4 * 4096; size
<= cache_max_size
; size
*= 2) {
2249 add_bucket(bufmgr
, size
);
2251 add_bucket(bufmgr
, size
+ size
* 1 / 4);
2252 add_bucket(bufmgr
, size
+ size
* 2 / 4);
2253 add_bucket(bufmgr
, size
+ size
* 3 / 4);
2258 drm_bacon_bufmgr_gem_set_vma_cache_size(drm_bacon_bufmgr
*bufmgr
, int limit
)
2260 bufmgr
->vma_max
= limit
;
2262 drm_bacon_gem_bo_purge_vma_cache(bufmgr
);
2266 drm_bacon_gem_context_create(drm_bacon_bufmgr
*bufmgr
)
2268 struct drm_i915_gem_context_create create
;
2269 drm_bacon_context
*context
= NULL
;
2272 context
= calloc(1, sizeof(*context
));
2277 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_CONTEXT_CREATE
, &create
);
2279 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
2285 context
->ctx_id
= create
.ctx_id
;
2286 context
->bufmgr
= bufmgr
;
2292 drm_bacon_gem_context_get_id(drm_bacon_context
*ctx
, uint32_t *ctx_id
)
2297 *ctx_id
= ctx
->ctx_id
;
2303 drm_bacon_gem_context_destroy(drm_bacon_context
*ctx
)
2305 struct drm_i915_gem_context_destroy destroy
;
2313 destroy
.ctx_id
= ctx
->ctx_id
;
2314 ret
= drmIoctl(ctx
->bufmgr
->fd
, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY
,
2317 fprintf(stderr
, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
2324 drm_bacon_get_reset_stats(drm_bacon_context
*ctx
,
2325 uint32_t *reset_count
,
2329 struct drm_i915_reset_stats stats
;
2337 stats
.ctx_id
= ctx
->ctx_id
;
2338 ret
= drmIoctl(ctx
->bufmgr
->fd
,
2339 DRM_IOCTL_I915_GET_RESET_STATS
,
2342 if (reset_count
!= NULL
)
2343 *reset_count
= stats
.reset_count
;
2346 *active
= stats
.batch_active
;
2348 if (pending
!= NULL
)
2349 *pending
= stats
.batch_pending
;
2356 drm_bacon_reg_read(drm_bacon_bufmgr
*bufmgr
,
2360 struct drm_i915_reg_read reg_read
;
2364 reg_read
.offset
= offset
;
2366 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_REG_READ
, ®_read
);
2368 *result
= reg_read
.val
;
2372 static pthread_mutex_t bufmgr_list_mutex
= PTHREAD_MUTEX_INITIALIZER
;
2373 static struct list_head bufmgr_list
= { &bufmgr_list
, &bufmgr_list
};
2375 static drm_bacon_bufmgr
*
2376 drm_bacon_bufmgr_gem_find(int fd
)
2378 list_for_each_entry(drm_bacon_bufmgr
,
2379 bufmgr
, &bufmgr_list
, managers
) {
2380 if (bufmgr
->fd
== fd
) {
2381 p_atomic_inc(&bufmgr
->refcount
);
2390 drm_bacon_bufmgr_destroy(drm_bacon_bufmgr
*bufmgr
)
2392 if (atomic_add_unless(&bufmgr
->refcount
, -1, 1)) {
2393 pthread_mutex_lock(&bufmgr_list_mutex
);
2395 if (p_atomic_dec_zero(&bufmgr
->refcount
)) {
2396 list_del(&bufmgr
->managers
);
2397 drm_bacon_bufmgr_gem_destroy(bufmgr
);
2400 pthread_mutex_unlock(&bufmgr_list_mutex
);
2404 void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo
*bo
)
2406 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
2407 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2409 if (bo_gem
->gtt_virtual
)
2410 return bo_gem
->gtt_virtual
;
2412 pthread_mutex_lock(&bufmgr
->lock
);
2413 if (bo_gem
->gtt_virtual
== NULL
) {
2414 struct drm_i915_gem_mmap_gtt mmap_arg
;
2417 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
2418 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
2420 if (bo_gem
->map_count
++ == 0)
2421 drm_bacon_gem_bo_open_vma(bufmgr
, bo_gem
);
2424 mmap_arg
.handle
= bo_gem
->gem_handle
;
2426 /* Get the fake offset back... */
2428 if (drmIoctl(bufmgr
->fd
,
2429 DRM_IOCTL_I915_GEM_MMAP_GTT
,
2432 ptr
= drm_mmap(0, bo
->size
, PROT_READ
| PROT_WRITE
,
2433 MAP_SHARED
, bufmgr
->fd
,
2436 if (ptr
== MAP_FAILED
) {
2437 if (--bo_gem
->map_count
== 0)
2438 drm_bacon_gem_bo_close_vma(bufmgr
, bo_gem
);
2442 bo_gem
->gtt_virtual
= ptr
;
2444 pthread_mutex_unlock(&bufmgr
->lock
);
2446 return bo_gem
->gtt_virtual
;
2449 void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo
*bo
)
2451 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
2452 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2454 if (bo_gem
->mem_virtual
)
2455 return bo_gem
->mem_virtual
;
2457 pthread_mutex_lock(&bufmgr
->lock
);
2458 if (!bo_gem
->mem_virtual
) {
2459 struct drm_i915_gem_mmap mmap_arg
;
2461 if (bo_gem
->map_count
++ == 0)
2462 drm_bacon_gem_bo_open_vma(bufmgr
, bo_gem
);
2464 DBG("bo_map: %d (%s), map_count=%d\n",
2465 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
2468 mmap_arg
.handle
= bo_gem
->gem_handle
;
2469 mmap_arg
.size
= bo
->size
;
2470 if (drmIoctl(bufmgr
->fd
,
2471 DRM_IOCTL_I915_GEM_MMAP
,
2473 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2474 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
2475 bo_gem
->name
, strerror(errno
));
2476 if (--bo_gem
->map_count
== 0)
2477 drm_bacon_gem_bo_close_vma(bufmgr
, bo_gem
);
2479 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
2480 bo_gem
->mem_virtual
= (void *)(uintptr_t) mmap_arg
.addr_ptr
;
2483 pthread_mutex_unlock(&bufmgr
->lock
);
2485 return bo_gem
->mem_virtual
;
2488 void *drm_bacon_gem_bo_map__wc(drm_bacon_bo
*bo
)
2490 drm_bacon_bufmgr
*bufmgr
= bo
->bufmgr
;
2491 drm_bacon_bo_gem
*bo_gem
= (drm_bacon_bo_gem
*) bo
;
2493 if (bo_gem
->wc_virtual
)
2494 return bo_gem
->wc_virtual
;
2496 pthread_mutex_lock(&bufmgr
->lock
);
2497 if (!bo_gem
->wc_virtual
) {
2498 struct drm_i915_gem_mmap mmap_arg
;
2500 if (bo_gem
->map_count
++ == 0)
2501 drm_bacon_gem_bo_open_vma(bufmgr
, bo_gem
);
2503 DBG("bo_map: %d (%s), map_count=%d\n",
2504 bo_gem
->gem_handle
, bo_gem
->name
, bo_gem
->map_count
);
2507 mmap_arg
.handle
= bo_gem
->gem_handle
;
2508 mmap_arg
.size
= bo
->size
;
2509 mmap_arg
.flags
= I915_MMAP_WC
;
2510 if (drmIoctl(bufmgr
->fd
,
2511 DRM_IOCTL_I915_GEM_MMAP
,
2513 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2514 __FILE__
, __LINE__
, bo_gem
->gem_handle
,
2515 bo_gem
->name
, strerror(errno
));
2516 if (--bo_gem
->map_count
== 0)
2517 drm_bacon_gem_bo_close_vma(bufmgr
, bo_gem
);
2519 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
2520 bo_gem
->wc_virtual
= (void *)(uintptr_t) mmap_arg
.addr_ptr
;
2523 pthread_mutex_unlock(&bufmgr
->lock
);
2525 return bo_gem
->wc_virtual
;
2529 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2530 * and manage map buffer objections.
2532 * \param fd File descriptor of the opened DRM device.
2535 drm_bacon_bufmgr_gem_init(struct gen_device_info
*devinfo
,
2536 int fd
, int batch_size
)
2538 drm_bacon_bufmgr
*bufmgr
;
2539 struct drm_i915_gem_get_aperture aperture
;
2541 pthread_mutex_lock(&bufmgr_list_mutex
);
2543 bufmgr
= drm_bacon_bufmgr_gem_find(fd
);
2547 bufmgr
= calloc(1, sizeof(*bufmgr
));
2552 p_atomic_set(&bufmgr
->refcount
, 1);
2554 if (pthread_mutex_init(&bufmgr
->lock
, NULL
) != 0) {
2561 drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_GET_APERTURE
, &aperture
);
2562 bufmgr
->gtt_size
= aperture
.aper_available_size
;
2564 bufmgr
->has_llc
= devinfo
->has_llc
;
2566 /* Let's go with one relocation per every 2 dwords (but round down a bit
2567 * since a power of two will mean an extra page allocation for the reloc
2570 * Every 4 was too few for the blender benchmark.
2572 bufmgr
->max_relocs
= batch_size
/ sizeof(uint32_t) / 2 - 2;
2574 init_cache_buckets(bufmgr
);
2576 list_inithead(&bufmgr
->vma_cache
);
2577 bufmgr
->vma_max
= -1; /* unlimited by default */
2579 list_add(&bufmgr
->managers
, &bufmgr_list
);
2581 bufmgr
->name_table
=
2582 _mesa_hash_table_create(NULL
, key_hash_uint
, key_uint_equal
);
2583 bufmgr
->handle_table
=
2584 _mesa_hash_table_create(NULL
, key_hash_uint
, key_uint_equal
);
2587 pthread_mutex_unlock(&bufmgr_list_mutex
);