1 /**************************************************************************
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
29 **************************************************************************/
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
42 #include <util/u_atomic.h>
50 #include <sys/ioctl.h>
52 #include <sys/types.h>
57 #define ETIME ETIMEDOUT
59 #include "common/gen_debug.h"
60 #include "common/gen_device_info.h"
61 #include "libdrm_macros.h"
62 #include "main/macros.h"
63 #include "util/macros.h"
64 #include "util/hash_table.h"
65 #include "util/list.h"
66 #include "brw_bufmgr.h"
79 #define memclear(s) memset(&s, 0, sizeof(s))
81 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
84 atomic_add_unless(int *v
, int add
, int unless
)
88 while (c
!= unless
&& (old
= p_atomic_cmpxchg(v
, c
, c
+ add
)) != c
)
93 struct bo_cache_bucket
{
94 struct list_head head
;
101 pthread_mutex_t lock
;
103 /** Array of lists of cached gem objects of power-of-two sizes */
104 struct bo_cache_bucket cache_bucket
[14 * 4];
108 struct hash_table
*name_table
;
109 struct hash_table
*handle_table
;
111 struct list_head vma_cache
;
112 int vma_count
, vma_open
, vma_max
;
114 unsigned int has_llc
:1;
115 unsigned int bo_reuse
:1;
118 static int bo_set_tiling_internal(struct brw_bo
*bo
, uint32_t tiling_mode
,
121 static void bo_free(struct brw_bo
*bo
);
124 key_hash_uint(const void *key
)
126 return _mesa_hash_data(key
, 4);
130 key_uint_equal(const void *a
, const void *b
)
132 return *((unsigned *) a
) == *((unsigned *) b
);
135 static struct brw_bo
*
136 hash_find_bo(struct hash_table
*ht
, unsigned int key
)
138 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, &key
);
139 return entry
? (struct brw_bo
*) entry
->data
: NULL
;
143 bo_tile_size(struct brw_bufmgr
*bufmgr
, unsigned long size
,
144 uint32_t *tiling_mode
)
146 if (*tiling_mode
== I915_TILING_NONE
)
149 /* 965+ just need multiples of page size for tiling */
150 return ALIGN(size
, 4096);
154 * Round a given pitch up to the minimum required for X tiling on a
155 * given chip. We use 512 as the minimum to allow for a later tiling
159 bo_tile_pitch(struct brw_bufmgr
*bufmgr
,
160 unsigned long pitch
, uint32_t *tiling_mode
)
162 unsigned long tile_width
;
164 /* If untiled, then just align it so that we can do rendering
165 * to it with the 3D engine.
167 if (*tiling_mode
== I915_TILING_NONE
)
168 return ALIGN(pitch
, 64);
170 if (*tiling_mode
== I915_TILING_X
)
175 /* 965 is flexible */
176 return ALIGN(pitch
, tile_width
);
179 static struct bo_cache_bucket
*
180 bucket_for_size(struct brw_bufmgr
*bufmgr
, unsigned long size
)
184 for (i
= 0; i
< bufmgr
->num_buckets
; i
++) {
185 struct bo_cache_bucket
*bucket
= &bufmgr
->cache_bucket
[i
];
186 if (bucket
->size
>= size
) {
195 brw_bo_reference(struct brw_bo
*bo
)
197 p_atomic_inc(&bo
->refcount
);
201 brw_bo_busy(struct brw_bo
*bo
)
203 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
204 struct drm_i915_gem_busy busy
;
208 busy
.handle
= bo
->gem_handle
;
210 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_BUSY
, &busy
);
212 bo
->idle
= !busy
.busy
;
217 return (ret
== 0 && busy
.busy
);
221 brw_bo_madvise(struct brw_bo
*bo
, int state
)
223 struct drm_i915_gem_madvise madv
;
226 madv
.handle
= bo
->gem_handle
;
229 drmIoctl(bo
->bufmgr
->fd
, DRM_IOCTL_I915_GEM_MADVISE
, &madv
);
231 return madv
.retained
;
234 /* drop the oldest entries that have been purged by the kernel */
236 brw_bo_cache_purge_bucket(struct brw_bufmgr
*bufmgr
,
237 struct bo_cache_bucket
*bucket
)
239 list_for_each_entry_safe(struct brw_bo
, bo
, &bucket
->head
, head
) {
240 if (brw_bo_madvise(bo
, I915_MADV_DONTNEED
))
248 static struct brw_bo
*
249 bo_alloc_internal(struct brw_bufmgr
*bufmgr
,
253 uint32_t tiling_mode
,
254 unsigned long stride
, unsigned int alignment
)
257 unsigned int page_size
= getpagesize();
259 struct bo_cache_bucket
*bucket
;
260 bool alloc_from_cache
;
261 unsigned long bo_size
;
262 bool for_render
= false;
264 if (flags
& BO_ALLOC_FOR_RENDER
)
267 /* Round the allocated size up to a power of two number of pages. */
268 bucket
= bucket_for_size(bufmgr
, size
);
270 /* If we don't have caching at this size, don't actually round the
273 if (bucket
== NULL
) {
275 if (bo_size
< page_size
)
278 bo_size
= bucket
->size
;
281 pthread_mutex_lock(&bufmgr
->lock
);
282 /* Get a buffer out of the cache if available */
284 alloc_from_cache
= false;
285 if (bucket
!= NULL
&& !list_empty(&bucket
->head
)) {
287 /* Allocate new render-target BOs from the tail (MRU)
288 * of the list, as it will likely be hot in the GPU
289 * cache and in the aperture for us.
291 bo
= LIST_ENTRY(struct brw_bo
, bucket
->head
.prev
, head
);
293 alloc_from_cache
= true;
294 bo
->align
= alignment
;
296 assert(alignment
== 0);
297 /* For non-render-target BOs (where we're probably
298 * going to map it first thing in order to fill it
299 * with data), check if the last BO in the cache is
300 * unbusy, and only reuse in that case. Otherwise,
301 * allocating a new buffer is probably faster than
302 * waiting for the GPU to finish.
304 bo
= LIST_ENTRY(struct brw_bo
, bucket
->head
.next
, head
);
305 if (!brw_bo_busy(bo
)) {
306 alloc_from_cache
= true;
311 if (alloc_from_cache
) {
312 if (!brw_bo_madvise(bo
, I915_MADV_WILLNEED
)) {
314 brw_bo_cache_purge_bucket(bufmgr
, bucket
);
318 if (bo_set_tiling_internal(bo
, tiling_mode
, stride
)) {
325 if (!alloc_from_cache
) {
326 struct drm_i915_gem_create create
;
328 bo
= calloc(1, sizeof(*bo
));
332 /* bo_free calls list_del() for an uninitialized
333 list (vma_list), so better set the list head here */
334 list_inithead(&bo
->vma_list
);
339 create
.size
= bo_size
;
341 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_CREATE
, &create
);
347 bo
->gem_handle
= create
.handle
;
348 _mesa_hash_table_insert(bufmgr
->handle_table
, &bo
->gem_handle
, bo
);
351 bo
->align
= alignment
;
353 bo
->tiling_mode
= I915_TILING_NONE
;
354 bo
->swizzle_mode
= I915_BIT_6_SWIZZLE_NONE
;
357 if (bo_set_tiling_internal(bo
, tiling_mode
, stride
))
362 p_atomic_set(&bo
->refcount
, 1);
365 pthread_mutex_unlock(&bufmgr
->lock
);
367 DBG("bo_create: buf %d (%s) %ldb\n", bo
->gem_handle
, bo
->name
, size
);
374 pthread_mutex_unlock(&bufmgr
->lock
);
379 brw_bo_alloc(struct brw_bufmgr
*bufmgr
,
380 const char *name
, unsigned long size
, unsigned int alignment
)
382 return bo_alloc_internal(bufmgr
, name
, size
, 0, I915_TILING_NONE
, 0, 0);
386 brw_bo_alloc_tiled(struct brw_bufmgr
*bufmgr
, const char *name
,
387 int x
, int y
, int cpp
, uint32_t *tiling_mode
,
388 unsigned long *pitch
, unsigned long flags
)
390 unsigned long size
, stride
;
394 unsigned long aligned_y
, height_alignment
;
396 tiling
= *tiling_mode
;
398 /* If we're tiled, our allocations are in 8 or 32-row blocks,
399 * so failure to align our height means that we won't allocate
402 * If we're untiled, we still have to align to 2 rows high
403 * because the data port accesses 2x2 blocks even if the
404 * bottom row isn't to be rendered, so failure to align means
405 * we could walk off the end of the GTT and fault. This is
406 * documented on 965, and may be the case on older chipsets
407 * too so we try to be careful.
410 height_alignment
= 2;
412 if (tiling
== I915_TILING_X
)
413 height_alignment
= 8;
414 else if (tiling
== I915_TILING_Y
)
415 height_alignment
= 32;
416 aligned_y
= ALIGN(y
, height_alignment
);
419 stride
= bo_tile_pitch(bufmgr
, stride
, tiling_mode
);
420 size
= stride
* aligned_y
;
421 size
= bo_tile_size(bufmgr
, size
, tiling_mode
);
422 } while (*tiling_mode
!= tiling
);
425 if (tiling
== I915_TILING_NONE
)
428 return bo_alloc_internal(bufmgr
, name
, size
, flags
, tiling
, stride
, 0);
432 * Returns a brw_bo wrapping the given buffer object handle.
434 * This can be used when one application needs to pass a buffer object
438 brw_bo_gem_create_from_name(struct brw_bufmgr
*bufmgr
,
439 const char *name
, unsigned int handle
)
443 struct drm_gem_open open_arg
;
444 struct drm_i915_gem_get_tiling get_tiling
;
446 /* At the moment most applications only have a few named bo.
447 * For instance, in a DRI client only the render buffers passed
448 * between X and the client are named. And since X returns the
449 * alternating names for the front/back buffer a linear search
450 * provides a sufficiently fast match.
452 pthread_mutex_lock(&bufmgr
->lock
);
453 bo
= hash_find_bo(bufmgr
->name_table
, handle
);
455 brw_bo_reference(bo
);
460 open_arg
.name
= handle
;
461 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_GEM_OPEN
, &open_arg
);
463 DBG("Couldn't reference %s handle 0x%08x: %s\n",
464 name
, handle
, strerror(errno
));
468 /* Now see if someone has used a prime handle to get this
469 * object from the kernel before by looking through the list
470 * again for a matching gem_handle
472 bo
= hash_find_bo(bufmgr
->handle_table
, open_arg
.handle
);
474 brw_bo_reference(bo
);
478 bo
= calloc(1, sizeof(*bo
));
482 p_atomic_set(&bo
->refcount
, 1);
483 list_inithead(&bo
->vma_list
);
485 bo
->size
= open_arg
.size
;
489 bo
->gem_handle
= open_arg
.handle
;
491 bo
->global_name
= handle
;
492 bo
->reusable
= false;
494 _mesa_hash_table_insert(bufmgr
->handle_table
, &bo
->gem_handle
, bo
);
495 _mesa_hash_table_insert(bufmgr
->name_table
, &bo
->global_name
, bo
);
497 memclear(get_tiling
);
498 get_tiling
.handle
= bo
->gem_handle
;
499 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_GET_TILING
, &get_tiling
);
503 bo
->tiling_mode
= get_tiling
.tiling_mode
;
504 bo
->swizzle_mode
= get_tiling
.swizzle_mode
;
505 /* XXX stride is unknown */
506 DBG("bo_create_from_handle: %d (%s)\n", handle
, bo
->name
);
509 pthread_mutex_unlock(&bufmgr
->lock
);
514 pthread_mutex_unlock(&bufmgr
->lock
);
519 bo_free(struct brw_bo
*bo
)
521 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
522 struct drm_gem_close close
;
523 struct hash_entry
*entry
;
526 list_del(&bo
->vma_list
);
527 if (bo
->mem_virtual
) {
528 VG(VALGRIND_FREELIKE_BLOCK(bo
->mem_virtual
, 0));
529 drm_munmap(bo
->mem_virtual
, bo
->size
);
532 if (bo
->wc_virtual
) {
533 VG(VALGRIND_FREELIKE_BLOCK(bo
->wc_virtual
, 0));
534 drm_munmap(bo
->wc_virtual
, bo
->size
);
537 if (bo
->gtt_virtual
) {
538 drm_munmap(bo
->gtt_virtual
, bo
->size
);
542 if (bo
->global_name
) {
543 entry
= _mesa_hash_table_search(bufmgr
->name_table
, &bo
->global_name
);
544 _mesa_hash_table_remove(bufmgr
->name_table
, entry
);
546 entry
= _mesa_hash_table_search(bufmgr
->handle_table
, &bo
->gem_handle
);
547 _mesa_hash_table_remove(bufmgr
->handle_table
, entry
);
549 /* Close this object */
551 close
.handle
= bo
->gem_handle
;
552 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_GEM_CLOSE
, &close
);
554 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
555 bo
->gem_handle
, bo
->name
, strerror(errno
));
561 bo_mark_mmaps_incoherent(struct brw_bo
*bo
)
565 VALGRIND_MAKE_MEM_NOACCESS(bo
->mem_virtual
, bo
->size
);
568 VALGRIND_MAKE_MEM_NOACCESS(bo
->wc_virtual
, bo
->size
);
571 VALGRIND_MAKE_MEM_NOACCESS(bo
->gtt_virtual
, bo
->size
);
575 /** Frees all cached buffers significantly older than @time. */
577 cleanup_bo_cache(struct brw_bufmgr
*bufmgr
, time_t time
)
581 if (bufmgr
->time
== time
)
584 for (i
= 0; i
< bufmgr
->num_buckets
; i
++) {
585 struct bo_cache_bucket
*bucket
= &bufmgr
->cache_bucket
[i
];
587 list_for_each_entry_safe(struct brw_bo
, bo
, &bucket
->head
, head
) {
588 if (time
- bo
->free_time
<= 1)
601 bo_purge_vma_cache(struct brw_bufmgr
*bufmgr
)
605 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__
,
606 bufmgr
->vma_count
, bufmgr
->vma_open
, bufmgr
->vma_max
);
608 if (bufmgr
->vma_max
< 0)
611 /* We may need to evict a few entries in order to create new mmaps */
612 limit
= bufmgr
->vma_max
- 2 * bufmgr
->vma_open
;
616 while (bufmgr
->vma_count
> limit
) {
619 bo
= LIST_ENTRY(struct brw_bo
, bufmgr
->vma_cache
.next
, vma_list
);
620 assert(bo
->map_count
== 0);
621 list_delinit(&bo
->vma_list
);
623 if (bo
->mem_virtual
) {
624 drm_munmap(bo
->mem_virtual
, bo
->size
);
625 bo
->mem_virtual
= NULL
;
628 if (bo
->wc_virtual
) {
629 drm_munmap(bo
->wc_virtual
, bo
->size
);
630 bo
->wc_virtual
= NULL
;
633 if (bo
->gtt_virtual
) {
634 drm_munmap(bo
->gtt_virtual
, bo
->size
);
635 bo
->gtt_virtual
= NULL
;
642 bo_close_vma(struct brw_bufmgr
*bufmgr
, struct brw_bo
*bo
)
645 list_addtail(&bo
->vma_list
, &bufmgr
->vma_cache
);
652 bo_purge_vma_cache(bufmgr
);
656 bo_open_vma(struct brw_bufmgr
*bufmgr
, struct brw_bo
*bo
)
659 list_del(&bo
->vma_list
);
666 bo_purge_vma_cache(bufmgr
);
670 bo_unreference_final(struct brw_bo
*bo
, time_t time
)
672 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
673 struct bo_cache_bucket
*bucket
;
675 DBG("bo_unreference final: %d (%s)\n", bo
->gem_handle
, bo
->name
);
677 /* Clear any left-over mappings */
679 DBG("bo freed with non-zero map-count %d\n", bo
->map_count
);
681 bo_close_vma(bufmgr
, bo
);
682 bo_mark_mmaps_incoherent(bo
);
685 bucket
= bucket_for_size(bufmgr
, bo
->size
);
686 /* Put the buffer into our internal cache for reuse if we can. */
687 if (bufmgr
->bo_reuse
&& bo
->reusable
&& bucket
!= NULL
&&
688 brw_bo_madvise(bo
, I915_MADV_DONTNEED
)) {
689 bo
->free_time
= time
;
693 list_addtail(&bo
->head
, &bucket
->head
);
700 brw_bo_unreference(struct brw_bo
*bo
)
705 assert(p_atomic_read(&bo
->refcount
) > 0);
707 if (atomic_add_unless(&bo
->refcount
, -1, 1)) {
708 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
709 struct timespec time
;
711 clock_gettime(CLOCK_MONOTONIC
, &time
);
713 pthread_mutex_lock(&bufmgr
->lock
);
715 if (p_atomic_dec_zero(&bo
->refcount
)) {
716 bo_unreference_final(bo
, time
.tv_sec
);
717 cleanup_bo_cache(bufmgr
, time
.tv_sec
);
720 pthread_mutex_unlock(&bufmgr
->lock
);
725 brw_bo_map(struct brw_bo
*bo
, int write_enable
)
727 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
728 struct drm_i915_gem_set_domain set_domain
;
731 pthread_mutex_lock(&bufmgr
->lock
);
733 if (bo
->map_count
++ == 0)
734 bo_open_vma(bufmgr
, bo
);
736 if (!bo
->mem_virtual
) {
737 struct drm_i915_gem_mmap mmap_arg
;
739 DBG("bo_map: %d (%s), map_count=%d\n",
740 bo
->gem_handle
, bo
->name
, bo
->map_count
);
743 mmap_arg
.handle
= bo
->gem_handle
;
744 mmap_arg
.size
= bo
->size
;
745 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_MMAP
, &mmap_arg
);
748 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
749 __FILE__
, __LINE__
, bo
->gem_handle
, bo
->name
, strerror(errno
));
750 if (--bo
->map_count
== 0)
751 bo_close_vma(bufmgr
, bo
);
752 pthread_mutex_unlock(&bufmgr
->lock
);
755 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
756 bo
->mem_virtual
= (void *) (uintptr_t) mmap_arg
.addr_ptr
;
758 DBG("bo_map: %d (%s) -> %p\n", bo
->gem_handle
, bo
->name
, bo
->mem_virtual
);
759 bo
->virtual = bo
->mem_virtual
;
761 memclear(set_domain
);
762 set_domain
.handle
= bo
->gem_handle
;
763 set_domain
.read_domains
= I915_GEM_DOMAIN_CPU
;
765 set_domain
.write_domain
= I915_GEM_DOMAIN_CPU
;
767 set_domain
.write_domain
= 0;
768 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_SET_DOMAIN
, &set_domain
);
770 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
771 __FILE__
, __LINE__
, bo
->gem_handle
, strerror(errno
));
774 bo_mark_mmaps_incoherent(bo
);
775 VG(VALGRIND_MAKE_MEM_DEFINED(bo
->mem_virtual
, bo
->size
));
776 pthread_mutex_unlock(&bufmgr
->lock
);
782 map_gtt(struct brw_bo
*bo
)
784 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
787 if (bo
->map_count
++ == 0)
788 bo_open_vma(bufmgr
, bo
);
790 /* Get a mapping of the buffer if we haven't before. */
791 if (bo
->gtt_virtual
== NULL
) {
792 struct drm_i915_gem_mmap_gtt mmap_arg
;
794 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
795 bo
->gem_handle
, bo
->name
, bo
->map_count
);
798 mmap_arg
.handle
= bo
->gem_handle
;
800 /* Get the fake offset back... */
801 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_MMAP_GTT
, &mmap_arg
);
804 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
805 __FILE__
, __LINE__
, bo
->gem_handle
, bo
->name
, strerror(errno
));
806 if (--bo
->map_count
== 0)
807 bo_close_vma(bufmgr
, bo
);
812 bo
->gtt_virtual
= drm_mmap(0, bo
->size
, PROT_READ
| PROT_WRITE
,
813 MAP_SHARED
, bufmgr
->fd
, mmap_arg
.offset
);
814 if (bo
->gtt_virtual
== MAP_FAILED
) {
815 bo
->gtt_virtual
= NULL
;
817 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
818 __FILE__
, __LINE__
, bo
->gem_handle
, bo
->name
, strerror(errno
));
819 if (--bo
->map_count
== 0)
820 bo_close_vma(bufmgr
, bo
);
825 bo
->virtual = bo
->gtt_virtual
;
827 DBG("bo_map_gtt: %d (%s) -> %p\n", bo
->gem_handle
, bo
->name
,
834 brw_bo_map_gtt(struct brw_bo
*bo
)
836 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
837 struct drm_i915_gem_set_domain set_domain
;
840 pthread_mutex_lock(&bufmgr
->lock
);
844 pthread_mutex_unlock(&bufmgr
->lock
);
848 /* Now move it to the GTT domain so that the GPU and CPU
849 * caches are flushed and the GPU isn't actively using the
852 * The pagefault handler does this domain change for us when
853 * it has unbound the BO from the GTT, but it's up to us to
854 * tell it when we're about to use things if we had done
855 * rendering and it still happens to be bound to the GTT.
857 memclear(set_domain
);
858 set_domain
.handle
= bo
->gem_handle
;
859 set_domain
.read_domains
= I915_GEM_DOMAIN_GTT
;
860 set_domain
.write_domain
= I915_GEM_DOMAIN_GTT
;
861 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_SET_DOMAIN
, &set_domain
);
863 DBG("%s:%d: Error setting domain %d: %s\n",
864 __FILE__
, __LINE__
, bo
->gem_handle
, strerror(errno
));
867 bo_mark_mmaps_incoherent(bo
);
868 VG(VALGRIND_MAKE_MEM_DEFINED(bo
->gtt_virtual
, bo
->size
));
869 pthread_mutex_unlock(&bufmgr
->lock
);
875 * Performs a mapping of the buffer object like the normal GTT
876 * mapping, but avoids waiting for the GPU to be done reading from or
877 * rendering to the buffer.
879 * This is used in the implementation of GL_ARB_map_buffer_range: The
880 * user asks to create a buffer, then does a mapping, fills some
881 * space, runs a drawing command, then asks to map it again without
882 * synchronizing because it guarantees that it won't write over the
883 * data that the GPU is busy using (or, more specifically, that if it
884 * does write over the data, it acknowledges that rendering is
889 brw_bo_map_unsynchronized(struct brw_bo
*bo
)
891 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
894 /* If the CPU cache isn't coherent with the GTT, then use a
895 * regular synchronized mapping. The problem is that we don't
896 * track where the buffer was last used on the CPU side in
897 * terms of brw_bo_map vs brw_bo_map_gtt, so
898 * we would potentially corrupt the buffer even when the user
899 * does reasonable things.
901 if (!bufmgr
->has_llc
)
902 return brw_bo_map_gtt(bo
);
904 pthread_mutex_lock(&bufmgr
->lock
);
908 bo_mark_mmaps_incoherent(bo
);
909 VG(VALGRIND_MAKE_MEM_DEFINED(bo
->gtt_virtual
, bo
->size
));
912 pthread_mutex_unlock(&bufmgr
->lock
);
918 brw_bo_unmap(struct brw_bo
*bo
)
920 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
926 pthread_mutex_lock(&bufmgr
->lock
);
928 if (bo
->map_count
<= 0) {
929 DBG("attempted to unmap an unmapped bo\n");
930 pthread_mutex_unlock(&bufmgr
->lock
);
931 /* Preserve the old behaviour of just treating this as a
932 * no-op rather than reporting the error.
937 /* We need to unmap after every innovation as we cannot track
938 * an open vma for every bo as that will exhaust the system
939 * limits and cause later failures.
941 if (--bo
->map_count
== 0) {
942 bo_close_vma(bufmgr
, bo
);
943 bo_mark_mmaps_incoherent(bo
);
946 pthread_mutex_unlock(&bufmgr
->lock
);
952 brw_bo_subdata(struct brw_bo
*bo
, unsigned long offset
,
953 unsigned long size
, const void *data
)
955 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
956 struct drm_i915_gem_pwrite pwrite
;
960 pwrite
.handle
= bo
->gem_handle
;
961 pwrite
.offset
= offset
;
963 pwrite
.data_ptr
= (uint64_t) (uintptr_t) data
;
964 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_PWRITE
, &pwrite
);
967 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
968 __FILE__
, __LINE__
, bo
->gem_handle
, (int) offset
,
969 (int) size
, strerror(errno
));
976 brw_bo_get_subdata(struct brw_bo
*bo
, unsigned long offset
,
977 unsigned long size
, void *data
)
979 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
980 struct drm_i915_gem_pread pread
;
984 pread
.handle
= bo
->gem_handle
;
985 pread
.offset
= offset
;
987 pread
.data_ptr
= (uint64_t) (uintptr_t) data
;
988 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_PREAD
, &pread
);
991 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
992 __FILE__
, __LINE__
, bo
->gem_handle
, (int) offset
,
993 (int) size
, strerror(errno
));
999 /** Waits for all GPU rendering with the object to have completed. */
1001 brw_bo_wait_rendering(struct brw_bo
*bo
)
1003 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
1004 struct drm_i915_gem_set_domain set_domain
;
1007 memclear(set_domain
);
1008 set_domain
.handle
= bo
->gem_handle
;
1009 set_domain
.read_domains
= I915_GEM_DOMAIN_GTT
;
1010 set_domain
.write_domain
= I915_GEM_DOMAIN_GTT
;
1011 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_SET_DOMAIN
, &set_domain
);
1013 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1014 __FILE__
, __LINE__
, bo
->gem_handle
,
1015 set_domain
.read_domains
, set_domain
.write_domain
, strerror(errno
));
1020 * Waits on a BO for the given amount of time.
1022 * @bo: buffer object to wait for
1023 * @timeout_ns: amount of time to wait in nanoseconds.
1024 * If value is less than 0, an infinite wait will occur.
1026 * Returns 0 if the wait was successful ie. the last batch referencing the
1027 * object has completed within the allotted time. Otherwise some negative return
1028 * value describes the error. Of particular interest is -ETIME when the wait has
1029 * failed to yield the desired result.
1031 * Similar to brw_bo_wait_rendering except a timeout parameter allows
1032 * the operation to give up after a certain amount of time. Another subtle
1033 * difference is the internal locking semantics are different (this variant does
1034 * not hold the lock for the duration of the wait). This makes the wait subject
1035 * to a larger userspace race window.
1037 * The implementation shall wait until the object is no longer actively
1038 * referenced within a batch buffer at the time of the call. The wait will
1039 * not guarantee that the buffer is re-issued via another thread, or an flinked
1040 * handle. Userspace must make sure this race does not occur if such precision
1043 * Note that some kernels have broken the inifite wait for negative values
1044 * promise, upgrade to latest stable kernels if this is the case.
1047 brw_bo_wait(struct brw_bo
*bo
, int64_t timeout_ns
)
1049 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
1050 struct drm_i915_gem_wait wait
;
1054 wait
.bo_handle
= bo
->gem_handle
;
1055 wait
.timeout_ns
= timeout_ns
;
1056 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_WAIT
, &wait
);
1064 brw_bufmgr_destroy(struct brw_bufmgr
*bufmgr
)
1066 pthread_mutex_destroy(&bufmgr
->lock
);
1068 /* Free any cached buffer objects we were going to reuse */
1069 for (int i
= 0; i
< bufmgr
->num_buckets
; i
++) {
1070 struct bo_cache_bucket
*bucket
= &bufmgr
->cache_bucket
[i
];
1072 list_for_each_entry_safe(struct brw_bo
, bo
, &bucket
->head
, head
) {
1073 list_del(&bo
->head
);
1079 _mesa_hash_table_destroy(bufmgr
->name_table
, NULL
);
1080 _mesa_hash_table_destroy(bufmgr
->handle_table
, NULL
);
1086 bo_set_tiling_internal(struct brw_bo
*bo
, uint32_t tiling_mode
,
1089 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
1090 struct drm_i915_gem_set_tiling set_tiling
;
1093 if (bo
->global_name
== 0 &&
1094 tiling_mode
== bo
->tiling_mode
&& stride
== bo
->stride
)
1097 memset(&set_tiling
, 0, sizeof(set_tiling
));
1099 /* set_tiling is slightly broken and overwrites the
1100 * input on the error path, so we have to open code
1103 set_tiling
.handle
= bo
->gem_handle
;
1104 set_tiling
.tiling_mode
= tiling_mode
;
1105 set_tiling
.stride
= stride
;
1107 ret
= ioctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_SET_TILING
, &set_tiling
);
1108 } while (ret
== -1 && (errno
== EINTR
|| errno
== EAGAIN
));
1112 bo
->tiling_mode
= set_tiling
.tiling_mode
;
1113 bo
->swizzle_mode
= set_tiling
.swizzle_mode
;
1114 bo
->stride
= set_tiling
.stride
;
1119 brw_bo_get_tiling(struct brw_bo
*bo
, uint32_t *tiling_mode
,
1120 uint32_t *swizzle_mode
)
1122 *tiling_mode
= bo
->tiling_mode
;
1123 *swizzle_mode
= bo
->swizzle_mode
;
1128 brw_bo_gem_create_from_prime(struct brw_bufmgr
*bufmgr
, int prime_fd
,
1134 struct drm_i915_gem_get_tiling get_tiling
;
1136 pthread_mutex_lock(&bufmgr
->lock
);
1137 ret
= drmPrimeFDToHandle(bufmgr
->fd
, prime_fd
, &handle
);
1139 DBG("create_from_prime: failed to obtain handle from fd: %s\n",
1141 pthread_mutex_unlock(&bufmgr
->lock
);
1146 * See if the kernel has already returned this buffer to us. Just as
1147 * for named buffers, we must not create two bo's pointing at the same
1150 bo
= hash_find_bo(bufmgr
->handle_table
, handle
);
1152 brw_bo_reference(bo
);
1156 bo
= calloc(1, sizeof(*bo
));
1160 p_atomic_set(&bo
->refcount
, 1);
1161 list_inithead(&bo
->vma_list
);
1163 /* Determine size of bo. The fd-to-handle ioctl really should
1164 * return the size, but it doesn't. If we have kernel 3.12 or
1165 * later, we can lseek on the prime fd to get the size. Older
1166 * kernels will just fail, in which case we fall back to the
1167 * provided (estimated or guess size). */
1168 ret
= lseek(prime_fd
, 0, SEEK_END
);
1174 bo
->bufmgr
= bufmgr
;
1176 bo
->gem_handle
= handle
;
1177 _mesa_hash_table_insert(bufmgr
->handle_table
, &bo
->gem_handle
, bo
);
1180 bo
->reusable
= false;
1182 memclear(get_tiling
);
1183 get_tiling
.handle
= bo
->gem_handle
;
1184 if (drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_GET_TILING
, &get_tiling
))
1187 bo
->tiling_mode
= get_tiling
.tiling_mode
;
1188 bo
->swizzle_mode
= get_tiling
.swizzle_mode
;
1189 /* XXX stride is unknown */
1192 pthread_mutex_unlock(&bufmgr
->lock
);
1197 pthread_mutex_unlock(&bufmgr
->lock
);
1202 brw_bo_gem_export_to_prime(struct brw_bo
*bo
, int *prime_fd
)
1204 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
1206 if (drmPrimeHandleToFD(bufmgr
->fd
, bo
->gem_handle
,
1207 DRM_CLOEXEC
, prime_fd
) != 0)
1210 bo
->reusable
= false;
1216 brw_bo_flink(struct brw_bo
*bo
, uint32_t *name
)
1218 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
1220 if (!bo
->global_name
) {
1221 struct drm_gem_flink flink
;
1224 flink
.handle
= bo
->gem_handle
;
1225 if (drmIoctl(bufmgr
->fd
, DRM_IOCTL_GEM_FLINK
, &flink
))
1228 pthread_mutex_lock(&bufmgr
->lock
);
1229 if (!bo
->global_name
) {
1230 bo
->global_name
= flink
.name
;
1231 bo
->reusable
= false;
1233 _mesa_hash_table_insert(bufmgr
->name_table
, &bo
->global_name
, bo
);
1235 pthread_mutex_unlock(&bufmgr
->lock
);
1238 *name
= bo
->global_name
;
1243 * Enables unlimited caching of buffer objects for reuse.
1245 * This is potentially very memory expensive, as the cache at each bucket
1246 * size is only bounded by how many buffers of that size we've managed to have
1247 * in flight at once.
1250 brw_bufmgr_enable_reuse(struct brw_bufmgr
*bufmgr
)
1252 bufmgr
->bo_reuse
= true;
1256 add_bucket(struct brw_bufmgr
*bufmgr
, int size
)
1258 unsigned int i
= bufmgr
->num_buckets
;
1260 assert(i
< ARRAY_SIZE(bufmgr
->cache_bucket
));
1262 list_inithead(&bufmgr
->cache_bucket
[i
].head
);
1263 bufmgr
->cache_bucket
[i
].size
= size
;
1264 bufmgr
->num_buckets
++;
1268 init_cache_buckets(struct brw_bufmgr
*bufmgr
)
1270 unsigned long size
, cache_max_size
= 64 * 1024 * 1024;
1272 /* OK, so power of two buckets was too wasteful of memory.
1273 * Give 3 other sizes between each power of two, to hopefully
1274 * cover things accurately enough. (The alternative is
1275 * probably to just go for exact matching of sizes, and assume
1276 * that for things like composited window resize the tiled
1277 * width/height alignment and rounding of sizes to pages will
1278 * get us useful cache hit rates anyway)
1280 add_bucket(bufmgr
, 4096);
1281 add_bucket(bufmgr
, 4096 * 2);
1282 add_bucket(bufmgr
, 4096 * 3);
1284 /* Initialize the linked lists for BO reuse cache. */
1285 for (size
= 4 * 4096; size
<= cache_max_size
; size
*= 2) {
1286 add_bucket(bufmgr
, size
);
1288 add_bucket(bufmgr
, size
+ size
* 1 / 4);
1289 add_bucket(bufmgr
, size
+ size
* 2 / 4);
1290 add_bucket(bufmgr
, size
+ size
* 3 / 4);
1295 brw_bufmgr_gem_set_vma_cache_size(struct brw_bufmgr
*bufmgr
, int limit
)
1297 bufmgr
->vma_max
= limit
;
1299 bo_purge_vma_cache(bufmgr
);
1303 brw_create_hw_context(struct brw_bufmgr
*bufmgr
)
1305 struct drm_i915_gem_context_create create
;
1309 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_CONTEXT_CREATE
, &create
);
1311 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno
));
1315 return create
.ctx_id
;
1319 brw_destroy_hw_context(struct brw_bufmgr
*bufmgr
, uint32_t ctx_id
)
1321 struct drm_i915_gem_context_destroy d
= {.ctx_id
= ctx_id
};
1324 drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY
, &d
) != 0) {
1325 fprintf(stderr
, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
1331 brw_reg_read(struct brw_bufmgr
*bufmgr
, uint32_t offset
, uint64_t *result
)
1333 struct drm_i915_reg_read reg_read
;
1337 reg_read
.offset
= offset
;
1339 ret
= drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_REG_READ
, ®_read
);
1341 *result
= reg_read
.val
;
1346 brw_bo_map__gtt(struct brw_bo
*bo
)
1348 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
1350 if (bo
->gtt_virtual
)
1351 return bo
->gtt_virtual
;
1353 pthread_mutex_lock(&bufmgr
->lock
);
1354 if (bo
->gtt_virtual
== NULL
) {
1355 struct drm_i915_gem_mmap_gtt mmap_arg
;
1358 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1359 bo
->gem_handle
, bo
->name
, bo
->map_count
);
1361 if (bo
->map_count
++ == 0)
1362 bo_open_vma(bufmgr
, bo
);
1365 mmap_arg
.handle
= bo
->gem_handle
;
1367 /* Get the fake offset back... */
1369 if (drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_MMAP_GTT
, &mmap_arg
) == 0) {
1371 ptr
= drm_mmap(0, bo
->size
, PROT_READ
| PROT_WRITE
,
1372 MAP_SHARED
, bufmgr
->fd
, mmap_arg
.offset
);
1374 if (ptr
== MAP_FAILED
) {
1375 if (--bo
->map_count
== 0)
1376 bo_close_vma(bufmgr
, bo
);
1380 bo
->gtt_virtual
= ptr
;
1382 pthread_mutex_unlock(&bufmgr
->lock
);
1384 return bo
->gtt_virtual
;
1388 brw_bo_map__cpu(struct brw_bo
*bo
)
1390 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
1392 if (bo
->mem_virtual
)
1393 return bo
->mem_virtual
;
1395 pthread_mutex_lock(&bufmgr
->lock
);
1396 if (!bo
->mem_virtual
) {
1397 struct drm_i915_gem_mmap mmap_arg
;
1399 if (bo
->map_count
++ == 0)
1400 bo_open_vma(bufmgr
, bo
);
1402 DBG("bo_map: %d (%s), map_count=%d\n",
1403 bo
->gem_handle
, bo
->name
, bo
->map_count
);
1406 mmap_arg
.handle
= bo
->gem_handle
;
1407 mmap_arg
.size
= bo
->size
;
1408 if (drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_MMAP
, &mmap_arg
)) {
1409 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1410 __FILE__
, __LINE__
, bo
->gem_handle
, bo
->name
, strerror(errno
));
1411 if (--bo
->map_count
== 0)
1412 bo_close_vma(bufmgr
, bo
);
1414 VG(VALGRIND_MALLOCLIKE_BLOCK
1415 (mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
1416 bo
->mem_virtual
= (void *) (uintptr_t) mmap_arg
.addr_ptr
;
1419 pthread_mutex_unlock(&bufmgr
->lock
);
1421 return bo
->mem_virtual
;
1425 brw_bo_map__wc(struct brw_bo
*bo
)
1427 struct brw_bufmgr
*bufmgr
= bo
->bufmgr
;
1430 return bo
->wc_virtual
;
1432 pthread_mutex_lock(&bufmgr
->lock
);
1433 if (!bo
->wc_virtual
) {
1434 struct drm_i915_gem_mmap mmap_arg
;
1436 if (bo
->map_count
++ == 0)
1437 bo_open_vma(bufmgr
, bo
);
1439 DBG("bo_map: %d (%s), map_count=%d\n",
1440 bo
->gem_handle
, bo
->name
, bo
->map_count
);
1443 mmap_arg
.handle
= bo
->gem_handle
;
1444 mmap_arg
.size
= bo
->size
;
1445 mmap_arg
.flags
= I915_MMAP_WC
;
1446 if (drmIoctl(bufmgr
->fd
, DRM_IOCTL_I915_GEM_MMAP
, &mmap_arg
)) {
1447 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1448 __FILE__
, __LINE__
, bo
->gem_handle
, bo
->name
, strerror(errno
));
1449 if (--bo
->map_count
== 0)
1450 bo_close_vma(bufmgr
, bo
);
1452 VG(VALGRIND_MALLOCLIKE_BLOCK
1453 (mmap_arg
.addr_ptr
, mmap_arg
.size
, 0, 1));
1454 bo
->wc_virtual
= (void *) (uintptr_t) mmap_arg
.addr_ptr
;
1457 pthread_mutex_unlock(&bufmgr
->lock
);
1459 return bo
->wc_virtual
;
1463 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
1464 * and manage map buffer objections.
1466 * \param fd File descriptor of the opened DRM device.
1469 brw_bufmgr_init(struct gen_device_info
*devinfo
, int fd
, int batch_size
)
1471 struct brw_bufmgr
*bufmgr
;
1473 bufmgr
= calloc(1, sizeof(*bufmgr
));
1477 /* Handles to buffer objects belong to the device fd and are not
1478 * reference counted by the kernel. If the same fd is used by
1479 * multiple parties (threads sharing the same screen bufmgr, or
1480 * even worse the same device fd passed to multiple libraries)
1481 * ownership of those handles is shared by those independent parties.
1483 * Don't do this! Ensure that each library/bufmgr has its own device
1484 * fd so that its namespace does not clash with another.
1488 if (pthread_mutex_init(&bufmgr
->lock
, NULL
) != 0) {
1493 bufmgr
->has_llc
= devinfo
->has_llc
;
1495 init_cache_buckets(bufmgr
);
1497 list_inithead(&bufmgr
->vma_cache
);
1498 bufmgr
->vma_max
= -1; /* unlimited by default */
1500 bufmgr
->name_table
=
1501 _mesa_hash_table_create(NULL
, key_hash_uint
, key_uint_equal
);
1502 bufmgr
->handle_table
=
1503 _mesa_hash_table_create(NULL
, key_hash_uint
, key_uint_equal
);