i965: Drop bogus pthread_mutex_unlock in map_gtt error path.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_bufmgr.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <xf86drm.h>
42 #include <util/u_atomic.h>
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <sys/ioctl.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 #include <stdbool.h>
54
55 #include "errno.h"
56 #ifndef ETIME
57 #define ETIME ETIMEDOUT
58 #endif
59 #include "common/gen_clflush.h"
60 #include "common/gen_debug.h"
61 #include "common/gen_device_info.h"
62 #include "libdrm_macros.h"
63 #include "main/macros.h"
64 #include "util/macros.h"
65 #include "util/hash_table.h"
66 #include "util/list.h"
67 #include "brw_bufmgr.h"
68 #include "brw_context.h"
69 #include "string.h"
70
71 #include "i915_drm.h"
72
73 #ifdef HAVE_VALGRIND
74 #include <valgrind.h>
75 #include <memcheck.h>
76 #define VG(x) x
77 #else
78 #define VG(x)
79 #endif
80
81 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
82 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
83 * leaked. All because it does not call VG(cli_free) from its
84 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
85 * and allocation, we mark it available for use upon mmapping and remove
86 * it upon unmapping.
87 */
88 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
89 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
90
91 #define memclear(s) memset(&s, 0, sizeof(s))
92
93 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
94
95 static inline int
96 atomic_add_unless(int *v, int add, int unless)
97 {
98 int c, old;
99 c = p_atomic_read(v);
100 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
101 c = old;
102 return c == unless;
103 }
104
105 struct bo_cache_bucket {
106 struct list_head head;
107 uint64_t size;
108 };
109
110 struct brw_bufmgr {
111 int fd;
112
113 pthread_mutex_t lock;
114
115 /** Array of lists of cached gem objects of power-of-two sizes */
116 struct bo_cache_bucket cache_bucket[14 * 4];
117 int num_buckets;
118 time_t time;
119
120 struct hash_table *name_table;
121 struct hash_table *handle_table;
122
123 bool has_llc:1;
124 bool bo_reuse:1;
125 };
126
127 static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
128 uint32_t stride);
129
130 static void bo_free(struct brw_bo *bo);
131
132 static uint32_t
133 key_hash_uint(const void *key)
134 {
135 return _mesa_hash_data(key, 4);
136 }
137
138 static bool
139 key_uint_equal(const void *a, const void *b)
140 {
141 return *((unsigned *) a) == *((unsigned *) b);
142 }
143
144 static struct brw_bo *
145 hash_find_bo(struct hash_table *ht, unsigned int key)
146 {
147 struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
148 return entry ? (struct brw_bo *) entry->data : NULL;
149 }
150
151 static uint64_t
152 bo_tile_size(struct brw_bufmgr *bufmgr, uint64_t size, uint32_t tiling)
153 {
154 if (tiling == I915_TILING_NONE)
155 return size;
156
157 /* 965+ just need multiples of page size for tiling */
158 return ALIGN(size, 4096);
159 }
160
161 /*
162 * Round a given pitch up to the minimum required for X tiling on a
163 * given chip. We use 512 as the minimum to allow for a later tiling
164 * change.
165 */
166 static uint32_t
167 bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling)
168 {
169 unsigned long tile_width;
170
171 /* If untiled, then just align it so that we can do rendering
172 * to it with the 3D engine.
173 */
174 if (tiling == I915_TILING_NONE)
175 return ALIGN(pitch, 64);
176
177 if (tiling == I915_TILING_X)
178 tile_width = 512;
179 else
180 tile_width = 128;
181
182 /* 965 is flexible */
183 return ALIGN(pitch, tile_width);
184 }
185
186 static struct bo_cache_bucket *
187 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
188 {
189 int i;
190
191 for (i = 0; i < bufmgr->num_buckets; i++) {
192 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
193 if (bucket->size >= size) {
194 return bucket;
195 }
196 }
197
198 return NULL;
199 }
200
201 inline void
202 brw_bo_reference(struct brw_bo *bo)
203 {
204 p_atomic_inc(&bo->refcount);
205 }
206
207 int
208 brw_bo_busy(struct brw_bo *bo)
209 {
210 struct brw_bufmgr *bufmgr = bo->bufmgr;
211 struct drm_i915_gem_busy busy;
212 int ret;
213
214 memclear(busy);
215 busy.handle = bo->gem_handle;
216
217 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
218 if (ret == 0) {
219 bo->idle = !busy.busy;
220 return busy.busy;
221 }
222 return false;
223 }
224
225 int
226 brw_bo_madvise(struct brw_bo *bo, int state)
227 {
228 struct drm_i915_gem_madvise madv;
229
230 memclear(madv);
231 madv.handle = bo->gem_handle;
232 madv.madv = state;
233 madv.retained = 1;
234 drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
235
236 return madv.retained;
237 }
238
239 /* drop the oldest entries that have been purged by the kernel */
240 static void
241 brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
242 struct bo_cache_bucket *bucket)
243 {
244 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
245 if (brw_bo_madvise(bo, I915_MADV_DONTNEED))
246 break;
247
248 list_del(&bo->head);
249 bo_free(bo);
250 }
251 }
252
253 static struct brw_bo *
254 bo_alloc_internal(struct brw_bufmgr *bufmgr,
255 const char *name,
256 uint64_t size,
257 unsigned flags,
258 uint32_t tiling_mode,
259 uint32_t stride, uint64_t alignment)
260 {
261 struct brw_bo *bo;
262 unsigned int page_size = getpagesize();
263 int ret;
264 struct bo_cache_bucket *bucket;
265 bool alloc_from_cache;
266 uint64_t bo_size;
267 bool for_render = false;
268
269 if (flags & BO_ALLOC_FOR_RENDER)
270 for_render = true;
271
272 /* Round the allocated size up to a power of two number of pages. */
273 bucket = bucket_for_size(bufmgr, size);
274
275 /* If we don't have caching at this size, don't actually round the
276 * allocation up.
277 */
278 if (bucket == NULL) {
279 bo_size = size;
280 if (bo_size < page_size)
281 bo_size = page_size;
282 } else {
283 bo_size = bucket->size;
284 }
285
286 pthread_mutex_lock(&bufmgr->lock);
287 /* Get a buffer out of the cache if available */
288 retry:
289 alloc_from_cache = false;
290 if (bucket != NULL && !list_empty(&bucket->head)) {
291 if (for_render) {
292 /* Allocate new render-target BOs from the tail (MRU)
293 * of the list, as it will likely be hot in the GPU
294 * cache and in the aperture for us.
295 */
296 bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
297 list_del(&bo->head);
298 alloc_from_cache = true;
299 bo->align = alignment;
300 } else {
301 assert(alignment == 0);
302 /* For non-render-target BOs (where we're probably
303 * going to map it first thing in order to fill it
304 * with data), check if the last BO in the cache is
305 * unbusy, and only reuse in that case. Otherwise,
306 * allocating a new buffer is probably faster than
307 * waiting for the GPU to finish.
308 */
309 bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
310 if (!brw_bo_busy(bo)) {
311 alloc_from_cache = true;
312 list_del(&bo->head);
313 }
314 }
315
316 if (alloc_from_cache) {
317 if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
318 bo_free(bo);
319 brw_bo_cache_purge_bucket(bufmgr, bucket);
320 goto retry;
321 }
322
323 if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
324 bo_free(bo);
325 goto retry;
326 }
327 }
328 }
329
330 if (!alloc_from_cache) {
331 struct drm_i915_gem_create create;
332
333 bo = calloc(1, sizeof(*bo));
334 if (!bo)
335 goto err;
336
337 bo->size = bo_size;
338 bo->idle = true;
339
340 memclear(create);
341 create.size = bo_size;
342
343 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
344 if (ret != 0) {
345 free(bo);
346 goto err;
347 }
348
349 bo->gem_handle = create.handle;
350 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
351
352 bo->bufmgr = bufmgr;
353 bo->align = alignment;
354
355 bo->tiling_mode = I915_TILING_NONE;
356 bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
357 bo->stride = 0;
358
359 if (bo_set_tiling_internal(bo, tiling_mode, stride))
360 goto err_free;
361 }
362
363 bo->name = name;
364 p_atomic_set(&bo->refcount, 1);
365 bo->reusable = true;
366 bo->cache_coherent = bufmgr->has_llc;
367
368 pthread_mutex_unlock(&bufmgr->lock);
369
370 DBG("bo_create: buf %d (%s) %ldb\n", bo->gem_handle, bo->name, size);
371
372 return bo;
373
374 err_free:
375 bo_free(bo);
376 err:
377 pthread_mutex_unlock(&bufmgr->lock);
378 return NULL;
379 }
380
381 struct brw_bo *
382 brw_bo_alloc(struct brw_bufmgr *bufmgr,
383 const char *name, uint64_t size, uint64_t alignment)
384 {
385 return bo_alloc_internal(bufmgr, name, size, 0, I915_TILING_NONE, 0, 0);
386 }
387
388 struct brw_bo *
389 brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name,
390 uint64_t size, uint32_t tiling_mode, uint32_t pitch,
391 unsigned flags)
392 {
393 return bo_alloc_internal(bufmgr, name, size, flags, tiling_mode, pitch, 0);
394 }
395
396 struct brw_bo *
397 brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
398 int x, int y, int cpp, uint32_t tiling,
399 uint32_t *pitch, unsigned flags)
400 {
401 uint64_t size;
402 uint32_t stride;
403 unsigned long aligned_y, height_alignment;
404
405 /* If we're tiled, our allocations are in 8 or 32-row blocks,
406 * so failure to align our height means that we won't allocate
407 * enough pages.
408 *
409 * If we're untiled, we still have to align to 2 rows high
410 * because the data port accesses 2x2 blocks even if the
411 * bottom row isn't to be rendered, so failure to align means
412 * we could walk off the end of the GTT and fault. This is
413 * documented on 965, and may be the case on older chipsets
414 * too so we try to be careful.
415 */
416 aligned_y = y;
417 height_alignment = 2;
418
419 if (tiling == I915_TILING_X)
420 height_alignment = 8;
421 else if (tiling == I915_TILING_Y)
422 height_alignment = 32;
423 aligned_y = ALIGN(y, height_alignment);
424
425 stride = x * cpp;
426 stride = bo_tile_pitch(bufmgr, stride, tiling);
427 size = stride * aligned_y;
428 size = bo_tile_size(bufmgr, size, tiling);
429 *pitch = stride;
430
431 if (tiling == I915_TILING_NONE)
432 stride = 0;
433
434 return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride, 0);
435 }
436
437 /**
438 * Returns a brw_bo wrapping the given buffer object handle.
439 *
440 * This can be used when one application needs to pass a buffer object
441 * to another.
442 */
443 struct brw_bo *
444 brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
445 const char *name, unsigned int handle)
446 {
447 struct brw_bo *bo;
448 int ret;
449 struct drm_gem_open open_arg;
450 struct drm_i915_gem_get_tiling get_tiling;
451
452 /* At the moment most applications only have a few named bo.
453 * For instance, in a DRI client only the render buffers passed
454 * between X and the client are named. And since X returns the
455 * alternating names for the front/back buffer a linear search
456 * provides a sufficiently fast match.
457 */
458 pthread_mutex_lock(&bufmgr->lock);
459 bo = hash_find_bo(bufmgr->name_table, handle);
460 if (bo) {
461 brw_bo_reference(bo);
462 goto out;
463 }
464
465 memclear(open_arg);
466 open_arg.name = handle;
467 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
468 if (ret != 0) {
469 DBG("Couldn't reference %s handle 0x%08x: %s\n",
470 name, handle, strerror(errno));
471 bo = NULL;
472 goto out;
473 }
474 /* Now see if someone has used a prime handle to get this
475 * object from the kernel before by looking through the list
476 * again for a matching gem_handle
477 */
478 bo = hash_find_bo(bufmgr->handle_table, open_arg.handle);
479 if (bo) {
480 brw_bo_reference(bo);
481 goto out;
482 }
483
484 bo = calloc(1, sizeof(*bo));
485 if (!bo)
486 goto out;
487
488 p_atomic_set(&bo->refcount, 1);
489
490 bo->size = open_arg.size;
491 bo->offset64 = 0;
492 bo->bufmgr = bufmgr;
493 bo->gem_handle = open_arg.handle;
494 bo->name = name;
495 bo->global_name = handle;
496 bo->reusable = false;
497 bo->external = true;
498
499 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
500 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
501
502 memclear(get_tiling);
503 get_tiling.handle = bo->gem_handle;
504 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
505 if (ret != 0)
506 goto err_unref;
507
508 bo->tiling_mode = get_tiling.tiling_mode;
509 bo->swizzle_mode = get_tiling.swizzle_mode;
510 /* XXX stride is unknown */
511 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
512
513 out:
514 pthread_mutex_unlock(&bufmgr->lock);
515 return bo;
516
517 err_unref:
518 bo_free(bo);
519 pthread_mutex_unlock(&bufmgr->lock);
520 return NULL;
521 }
522
523 static void
524 bo_free(struct brw_bo *bo)
525 {
526 struct brw_bufmgr *bufmgr = bo->bufmgr;
527 struct drm_gem_close close;
528 struct hash_entry *entry;
529 int ret;
530
531 if (bo->map_cpu) {
532 VG_NOACCESS(bo->map_cpu, bo->size);
533 drm_munmap(bo->map_cpu, bo->size);
534 }
535 if (bo->map_wc) {
536 VG_NOACCESS(bo->map_wc, bo->size);
537 drm_munmap(bo->map_wc, bo->size);
538 }
539 if (bo->map_gtt) {
540 VG_NOACCESS(bo->map_gtt, bo->size);
541 drm_munmap(bo->map_gtt, bo->size);
542 }
543
544 if (bo->global_name) {
545 entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
546 _mesa_hash_table_remove(bufmgr->name_table, entry);
547 }
548 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
549 _mesa_hash_table_remove(bufmgr->handle_table, entry);
550
551 /* Close this object */
552 memclear(close);
553 close.handle = bo->gem_handle;
554 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
555 if (ret != 0) {
556 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
557 bo->gem_handle, bo->name, strerror(errno));
558 }
559 free(bo);
560 }
561
562 /** Frees all cached buffers significantly older than @time. */
563 static void
564 cleanup_bo_cache(struct brw_bufmgr *bufmgr, time_t time)
565 {
566 int i;
567
568 if (bufmgr->time == time)
569 return;
570
571 for (i = 0; i < bufmgr->num_buckets; i++) {
572 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
573
574 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
575 if (time - bo->free_time <= 1)
576 break;
577
578 list_del(&bo->head);
579
580 bo_free(bo);
581 }
582 }
583
584 bufmgr->time = time;
585 }
586
587 static void
588 bo_unreference_final(struct brw_bo *bo, time_t time)
589 {
590 struct brw_bufmgr *bufmgr = bo->bufmgr;
591 struct bo_cache_bucket *bucket;
592
593 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
594
595 bucket = bucket_for_size(bufmgr, bo->size);
596 /* Put the buffer into our internal cache for reuse if we can. */
597 if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
598 brw_bo_madvise(bo, I915_MADV_DONTNEED)) {
599 bo->free_time = time;
600
601 bo->name = NULL;
602 bo->kflags = 0;
603
604 list_addtail(&bo->head, &bucket->head);
605 } else {
606 bo_free(bo);
607 }
608 }
609
610 void
611 brw_bo_unreference(struct brw_bo *bo)
612 {
613 if (bo == NULL)
614 return;
615
616 assert(p_atomic_read(&bo->refcount) > 0);
617
618 if (atomic_add_unless(&bo->refcount, -1, 1)) {
619 struct brw_bufmgr *bufmgr = bo->bufmgr;
620 struct timespec time;
621
622 clock_gettime(CLOCK_MONOTONIC, &time);
623
624 pthread_mutex_lock(&bufmgr->lock);
625
626 if (p_atomic_dec_zero(&bo->refcount)) {
627 bo_unreference_final(bo, time.tv_sec);
628 cleanup_bo_cache(bufmgr, time.tv_sec);
629 }
630
631 pthread_mutex_unlock(&bufmgr->lock);
632 }
633 }
634
635 static void
636 set_domain(struct brw_context *brw, const char *action,
637 struct brw_bo *bo, uint32_t read_domains, uint32_t write_domain)
638 {
639 struct drm_i915_gem_set_domain sd = {
640 .handle = bo->gem_handle,
641 .read_domains = read_domains,
642 .write_domain = write_domain,
643 };
644
645 double elapsed = unlikely(brw && brw->perf_debug) ? -get_time() : 0.0;
646
647 if (drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) {
648 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s.\n",
649 __FILE__, __LINE__, bo->gem_handle, read_domains, write_domain,
650 strerror(errno));
651 }
652
653 if (unlikely(brw && brw->perf_debug)) {
654 elapsed += get_time();
655 if (elapsed > 1e-5) /* 0.01ms */
656 perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n",
657 action, bo->name, elapsed * 1000);
658 }
659 }
660
661 static void
662 print_flags(unsigned flags)
663 {
664 if (flags & MAP_READ)
665 DBG("READ ");
666 if (flags & MAP_WRITE)
667 DBG("WRITE ");
668 if (flags & MAP_ASYNC)
669 DBG("ASYNC ");
670 if (flags & MAP_PERSISTENT)
671 DBG("PERSISTENT ");
672 if (flags & MAP_COHERENT)
673 DBG("COHERENT ");
674 if (flags & MAP_RAW)
675 DBG("RAW ");
676 DBG("\n");
677 }
678
679 static void *
680 brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
681 {
682 struct brw_bufmgr *bufmgr = bo->bufmgr;
683
684 /* We disallow CPU maps for writing to non-coherent buffers, as the
685 * CPU map can become invalidated when a batch is flushed out, which
686 * can happen at unpredictable times. You should use WC maps instead.
687 */
688 assert(bo->cache_coherent || !(flags & MAP_WRITE));
689
690 if (!bo->map_cpu) {
691 struct drm_i915_gem_mmap mmap_arg;
692 void *map;
693
694 DBG("brw_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name);
695
696 memclear(mmap_arg);
697 mmap_arg.handle = bo->gem_handle;
698 mmap_arg.size = bo->size;
699 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
700 if (ret != 0) {
701 ret = -errno;
702 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
703 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
704 return NULL;
705 }
706 map = (void *) (uintptr_t) mmap_arg.addr_ptr;
707 VG_DEFINED(map, bo->size);
708
709 if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) {
710 VG_NOACCESS(map, bo->size);
711 drm_munmap(map, bo->size);
712 }
713 }
714 assert(bo->map_cpu);
715
716 DBG("brw_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name,
717 bo->map_cpu);
718 print_flags(flags);
719
720 if (!(flags & MAP_ASYNC)) {
721 set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
722 flags & MAP_WRITE ? I915_GEM_DOMAIN_CPU : 0);
723 }
724
725 if (!bo->cache_coherent) {
726 /* If we're reusing an existing CPU mapping, the CPU caches may
727 * contain stale data from the last time we read from that mapping.
728 * (With the BO cache, it might even be data from a previous buffer!)
729 * Even if it's a brand new mapping, the kernel may have zeroed the
730 * buffer via CPU writes.
731 *
732 * We need to invalidate those cachelines so that we see the latest
733 * contents, and so long as we only read from the CPU mmap we do not
734 * need to write those cachelines back afterwards.
735 */
736 gen_invalidate_range(bo->map_cpu, bo->size);
737 }
738
739 return bo->map_cpu;
740 }
741
742 static void *
743 brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
744 {
745 struct brw_bufmgr *bufmgr = bo->bufmgr;
746
747 /* Get a mapping of the buffer if we haven't before. */
748 if (bo->map_gtt == NULL) {
749 struct drm_i915_gem_mmap_gtt mmap_arg;
750 void *map;
751
752 DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name);
753
754 memclear(mmap_arg);
755 mmap_arg.handle = bo->gem_handle;
756
757 /* Get the fake offset back... */
758 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
759 if (ret != 0) {
760 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
761 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
762 return NULL;
763 }
764
765 /* and mmap it. */
766 map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
767 MAP_SHARED, bufmgr->fd, mmap_arg.offset);
768 if (map == MAP_FAILED) {
769 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
770 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
771 return NULL;
772 }
773
774 /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will
775 * already intercept this mmap call. However, for consistency between
776 * all the mmap paths, we mark the pointer as defined now and mark it
777 * as inaccessible afterwards.
778 */
779 VG_DEFINED(map, bo->size);
780
781 if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) {
782 VG_NOACCESS(map, bo->size);
783 drm_munmap(map, bo->size);
784 }
785 }
786 assert(bo->map_gtt);
787
788 DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt);
789 print_flags(flags);
790
791 if (!(flags & MAP_ASYNC)) {
792 set_domain(brw, "GTT mapping", bo,
793 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
794 }
795
796 return bo->map_gtt;
797 }
798
799 static bool
800 can_map_cpu(struct brw_bo *bo, unsigned flags)
801 {
802 if (bo->cache_coherent)
803 return true;
804
805 /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
806 * across batch flushes where the kernel will change cache domains of the
807 * bo, invalidating continued access to the CPU mmap on non-LLC device.
808 *
809 * Similarly, ASYNC typically means that the buffer will be accessed via
810 * both the CPU and the GPU simultaneously. Batches may be executed that
811 * use the BO even while it is mapped. While OpenGL technically disallows
812 * most drawing while non-persistent mappings are active, we may still use
813 * the GPU for blits or other operations, causing batches to happen at
814 * inconvenient times.
815 */
816 if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC))
817 return false;
818
819 return !(flags & MAP_WRITE);
820 }
821
822 void *
823 brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
824 {
825 if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
826 return brw_bo_map_gtt(brw, bo, flags);
827 else if (can_map_cpu(bo, flags))
828 return brw_bo_map_cpu(brw, bo, flags);
829 else
830 return brw_bo_map_gtt(brw, bo, flags);
831 }
832
833 int
834 brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
835 uint64_t size, const void *data)
836 {
837 struct brw_bufmgr *bufmgr = bo->bufmgr;
838 struct drm_i915_gem_pwrite pwrite;
839 int ret;
840
841 memclear(pwrite);
842 pwrite.handle = bo->gem_handle;
843 pwrite.offset = offset;
844 pwrite.size = size;
845 pwrite.data_ptr = (uint64_t) (uintptr_t) data;
846 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
847 if (ret != 0) {
848 ret = -errno;
849 DBG("%s:%d: Error writing data to buffer %d: "
850 "(%"PRIu64" %"PRIu64") %s .\n",
851 __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno));
852 }
853
854 return ret;
855 }
856
857 /** Waits for all GPU rendering with the object to have completed. */
858 void
859 brw_bo_wait_rendering(struct brw_bo *bo)
860 {
861 /* We require a kernel recent enough for WAIT_IOCTL support.
862 * See intel_init_bufmgr()
863 */
864 brw_bo_wait(bo, -1);
865 }
866
867 /**
868 * Waits on a BO for the given amount of time.
869 *
870 * @bo: buffer object to wait for
871 * @timeout_ns: amount of time to wait in nanoseconds.
872 * If value is less than 0, an infinite wait will occur.
873 *
874 * Returns 0 if the wait was successful ie. the last batch referencing the
875 * object has completed within the allotted time. Otherwise some negative return
876 * value describes the error. Of particular interest is -ETIME when the wait has
877 * failed to yield the desired result.
878 *
879 * Similar to brw_bo_wait_rendering except a timeout parameter allows
880 * the operation to give up after a certain amount of time. Another subtle
881 * difference is the internal locking semantics are different (this variant does
882 * not hold the lock for the duration of the wait). This makes the wait subject
883 * to a larger userspace race window.
884 *
885 * The implementation shall wait until the object is no longer actively
886 * referenced within a batch buffer at the time of the call. The wait will
887 * not guarantee that the buffer is re-issued via another thread, or an flinked
888 * handle. Userspace must make sure this race does not occur if such precision
889 * is important.
890 *
891 * Note that some kernels have broken the inifite wait for negative values
892 * promise, upgrade to latest stable kernels if this is the case.
893 */
894 int
895 brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns)
896 {
897 struct brw_bufmgr *bufmgr = bo->bufmgr;
898 struct drm_i915_gem_wait wait;
899 int ret;
900
901 memclear(wait);
902 wait.bo_handle = bo->gem_handle;
903 wait.timeout_ns = timeout_ns;
904 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
905 if (ret == -1)
906 return -errno;
907
908 return ret;
909 }
910
911 void
912 brw_bufmgr_destroy(struct brw_bufmgr *bufmgr)
913 {
914 pthread_mutex_destroy(&bufmgr->lock);
915
916 /* Free any cached buffer objects we were going to reuse */
917 for (int i = 0; i < bufmgr->num_buckets; i++) {
918 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
919
920 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
921 list_del(&bo->head);
922
923 bo_free(bo);
924 }
925 }
926
927 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
928 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
929
930 free(bufmgr);
931 }
932
933 static int
934 bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
935 uint32_t stride)
936 {
937 struct brw_bufmgr *bufmgr = bo->bufmgr;
938 struct drm_i915_gem_set_tiling set_tiling;
939 int ret;
940
941 if (bo->global_name == 0 &&
942 tiling_mode == bo->tiling_mode && stride == bo->stride)
943 return 0;
944
945 memset(&set_tiling, 0, sizeof(set_tiling));
946 do {
947 /* set_tiling is slightly broken and overwrites the
948 * input on the error path, so we have to open code
949 * rmIoctl.
950 */
951 set_tiling.handle = bo->gem_handle;
952 set_tiling.tiling_mode = tiling_mode;
953 set_tiling.stride = stride;
954
955 ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
956 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
957 if (ret == -1)
958 return -errno;
959
960 bo->tiling_mode = set_tiling.tiling_mode;
961 bo->swizzle_mode = set_tiling.swizzle_mode;
962 bo->stride = set_tiling.stride;
963 return 0;
964 }
965
966 int
967 brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
968 uint32_t *swizzle_mode)
969 {
970 *tiling_mode = bo->tiling_mode;
971 *swizzle_mode = bo->swizzle_mode;
972 return 0;
973 }
974
975 struct brw_bo *
976 brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd)
977 {
978 int ret;
979 uint32_t handle;
980 struct brw_bo *bo;
981 struct drm_i915_gem_get_tiling get_tiling;
982
983 pthread_mutex_lock(&bufmgr->lock);
984 ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
985 if (ret) {
986 DBG("create_from_prime: failed to obtain handle from fd: %s\n",
987 strerror(errno));
988 pthread_mutex_unlock(&bufmgr->lock);
989 return NULL;
990 }
991
992 /*
993 * See if the kernel has already returned this buffer to us. Just as
994 * for named buffers, we must not create two bo's pointing at the same
995 * kernel object
996 */
997 bo = hash_find_bo(bufmgr->handle_table, handle);
998 if (bo) {
999 brw_bo_reference(bo);
1000 goto out;
1001 }
1002
1003 bo = calloc(1, sizeof(*bo));
1004 if (!bo)
1005 goto out;
1006
1007 p_atomic_set(&bo->refcount, 1);
1008
1009 /* Determine size of bo. The fd-to-handle ioctl really should
1010 * return the size, but it doesn't. If we have kernel 3.12 or
1011 * later, we can lseek on the prime fd to get the size. Older
1012 * kernels will just fail, in which case we fall back to the
1013 * provided (estimated or guess size). */
1014 ret = lseek(prime_fd, 0, SEEK_END);
1015 if (ret != -1)
1016 bo->size = ret;
1017
1018 bo->bufmgr = bufmgr;
1019
1020 bo->gem_handle = handle;
1021 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1022
1023 bo->name = "prime";
1024 bo->reusable = false;
1025 bo->external = true;
1026
1027 memclear(get_tiling);
1028 get_tiling.handle = bo->gem_handle;
1029 if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
1030 goto err;
1031
1032 bo->tiling_mode = get_tiling.tiling_mode;
1033 bo->swizzle_mode = get_tiling.swizzle_mode;
1034 /* XXX stride is unknown */
1035
1036 out:
1037 pthread_mutex_unlock(&bufmgr->lock);
1038 return bo;
1039
1040 err:
1041 bo_free(bo);
1042 pthread_mutex_unlock(&bufmgr->lock);
1043 return NULL;
1044 }
1045
1046 int
1047 brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
1048 {
1049 struct brw_bufmgr *bufmgr = bo->bufmgr;
1050
1051 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1052 DRM_CLOEXEC, prime_fd) != 0)
1053 return -errno;
1054
1055 bo->reusable = false;
1056 bo->external = true;
1057
1058 return 0;
1059 }
1060
1061 int
1062 brw_bo_flink(struct brw_bo *bo, uint32_t *name)
1063 {
1064 struct brw_bufmgr *bufmgr = bo->bufmgr;
1065
1066 if (!bo->global_name) {
1067 struct drm_gem_flink flink;
1068
1069 memclear(flink);
1070 flink.handle = bo->gem_handle;
1071 if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
1072 return -errno;
1073
1074 pthread_mutex_lock(&bufmgr->lock);
1075 if (!bo->global_name) {
1076 bo->global_name = flink.name;
1077 bo->reusable = false;
1078 bo->external = true;
1079
1080 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
1081 }
1082 pthread_mutex_unlock(&bufmgr->lock);
1083 }
1084
1085 *name = bo->global_name;
1086 return 0;
1087 }
1088
1089 /**
1090 * Enables unlimited caching of buffer objects for reuse.
1091 *
1092 * This is potentially very memory expensive, as the cache at each bucket
1093 * size is only bounded by how many buffers of that size we've managed to have
1094 * in flight at once.
1095 */
1096 void
1097 brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr)
1098 {
1099 bufmgr->bo_reuse = true;
1100 }
1101
1102 static void
1103 add_bucket(struct brw_bufmgr *bufmgr, int size)
1104 {
1105 unsigned int i = bufmgr->num_buckets;
1106
1107 assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
1108
1109 list_inithead(&bufmgr->cache_bucket[i].head);
1110 bufmgr->cache_bucket[i].size = size;
1111 bufmgr->num_buckets++;
1112 }
1113
1114 static void
1115 init_cache_buckets(struct brw_bufmgr *bufmgr)
1116 {
1117 uint64_t size, cache_max_size = 64 * 1024 * 1024;
1118
1119 /* OK, so power of two buckets was too wasteful of memory.
1120 * Give 3 other sizes between each power of two, to hopefully
1121 * cover things accurately enough. (The alternative is
1122 * probably to just go for exact matching of sizes, and assume
1123 * that for things like composited window resize the tiled
1124 * width/height alignment and rounding of sizes to pages will
1125 * get us useful cache hit rates anyway)
1126 */
1127 add_bucket(bufmgr, 4096);
1128 add_bucket(bufmgr, 4096 * 2);
1129 add_bucket(bufmgr, 4096 * 3);
1130
1131 /* Initialize the linked lists for BO reuse cache. */
1132 for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
1133 add_bucket(bufmgr, size);
1134
1135 add_bucket(bufmgr, size + size * 1 / 4);
1136 add_bucket(bufmgr, size + size * 2 / 4);
1137 add_bucket(bufmgr, size + size * 3 / 4);
1138 }
1139 }
1140
1141 uint32_t
1142 brw_create_hw_context(struct brw_bufmgr *bufmgr)
1143 {
1144 struct drm_i915_gem_context_create create;
1145 int ret;
1146
1147 memclear(create);
1148 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
1149 if (ret != 0) {
1150 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno));
1151 return 0;
1152 }
1153
1154 return create.ctx_id;
1155 }
1156
1157 void
1158 brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id)
1159 {
1160 struct drm_i915_gem_context_destroy d = {.ctx_id = ctx_id };
1161
1162 if (ctx_id != 0 &&
1163 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) {
1164 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
1165 strerror(errno));
1166 }
1167 }
1168
1169 int
1170 brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, uint64_t *result)
1171 {
1172 struct drm_i915_reg_read reg_read;
1173 int ret;
1174
1175 memclear(reg_read);
1176 reg_read.offset = offset;
1177
1178 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
1179
1180 *result = reg_read.val;
1181 return ret;
1182 }
1183
1184 /**
1185 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
1186 * and manage map buffer objections.
1187 *
1188 * \param fd File descriptor of the opened DRM device.
1189 */
1190 struct brw_bufmgr *
1191 brw_bufmgr_init(struct gen_device_info *devinfo, int fd, int batch_size)
1192 {
1193 struct brw_bufmgr *bufmgr;
1194
1195 bufmgr = calloc(1, sizeof(*bufmgr));
1196 if (bufmgr == NULL)
1197 return NULL;
1198
1199 /* Handles to buffer objects belong to the device fd and are not
1200 * reference counted by the kernel. If the same fd is used by
1201 * multiple parties (threads sharing the same screen bufmgr, or
1202 * even worse the same device fd passed to multiple libraries)
1203 * ownership of those handles is shared by those independent parties.
1204 *
1205 * Don't do this! Ensure that each library/bufmgr has its own device
1206 * fd so that its namespace does not clash with another.
1207 */
1208 bufmgr->fd = fd;
1209
1210 if (pthread_mutex_init(&bufmgr->lock, NULL) != 0) {
1211 free(bufmgr);
1212 return NULL;
1213 }
1214
1215 bufmgr->has_llc = devinfo->has_llc;
1216
1217 init_cache_buckets(bufmgr);
1218
1219 bufmgr->name_table =
1220 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
1221 bufmgr->handle_table =
1222 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
1223
1224 return bufmgr;
1225 }