i965: Implement brw_bo_map_unsynchronized() with MAP_ASYNC
[mesa.git] / src / mesa / drivers / dri / i965 / brw_bufmgr.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <xf86drm.h>
42 #include <util/u_atomic.h>
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <sys/ioctl.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 #include <stdbool.h>
54
55 #include "errno.h"
56 #ifndef ETIME
57 #define ETIME ETIMEDOUT
58 #endif
59 #include "common/gen_debug.h"
60 #include "common/gen_device_info.h"
61 #include "libdrm_macros.h"
62 #include "main/macros.h"
63 #include "util/macros.h"
64 #include "util/hash_table.h"
65 #include "util/list.h"
66 #include "brw_bufmgr.h"
67 #include "brw_context.h"
68 #include "string.h"
69
70 #include "i915_drm.h"
71
72 #ifdef HAVE_VALGRIND
73 #include <valgrind.h>
74 #include <memcheck.h>
75 #define VG(x) x
76 #else
77 #define VG(x)
78 #endif
79
80 #define memclear(s) memset(&s, 0, sizeof(s))
81
82 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
83
84 static inline int
85 atomic_add_unless(int *v, int add, int unless)
86 {
87 int c, old;
88 c = p_atomic_read(v);
89 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
90 c = old;
91 return c == unless;
92 }
93
94 struct bo_cache_bucket {
95 struct list_head head;
96 uint64_t size;
97 };
98
99 struct brw_bufmgr {
100 int fd;
101
102 pthread_mutex_t lock;
103
104 /** Array of lists of cached gem objects of power-of-two sizes */
105 struct bo_cache_bucket cache_bucket[14 * 4];
106 int num_buckets;
107 time_t time;
108
109 struct hash_table *name_table;
110 struct hash_table *handle_table;
111
112 bool has_llc:1;
113 bool bo_reuse:1;
114 };
115
116 static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
117 uint32_t stride);
118
119 static void bo_free(struct brw_bo *bo);
120
121 static uint32_t
122 key_hash_uint(const void *key)
123 {
124 return _mesa_hash_data(key, 4);
125 }
126
127 static bool
128 key_uint_equal(const void *a, const void *b)
129 {
130 return *((unsigned *) a) == *((unsigned *) b);
131 }
132
133 static struct brw_bo *
134 hash_find_bo(struct hash_table *ht, unsigned int key)
135 {
136 struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
137 return entry ? (struct brw_bo *) entry->data : NULL;
138 }
139
140 static uint64_t
141 bo_tile_size(struct brw_bufmgr *bufmgr, uint64_t size, uint32_t tiling)
142 {
143 if (tiling == I915_TILING_NONE)
144 return size;
145
146 /* 965+ just need multiples of page size for tiling */
147 return ALIGN(size, 4096);
148 }
149
150 /*
151 * Round a given pitch up to the minimum required for X tiling on a
152 * given chip. We use 512 as the minimum to allow for a later tiling
153 * change.
154 */
155 static uint32_t
156 bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling)
157 {
158 unsigned long tile_width;
159
160 /* If untiled, then just align it so that we can do rendering
161 * to it with the 3D engine.
162 */
163 if (tiling == I915_TILING_NONE)
164 return ALIGN(pitch, 64);
165
166 if (tiling == I915_TILING_X)
167 tile_width = 512;
168 else
169 tile_width = 128;
170
171 /* 965 is flexible */
172 return ALIGN(pitch, tile_width);
173 }
174
175 static struct bo_cache_bucket *
176 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
177 {
178 int i;
179
180 for (i = 0; i < bufmgr->num_buckets; i++) {
181 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
182 if (bucket->size >= size) {
183 return bucket;
184 }
185 }
186
187 return NULL;
188 }
189
190 inline void
191 brw_bo_reference(struct brw_bo *bo)
192 {
193 p_atomic_inc(&bo->refcount);
194 }
195
196 int
197 brw_bo_busy(struct brw_bo *bo)
198 {
199 struct brw_bufmgr *bufmgr = bo->bufmgr;
200 struct drm_i915_gem_busy busy;
201 int ret;
202
203 memclear(busy);
204 busy.handle = bo->gem_handle;
205
206 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
207 if (ret == 0) {
208 bo->idle = !busy.busy;
209 return busy.busy;
210 }
211 return false;
212 }
213
214 int
215 brw_bo_madvise(struct brw_bo *bo, int state)
216 {
217 struct drm_i915_gem_madvise madv;
218
219 memclear(madv);
220 madv.handle = bo->gem_handle;
221 madv.madv = state;
222 madv.retained = 1;
223 drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
224
225 return madv.retained;
226 }
227
228 /* drop the oldest entries that have been purged by the kernel */
229 static void
230 brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
231 struct bo_cache_bucket *bucket)
232 {
233 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
234 if (brw_bo_madvise(bo, I915_MADV_DONTNEED))
235 break;
236
237 list_del(&bo->head);
238 bo_free(bo);
239 }
240 }
241
242 static struct brw_bo *
243 bo_alloc_internal(struct brw_bufmgr *bufmgr,
244 const char *name,
245 uint64_t size,
246 unsigned flags,
247 uint32_t tiling_mode,
248 uint32_t stride, uint64_t alignment)
249 {
250 struct brw_bo *bo;
251 unsigned int page_size = getpagesize();
252 int ret;
253 struct bo_cache_bucket *bucket;
254 bool alloc_from_cache;
255 uint64_t bo_size;
256 bool for_render = false;
257
258 if (flags & BO_ALLOC_FOR_RENDER)
259 for_render = true;
260
261 /* Round the allocated size up to a power of two number of pages. */
262 bucket = bucket_for_size(bufmgr, size);
263
264 /* If we don't have caching at this size, don't actually round the
265 * allocation up.
266 */
267 if (bucket == NULL) {
268 bo_size = size;
269 if (bo_size < page_size)
270 bo_size = page_size;
271 } else {
272 bo_size = bucket->size;
273 }
274
275 pthread_mutex_lock(&bufmgr->lock);
276 /* Get a buffer out of the cache if available */
277 retry:
278 alloc_from_cache = false;
279 if (bucket != NULL && !list_empty(&bucket->head)) {
280 if (for_render) {
281 /* Allocate new render-target BOs from the tail (MRU)
282 * of the list, as it will likely be hot in the GPU
283 * cache and in the aperture for us.
284 */
285 bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
286 list_del(&bo->head);
287 alloc_from_cache = true;
288 bo->align = alignment;
289 } else {
290 assert(alignment == 0);
291 /* For non-render-target BOs (where we're probably
292 * going to map it first thing in order to fill it
293 * with data), check if the last BO in the cache is
294 * unbusy, and only reuse in that case. Otherwise,
295 * allocating a new buffer is probably faster than
296 * waiting for the GPU to finish.
297 */
298 bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
299 if (!brw_bo_busy(bo)) {
300 alloc_from_cache = true;
301 list_del(&bo->head);
302 }
303 }
304
305 if (alloc_from_cache) {
306 if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
307 bo_free(bo);
308 brw_bo_cache_purge_bucket(bufmgr, bucket);
309 goto retry;
310 }
311
312 if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
313 bo_free(bo);
314 goto retry;
315 }
316 }
317 }
318
319 if (!alloc_from_cache) {
320 struct drm_i915_gem_create create;
321
322 bo = calloc(1, sizeof(*bo));
323 if (!bo)
324 goto err;
325
326 bo->size = bo_size;
327
328 memclear(create);
329 create.size = bo_size;
330
331 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
332 if (ret != 0) {
333 free(bo);
334 goto err;
335 }
336
337 bo->gem_handle = create.handle;
338 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
339
340 bo->bufmgr = bufmgr;
341 bo->align = alignment;
342
343 bo->tiling_mode = I915_TILING_NONE;
344 bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
345 bo->stride = 0;
346
347 if (bo_set_tiling_internal(bo, tiling_mode, stride))
348 goto err_free;
349 }
350
351 bo->name = name;
352 p_atomic_set(&bo->refcount, 1);
353 bo->reusable = true;
354 bo->cache_coherent = bufmgr->has_llc;
355
356 pthread_mutex_unlock(&bufmgr->lock);
357
358 DBG("bo_create: buf %d (%s) %ldb\n", bo->gem_handle, bo->name, size);
359
360 return bo;
361
362 err_free:
363 bo_free(bo);
364 err:
365 pthread_mutex_unlock(&bufmgr->lock);
366 return NULL;
367 }
368
369 struct brw_bo *
370 brw_bo_alloc(struct brw_bufmgr *bufmgr,
371 const char *name, uint64_t size, uint64_t alignment)
372 {
373 return bo_alloc_internal(bufmgr, name, size, 0, I915_TILING_NONE, 0, 0);
374 }
375
376 struct brw_bo *
377 brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name,
378 int x, int y, int cpp, uint32_t tiling,
379 uint32_t *pitch, unsigned flags)
380 {
381 uint64_t size;
382 uint32_t stride;
383 unsigned long aligned_y, height_alignment;
384
385 /* If we're tiled, our allocations are in 8 or 32-row blocks,
386 * so failure to align our height means that we won't allocate
387 * enough pages.
388 *
389 * If we're untiled, we still have to align to 2 rows high
390 * because the data port accesses 2x2 blocks even if the
391 * bottom row isn't to be rendered, so failure to align means
392 * we could walk off the end of the GTT and fault. This is
393 * documented on 965, and may be the case on older chipsets
394 * too so we try to be careful.
395 */
396 aligned_y = y;
397 height_alignment = 2;
398
399 if (tiling == I915_TILING_X)
400 height_alignment = 8;
401 else if (tiling == I915_TILING_Y)
402 height_alignment = 32;
403 aligned_y = ALIGN(y, height_alignment);
404
405 stride = x * cpp;
406 stride = bo_tile_pitch(bufmgr, stride, tiling);
407 size = stride * aligned_y;
408 size = bo_tile_size(bufmgr, size, tiling);
409 *pitch = stride;
410
411 if (tiling == I915_TILING_NONE)
412 stride = 0;
413
414 return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride, 0);
415 }
416
417 /**
418 * Returns a brw_bo wrapping the given buffer object handle.
419 *
420 * This can be used when one application needs to pass a buffer object
421 * to another.
422 */
423 struct brw_bo *
424 brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
425 const char *name, unsigned int handle)
426 {
427 struct brw_bo *bo;
428 int ret;
429 struct drm_gem_open open_arg;
430 struct drm_i915_gem_get_tiling get_tiling;
431
432 /* At the moment most applications only have a few named bo.
433 * For instance, in a DRI client only the render buffers passed
434 * between X and the client are named. And since X returns the
435 * alternating names for the front/back buffer a linear search
436 * provides a sufficiently fast match.
437 */
438 pthread_mutex_lock(&bufmgr->lock);
439 bo = hash_find_bo(bufmgr->name_table, handle);
440 if (bo) {
441 brw_bo_reference(bo);
442 goto out;
443 }
444
445 memclear(open_arg);
446 open_arg.name = handle;
447 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
448 if (ret != 0) {
449 DBG("Couldn't reference %s handle 0x%08x: %s\n",
450 name, handle, strerror(errno));
451 bo = NULL;
452 goto out;
453 }
454 /* Now see if someone has used a prime handle to get this
455 * object from the kernel before by looking through the list
456 * again for a matching gem_handle
457 */
458 bo = hash_find_bo(bufmgr->handle_table, open_arg.handle);
459 if (bo) {
460 brw_bo_reference(bo);
461 goto out;
462 }
463
464 bo = calloc(1, sizeof(*bo));
465 if (!bo)
466 goto out;
467
468 p_atomic_set(&bo->refcount, 1);
469
470 bo->size = open_arg.size;
471 bo->offset64 = 0;
472 bo->bufmgr = bufmgr;
473 bo->gem_handle = open_arg.handle;
474 bo->name = name;
475 bo->global_name = handle;
476 bo->reusable = false;
477
478 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
479 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
480
481 memclear(get_tiling);
482 get_tiling.handle = bo->gem_handle;
483 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
484 if (ret != 0)
485 goto err_unref;
486
487 bo->tiling_mode = get_tiling.tiling_mode;
488 bo->swizzle_mode = get_tiling.swizzle_mode;
489 /* XXX stride is unknown */
490 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
491
492 out:
493 pthread_mutex_unlock(&bufmgr->lock);
494 return bo;
495
496 err_unref:
497 bo_free(bo);
498 pthread_mutex_unlock(&bufmgr->lock);
499 return NULL;
500 }
501
502 static void
503 bo_free(struct brw_bo *bo)
504 {
505 struct brw_bufmgr *bufmgr = bo->bufmgr;
506 struct drm_gem_close close;
507 struct hash_entry *entry;
508 int ret;
509
510 if (bo->map_cpu) {
511 VG(VALGRIND_FREELIKE_BLOCK(bo->map_cpu, 0));
512 drm_munmap(bo->map_cpu, bo->size);
513 }
514 if (bo->map_wc) {
515 VG(VALGRIND_FREELIKE_BLOCK(bo->map_wc, 0));
516 drm_munmap(bo->map_wc, bo->size);
517 }
518 if (bo->map_gtt) {
519 drm_munmap(bo->map_gtt, bo->size);
520 }
521
522 if (bo->global_name) {
523 entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
524 _mesa_hash_table_remove(bufmgr->name_table, entry);
525 }
526 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
527 _mesa_hash_table_remove(bufmgr->handle_table, entry);
528
529 /* Close this object */
530 memclear(close);
531 close.handle = bo->gem_handle;
532 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
533 if (ret != 0) {
534 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
535 bo->gem_handle, bo->name, strerror(errno));
536 }
537 free(bo);
538 }
539
540 static void
541 bo_mark_mmaps_incoherent(struct brw_bo *bo)
542 {
543 #if HAVE_VALGRIND
544 if (bo->map_cpu)
545 VALGRIND_MAKE_MEM_NOACCESS(bo->map_cpu, bo->size);
546
547 if (bo->map_wc)
548 VALGRIND_MAKE_MEM_NOACCESS(bo->map_wc, bo->size);
549
550 if (bo->map_gtt)
551 VALGRIND_MAKE_MEM_NOACCESS(bo->map_gtt, bo->size);
552 #endif
553 }
554
555 /** Frees all cached buffers significantly older than @time. */
556 static void
557 cleanup_bo_cache(struct brw_bufmgr *bufmgr, time_t time)
558 {
559 int i;
560
561 if (bufmgr->time == time)
562 return;
563
564 for (i = 0; i < bufmgr->num_buckets; i++) {
565 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
566
567 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
568 if (time - bo->free_time <= 1)
569 break;
570
571 list_del(&bo->head);
572
573 bo_free(bo);
574 }
575 }
576
577 bufmgr->time = time;
578 }
579
580 static void
581 bo_unreference_final(struct brw_bo *bo, time_t time)
582 {
583 struct brw_bufmgr *bufmgr = bo->bufmgr;
584 struct bo_cache_bucket *bucket;
585
586 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
587
588 /* Clear any left-over mappings */
589 if (bo->map_count) {
590 DBG("bo freed with non-zero map-count %d\n", bo->map_count);
591 bo->map_count = 0;
592 bo_mark_mmaps_incoherent(bo);
593 }
594
595 bucket = bucket_for_size(bufmgr, bo->size);
596 /* Put the buffer into our internal cache for reuse if we can. */
597 if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
598 brw_bo_madvise(bo, I915_MADV_DONTNEED)) {
599 bo->free_time = time;
600
601 bo->name = NULL;
602 bo->kflags = 0;
603
604 list_addtail(&bo->head, &bucket->head);
605 } else {
606 bo_free(bo);
607 }
608 }
609
610 void
611 brw_bo_unreference(struct brw_bo *bo)
612 {
613 if (bo == NULL)
614 return;
615
616 assert(p_atomic_read(&bo->refcount) > 0);
617
618 if (atomic_add_unless(&bo->refcount, -1, 1)) {
619 struct brw_bufmgr *bufmgr = bo->bufmgr;
620 struct timespec time;
621
622 clock_gettime(CLOCK_MONOTONIC, &time);
623
624 pthread_mutex_lock(&bufmgr->lock);
625
626 if (p_atomic_dec_zero(&bo->refcount)) {
627 bo_unreference_final(bo, time.tv_sec);
628 cleanup_bo_cache(bufmgr, time.tv_sec);
629 }
630
631 pthread_mutex_unlock(&bufmgr->lock);
632 }
633 }
634
635 static void
636 set_domain(struct brw_context *brw, const char *action,
637 struct brw_bo *bo, uint32_t read_domains, uint32_t write_domain)
638 {
639 struct drm_i915_gem_set_domain sd = {
640 .handle = bo->gem_handle,
641 .read_domains = read_domains,
642 .write_domain = write_domain,
643 };
644
645 double elapsed = unlikely(brw && brw->perf_debug) ? -get_time() : 0.0;
646
647 if (drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) {
648 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s.\n",
649 __FILE__, __LINE__, bo->gem_handle, read_domains, write_domain,
650 strerror(errno));
651 }
652
653 if (unlikely(brw && brw->perf_debug)) {
654 elapsed += get_time();
655 if (elapsed > 1e-5) /* 0.01ms */
656 perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n",
657 action, bo->name, elapsed * 1000);
658 }
659 }
660
661 static void *
662 brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
663 {
664 struct brw_bufmgr *bufmgr = bo->bufmgr;
665
666 pthread_mutex_lock(&bufmgr->lock);
667
668 if (!bo->map_cpu) {
669 struct drm_i915_gem_mmap mmap_arg;
670
671 DBG("brw_bo_map_cpu: %d (%s), map_count=%d\n",
672 bo->gem_handle, bo->name, bo->map_count);
673
674 memclear(mmap_arg);
675 mmap_arg.handle = bo->gem_handle;
676 mmap_arg.size = bo->size;
677 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
678 if (ret != 0) {
679 ret = -errno;
680 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
681 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
682 pthread_mutex_unlock(&bufmgr->lock);
683 return NULL;
684 }
685 bo->map_count++;
686 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
687 bo->map_cpu = (void *) (uintptr_t) mmap_arg.addr_ptr;
688 }
689 DBG("brw_bo_map_cpu: %d (%s) -> %p\n", bo->gem_handle, bo->name,
690 bo->map_cpu);
691
692 if (!(flags & MAP_ASYNC)) {
693 set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
694 flags & MAP_WRITE ? I915_GEM_DOMAIN_CPU : 0);
695 }
696
697 bo_mark_mmaps_incoherent(bo);
698 VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_cpu, bo->size));
699 pthread_mutex_unlock(&bufmgr->lock);
700
701 return bo->map_cpu;
702 }
703
704 static void *
705 brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
706 {
707 struct brw_bufmgr *bufmgr = bo->bufmgr;
708
709 pthread_mutex_lock(&bufmgr->lock);
710
711 /* Get a mapping of the buffer if we haven't before. */
712 if (bo->map_gtt == NULL) {
713 struct drm_i915_gem_mmap_gtt mmap_arg;
714
715 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
716 bo->gem_handle, bo->name, bo->map_count);
717
718 memclear(mmap_arg);
719 mmap_arg.handle = bo->gem_handle;
720
721 /* Get the fake offset back... */
722 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
723 if (ret != 0) {
724 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
725 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
726 pthread_mutex_unlock(&bufmgr->lock);
727 return NULL;
728 }
729
730 /* and mmap it */
731 bo->map_gtt = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
732 MAP_SHARED, bufmgr->fd, mmap_arg.offset);
733 if (bo->map_gtt == MAP_FAILED) {
734 bo->map_gtt = NULL;
735 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
736 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
737 pthread_mutex_unlock(&bufmgr->lock);
738 return NULL;
739 }
740 bo->map_count++;
741 }
742
743 DBG("bo_map_gtt: %d (%s) -> %p\n", bo->gem_handle, bo->name,
744 bo->map_gtt);
745
746 if (!(flags & MAP_ASYNC)) {
747 set_domain(brw, "GTT mapping", bo,
748 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
749 }
750
751 bo_mark_mmaps_incoherent(bo);
752 VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_gtt, bo->size));
753 pthread_mutex_unlock(&bufmgr->lock);
754
755 return bo->map_gtt;
756 }
757
758 /**
759 * Performs a mapping of the buffer object like the normal GTT
760 * mapping, but avoids waiting for the GPU to be done reading from or
761 * rendering to the buffer.
762 *
763 * This is used in the implementation of GL_ARB_map_buffer_range: The
764 * user asks to create a buffer, then does a mapping, fills some
765 * space, runs a drawing command, then asks to map it again without
766 * synchronizing because it guarantees that it won't write over the
767 * data that the GPU is busy using (or, more specifically, that if it
768 * does write over the data, it acknowledges that rendering is
769 * undefined).
770 */
771
772 void *
773 brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo)
774 {
775 struct brw_bufmgr *bufmgr = bo->bufmgr;
776
777 /* If the CPU cache isn't coherent with the GTT, then use a
778 * regular synchronized mapping. The problem is that we don't
779 * track where the buffer was last used on the CPU side in
780 * terms of brw_bo_map_cpu vs brw_bo_map_gtt, so
781 * we would potentially corrupt the buffer even when the user
782 * does reasonable things.
783 */
784 if (!bufmgr->has_llc)
785 return brw_bo_map_gtt(brw, bo, MAP_READ | MAP_WRITE);
786 else
787 return brw_bo_map_gtt(brw, bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
788 }
789
790 static bool
791 can_map_cpu(struct brw_bo *bo, unsigned flags)
792 {
793 if (bo->cache_coherent)
794 return true;
795
796 if (flags & MAP_PERSISTENT)
797 return false;
798
799 if (flags & MAP_COHERENT)
800 return false;
801
802 return !(flags & MAP_WRITE);
803 }
804
805 void *
806 brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
807 {
808 if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
809 return brw_bo_map_gtt(brw, bo, flags);
810 else if (can_map_cpu(bo, flags))
811 return brw_bo_map_cpu(brw, bo, flags);
812 else
813 return brw_bo_map_gtt(brw, bo, flags);
814 }
815
816 int
817 brw_bo_unmap(struct brw_bo *bo)
818 {
819 struct brw_bufmgr *bufmgr = bo->bufmgr;
820 int ret = 0;
821
822 pthread_mutex_lock(&bufmgr->lock);
823
824 if (bo->map_count <= 0) {
825 DBG("attempted to unmap an unmapped bo\n");
826 pthread_mutex_unlock(&bufmgr->lock);
827 /* Preserve the old behaviour of just treating this as a
828 * no-op rather than reporting the error.
829 */
830 return 0;
831 }
832
833 if (--bo->map_count == 0) {
834 bo_mark_mmaps_incoherent(bo);
835 }
836 pthread_mutex_unlock(&bufmgr->lock);
837
838 return ret;
839 }
840
841 int
842 brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
843 uint64_t size, const void *data)
844 {
845 struct brw_bufmgr *bufmgr = bo->bufmgr;
846 struct drm_i915_gem_pwrite pwrite;
847 int ret;
848
849 memclear(pwrite);
850 pwrite.handle = bo->gem_handle;
851 pwrite.offset = offset;
852 pwrite.size = size;
853 pwrite.data_ptr = (uint64_t) (uintptr_t) data;
854 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
855 if (ret != 0) {
856 ret = -errno;
857 DBG("%s:%d: Error writing data to buffer %d: "
858 "(%"PRIu64" %"PRIu64") %s .\n",
859 __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno));
860 }
861
862 return ret;
863 }
864
865 int
866 brw_bo_get_subdata(struct brw_bo *bo, uint64_t offset,
867 uint64_t size, void *data)
868 {
869 struct brw_bufmgr *bufmgr = bo->bufmgr;
870 struct drm_i915_gem_pread pread;
871 int ret;
872
873 memclear(pread);
874 pread.handle = bo->gem_handle;
875 pread.offset = offset;
876 pread.size = size;
877 pread.data_ptr = (uint64_t) (uintptr_t) data;
878 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
879 if (ret != 0) {
880 ret = -errno;
881 DBG("%s:%d: Error reading data from buffer %d: "
882 "(%"PRIu64" %"PRIu64") %s .\n",
883 __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno));
884 }
885
886 return ret;
887 }
888
889 /** Waits for all GPU rendering with the object to have completed. */
890 void
891 brw_bo_wait_rendering(struct brw_context *brw, struct brw_bo *bo)
892 {
893 set_domain(brw, "waiting for",
894 bo, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
895 }
896
897 /**
898 * Waits on a BO for the given amount of time.
899 *
900 * @bo: buffer object to wait for
901 * @timeout_ns: amount of time to wait in nanoseconds.
902 * If value is less than 0, an infinite wait will occur.
903 *
904 * Returns 0 if the wait was successful ie. the last batch referencing the
905 * object has completed within the allotted time. Otherwise some negative return
906 * value describes the error. Of particular interest is -ETIME when the wait has
907 * failed to yield the desired result.
908 *
909 * Similar to brw_bo_wait_rendering except a timeout parameter allows
910 * the operation to give up after a certain amount of time. Another subtle
911 * difference is the internal locking semantics are different (this variant does
912 * not hold the lock for the duration of the wait). This makes the wait subject
913 * to a larger userspace race window.
914 *
915 * The implementation shall wait until the object is no longer actively
916 * referenced within a batch buffer at the time of the call. The wait will
917 * not guarantee that the buffer is re-issued via another thread, or an flinked
918 * handle. Userspace must make sure this race does not occur if such precision
919 * is important.
920 *
921 * Note that some kernels have broken the inifite wait for negative values
922 * promise, upgrade to latest stable kernels if this is the case.
923 */
924 int
925 brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns)
926 {
927 struct brw_bufmgr *bufmgr = bo->bufmgr;
928 struct drm_i915_gem_wait wait;
929 int ret;
930
931 memclear(wait);
932 wait.bo_handle = bo->gem_handle;
933 wait.timeout_ns = timeout_ns;
934 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
935 if (ret == -1)
936 return -errno;
937
938 return ret;
939 }
940
941 void
942 brw_bufmgr_destroy(struct brw_bufmgr *bufmgr)
943 {
944 pthread_mutex_destroy(&bufmgr->lock);
945
946 /* Free any cached buffer objects we were going to reuse */
947 for (int i = 0; i < bufmgr->num_buckets; i++) {
948 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
949
950 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
951 list_del(&bo->head);
952
953 bo_free(bo);
954 }
955 }
956
957 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
958 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
959
960 free(bufmgr);
961 }
962
963 static int
964 bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
965 uint32_t stride)
966 {
967 struct brw_bufmgr *bufmgr = bo->bufmgr;
968 struct drm_i915_gem_set_tiling set_tiling;
969 int ret;
970
971 if (bo->global_name == 0 &&
972 tiling_mode == bo->tiling_mode && stride == bo->stride)
973 return 0;
974
975 memset(&set_tiling, 0, sizeof(set_tiling));
976 do {
977 /* set_tiling is slightly broken and overwrites the
978 * input on the error path, so we have to open code
979 * rmIoctl.
980 */
981 set_tiling.handle = bo->gem_handle;
982 set_tiling.tiling_mode = tiling_mode;
983 set_tiling.stride = stride;
984
985 ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
986 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
987 if (ret == -1)
988 return -errno;
989
990 bo->tiling_mode = set_tiling.tiling_mode;
991 bo->swizzle_mode = set_tiling.swizzle_mode;
992 bo->stride = set_tiling.stride;
993 return 0;
994 }
995
996 int
997 brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
998 uint32_t *swizzle_mode)
999 {
1000 *tiling_mode = bo->tiling_mode;
1001 *swizzle_mode = bo->swizzle_mode;
1002 return 0;
1003 }
1004
1005 struct brw_bo *
1006 brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd,
1007 int size)
1008 {
1009 int ret;
1010 uint32_t handle;
1011 struct brw_bo *bo;
1012 struct drm_i915_gem_get_tiling get_tiling;
1013
1014 pthread_mutex_lock(&bufmgr->lock);
1015 ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1016 if (ret) {
1017 DBG("create_from_prime: failed to obtain handle from fd: %s\n",
1018 strerror(errno));
1019 pthread_mutex_unlock(&bufmgr->lock);
1020 return NULL;
1021 }
1022
1023 /*
1024 * See if the kernel has already returned this buffer to us. Just as
1025 * for named buffers, we must not create two bo's pointing at the same
1026 * kernel object
1027 */
1028 bo = hash_find_bo(bufmgr->handle_table, handle);
1029 if (bo) {
1030 brw_bo_reference(bo);
1031 goto out;
1032 }
1033
1034 bo = calloc(1, sizeof(*bo));
1035 if (!bo)
1036 goto out;
1037
1038 p_atomic_set(&bo->refcount, 1);
1039
1040 /* Determine size of bo. The fd-to-handle ioctl really should
1041 * return the size, but it doesn't. If we have kernel 3.12 or
1042 * later, we can lseek on the prime fd to get the size. Older
1043 * kernels will just fail, in which case we fall back to the
1044 * provided (estimated or guess size). */
1045 ret = lseek(prime_fd, 0, SEEK_END);
1046 if (ret != -1)
1047 bo->size = ret;
1048 else
1049 bo->size = size;
1050
1051 bo->bufmgr = bufmgr;
1052
1053 bo->gem_handle = handle;
1054 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1055
1056 bo->name = "prime";
1057 bo->reusable = false;
1058
1059 memclear(get_tiling);
1060 get_tiling.handle = bo->gem_handle;
1061 if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
1062 goto err;
1063
1064 bo->tiling_mode = get_tiling.tiling_mode;
1065 bo->swizzle_mode = get_tiling.swizzle_mode;
1066 /* XXX stride is unknown */
1067
1068 out:
1069 pthread_mutex_unlock(&bufmgr->lock);
1070 return bo;
1071
1072 err:
1073 bo_free(bo);
1074 pthread_mutex_unlock(&bufmgr->lock);
1075 return NULL;
1076 }
1077
1078 int
1079 brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
1080 {
1081 struct brw_bufmgr *bufmgr = bo->bufmgr;
1082
1083 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1084 DRM_CLOEXEC, prime_fd) != 0)
1085 return -errno;
1086
1087 bo->reusable = false;
1088
1089 return 0;
1090 }
1091
1092 int
1093 brw_bo_flink(struct brw_bo *bo, uint32_t *name)
1094 {
1095 struct brw_bufmgr *bufmgr = bo->bufmgr;
1096
1097 if (!bo->global_name) {
1098 struct drm_gem_flink flink;
1099
1100 memclear(flink);
1101 flink.handle = bo->gem_handle;
1102 if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
1103 return -errno;
1104
1105 pthread_mutex_lock(&bufmgr->lock);
1106 if (!bo->global_name) {
1107 bo->global_name = flink.name;
1108 bo->reusable = false;
1109
1110 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
1111 }
1112 pthread_mutex_unlock(&bufmgr->lock);
1113 }
1114
1115 *name = bo->global_name;
1116 return 0;
1117 }
1118
1119 /**
1120 * Enables unlimited caching of buffer objects for reuse.
1121 *
1122 * This is potentially very memory expensive, as the cache at each bucket
1123 * size is only bounded by how many buffers of that size we've managed to have
1124 * in flight at once.
1125 */
1126 void
1127 brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr)
1128 {
1129 bufmgr->bo_reuse = true;
1130 }
1131
1132 static void
1133 add_bucket(struct brw_bufmgr *bufmgr, int size)
1134 {
1135 unsigned int i = bufmgr->num_buckets;
1136
1137 assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
1138
1139 list_inithead(&bufmgr->cache_bucket[i].head);
1140 bufmgr->cache_bucket[i].size = size;
1141 bufmgr->num_buckets++;
1142 }
1143
1144 static void
1145 init_cache_buckets(struct brw_bufmgr *bufmgr)
1146 {
1147 uint64_t size, cache_max_size = 64 * 1024 * 1024;
1148
1149 /* OK, so power of two buckets was too wasteful of memory.
1150 * Give 3 other sizes between each power of two, to hopefully
1151 * cover things accurately enough. (The alternative is
1152 * probably to just go for exact matching of sizes, and assume
1153 * that for things like composited window resize the tiled
1154 * width/height alignment and rounding of sizes to pages will
1155 * get us useful cache hit rates anyway)
1156 */
1157 add_bucket(bufmgr, 4096);
1158 add_bucket(bufmgr, 4096 * 2);
1159 add_bucket(bufmgr, 4096 * 3);
1160
1161 /* Initialize the linked lists for BO reuse cache. */
1162 for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
1163 add_bucket(bufmgr, size);
1164
1165 add_bucket(bufmgr, size + size * 1 / 4);
1166 add_bucket(bufmgr, size + size * 2 / 4);
1167 add_bucket(bufmgr, size + size * 3 / 4);
1168 }
1169 }
1170
1171 uint32_t
1172 brw_create_hw_context(struct brw_bufmgr *bufmgr)
1173 {
1174 struct drm_i915_gem_context_create create;
1175 int ret;
1176
1177 memclear(create);
1178 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
1179 if (ret != 0) {
1180 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno));
1181 return 0;
1182 }
1183
1184 return create.ctx_id;
1185 }
1186
1187 void
1188 brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id)
1189 {
1190 struct drm_i915_gem_context_destroy d = {.ctx_id = ctx_id };
1191
1192 if (ctx_id != 0 &&
1193 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) {
1194 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
1195 strerror(errno));
1196 }
1197 }
1198
1199 int
1200 brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, uint64_t *result)
1201 {
1202 struct drm_i915_reg_read reg_read;
1203 int ret;
1204
1205 memclear(reg_read);
1206 reg_read.offset = offset;
1207
1208 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
1209
1210 *result = reg_read.val;
1211 return ret;
1212 }
1213
1214 /**
1215 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
1216 * and manage map buffer objections.
1217 *
1218 * \param fd File descriptor of the opened DRM device.
1219 */
1220 struct brw_bufmgr *
1221 brw_bufmgr_init(struct gen_device_info *devinfo, int fd, int batch_size)
1222 {
1223 struct brw_bufmgr *bufmgr;
1224
1225 bufmgr = calloc(1, sizeof(*bufmgr));
1226 if (bufmgr == NULL)
1227 return NULL;
1228
1229 /* Handles to buffer objects belong to the device fd and are not
1230 * reference counted by the kernel. If the same fd is used by
1231 * multiple parties (threads sharing the same screen bufmgr, or
1232 * even worse the same device fd passed to multiple libraries)
1233 * ownership of those handles is shared by those independent parties.
1234 *
1235 * Don't do this! Ensure that each library/bufmgr has its own device
1236 * fd so that its namespace does not clash with another.
1237 */
1238 bufmgr->fd = fd;
1239
1240 if (pthread_mutex_init(&bufmgr->lock, NULL) != 0) {
1241 free(bufmgr);
1242 return NULL;
1243 }
1244
1245 bufmgr->has_llc = devinfo->has_llc;
1246
1247 init_cache_buckets(bufmgr);
1248
1249 bufmgr->name_table =
1250 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
1251 bufmgr->handle_table =
1252 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
1253
1254 return bufmgr;
1255 }