i965: initial attempt at fixing the aperture overflow
[mesa.git] / src / mesa / drivers / dri / intel / intel_bufmgr_ttm.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #include <xf86drm.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <assert.h>
43
44 #include "errno.h"
45 #include "mtypes.h"
46 #include "dri_bufmgr.h"
47 #include "string.h"
48 #include "imports.h"
49
50 #include "i915_drm.h"
51
52 #include "intel_bufmgr_ttm.h"
53
54 #define DBG(...) do { \
55 if (bufmgr_ttm->bufmgr.debug) \
56 fprintf(stderr, __VA_ARGS__); \
57 } while (0)
58
59 /*
60 * These bits are always specified in each validation
61 * request. Other bits are not supported at this point
62 * as it would require a bit of investigation to figure
63 * out what mask value should be used.
64 */
65 #define INTEL_BO_MASK (DRM_BO_MASK_MEM | \
66 DRM_BO_FLAG_READ | \
67 DRM_BO_FLAG_WRITE | \
68 DRM_BO_FLAG_EXE)
69
70 struct intel_validate_entry {
71 dri_bo *bo;
72 struct drm_i915_op_arg bo_arg;
73 };
74
75 struct dri_ttm_bo_bucket_entry {
76 drmBO drm_bo;
77 struct dri_ttm_bo_bucket_entry *next;
78 };
79
80 struct dri_ttm_bo_bucket {
81 struct dri_ttm_bo_bucket_entry *head;
82 struct dri_ttm_bo_bucket_entry **tail;
83 /**
84 * Limit on the number of entries in this bucket.
85 *
86 * 0 means that this caching at this bucket size is disabled.
87 * -1 means that there is no limit to caching at this size.
88 */
89 int max_entries;
90 int num_entries;
91 };
92
93 /* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
94 * is 1 << 16 pages, or 256MB.
95 */
96 #define INTEL_TTM_BO_BUCKETS 16
97 typedef struct _dri_bufmgr_ttm {
98 dri_bufmgr bufmgr;
99
100 int fd;
101 unsigned int fence_type;
102 unsigned int fence_type_flush;
103
104 uint32_t max_relocs;
105
106 struct intel_validate_entry *validate_array;
107 int validate_array_size;
108 int validate_count;
109
110 /** Array of lists of cached drmBOs of power-of-two sizes */
111 struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS];
112 } dri_bufmgr_ttm;
113
114 /**
115 * Private information associated with a relocation that isn't already stored
116 * in the relocation buffer to be passed to the kernel.
117 */
118 struct dri_ttm_reloc {
119 dri_bo *target_buf;
120 uint64_t validate_flags;
121 /** Offset of target_buf after last execution of this relocation entry. */
122 unsigned int last_target_offset;
123 };
124
125 typedef struct _dri_bo_ttm {
126 dri_bo bo;
127
128 int refcount;
129 unsigned int map_count;
130 drmBO drm_bo;
131 const char *name;
132
133 uint64_t last_flags;
134
135 /**
136 * Index of the buffer within the validation list while preparing a
137 * batchbuffer execution.
138 */
139 int validate_index;
140
141 /** DRM buffer object containing relocation list */
142 uint32_t *reloc_buf_data;
143 struct dri_ttm_reloc *relocs;
144
145 /**
146 * Indicates that the buffer may be shared with other processes, so we
147 * can't hold maps beyond when the user does.
148 */
149 GLboolean shared;
150
151 GLboolean delayed_unmap;
152 /* Virtual address from the dri_bo_map whose unmap was delayed. */
153 void *saved_virtual;
154 } dri_bo_ttm;
155
156 typedef struct _dri_fence_ttm
157 {
158 dri_fence fence;
159
160 int refcount;
161 const char *name;
162 drmFence drm_fence;
163 } dri_fence_ttm;
164
165 static int
166 logbase2(int n)
167 {
168 GLint i = 1;
169 GLint log2 = 0;
170
171 while (n > i) {
172 i *= 2;
173 log2++;
174 }
175
176 return log2;
177 }
178
179 static struct dri_ttm_bo_bucket *
180 dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size)
181 {
182 int i;
183
184 /* We only do buckets in power of two increments */
185 if ((size & (size - 1)) != 0)
186 return NULL;
187
188 /* We should only see sizes rounded to pages. */
189 assert((size % 4096) == 0);
190
191 /* We always allocate in units of pages */
192 i = ffs(size / 4096) - 1;
193 if (i >= INTEL_TTM_BO_BUCKETS)
194 return NULL;
195
196 return &bufmgr_ttm->cache_bucket[i];
197 }
198
199
200 static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
201 {
202 int i, j;
203
204 for (i = 0; i < bufmgr_ttm->validate_count; i++) {
205 dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
206 dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
207
208 if (bo_ttm->reloc_buf_data != NULL) {
209 for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) {
210 uint32_t *reloc_entry = bo_ttm->reloc_buf_data +
211 I915_RELOC_HEADER +
212 j * I915_RELOC0_STRIDE;
213 dri_bo *target_bo = bo_ttm->relocs[j].target_buf;
214 dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo;
215
216 DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n",
217 i,
218 bo_ttm->name, reloc_entry[0],
219 target_ttm->name, target_bo->offset,
220 reloc_entry[1]);
221 }
222 } else {
223 DBG("%2d: %s\n", i, bo_ttm->name);
224 }
225 }
226 }
227
228 /**
229 * Adds the given buffer to the list of buffers to be validated (moved into the
230 * appropriate memory type) with the next batch submission.
231 *
232 * If a buffer is validated multiple times in a batch submission, it ends up
233 * with the intersection of the memory type flags and the union of the
234 * access flags.
235 */
236 static void
237 intel_add_validate_buffer(dri_bo *buf,
238 uint64_t flags)
239 {
240 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
241 dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
242
243 /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing,
244 * do that now.
245 */
246 if (ttm_buf->delayed_unmap) {
247 drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
248 ttm_buf->delayed_unmap = GL_FALSE;
249 }
250
251 if (ttm_buf->validate_index == -1) {
252 struct intel_validate_entry *entry;
253 struct drm_i915_op_arg *arg;
254 struct drm_bo_op_req *req;
255 int index;
256
257 /* Extend the array of validation entries as necessary. */
258 if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) {
259 int i, new_size = bufmgr_ttm->validate_array_size * 2;
260
261 if (new_size == 0)
262 new_size = 5;
263
264 bufmgr_ttm->validate_array =
265 realloc(bufmgr_ttm->validate_array,
266 sizeof(struct intel_validate_entry) * new_size);
267 bufmgr_ttm->validate_array_size = new_size;
268
269 /* Update pointers for realloced mem. */
270 for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) {
271 bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long)
272 &bufmgr_ttm->validate_array[i + 1].bo_arg;
273 }
274 }
275
276 /* Pick out the new array entry for ourselves */
277 index = bufmgr_ttm->validate_count;
278 ttm_buf->validate_index = index;
279 entry = &bufmgr_ttm->validate_array[index];
280 bufmgr_ttm->validate_count++;
281
282 /* Fill in array entry */
283 entry->bo = buf;
284 dri_bo_reference(buf);
285
286 /* Fill in kernel arg */
287 arg = &entry->bo_arg;
288 req = &arg->d.req;
289
290 memset(arg, 0, sizeof(*arg));
291 req->bo_req.handle = ttm_buf->drm_bo.handle;
292 req->op = drm_bo_validate;
293 req->bo_req.flags = flags;
294 req->bo_req.hint = 0;
295 #ifdef DRM_BO_HINT_PRESUMED_OFFSET
296 /* PRESUMED_OFFSET indicates that all relocations pointing at this
297 * buffer have the correct offset. If any of our relocations don't,
298 * this flag will be cleared off the buffer later in the relocation
299 * processing.
300 */
301 req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET;
302 req->bo_req.presumed_offset = buf->offset;
303 #endif
304 req->bo_req.mask = INTEL_BO_MASK;
305 req->bo_req.fence_class = 0; /* Backwards compat. */
306
307 if (ttm_buf->reloc_buf_data != NULL)
308 arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data;
309 else
310 arg->reloc_ptr = 0;
311
312 /* Hook up the linked list of args for the kernel */
313 arg->next = 0;
314 if (index != 0) {
315 bufmgr_ttm->validate_array[index - 1].bo_arg.next =
316 (unsigned long)arg;
317 }
318 } else {
319 struct intel_validate_entry *entry =
320 &bufmgr_ttm->validate_array[ttm_buf->validate_index];
321 struct drm_i915_op_arg *arg = &entry->bo_arg;
322 struct drm_bo_op_req *req = &arg->d.req;
323 uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM;
324 uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM;
325
326 /* Buffer was already in the validate list. Extend its flags as
327 * necessary.
328 */
329
330 if (memFlags == 0) {
331 fprintf(stderr,
332 "%s: No shared memory types between "
333 "0x%16llx and 0x%16llx\n",
334 __FUNCTION__, req->bo_req.flags, flags);
335 abort();
336 }
337 if (flags & ~INTEL_BO_MASK) {
338 fprintf(stderr,
339 "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n",
340 __FUNCTION__, flags & ~INTEL_BO_MASK);
341 abort();
342 }
343 req->bo_req.flags = memFlags | modeFlags;
344 }
345 }
346
347
348 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
349 sizeof(uint32_t))
350
351 static int
352 intel_setup_reloc_list(dri_bo *bo)
353 {
354 dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
355 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
356
357 bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs,
358 sizeof(struct dri_ttm_reloc));
359 bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs));
360
361 /* Initialize the relocation list with the header:
362 * DWORD 0: relocation count
363 * DWORD 1: relocation type
364 * DWORD 2+3: handle to next relocation list (currently none) 64-bits
365 */
366 bo_ttm->reloc_buf_data[0] = 0;
367 bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0;
368 bo_ttm->reloc_buf_data[2] = 0;
369 bo_ttm->reloc_buf_data[3] = 0;
370
371 return 0;
372 }
373
374 #if 0
375 int
376 driFenceSignaled(DriFenceObject * fence, unsigned type)
377 {
378 int signaled;
379 int ret;
380
381 if (fence == NULL)
382 return GL_TRUE;
383
384 ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled);
385 BM_CKFATAL(ret);
386 return signaled;
387 }
388 #endif
389
390 static dri_bo *
391 dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
392 unsigned long size, unsigned int alignment,
393 uint64_t location_mask)
394 {
395 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
396 dri_bo_ttm *ttm_buf;
397 unsigned int pageSize = getpagesize();
398 int ret;
399 uint64_t flags;
400 unsigned int hint;
401 unsigned long alloc_size;
402 struct dri_ttm_bo_bucket *bucket;
403 GLboolean alloc_from_cache = GL_FALSE;
404
405 ttm_buf = calloc(1, sizeof(*ttm_buf));
406 if (!ttm_buf)
407 return NULL;
408
409 /* The mask argument doesn't do anything for us that we want other than
410 * determine which pool (TTM or local) the buffer is allocated into, so
411 * just pass all of the allocation class flags.
412 */
413 flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE |
414 DRM_BO_FLAG_EXE;
415 /* No hints we want to use. */
416 hint = 0;
417
418 /* Round the allocated size up to a power of two number of pages. */
419 alloc_size = 1 << logbase2(size);
420 if (alloc_size < pageSize)
421 alloc_size = pageSize;
422 bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size);
423
424 /* If we don't have caching at this size, don't actually round the
425 * allocation up.
426 */
427 if (bucket == NULL || bucket->max_entries == 0)
428 alloc_size = size;
429
430 /* Get a buffer out of the cache if available */
431 if (bucket != NULL && bucket->num_entries > 0) {
432 struct dri_ttm_bo_bucket_entry *entry = bucket->head;
433 int busy;
434
435 /* Check if the buffer is still in flight. If not, reuse it. */
436 ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy);
437 alloc_from_cache = (ret == 0 && busy == 0);
438
439 if (alloc_from_cache) {
440 bucket->head = entry->next;
441 if (entry->next == NULL)
442 bucket->tail = &bucket->head;
443 bucket->num_entries--;
444
445 ttm_buf->drm_bo = entry->drm_bo;
446 free(entry);
447 }
448 }
449
450 if (!alloc_from_cache) {
451 ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize,
452 NULL, flags, hint, &ttm_buf->drm_bo);
453 if (ret != 0) {
454 free(ttm_buf);
455 return NULL;
456 }
457 }
458
459 ttm_buf->bo.size = size;
460 ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
461 ttm_buf->bo.virtual = NULL;
462 ttm_buf->bo.bufmgr = bufmgr;
463 ttm_buf->name = name;
464 ttm_buf->refcount = 1;
465 ttm_buf->reloc_buf_data = NULL;
466 ttm_buf->relocs = NULL;
467 ttm_buf->last_flags = ttm_buf->drm_bo.flags;
468 ttm_buf->shared = GL_FALSE;
469 ttm_buf->delayed_unmap = GL_FALSE;
470 ttm_buf->validate_index = -1;
471
472 DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size);
473
474 return &ttm_buf->bo;
475 }
476
477 /* Our TTM backend doesn't allow creation of static buffers, as that requires
478 * privelege for the non-fake case, and the lock in the fake case where we were
479 * working around the X Server not creating buffers and passing handles to us.
480 */
481 static dri_bo *
482 dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name,
483 unsigned long offset, unsigned long size, void *virtual,
484 uint64_t location_mask)
485 {
486 return NULL;
487 }
488
489 /**
490 * Returns a dri_bo wrapping the given buffer object handle.
491 *
492 * This can be used when one application needs to pass a buffer object
493 * to another.
494 */
495 dri_bo *
496 intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
497 unsigned int handle)
498 {
499 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
500 dri_bo_ttm *ttm_buf;
501 int ret;
502
503 ttm_buf = calloc(1, sizeof(*ttm_buf));
504 if (!ttm_buf)
505 return NULL;
506
507 ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo);
508 if (ret != 0) {
509 fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
510 name, handle, strerror(-ret));
511 free(ttm_buf);
512 return NULL;
513 }
514 ttm_buf->bo.size = ttm_buf->drm_bo.size;
515 ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
516 ttm_buf->bo.virtual = NULL;
517 ttm_buf->bo.bufmgr = bufmgr;
518 ttm_buf->name = name;
519 ttm_buf->refcount = 1;
520 ttm_buf->reloc_buf_data = NULL;
521 ttm_buf->relocs = NULL;
522 ttm_buf->last_flags = ttm_buf->drm_bo.flags;
523 ttm_buf->shared = GL_TRUE;
524 ttm_buf->delayed_unmap = GL_FALSE;
525 ttm_buf->validate_index = -1;
526
527 DBG("bo_create_from_handle: %p %08x (%s)\n",
528 &ttm_buf->bo, handle, ttm_buf->name);
529
530 return &ttm_buf->bo;
531 }
532
533 static void
534 dri_ttm_bo_reference(dri_bo *buf)
535 {
536 dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
537
538 ttm_buf->refcount++;
539 }
540
541 static void
542 dri_ttm_bo_unreference(dri_bo *buf)
543 {
544 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
545 dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
546
547 if (!buf)
548 return;
549
550 if (--ttm_buf->refcount == 0) {
551 struct dri_ttm_bo_bucket *bucket;
552 int ret;
553
554 assert(ttm_buf->map_count == 0);
555
556 if (ttm_buf->reloc_buf_data) {
557 int i;
558
559 /* Unreference all the target buffers */
560 for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++)
561 dri_bo_unreference(ttm_buf->relocs[i].target_buf);
562 free(ttm_buf->relocs);
563
564 /* Free the kernel BO containing relocation entries */
565 free(ttm_buf->reloc_buf_data);
566 ttm_buf->reloc_buf_data = NULL;
567 }
568
569 if (ttm_buf->delayed_unmap) {
570 int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
571
572 if (ret != 0) {
573 fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n",
574 __FILE__, __LINE__, ttm_buf->name, strerror(-ret));
575 }
576 }
577
578 bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size);
579 /* Put the buffer into our internal cache for reuse if we can. */
580 if (!ttm_buf->shared &&
581 bucket != NULL &&
582 (bucket->max_entries == -1 ||
583 (bucket->max_entries > 0 &&
584 bucket->num_entries < bucket->max_entries)))
585 {
586 struct dri_ttm_bo_bucket_entry *entry;
587
588 entry = calloc(1, sizeof(*entry));
589 entry->drm_bo = ttm_buf->drm_bo;
590
591 entry->next = NULL;
592 *bucket->tail = entry;
593 bucket->tail = &entry->next;
594 bucket->num_entries++;
595 } else {
596 /* Decrement the kernel refcount for the buffer. */
597 ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
598 if (ret != 0) {
599 fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
600 ttm_buf->name, strerror(-ret));
601 }
602 }
603
604 DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
605
606 free(buf);
607 return;
608 }
609 }
610
611 static int
612 dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable)
613 {
614 dri_bufmgr_ttm *bufmgr_ttm;
615 dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
616 uint64_t flags;
617 int ret;
618
619 bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
620
621 flags = DRM_BO_FLAG_READ;
622 if (write_enable)
623 flags |= DRM_BO_FLAG_WRITE;
624
625 /* Allow recursive mapping. Mesa may recursively map buffers with
626 * nested display loops.
627 */
628 if (ttm_buf->map_count++ != 0)
629 return 0;
630
631 assert(buf->virtual == NULL);
632
633 DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
634
635 /* XXX: What about if we're upgrading from READ to WRITE? */
636 if (ttm_buf->delayed_unmap) {
637 buf->virtual = ttm_buf->saved_virtual;
638 return 0;
639 }
640
641 ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual);
642 if (ret != 0) {
643 fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n",
644 __FILE__, __LINE__, ttm_buf->name, strerror(-ret));
645 }
646
647 return ret;
648 }
649
650 static int
651 dri_ttm_bo_unmap(dri_bo *buf)
652 {
653 dri_bufmgr_ttm *bufmgr_ttm;
654 dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
655 int ret;
656
657 if (buf == NULL)
658 return 0;
659
660 assert(ttm_buf->map_count != 0);
661 if (--ttm_buf->map_count != 0)
662 return 0;
663
664 bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
665
666 assert(buf->virtual != NULL);
667
668 DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
669
670 if (!ttm_buf->shared) {
671 ttm_buf->saved_virtual = buf->virtual;
672 ttm_buf->delayed_unmap = GL_TRUE;
673 buf->virtual = NULL;
674
675 return 0;
676 }
677
678 buf->virtual = NULL;
679
680 ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
681 if (ret != 0) {
682 fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n",
683 __FILE__, __LINE__, ttm_buf->name, strerror(-ret));
684 }
685
686 return ret;
687 }
688
689 /**
690 * Returns a dri_bo wrapping the given buffer object handle.
691 *
692 * This can be used when one application needs to pass a buffer object
693 * to another.
694 */
695 dri_fence *
696 intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
697 drm_fence_arg_t *arg)
698 {
699 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
700 dri_fence_ttm *ttm_fence;
701
702 ttm_fence = malloc(sizeof(*ttm_fence));
703 if (!ttm_fence)
704 return NULL;
705
706 ttm_fence->drm_fence.handle = arg->handle;
707 ttm_fence->drm_fence.fence_class = arg->fence_class;
708 ttm_fence->drm_fence.type = arg->type;
709 ttm_fence->drm_fence.flags = arg->flags;
710 ttm_fence->drm_fence.signaled = 0;
711 ttm_fence->drm_fence.sequence = arg->sequence;
712
713 ttm_fence->fence.bufmgr = bufmgr;
714 ttm_fence->name = name;
715 ttm_fence->refcount = 1;
716
717 DBG("fence_create_from_handle: %p (%s)\n",
718 &ttm_fence->fence, ttm_fence->name);
719
720 return &ttm_fence->fence;
721 }
722
723
724 static void
725 dri_ttm_fence_reference(dri_fence *fence)
726 {
727 dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
728 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
729
730 ++fence_ttm->refcount;
731 DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
732 }
733
734 static void
735 dri_ttm_fence_unreference(dri_fence *fence)
736 {
737 dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
738 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
739
740 if (!fence)
741 return;
742
743 DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
744
745 if (--fence_ttm->refcount == 0) {
746 int ret;
747
748 ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence);
749 if (ret != 0) {
750 fprintf(stderr, "drmFenceUnreference failed (%s): %s\n",
751 fence_ttm->name, strerror(-ret));
752 }
753
754 free(fence);
755 return;
756 }
757 }
758
759 static void
760 dri_ttm_fence_wait(dri_fence *fence)
761 {
762 dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
763 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
764 int ret;
765
766 ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0);
767 if (ret != 0) {
768 fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n",
769 __FILE__, __LINE__, fence_ttm->name, strerror(-ret));
770 abort();
771 }
772
773 DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
774 }
775
776 static void
777 dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
778 {
779 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
780 int i;
781
782 free(bufmgr_ttm->validate_array);
783
784 /* Free any cached buffer objects we were going to reuse */
785 for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
786 struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i];
787 struct dri_ttm_bo_bucket_entry *entry;
788
789 while ((entry = bucket->head) != NULL) {
790 int ret;
791
792 bucket->head = entry->next;
793 if (entry->next == NULL)
794 bucket->tail = &bucket->head;
795 bucket->num_entries--;
796
797 /* Decrement the kernel refcount for the buffer. */
798 ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo);
799 if (ret != 0) {
800 fprintf(stderr, "drmBOUnreference failed: %s\n",
801 strerror(-ret));
802 }
803
804 free(entry);
805 }
806 }
807
808 free(bufmgr);
809 }
810
811 /**
812 * Adds the target buffer to the validation list and adds the relocation
813 * to the reloc_buffer's relocation list.
814 *
815 * The relocation entry at the given offset must already contain the
816 * precomputed relocation value, because the kernel will optimize out
817 * the relocation entry write when the buffer hasn't moved from the
818 * last known offset in target_buf.
819 */
820 static int
821 dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
822 GLuint offset, dri_bo *target_buf)
823 {
824 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr;
825 dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf;
826 dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf;
827 int num_relocs;
828 uint32_t *this_reloc;
829
830 /* Create a new relocation list if needed */
831 if (reloc_buf_ttm->reloc_buf_data == NULL)
832 intel_setup_reloc_list(reloc_buf);
833
834 num_relocs = reloc_buf_ttm->reloc_buf_data[0];
835
836 /* Check overflow */
837 assert(num_relocs < bufmgr_ttm->max_relocs);
838
839 this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER +
840 num_relocs * I915_RELOC0_STRIDE;
841
842 this_reloc[0] = offset;
843 this_reloc[1] = delta;
844 this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */
845 this_reloc[3] = 0;
846
847 reloc_buf_ttm->relocs[num_relocs].validate_flags = flags;
848 reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf;
849 dri_bo_reference(target_buf);
850
851 reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */
852 /* Check wraparound */
853 assert(reloc_buf_ttm->reloc_buf_data[0] != 0);
854 return 0;
855 }
856
857 /**
858 * Walk the tree of relocations rooted at BO and accumulate the list of
859 * validations to be performed and update the relocation buffers with
860 * index values into the validation list.
861 */
862 static void
863 dri_ttm_bo_process_reloc(dri_bo *bo)
864 {
865 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
866 dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
867 unsigned int nr_relocs;
868 int i;
869
870 if (bo_ttm->reloc_buf_data == NULL)
871 return;
872
873 nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff;
874
875 for (i = 0; i < nr_relocs; i++) {
876 struct dri_ttm_reloc *r = &bo_ttm->relocs[i];
877
878 /* Continue walking the tree depth-first. */
879 dri_ttm_bo_process_reloc(r->target_buf);
880
881 /* Add the target to the validate list */
882 intel_add_validate_buffer(r->target_buf, r->validate_flags);
883
884 /* Clear the PRESUMED_OFFSET flag from the validate list entry of the
885 * target if this buffer has a stale relocated pointer at it.
886 */
887 if (r->last_target_offset != r->target_buf->offset) {
888 dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf;
889 struct intel_validate_entry *entry =
890 &bufmgr_ttm->validate_array[target_buf_ttm->validate_index];
891
892 entry->bo_arg.d.req.bo_req.flags &= ~DRM_BO_HINT_PRESUMED_OFFSET;
893 }
894 }
895 }
896
897 static void *
898 dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
899 {
900 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
901
902 /* Update indices and set up the validate list. */
903 dri_ttm_bo_process_reloc(batch_buf);
904
905 /* Add the batch buffer to the validation list. There are no relocations
906 * pointing to it.
907 */
908 intel_add_validate_buffer(batch_buf,
909 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE);
910
911 *count = bufmgr_ttm->validate_count;
912 return &bufmgr_ttm->validate_array[0].bo_arg;
913 }
914
915 static const char *
916 intel_get_flags_mem_type_string(uint64_t flags)
917 {
918 switch (flags & DRM_BO_MASK_MEM) {
919 case DRM_BO_FLAG_MEM_LOCAL: return "local";
920 case DRM_BO_FLAG_MEM_TT: return "ttm";
921 case DRM_BO_FLAG_MEM_VRAM: return "vram";
922 case DRM_BO_FLAG_MEM_PRIV0: return "priv0";
923 case DRM_BO_FLAG_MEM_PRIV1: return "priv1";
924 case DRM_BO_FLAG_MEM_PRIV2: return "priv2";
925 case DRM_BO_FLAG_MEM_PRIV3: return "priv3";
926 case DRM_BO_FLAG_MEM_PRIV4: return "priv4";
927 default: return NULL;
928 }
929 }
930
931 static const char *
932 intel_get_flags_caching_string(uint64_t flags)
933 {
934 switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) {
935 case 0: return "UU";
936 case DRM_BO_FLAG_CACHED: return "CU";
937 case DRM_BO_FLAG_CACHED_MAPPED: return "UC";
938 case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC";
939 default: return NULL;
940 }
941 }
942
943 static void
944 intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm)
945 {
946 int i;
947
948 for (i = 0; i < bufmgr_ttm->validate_count; i++) {
949 dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
950 dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
951 struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg;
952 struct drm_bo_arg_rep *rep = &arg->d.rep;
953
954 /* Update the flags */
955 if (rep->bo_info.flags != bo_ttm->last_flags) {
956 DBG("BO %s migrated: %s/%s -> %s/%s\n",
957 bo_ttm->name,
958 intel_get_flags_mem_type_string(bo_ttm->last_flags),
959 intel_get_flags_caching_string(bo_ttm->last_flags),
960 intel_get_flags_mem_type_string(rep->bo_info.flags),
961 intel_get_flags_caching_string(rep->bo_info.flags));
962
963 bo_ttm->last_flags = rep->bo_info.flags;
964 }
965 /* Update the buffer offset */
966 if (rep->bo_info.offset != bo->offset) {
967 DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n",
968 bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset);
969 bo->offset = rep->bo_info.offset;
970 }
971 }
972 }
973
974 /**
975 * Update the last target offset field of relocation entries for PRESUMED_OFFSET
976 * computation.
977 */
978 static void
979 dri_ttm_bo_post_submit(dri_bo *bo)
980 {
981 dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
982 unsigned int nr_relocs;
983 int i;
984
985 if (bo_ttm->reloc_buf_data == NULL)
986 return;
987
988 nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff;
989
990 for (i = 0; i < nr_relocs; i++) {
991 struct dri_ttm_reloc *r = &bo_ttm->relocs[i];
992
993 /* Continue walking the tree depth-first. */
994 dri_ttm_bo_post_submit(r->target_buf);
995
996 r->last_target_offset = bo->offset;
997 }
998 }
999
1000 static void
1001 dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
1002 {
1003 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
1004 int i;
1005
1006 intel_update_buffer_offsets (bufmgr_ttm);
1007
1008 dri_ttm_bo_post_submit(batch_buf);
1009
1010 if (bufmgr_ttm->bufmgr.debug)
1011 dri_ttm_dump_validation_list(bufmgr_ttm);
1012
1013 for (i = 0; i < bufmgr_ttm->validate_count; i++) {
1014 dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
1015 dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
1016
1017 /* Disconnect the buffer from the validate list */
1018 bo_ttm->validate_index = -1;
1019 dri_bo_unreference(bo);
1020 bufmgr_ttm->validate_array[i].bo = NULL;
1021 }
1022 bufmgr_ttm->validate_count = 0;
1023 }
1024
1025 /**
1026 * Enables unlimited caching of buffer objects for reuse.
1027 *
1028 * This is potentially very memory expensive, as the cache at each bucket
1029 * size is only bounded by how many buffers of that size we've managed to have
1030 * in flight at once.
1031 */
1032 void
1033 intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
1034 {
1035 dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
1036 int i;
1037
1038 for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
1039 bufmgr_ttm->cache_bucket[i].max_entries = -1;
1040 }
1041 }
1042
1043 /*
1044 *
1045 */
1046 static int
1047 dri_ttm_check_aperture_space(dri_bo *bo)
1048 {
1049 return 0;
1050 }
1051
1052 /**
1053 * Initializes the TTM buffer manager, which uses the kernel to allocate, map,
1054 * and manage map buffer objections.
1055 *
1056 * \param fd File descriptor of the opened DRM device.
1057 * \param fence_type Driver-specific fence type used for fences with no flush.
1058 * \param fence_type_flush Driver-specific fence type used for fences with a
1059 * flush.
1060 */
1061 dri_bufmgr *
1062 intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
1063 unsigned int fence_type_flush, int batch_size)
1064 {
1065 dri_bufmgr_ttm *bufmgr_ttm;
1066 int i;
1067
1068 bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm));
1069 bufmgr_ttm->fd = fd;
1070 bufmgr_ttm->fence_type = fence_type;
1071 bufmgr_ttm->fence_type_flush = fence_type_flush;
1072
1073 /* Let's go with one relocation per every 2 dwords (but round down a bit
1074 * since a power of two will mean an extra page allocation for the reloc
1075 * buffer).
1076 *
1077 * Every 4 was too few for the blender benchmark.
1078 */
1079 bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
1080
1081 bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc;
1082 bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static;
1083 bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference;
1084 bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference;
1085 bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map;
1086 bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap;
1087 bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference;
1088 bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference;
1089 bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait;
1090 bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy;
1091 bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc;
1092 bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc;
1093 bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
1094 bufmgr_ttm->bufmgr.debug = GL_FALSE;
1095 bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space;
1096 /* Initialize the linked lists for BO reuse cache. */
1097 for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++)
1098 bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head;
1099
1100 return &bufmgr_ttm->bufmgr;
1101 }
1102