Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_bo.c
1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 #include "radeon_drm_cs.h"
28
29 #include "util/u_hash_table.h"
30 #include "util/u_memory.h"
31 #include "util/simple_list.h"
32 #include "util/list.h"
33 #include "os/os_thread.h"
34 #include "os/os_mman.h"
35 #include "os/os_time.h"
36
37 #include "state_tracker/drm_driver.h"
38
39 #include <sys/ioctl.h>
40 #include <xf86drm.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44
45 static const struct pb_vtbl radeon_bo_vtbl;
46
47 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
48 {
49 assert(bo->vtbl == &radeon_bo_vtbl);
50 return (struct radeon_bo *)bo;
51 }
52
53 struct radeon_bo_va_hole {
54 struct list_head list;
55 uint64_t offset;
56 uint64_t size;
57 };
58
59 struct radeon_bomgr {
60 /* Base class. */
61 struct pb_manager base;
62
63 /* Winsys. */
64 struct radeon_drm_winsys *rws;
65
66 /* List of buffer GEM names. Protected by bo_handles_mutex. */
67 struct util_hash_table *bo_names;
68 /* List of buffer handles. Protectded by bo_handles_mutex. */
69 struct util_hash_table *bo_handles;
70 /* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */
71 struct util_hash_table *bo_vas;
72 pipe_mutex bo_handles_mutex;
73 pipe_mutex bo_va_mutex;
74
75 /* is virtual address supported */
76 bool va;
77 uint64_t va_offset;
78 struct list_head va_holes;
79 };
80
81 static inline struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
82 {
83 return (struct radeon_bomgr *)mgr;
84 }
85
86 static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf)
87 {
88 struct radeon_bo *bo = NULL;
89
90 if (_buf->vtbl == &radeon_bo_vtbl) {
91 bo = radeon_bo(_buf);
92 } else {
93 struct pb_buffer *base_buf;
94 pb_size offset;
95 pb_get_base_buffer(_buf, &base_buf, &offset);
96
97 if (base_buf->vtbl == &radeon_bo_vtbl)
98 bo = radeon_bo(base_buf);
99 }
100
101 return bo;
102 }
103
104 static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
105 enum radeon_bo_usage usage)
106 {
107 struct radeon_bo *bo = get_radeon_bo(_buf);
108
109 /* Wait if any ioctl is being submitted with this buffer. */
110 if (!os_wait_until_zero(&bo->num_active_ioctls, timeout))
111 return false;
112
113 /* TODO: handle arbitrary timeout */
114 if (!timeout) {
115 struct drm_radeon_gem_busy args = {0};
116
117 args.handle = bo->handle;
118 return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
119 &args, sizeof(args)) == 0;
120 } else {
121 struct drm_radeon_gem_wait_idle args = {0};
122
123 args.handle = bo->handle;
124 while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
125 &args, sizeof(args)) == -EBUSY);
126 return true;
127 }
128 }
129
130 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
131 {
132 /* Zero domains the driver doesn't understand. */
133 domain &= RADEON_DOMAIN_VRAM_GTT;
134
135 /* If no domain is set, we must set something... */
136 if (!domain)
137 domain = RADEON_DOMAIN_VRAM_GTT;
138
139 return domain;
140 }
141
142 static enum radeon_bo_domain radeon_bo_get_initial_domain(
143 struct radeon_winsys_cs_handle *buf)
144 {
145 struct radeon_bo *bo = (struct radeon_bo*)buf;
146 struct drm_radeon_gem_op args;
147
148 if (bo->rws->info.drm_minor < 38)
149 return RADEON_DOMAIN_VRAM_GTT;
150
151 memset(&args, 0, sizeof(args));
152 args.handle = bo->handle;
153 args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
154
155 drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
156 &args, sizeof(args));
157
158 /* GEM domains and winsys domains are defined the same. */
159 return get_valid_domain(args.value);
160 }
161
162 static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size, uint64_t alignment)
163 {
164 struct radeon_bo_va_hole *hole, *n;
165 uint64_t offset = 0, waste = 0;
166
167 alignment = MAX2(alignment, 4096);
168 size = align(size, 4096);
169
170 pipe_mutex_lock(mgr->bo_va_mutex);
171 /* first look for a hole */
172 LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
173 offset = hole->offset;
174 waste = offset % alignment;
175 waste = waste ? alignment - waste : 0;
176 offset += waste;
177 if (offset >= (hole->offset + hole->size)) {
178 continue;
179 }
180 if (!waste && hole->size == size) {
181 offset = hole->offset;
182 list_del(&hole->list);
183 FREE(hole);
184 pipe_mutex_unlock(mgr->bo_va_mutex);
185 return offset;
186 }
187 if ((hole->size - waste) > size) {
188 if (waste) {
189 n = CALLOC_STRUCT(radeon_bo_va_hole);
190 n->size = waste;
191 n->offset = hole->offset;
192 list_add(&n->list, &hole->list);
193 }
194 hole->size -= (size + waste);
195 hole->offset += size + waste;
196 pipe_mutex_unlock(mgr->bo_va_mutex);
197 return offset;
198 }
199 if ((hole->size - waste) == size) {
200 hole->size = waste;
201 pipe_mutex_unlock(mgr->bo_va_mutex);
202 return offset;
203 }
204 }
205
206 offset = mgr->va_offset;
207 waste = offset % alignment;
208 waste = waste ? alignment - waste : 0;
209 if (waste) {
210 n = CALLOC_STRUCT(radeon_bo_va_hole);
211 n->size = waste;
212 n->offset = offset;
213 list_add(&n->list, &mgr->va_holes);
214 }
215 offset += waste;
216 mgr->va_offset += size + waste;
217 pipe_mutex_unlock(mgr->bo_va_mutex);
218 return offset;
219 }
220
221 static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
222 {
223 struct radeon_bo_va_hole *hole;
224
225 size = align(size, 4096);
226
227 pipe_mutex_lock(mgr->bo_va_mutex);
228 if ((va + size) == mgr->va_offset) {
229 mgr->va_offset = va;
230 /* Delete uppermost hole if it reaches the new top */
231 if (!LIST_IS_EMPTY(&mgr->va_holes)) {
232 hole = container_of(mgr->va_holes.next, hole, list);
233 if ((hole->offset + hole->size) == va) {
234 mgr->va_offset = hole->offset;
235 list_del(&hole->list);
236 FREE(hole);
237 }
238 }
239 } else {
240 struct radeon_bo_va_hole *next;
241
242 hole = container_of(&mgr->va_holes, hole, list);
243 LIST_FOR_EACH_ENTRY(next, &mgr->va_holes, list) {
244 if (next->offset < va)
245 break;
246 hole = next;
247 }
248
249 if (&hole->list != &mgr->va_holes) {
250 /* Grow upper hole if it's adjacent */
251 if (hole->offset == (va + size)) {
252 hole->offset = va;
253 hole->size += size;
254 /* Merge lower hole if it's adjacent */
255 if (next != hole && &next->list != &mgr->va_holes &&
256 (next->offset + next->size) == va) {
257 next->size += hole->size;
258 list_del(&hole->list);
259 FREE(hole);
260 }
261 goto out;
262 }
263 }
264
265 /* Grow lower hole if it's adjacent */
266 if (next != hole && &next->list != &mgr->va_holes &&
267 (next->offset + next->size) == va) {
268 next->size += size;
269 goto out;
270 }
271
272 /* FIXME on allocation failure we just lose virtual address space
273 * maybe print a warning
274 */
275 next = CALLOC_STRUCT(radeon_bo_va_hole);
276 if (next) {
277 next->size = size;
278 next->offset = va;
279 list_add(&next->list, &hole->list);
280 }
281 }
282 out:
283 pipe_mutex_unlock(mgr->bo_va_mutex);
284 }
285
286 static void radeon_bo_destroy(struct pb_buffer *_buf)
287 {
288 struct radeon_bo *bo = radeon_bo(_buf);
289 struct radeon_bomgr *mgr = bo->mgr;
290 struct drm_gem_close args;
291
292 memset(&args, 0, sizeof(args));
293
294 pipe_mutex_lock(bo->mgr->bo_handles_mutex);
295 util_hash_table_remove(bo->mgr->bo_handles, (void*)(uintptr_t)bo->handle);
296 if (bo->flink_name) {
297 util_hash_table_remove(bo->mgr->bo_names,
298 (void*)(uintptr_t)bo->flink_name);
299 }
300 pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
301
302 if (bo->ptr)
303 os_munmap(bo->ptr, bo->base.size);
304
305 if (mgr->va) {
306 if (bo->rws->va_unmap_working) {
307 struct drm_radeon_gem_va va;
308
309 va.handle = bo->handle;
310 va.vm_id = 0;
311 va.operation = RADEON_VA_UNMAP;
312 va.flags = RADEON_VM_PAGE_READABLE |
313 RADEON_VM_PAGE_WRITEABLE |
314 RADEON_VM_PAGE_SNOOPED;
315 va.offset = bo->va;
316
317 if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_VA, &va,
318 sizeof(va)) != 0 &&
319 va.operation == RADEON_VA_RESULT_ERROR) {
320 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
321 fprintf(stderr, "radeon: size : %d bytes\n", bo->base.size);
322 fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
323 }
324 }
325
326 radeon_bomgr_free_va(mgr, bo->va, bo->base.size);
327 }
328
329 /* Close object. */
330 args.handle = bo->handle;
331 drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
332
333 pipe_mutex_destroy(bo->map_mutex);
334
335 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
336 bo->rws->allocated_vram -= align(bo->base.size, 4096);
337 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
338 bo->rws->allocated_gtt -= align(bo->base.size, 4096);
339 FREE(bo);
340 }
341
342 void *radeon_bo_do_map(struct radeon_bo *bo)
343 {
344 struct drm_radeon_gem_mmap args = {0};
345 void *ptr;
346
347 /* If the buffer is created from user memory, return the user pointer. */
348 if (bo->user_ptr)
349 return bo->user_ptr;
350
351 /* Map the buffer. */
352 pipe_mutex_lock(bo->map_mutex);
353 /* Return the pointer if it's already mapped. */
354 if (bo->ptr) {
355 bo->map_count++;
356 pipe_mutex_unlock(bo->map_mutex);
357 return bo->ptr;
358 }
359 args.handle = bo->handle;
360 args.offset = 0;
361 args.size = (uint64_t)bo->base.size;
362 if (drmCommandWriteRead(bo->rws->fd,
363 DRM_RADEON_GEM_MMAP,
364 &args,
365 sizeof(args))) {
366 pipe_mutex_unlock(bo->map_mutex);
367 fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
368 bo, bo->handle);
369 return NULL;
370 }
371
372 ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
373 bo->rws->fd, args.addr_ptr);
374 if (ptr == MAP_FAILED) {
375 pipe_mutex_unlock(bo->map_mutex);
376 fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
377 return NULL;
378 }
379 bo->ptr = ptr;
380 bo->map_count = 1;
381 pipe_mutex_unlock(bo->map_mutex);
382
383 return bo->ptr;
384 }
385
386 static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
387 struct radeon_winsys_cs *rcs,
388 enum pipe_transfer_usage usage)
389 {
390 struct radeon_bo *bo = (struct radeon_bo*)buf;
391 struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
392
393 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
394 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
395 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
396 if (usage & PIPE_TRANSFER_DONTBLOCK) {
397 if (!(usage & PIPE_TRANSFER_WRITE)) {
398 /* Mapping for read.
399 *
400 * Since we are mapping for read, we don't need to wait
401 * if the GPU is using the buffer for read too
402 * (neither one is changing it).
403 *
404 * Only check whether the buffer is being used for write. */
405 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
406 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
407 return NULL;
408 }
409
410 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
411 RADEON_USAGE_WRITE)) {
412 return NULL;
413 }
414 } else {
415 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
416 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
417 return NULL;
418 }
419
420 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
421 RADEON_USAGE_READWRITE)) {
422 return NULL;
423 }
424 }
425 } else {
426 uint64_t time = os_time_get_nano();
427
428 if (!(usage & PIPE_TRANSFER_WRITE)) {
429 /* Mapping for read.
430 *
431 * Since we are mapping for read, we don't need to wait
432 * if the GPU is using the buffer for read too
433 * (neither one is changing it).
434 *
435 * Only check whether the buffer is being used for write. */
436 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
437 cs->flush_cs(cs->flush_data, 0, NULL);
438 }
439 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
440 RADEON_USAGE_WRITE);
441 } else {
442 /* Mapping for write. */
443 if (cs) {
444 if (radeon_bo_is_referenced_by_cs(cs, bo)) {
445 cs->flush_cs(cs->flush_data, 0, NULL);
446 } else {
447 /* Try to avoid busy-waiting in radeon_bo_wait. */
448 if (p_atomic_read(&bo->num_active_ioctls))
449 radeon_drm_cs_sync_flush(rcs);
450 }
451 }
452
453 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
454 RADEON_USAGE_READWRITE);
455 }
456
457 bo->mgr->rws->buffer_wait_time += os_time_get_nano() - time;
458 }
459 }
460
461 return radeon_bo_do_map(bo);
462 }
463
464 static void radeon_bo_unmap(struct radeon_winsys_cs_handle *_buf)
465 {
466 struct radeon_bo *bo = (struct radeon_bo*)_buf;
467
468 if (bo->user_ptr)
469 return;
470
471 pipe_mutex_lock(bo->map_mutex);
472 if (!bo->ptr) {
473 pipe_mutex_unlock(bo->map_mutex);
474 return; /* it's not been mapped */
475 }
476
477 assert(bo->map_count);
478 if (--bo->map_count) {
479 pipe_mutex_unlock(bo->map_mutex);
480 return; /* it's been mapped multiple times */
481 }
482
483 os_munmap(bo->ptr, bo->base.size);
484 bo->ptr = NULL;
485 pipe_mutex_unlock(bo->map_mutex);
486 }
487
488 static void radeon_bo_get_base_buffer(struct pb_buffer *buf,
489 struct pb_buffer **base_buf,
490 unsigned *offset)
491 {
492 *base_buf = buf;
493 *offset = 0;
494 }
495
496 static enum pipe_error radeon_bo_validate(struct pb_buffer *_buf,
497 struct pb_validate *vl,
498 unsigned flags)
499 {
500 /* Always pinned */
501 return PIPE_OK;
502 }
503
504 static void radeon_bo_fence(struct pb_buffer *buf,
505 struct pipe_fence_handle *fence)
506 {
507 }
508
509 static const struct pb_vtbl radeon_bo_vtbl = {
510 radeon_bo_destroy,
511 NULL, /* never called */
512 NULL, /* never called */
513 radeon_bo_validate,
514 radeon_bo_fence,
515 radeon_bo_get_base_buffer,
516 };
517
518 #ifndef RADEON_GEM_GTT_WC
519 #define RADEON_GEM_GTT_WC (1 << 2)
520 #endif
521 #ifndef RADEON_GEM_CPU_ACCESS
522 /* BO is expected to be accessed by the CPU */
523 #define RADEON_GEM_CPU_ACCESS (1 << 3)
524 #endif
525 #ifndef RADEON_GEM_NO_CPU_ACCESS
526 /* CPU access is not expected to work for this BO */
527 #define RADEON_GEM_NO_CPU_ACCESS (1 << 4)
528 #endif
529
530 static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
531 pb_size size,
532 const struct pb_desc *desc)
533 {
534 struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
535 struct radeon_drm_winsys *rws = mgr->rws;
536 struct radeon_bo *bo;
537 struct drm_radeon_gem_create args;
538 struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
539 int r;
540
541 memset(&args, 0, sizeof(args));
542
543 assert(rdesc->initial_domains);
544 assert((rdesc->initial_domains &
545 ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
546
547 args.size = size;
548 args.alignment = desc->alignment;
549 args.initial_domain = rdesc->initial_domains;
550 args.flags = 0;
551
552 if (rdesc->flags & RADEON_FLAG_GTT_WC)
553 args.flags |= RADEON_GEM_GTT_WC;
554 if (rdesc->flags & RADEON_FLAG_CPU_ACCESS)
555 args.flags |= RADEON_GEM_CPU_ACCESS;
556 if (rdesc->flags & RADEON_FLAG_NO_CPU_ACCESS)
557 args.flags |= RADEON_GEM_NO_CPU_ACCESS;
558
559 if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
560 &args, sizeof(args))) {
561 fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
562 fprintf(stderr, "radeon: size : %d bytes\n", size);
563 fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment);
564 fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
565 fprintf(stderr, "radeon: flags : %d\n", args.flags);
566 return NULL;
567 }
568
569 bo = CALLOC_STRUCT(radeon_bo);
570 if (!bo)
571 return NULL;
572
573 pipe_reference_init(&bo->base.reference, 1);
574 bo->base.alignment = desc->alignment;
575 bo->base.usage = desc->usage;
576 bo->base.size = size;
577 bo->base.vtbl = &radeon_bo_vtbl;
578 bo->mgr = mgr;
579 bo->rws = mgr->rws;
580 bo->handle = args.handle;
581 bo->va = 0;
582 bo->initial_domain = rdesc->initial_domains;
583 pipe_mutex_init(bo->map_mutex);
584
585 if (mgr->va) {
586 struct drm_radeon_gem_va va;
587
588 bo->va = radeon_bomgr_find_va(mgr, size, desc->alignment);
589
590 va.handle = bo->handle;
591 va.vm_id = 0;
592 va.operation = RADEON_VA_MAP;
593 va.flags = RADEON_VM_PAGE_READABLE |
594 RADEON_VM_PAGE_WRITEABLE |
595 RADEON_VM_PAGE_SNOOPED;
596 va.offset = bo->va;
597 r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
598 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
599 fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
600 fprintf(stderr, "radeon: size : %d bytes\n", size);
601 fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment);
602 fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
603 fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
604 radeon_bo_destroy(&bo->base);
605 return NULL;
606 }
607 pipe_mutex_lock(mgr->bo_handles_mutex);
608 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
609 struct pb_buffer *b = &bo->base;
610 struct radeon_bo *old_bo =
611 util_hash_table_get(mgr->bo_vas, (void*)(uintptr_t)va.offset);
612
613 pipe_mutex_unlock(mgr->bo_handles_mutex);
614 pb_reference(&b, &old_bo->base);
615 return b;
616 }
617
618 util_hash_table_set(mgr->bo_vas, (void*)(uintptr_t)bo->va, bo);
619 pipe_mutex_unlock(mgr->bo_handles_mutex);
620 }
621
622 if (rdesc->initial_domains & RADEON_DOMAIN_VRAM)
623 rws->allocated_vram += align(size, 4096);
624 else if (rdesc->initial_domains & RADEON_DOMAIN_GTT)
625 rws->allocated_gtt += align(size, 4096);
626
627 return &bo->base;
628 }
629
630 static void radeon_bomgr_flush(struct pb_manager *mgr)
631 {
632 /* NOP */
633 }
634
635 /* This is for the cache bufmgr. */
636 static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr,
637 struct pb_buffer *_buf)
638 {
639 struct radeon_bo *bo = radeon_bo(_buf);
640
641 if (radeon_bo_is_referenced_by_any_cs(bo)) {
642 return TRUE;
643 }
644
645 if (!radeon_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) {
646 return TRUE;
647 }
648
649 return FALSE;
650 }
651
652 static void radeon_bomgr_destroy(struct pb_manager *_mgr)
653 {
654 struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
655 util_hash_table_destroy(mgr->bo_names);
656 util_hash_table_destroy(mgr->bo_handles);
657 util_hash_table_destroy(mgr->bo_vas);
658 pipe_mutex_destroy(mgr->bo_handles_mutex);
659 pipe_mutex_destroy(mgr->bo_va_mutex);
660 FREE(mgr);
661 }
662
663 #define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
664
665 static unsigned handle_hash(void *key)
666 {
667 return PTR_TO_UINT(key);
668 }
669
670 static int handle_compare(void *key1, void *key2)
671 {
672 return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
673 }
674
675 struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
676 {
677 struct radeon_bomgr *mgr;
678
679 mgr = CALLOC_STRUCT(radeon_bomgr);
680 if (!mgr)
681 return NULL;
682
683 mgr->base.destroy = radeon_bomgr_destroy;
684 mgr->base.create_buffer = radeon_bomgr_create_bo;
685 mgr->base.flush = radeon_bomgr_flush;
686 mgr->base.is_buffer_busy = radeon_bomgr_is_buffer_busy;
687
688 mgr->rws = rws;
689 mgr->bo_names = util_hash_table_create(handle_hash, handle_compare);
690 mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
691 mgr->bo_vas = util_hash_table_create(handle_hash, handle_compare);
692 pipe_mutex_init(mgr->bo_handles_mutex);
693 pipe_mutex_init(mgr->bo_va_mutex);
694
695 mgr->va = rws->info.r600_virtual_address;
696 mgr->va_offset = rws->va_start;
697 list_inithead(&mgr->va_holes);
698
699 return &mgr->base;
700 }
701
702 static unsigned eg_tile_split(unsigned tile_split)
703 {
704 switch (tile_split) {
705 case 0: tile_split = 64; break;
706 case 1: tile_split = 128; break;
707 case 2: tile_split = 256; break;
708 case 3: tile_split = 512; break;
709 default:
710 case 4: tile_split = 1024; break;
711 case 5: tile_split = 2048; break;
712 case 6: tile_split = 4096; break;
713 }
714 return tile_split;
715 }
716
717 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
718 {
719 switch (eg_tile_split) {
720 case 64: return 0;
721 case 128: return 1;
722 case 256: return 2;
723 case 512: return 3;
724 default:
725 case 1024: return 4;
726 case 2048: return 5;
727 case 4096: return 6;
728 }
729 }
730
731 static void radeon_bo_get_tiling(struct pb_buffer *_buf,
732 enum radeon_bo_layout *microtiled,
733 enum radeon_bo_layout *macrotiled,
734 unsigned *bankw, unsigned *bankh,
735 unsigned *tile_split,
736 unsigned *stencil_tile_split,
737 unsigned *mtilea,
738 bool *scanout)
739 {
740 struct radeon_bo *bo = get_radeon_bo(_buf);
741 struct drm_radeon_gem_set_tiling args;
742
743 memset(&args, 0, sizeof(args));
744
745 args.handle = bo->handle;
746
747 drmCommandWriteRead(bo->rws->fd,
748 DRM_RADEON_GEM_GET_TILING,
749 &args,
750 sizeof(args));
751
752 *microtiled = RADEON_LAYOUT_LINEAR;
753 *macrotiled = RADEON_LAYOUT_LINEAR;
754 if (args.tiling_flags & RADEON_TILING_MICRO)
755 *microtiled = RADEON_LAYOUT_TILED;
756 else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
757 *microtiled = RADEON_LAYOUT_SQUARETILED;
758
759 if (args.tiling_flags & RADEON_TILING_MACRO)
760 *macrotiled = RADEON_LAYOUT_TILED;
761 if (bankw && tile_split && stencil_tile_split && mtilea && tile_split) {
762 *bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
763 *bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
764 *tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
765 *stencil_tile_split = (args.tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
766 *mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
767 *tile_split = eg_tile_split(*tile_split);
768 }
769 if (scanout)
770 *scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
771 }
772
773 static void radeon_bo_set_tiling(struct pb_buffer *_buf,
774 struct radeon_winsys_cs *rcs,
775 enum radeon_bo_layout microtiled,
776 enum radeon_bo_layout macrotiled,
777 unsigned pipe_config,
778 unsigned bankw, unsigned bankh,
779 unsigned tile_split,
780 unsigned stencil_tile_split,
781 unsigned mtilea, unsigned num_banks,
782 uint32_t pitch,
783 bool scanout)
784 {
785 struct radeon_bo *bo = get_radeon_bo(_buf);
786 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
787 struct drm_radeon_gem_set_tiling args;
788
789 memset(&args, 0, sizeof(args));
790
791 /* Tiling determines how DRM treats the buffer data.
792 * We must flush CS when changing it if the buffer is referenced. */
793 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
794 cs->flush_cs(cs->flush_data, 0, NULL);
795 }
796
797 os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
798
799 if (microtiled == RADEON_LAYOUT_TILED)
800 args.tiling_flags |= RADEON_TILING_MICRO;
801 else if (microtiled == RADEON_LAYOUT_SQUARETILED)
802 args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
803
804 if (macrotiled == RADEON_LAYOUT_TILED)
805 args.tiling_flags |= RADEON_TILING_MACRO;
806
807 args.tiling_flags |= (bankw & RADEON_TILING_EG_BANKW_MASK) <<
808 RADEON_TILING_EG_BANKW_SHIFT;
809 args.tiling_flags |= (bankh & RADEON_TILING_EG_BANKH_MASK) <<
810 RADEON_TILING_EG_BANKH_SHIFT;
811 if (tile_split) {
812 args.tiling_flags |= (eg_tile_split_rev(tile_split) &
813 RADEON_TILING_EG_TILE_SPLIT_MASK) <<
814 RADEON_TILING_EG_TILE_SPLIT_SHIFT;
815 }
816 args.tiling_flags |= (stencil_tile_split &
817 RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK) <<
818 RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT;
819 args.tiling_flags |= (mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
820 RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
821
822 if (bo->rws->gen >= DRV_SI && !scanout)
823 args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
824
825 args.handle = bo->handle;
826 args.pitch = pitch;
827
828 drmCommandWriteRead(bo->rws->fd,
829 DRM_RADEON_GEM_SET_TILING,
830 &args,
831 sizeof(args));
832 }
833
834 static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle(struct pb_buffer *_buf)
835 {
836 /* return radeon_bo. */
837 return (struct radeon_winsys_cs_handle*)get_radeon_bo(_buf);
838 }
839
840 static struct pb_buffer *
841 radeon_winsys_bo_create(struct radeon_winsys *rws,
842 unsigned size,
843 unsigned alignment,
844 boolean use_reusable_pool,
845 enum radeon_bo_domain domain,
846 enum radeon_bo_flag flags)
847 {
848 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
849 struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
850 struct radeon_bo_desc desc;
851 struct pb_manager *provider;
852 struct pb_buffer *buffer;
853
854 memset(&desc, 0, sizeof(desc));
855 desc.base.alignment = alignment;
856
857 /* Align size to page size. This is the minimum alignment for normal
858 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
859 * like constant/uniform buffers, can benefit from better and more reuse.
860 */
861 size = align(size, 4096);
862
863 /* Only set one usage bit each for domains and flags, or the cache manager
864 * might consider different sets of domains / flags compatible
865 */
866 if (domain == RADEON_DOMAIN_VRAM_GTT)
867 desc.base.usage = 1 << 2;
868 else
869 desc.base.usage = domain >> 1;
870 assert(flags < sizeof(desc.base.usage) * 8 - 3);
871 desc.base.usage |= 1 << (flags + 3);
872
873 desc.initial_domains = domain;
874 desc.flags = flags;
875
876 /* Assign a buffer manager. */
877 if (use_reusable_pool)
878 provider = ws->cman;
879 else
880 provider = ws->kman;
881
882 buffer = provider->create_buffer(provider, size, &desc.base);
883 if (!buffer)
884 return NULL;
885
886 pipe_mutex_lock(mgr->bo_handles_mutex);
887 util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)get_radeon_bo(buffer)->handle, buffer);
888 pipe_mutex_unlock(mgr->bo_handles_mutex);
889
890 return (struct pb_buffer*)buffer;
891 }
892
893 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
894 void *pointer, unsigned size)
895 {
896 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
897 struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
898 struct drm_radeon_gem_userptr args;
899 struct radeon_bo *bo;
900 int r;
901
902 bo = CALLOC_STRUCT(radeon_bo);
903 if (!bo)
904 return NULL;
905
906 memset(&args, 0, sizeof(args));
907 args.addr = (uintptr_t)pointer;
908 args.size = align(size, sysconf(_SC_PAGE_SIZE));
909 args.flags = RADEON_GEM_USERPTR_ANONONLY |
910 RADEON_GEM_USERPTR_VALIDATE |
911 RADEON_GEM_USERPTR_REGISTER;
912 if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
913 &args, sizeof(args))) {
914 FREE(bo);
915 return NULL;
916 }
917
918 pipe_mutex_lock(mgr->bo_handles_mutex);
919
920 /* Initialize it. */
921 pipe_reference_init(&bo->base.reference, 1);
922 bo->handle = args.handle;
923 bo->base.alignment = 0;
924 bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
925 bo->base.size = size;
926 bo->base.vtbl = &radeon_bo_vtbl;
927 bo->mgr = mgr;
928 bo->rws = mgr->rws;
929 bo->user_ptr = pointer;
930 bo->va = 0;
931 bo->initial_domain = RADEON_DOMAIN_GTT;
932 pipe_mutex_init(bo->map_mutex);
933
934 util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)bo->handle, bo);
935
936 pipe_mutex_unlock(mgr->bo_handles_mutex);
937
938 if (mgr->va) {
939 struct drm_radeon_gem_va va;
940
941 bo->va = radeon_bomgr_find_va(mgr, bo->base.size, 1 << 20);
942
943 va.handle = bo->handle;
944 va.operation = RADEON_VA_MAP;
945 va.vm_id = 0;
946 va.offset = bo->va;
947 va.flags = RADEON_VM_PAGE_READABLE |
948 RADEON_VM_PAGE_WRITEABLE |
949 RADEON_VM_PAGE_SNOOPED;
950 va.offset = bo->va;
951 r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
952 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
953 fprintf(stderr, "radeon: Failed to assign virtual address space\n");
954 radeon_bo_destroy(&bo->base);
955 return NULL;
956 }
957 pipe_mutex_lock(mgr->bo_handles_mutex);
958 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
959 struct pb_buffer *b = &bo->base;
960 struct radeon_bo *old_bo =
961 util_hash_table_get(mgr->bo_vas, (void*)(uintptr_t)va.offset);
962
963 pipe_mutex_unlock(mgr->bo_handles_mutex);
964 pb_reference(&b, &old_bo->base);
965 return b;
966 }
967
968 util_hash_table_set(mgr->bo_vas, (void*)(uintptr_t)bo->va, bo);
969 pipe_mutex_unlock(mgr->bo_handles_mutex);
970 }
971
972 ws->allocated_gtt += align(bo->base.size, 4096);
973
974 return (struct pb_buffer*)bo;
975 }
976
977 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
978 struct winsys_handle *whandle,
979 unsigned *stride)
980 {
981 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
982 struct radeon_bo *bo;
983 struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
984 int r;
985 unsigned handle;
986 uint64_t size = 0;
987
988 /* We must maintain a list of pairs <handle, bo>, so that we always return
989 * the same BO for one particular handle. If we didn't do that and created
990 * more than one BO for the same handle and then relocated them in a CS,
991 * we would hit a deadlock in the kernel.
992 *
993 * The list of pairs is guarded by a mutex, of course. */
994 pipe_mutex_lock(mgr->bo_handles_mutex);
995
996 if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
997 /* First check if there already is an existing bo for the handle. */
998 bo = util_hash_table_get(mgr->bo_names, (void*)(uintptr_t)whandle->handle);
999 } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1000 /* We must first get the GEM handle, as fds are unreliable keys */
1001 r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1002 if (r)
1003 goto fail;
1004 bo = util_hash_table_get(mgr->bo_handles, (void*)(uintptr_t)handle);
1005 } else {
1006 /* Unknown handle type */
1007 goto fail;
1008 }
1009
1010 if (bo) {
1011 /* Increase the refcount. */
1012 struct pb_buffer *b = NULL;
1013 pb_reference(&b, &bo->base);
1014 goto done;
1015 }
1016
1017 /* There isn't, create a new one. */
1018 bo = CALLOC_STRUCT(radeon_bo);
1019 if (!bo) {
1020 goto fail;
1021 }
1022
1023 if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1024 struct drm_gem_open open_arg = {};
1025 memset(&open_arg, 0, sizeof(open_arg));
1026 /* Open the BO. */
1027 open_arg.name = whandle->handle;
1028 if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1029 FREE(bo);
1030 goto fail;
1031 }
1032 handle = open_arg.handle;
1033 size = open_arg.size;
1034 bo->flink_name = whandle->handle;
1035 } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1036 size = lseek(whandle->handle, 0, SEEK_END);
1037 /*
1038 * Could check errno to determine whether the kernel is new enough, but
1039 * it doesn't really matter why this failed, just that it failed.
1040 */
1041 if (size == (off_t)-1) {
1042 FREE(bo);
1043 goto fail;
1044 }
1045 lseek(whandle->handle, 0, SEEK_SET);
1046 }
1047
1048 bo->handle = handle;
1049
1050 /* Initialize it. */
1051 pipe_reference_init(&bo->base.reference, 1);
1052 bo->base.alignment = 0;
1053 bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
1054 bo->base.size = (unsigned) size;
1055 bo->base.vtbl = &radeon_bo_vtbl;
1056 bo->mgr = mgr;
1057 bo->rws = mgr->rws;
1058 bo->va = 0;
1059 pipe_mutex_init(bo->map_mutex);
1060
1061 if (bo->flink_name)
1062 util_hash_table_set(mgr->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1063
1064 util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1065
1066 done:
1067 pipe_mutex_unlock(mgr->bo_handles_mutex);
1068
1069 if (stride)
1070 *stride = whandle->stride;
1071
1072 if (mgr->va && !bo->va) {
1073 struct drm_radeon_gem_va va;
1074
1075 bo->va = radeon_bomgr_find_va(mgr, bo->base.size, 1 << 20);
1076
1077 va.handle = bo->handle;
1078 va.operation = RADEON_VA_MAP;
1079 va.vm_id = 0;
1080 va.offset = bo->va;
1081 va.flags = RADEON_VM_PAGE_READABLE |
1082 RADEON_VM_PAGE_WRITEABLE |
1083 RADEON_VM_PAGE_SNOOPED;
1084 va.offset = bo->va;
1085 r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1086 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1087 fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1088 radeon_bo_destroy(&bo->base);
1089 return NULL;
1090 }
1091 pipe_mutex_lock(mgr->bo_handles_mutex);
1092 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1093 struct pb_buffer *b = &bo->base;
1094 struct radeon_bo *old_bo =
1095 util_hash_table_get(mgr->bo_vas, (void*)(uintptr_t)va.offset);
1096
1097 pipe_mutex_unlock(mgr->bo_handles_mutex);
1098 pb_reference(&b, &old_bo->base);
1099 return b;
1100 }
1101
1102 util_hash_table_set(mgr->bo_vas, (void*)(uintptr_t)bo->va, bo);
1103 pipe_mutex_unlock(mgr->bo_handles_mutex);
1104 }
1105
1106 bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1107
1108 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1109 ws->allocated_vram += align(bo->base.size, 4096);
1110 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1111 ws->allocated_gtt += align(bo->base.size, 4096);
1112
1113 return (struct pb_buffer*)bo;
1114
1115 fail:
1116 pipe_mutex_unlock(mgr->bo_handles_mutex);
1117 return NULL;
1118 }
1119
1120 static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
1121 unsigned stride,
1122 struct winsys_handle *whandle)
1123 {
1124 struct drm_gem_flink flink;
1125 struct radeon_bo *bo = get_radeon_bo(buffer);
1126
1127 memset(&flink, 0, sizeof(flink));
1128
1129 if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1130 if (!bo->flink_name) {
1131 flink.handle = bo->handle;
1132
1133 if (ioctl(bo->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1134 return FALSE;
1135 }
1136
1137 bo->flink_name = flink.name;
1138
1139 pipe_mutex_lock(bo->mgr->bo_handles_mutex);
1140 util_hash_table_set(bo->mgr->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1141 pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
1142 }
1143 whandle->handle = bo->flink_name;
1144 } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
1145 whandle->handle = bo->handle;
1146 } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1147 if (drmPrimeHandleToFD(bo->rws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1148 return FALSE;
1149 }
1150
1151 whandle->stride = stride;
1152 return TRUE;
1153 }
1154
1155 static uint64_t radeon_winsys_bo_va(struct radeon_winsys_cs_handle *buf)
1156 {
1157 return ((struct radeon_bo*)buf)->va;
1158 }
1159
1160 void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
1161 {
1162 ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
1163 ws->base.buffer_set_tiling = radeon_bo_set_tiling;
1164 ws->base.buffer_get_tiling = radeon_bo_get_tiling;
1165 ws->base.buffer_map = radeon_bo_map;
1166 ws->base.buffer_unmap = radeon_bo_unmap;
1167 ws->base.buffer_wait = radeon_bo_wait;
1168 ws->base.buffer_create = radeon_winsys_bo_create;
1169 ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1170 ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1171 ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1172 ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1173 ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1174 }