src/gallium/winsys/radeon/drm/radeon_drm_bo.c

   1 /*
   2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 #include "radeon_drm_cs.h"
  28
  29 #include "util/u_hash_table.h"
  30 #include "util/u_memory.h"
  31 #include "util/simple_list.h"
  32 #include "os/os_thread.h"
  33 #include "os/os_mman.h"
  34 #include "util/os_time.h"
  35
  36 #include "state_tracker/drm_driver.h"
  37
  38 #include <sys/ioctl.h>
  39 #include <xf86drm.h>
  40 #include <errno.h>
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <inttypes.h>
  44
  45 static struct pb_buffer *
  46 radeon_winsys_bo_create(struct radeon_winsys *rws,
  47                         uint64_t size,
  48                         unsigned alignment,
  49                         enum radeon_bo_domain domain,
  50                         enum radeon_bo_flag flags);
  51
  52 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
  53 {
  54     return (struct radeon_bo *)bo;
  55 }
  56
  57 struct radeon_bo_va_hole {
  58     struct list_head list;
  59     uint64_t         offset;
  60     uint64_t         size;
  61 };
  62
  63 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
  64 {
  65     struct drm_radeon_gem_busy args = {0};
  66
  67     args.handle = bo->handle;
  68     return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
  69                                &args, sizeof(args)) != 0;
  70 }
  71
  72 static bool radeon_bo_is_busy(struct radeon_bo *bo)
  73 {
  74     unsigned num_idle;
  75     bool busy = false;
  76
  77     if (bo->handle)
  78         return radeon_real_bo_is_busy(bo);
  79
  80     mtx_lock(&bo->rws->bo_fence_lock);
  81     for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
  82         if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
  83             busy = true;
  84             break;
  85         }
  86         radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
  87     }
  88     memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
  89             (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
  90     bo->u.slab.num_fences -= num_idle;
  91     mtx_unlock(&bo->rws->bo_fence_lock);
  92
  93     return busy;
  94 }
  95
  96 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
  97 {
  98     struct drm_radeon_gem_wait_idle args = {0};
  99
 100     args.handle = bo->handle;
 101     while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
 102                            &args, sizeof(args)) == -EBUSY);
 103 }
 104
 105 static void radeon_bo_wait_idle(struct radeon_bo *bo)
 106 {
 107     if (bo->handle) {
 108         radeon_real_bo_wait_idle(bo);
 109     } else {
 110         mtx_lock(&bo->rws->bo_fence_lock);
 111         while (bo->u.slab.num_fences) {
 112             struct radeon_bo *fence = NULL;
 113             radeon_bo_reference(&fence, bo->u.slab.fences[0]);
 114             mtx_unlock(&bo->rws->bo_fence_lock);
 115
 116             /* Wait without holding the fence lock. */
 117             radeon_real_bo_wait_idle(fence);
 118
 119             mtx_lock(&bo->rws->bo_fence_lock);
 120             if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
 121                 radeon_bo_reference(&bo->u.slab.fences[0], NULL);
 122                 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
 123                         (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
 124                 bo->u.slab.num_fences--;
 125             }
 126             radeon_bo_reference(&fence, NULL);
 127         }
 128         mtx_unlock(&bo->rws->bo_fence_lock);
 129     }
 130 }
 131
 132 static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
 133                            enum radeon_bo_usage usage)
 134 {
 135     struct radeon_bo *bo = radeon_bo(_buf);
 136     int64_t abs_timeout;
 137
 138     /* No timeout. Just query. */
 139     if (timeout == 0)
 140         return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
 141
 142     abs_timeout = os_time_get_absolute_timeout(timeout);
 143
 144     /* Wait if any ioctl is being submitted with this buffer. */
 145     if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
 146         return false;
 147
 148     /* Infinite timeout. */
 149     if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
 150         radeon_bo_wait_idle(bo);
 151         return true;
 152     }
 153
 154     /* Other timeouts need to be emulated with a loop. */
 155     while (radeon_bo_is_busy(bo)) {
 156        if (os_time_get_nano() >= abs_timeout)
 157           return false;
 158        os_time_sleep(10);
 159     }
 160
 161     return true;
 162 }
 163
 164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
 165 {
 166     /* Zero domains the driver doesn't understand. */
 167     domain &= RADEON_DOMAIN_VRAM_GTT;
 168
 169     /* If no domain is set, we must set something... */
 170     if (!domain)
 171         domain = RADEON_DOMAIN_VRAM_GTT;
 172
 173     return domain;
 174 }
 175
 176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
 177                 struct pb_buffer *buf)
 178 {
 179     struct radeon_bo *bo = (struct radeon_bo*)buf;
 180     struct drm_radeon_gem_op args;
 181
 182     if (bo->rws->info.drm_minor < 38)
 183         return RADEON_DOMAIN_VRAM_GTT;
 184
 185     memset(&args, 0, sizeof(args));
 186     args.handle = bo->handle;
 187     args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
 188
 189     if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
 190                             &args, sizeof(args))) {
 191         fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
 192                 bo, bo->handle);
 193         /* Default domain as returned by get_valid_domain. */
 194         return RADEON_DOMAIN_VRAM_GTT;
 195     }
 196
 197     /* GEM domains and winsys domains are defined the same. */
 198     return get_valid_domain(args.value);
 199 }
 200
 201 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
 202                                      struct radeon_vm_heap *heap,
 203                                      uint64_t size, uint64_t alignment)
 204 {
 205     struct radeon_bo_va_hole *hole, *n;
 206     uint64_t offset = 0, waste = 0;
 207
 208     /* All VM address space holes will implicitly start aligned to the
 209      * size alignment, so we don't need to sanitize the alignment here
 210      */
 211     size = align(size, info->gart_page_size);
 212
 213     mtx_lock(&heap->mutex);
 214     /* first look for a hole */
 215     LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
 216         offset = hole->offset;
 217         waste = offset % alignment;
 218         waste = waste ? alignment - waste : 0;
 219         offset += waste;
 220         if (offset >= (hole->offset + hole->size)) {
 221             continue;
 222         }
 223         if (!waste && hole->size == size) {
 224             offset = hole->offset;
 225             list_del(&hole->list);
 226             FREE(hole);
 227             mtx_unlock(&heap->mutex);
 228             return offset;
 229         }
 230         if ((hole->size - waste) > size) {
 231             if (waste) {
 232                 n = CALLOC_STRUCT(radeon_bo_va_hole);
 233                 n->size = waste;
 234                 n->offset = hole->offset;
 235                 list_add(&n->list, &hole->list);
 236             }
 237             hole->size -= (size + waste);
 238             hole->offset += size + waste;
 239             mtx_unlock(&heap->mutex);
 240             return offset;
 241         }
 242         if ((hole->size - waste) == size) {
 243             hole->size = waste;
 244             mtx_unlock(&heap->mutex);
 245             return offset;
 246         }
 247     }
 248
 249     offset = heap->start;
 250     waste = offset % alignment;
 251     waste = waste ? alignment - waste : 0;
 252
 253     if (offset + waste + size > heap->end) {
 254         mtx_unlock(&heap->mutex);
 255         return 0;
 256     }
 257
 258     if (waste) {
 259         n = CALLOC_STRUCT(radeon_bo_va_hole);
 260         n->size = waste;
 261         n->offset = offset;
 262         list_add(&n->list, &heap->holes);
 263     }
 264     offset += waste;
 265     heap->start += size + waste;
 266     mtx_unlock(&heap->mutex);
 267     return offset;
 268 }
 269
 270 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
 271                                        uint64_t size, uint64_t alignment)
 272 {
 273     uint64_t va = 0;
 274
 275     /* Try to allocate from the 64-bit address space first.
 276      * If it doesn't exist (start = 0) or if it doesn't have enough space,
 277      * fall back to the 32-bit address space.
 278      */
 279     if (ws->vm64.start)
 280         va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
 281     if (!va)
 282         va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
 283     return va;
 284 }
 285
 286 static void radeon_bomgr_free_va(const struct radeon_info *info,
 287                                  struct radeon_vm_heap *heap,
 288                                  uint64_t va, uint64_t size)
 289 {
 290     struct radeon_bo_va_hole *hole = NULL;
 291
 292     size = align(size, info->gart_page_size);
 293
 294     mtx_lock(&heap->mutex);
 295     if ((va + size) == heap->start) {
 296         heap->start = va;
 297         /* Delete uppermost hole if it reaches the new top */
 298         if (!LIST_IS_EMPTY(&heap->holes)) {
 299             hole = container_of(heap->holes.next, hole, list);
 300             if ((hole->offset + hole->size) == va) {
 301                 heap->start = hole->offset;
 302                 list_del(&hole->list);
 303                 FREE(hole);
 304             }
 305         }
 306     } else {
 307         struct radeon_bo_va_hole *next;
 308
 309         hole = container_of(&heap->holes, hole, list);
 310         LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
 311             if (next->offset < va)
 312                 break;
 313             hole = next;
 314         }
 315
 316         if (&hole->list != &heap->holes) {
 317             /* Grow upper hole if it's adjacent */
 318             if (hole->offset == (va + size)) {
 319                 hole->offset = va;
 320                 hole->size += size;
 321                 /* Merge lower hole if it's adjacent */
 322                 if (next != hole && &next->list != &heap->holes &&
 323                     (next->offset + next->size) == va) {
 324                     next->size += hole->size;
 325                     list_del(&hole->list);
 326                     FREE(hole);
 327                 }
 328                 goto out;
 329             }
 330         }
 331
 332         /* Grow lower hole if it's adjacent */
 333         if (next != hole && &next->list != &heap->holes &&
 334             (next->offset + next->size) == va) {
 335             next->size += size;
 336             goto out;
 337         }
 338
 339         /* FIXME on allocation failure we just lose virtual address space
 340          * maybe print a warning
 341          */
 342         next = CALLOC_STRUCT(radeon_bo_va_hole);
 343         if (next) {
 344             next->size = size;
 345             next->offset = va;
 346             list_add(&next->list, &hole->list);
 347         }
 348     }
 349 out:
 350     mtx_unlock(&heap->mutex);
 351 }
 352
 353 void radeon_bo_destroy(struct pb_buffer *_buf)
 354 {
 355     struct radeon_bo *bo = radeon_bo(_buf);
 356     struct radeon_drm_winsys *rws = bo->rws;
 357     struct drm_gem_close args;
 358
 359     assert(bo->handle && "must not be called for slab entries");
 360
 361     memset(&args, 0, sizeof(args));
 362
 363     mtx_lock(&rws->bo_handles_mutex);
 364     util_hash_table_remove(rws->bo_handles, (void*)(uintptr_t)bo->handle);
 365     if (bo->flink_name) {
 366         util_hash_table_remove(rws->bo_names,
 367                                (void*)(uintptr_t)bo->flink_name);
 368     }
 369     mtx_unlock(&rws->bo_handles_mutex);
 370
 371     if (bo->u.real.ptr)
 372         os_munmap(bo->u.real.ptr, bo->base.size);
 373
 374     if (rws->info.has_virtual_memory) {
 375         if (rws->va_unmap_working) {
 376             struct drm_radeon_gem_va va;
 377
 378             va.handle = bo->handle;
 379             va.vm_id = 0;
 380             va.operation = RADEON_VA_UNMAP;
 381             va.flags = RADEON_VM_PAGE_READABLE |
 382                        RADEON_VM_PAGE_WRITEABLE |
 383                        RADEON_VM_PAGE_SNOOPED;
 384             va.offset = bo->va;
 385
 386             if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
 387                                     sizeof(va)) != 0 &&
 388                 va.operation == RADEON_VA_RESULT_ERROR) {
 389                 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
 390                 fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
 391                 fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
 392             }
 393         }
 394
 395         radeon_bomgr_free_va(&rws->info,
 396                              bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
 397                              bo->va, bo->base.size);
 398     }
 399
 400     /* Close object. */
 401     args.handle = bo->handle;
 402     drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
 403
 404     mtx_destroy(&bo->u.real.map_mutex);
 405
 406     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 407         rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
 408     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
 409         rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
 410
 411     if (bo->u.real.map_count >= 1) {
 412         if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 413             bo->rws->mapped_vram -= bo->base.size;
 414         else
 415             bo->rws->mapped_gtt -= bo->base.size;
 416         bo->rws->num_mapped_buffers--;
 417     }
 418
 419     FREE(bo);
 420 }
 421
 422 static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
 423 {
 424    struct radeon_bo *bo = radeon_bo(_buf);
 425
 426     assert(bo->handle && "must not be called for slab entries");
 427
 428    if (bo->u.real.use_reusable_pool)
 429       pb_cache_add_buffer(&bo->u.real.cache_entry);
 430    else
 431       radeon_bo_destroy(_buf);
 432 }
 433
 434 void *radeon_bo_do_map(struct radeon_bo *bo)
 435 {
 436     struct drm_radeon_gem_mmap args = {0};
 437     void *ptr;
 438     unsigned offset;
 439
 440     /* If the buffer is created from user memory, return the user pointer. */
 441     if (bo->user_ptr)
 442         return bo->user_ptr;
 443
 444     if (bo->handle) {
 445         offset = 0;
 446     } else {
 447         offset = bo->va - bo->u.slab.real->va;
 448         bo = bo->u.slab.real;
 449     }
 450
 451     /* Map the buffer. */
 452     mtx_lock(&bo->u.real.map_mutex);
 453     /* Return the pointer if it's already mapped. */
 454     if (bo->u.real.ptr) {
 455         bo->u.real.map_count++;
 456         mtx_unlock(&bo->u.real.map_mutex);
 457         return (uint8_t*)bo->u.real.ptr + offset;
 458     }
 459     args.handle = bo->handle;
 460     args.offset = 0;
 461     args.size = (uint64_t)bo->base.size;
 462     if (drmCommandWriteRead(bo->rws->fd,
 463                             DRM_RADEON_GEM_MMAP,
 464                             &args,
 465                             sizeof(args))) {
 466         mtx_unlock(&bo->u.real.map_mutex);
 467         fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
 468                 bo, bo->handle);
 469         return NULL;
 470     }
 471
 472     ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
 473                bo->rws->fd, args.addr_ptr);
 474     if (ptr == MAP_FAILED) {
 475         /* Clear the cache and try again. */
 476         pb_cache_release_all_buffers(&bo->rws->bo_cache);
 477
 478         ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
 479                       bo->rws->fd, args.addr_ptr);
 480         if (ptr == MAP_FAILED) {
 481             mtx_unlock(&bo->u.real.map_mutex);
 482             fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
 483             return NULL;
 484         }
 485     }
 486     bo->u.real.ptr = ptr;
 487     bo->u.real.map_count = 1;
 488
 489     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 490        bo->rws->mapped_vram += bo->base.size;
 491     else
 492        bo->rws->mapped_gtt += bo->base.size;
 493     bo->rws->num_mapped_buffers++;
 494
 495     mtx_unlock(&bo->u.real.map_mutex);
 496     return (uint8_t*)bo->u.real.ptr + offset;
 497 }
 498
 499 static void *radeon_bo_map(struct pb_buffer *buf,
 500                            struct radeon_winsys_cs *rcs,
 501                            enum pipe_transfer_usage usage)
 502 {
 503     struct radeon_bo *bo = (struct radeon_bo*)buf;
 504     struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
 505
 506     /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
 507     if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 508         /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
 509         if (usage & PIPE_TRANSFER_DONTBLOCK) {
 510             if (!(usage & PIPE_TRANSFER_WRITE)) {
 511                 /* Mapping for read.
 512                  *
 513                  * Since we are mapping for read, we don't need to wait
 514                  * if the GPU is using the buffer for read too
 515                  * (neither one is changing it).
 516                  *
 517                  * Only check whether the buffer is being used for write. */
 518                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
 519                     cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL);
 520                     return NULL;
 521                 }
 522
 523                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
 524                                     RADEON_USAGE_WRITE)) {
 525                     return NULL;
 526                 }
 527             } else {
 528                 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
 529                     cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL);
 530                     return NULL;
 531                 }
 532
 533                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
 534                                     RADEON_USAGE_READWRITE)) {
 535                     return NULL;
 536                 }
 537             }
 538         } else {
 539             uint64_t time = os_time_get_nano();
 540
 541             if (!(usage & PIPE_TRANSFER_WRITE)) {
 542                 /* Mapping for read.
 543                  *
 544                  * Since we are mapping for read, we don't need to wait
 545                  * if the GPU is using the buffer for read too
 546                  * (neither one is changing it).
 547                  *
 548                  * Only check whether the buffer is being used for write. */
 549                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
 550                     cs->flush_cs(cs->flush_data, 0, NULL);
 551                 }
 552                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
 553                                RADEON_USAGE_WRITE);
 554             } else {
 555                 /* Mapping for write. */
 556                 if (cs) {
 557                     if (radeon_bo_is_referenced_by_cs(cs, bo)) {
 558                         cs->flush_cs(cs->flush_data, 0, NULL);
 559                     } else {
 560                         /* Try to avoid busy-waiting in radeon_bo_wait. */
 561                         if (p_atomic_read(&bo->num_active_ioctls))
 562                             radeon_drm_cs_sync_flush(rcs);
 563                     }
 564                 }
 565
 566                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
 567                                RADEON_USAGE_READWRITE);
 568             }
 569
 570             bo->rws->buffer_wait_time += os_time_get_nano() - time;
 571         }
 572     }
 573
 574     return radeon_bo_do_map(bo);
 575 }
 576
 577 static void radeon_bo_unmap(struct pb_buffer *_buf)
 578 {
 579     struct radeon_bo *bo = (struct radeon_bo*)_buf;
 580
 581     if (bo->user_ptr)
 582         return;
 583
 584     if (!bo->handle)
 585         bo = bo->u.slab.real;
 586
 587     mtx_lock(&bo->u.real.map_mutex);
 588     if (!bo->u.real.ptr) {
 589         mtx_unlock(&bo->u.real.map_mutex);
 590         return; /* it's not been mapped */
 591     }
 592
 593     assert(bo->u.real.map_count);
 594     if (--bo->u.real.map_count) {
 595         mtx_unlock(&bo->u.real.map_mutex);
 596         return; /* it's been mapped multiple times */
 597     }
 598
 599     os_munmap(bo->u.real.ptr, bo->base.size);
 600     bo->u.real.ptr = NULL;
 601
 602     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 603        bo->rws->mapped_vram -= bo->base.size;
 604     else
 605        bo->rws->mapped_gtt -= bo->base.size;
 606     bo->rws->num_mapped_buffers--;
 607
 608     mtx_unlock(&bo->u.real.map_mutex);
 609 }
 610
 611 static const struct pb_vtbl radeon_bo_vtbl = {
 612     radeon_bo_destroy_or_cache
 613     /* other functions are never called */
 614 };
 615
 616 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
 617                                           unsigned size, unsigned alignment,
 618                                           unsigned initial_domains,
 619                                           unsigned flags,
 620                                           int heap)
 621 {
 622     struct radeon_bo *bo;
 623     struct drm_radeon_gem_create args;
 624     int r;
 625
 626     memset(&args, 0, sizeof(args));
 627
 628     assert(initial_domains);
 629     assert((initial_domains &
 630             ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
 631
 632     args.size = size;
 633     args.alignment = alignment;
 634     args.initial_domain = initial_domains;
 635     args.flags = 0;
 636
 637     /* If VRAM is just stolen system memory, allow both VRAM and
 638      * GTT, whichever has free space. If a buffer is evicted from
 639      * VRAM to GTT, it will stay there.
 640      */
 641     if (!rws->info.has_dedicated_vram)
 642         args.initial_domain |= RADEON_DOMAIN_GTT;
 643
 644     if (flags & RADEON_FLAG_GTT_WC)
 645         args.flags |= RADEON_GEM_GTT_WC;
 646     if (flags & RADEON_FLAG_NO_CPU_ACCESS)
 647         args.flags |= RADEON_GEM_NO_CPU_ACCESS;
 648
 649     if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
 650                             &args, sizeof(args))) {
 651         fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
 652         fprintf(stderr, "radeon:    size      : %u bytes\n", size);
 653         fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
 654         fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
 655         fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
 656         return NULL;
 657     }
 658
 659     assert(args.handle != 0);
 660
 661     bo = CALLOC_STRUCT(radeon_bo);
 662     if (!bo)
 663         return NULL;
 664
 665     pipe_reference_init(&bo->base.reference, 1);
 666     bo->base.alignment = alignment;
 667     bo->base.usage = 0;
 668     bo->base.size = size;
 669     bo->base.vtbl = &radeon_bo_vtbl;
 670     bo->rws = rws;
 671     bo->handle = args.handle;
 672     bo->va = 0;
 673     bo->initial_domain = initial_domains;
 674     bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
 675     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
 676
 677     if (heap >= 0) {
 678         pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
 679                             heap);
 680     }
 681
 682     if (rws->info.has_virtual_memory) {
 683         struct drm_radeon_gem_va va;
 684         unsigned va_gap_size;
 685
 686         va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
 687
 688         if (flags & RADEON_FLAG_32BIT) {
 689             bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
 690                                           size + va_gap_size, alignment);
 691             assert(bo->va + size < rws->vm32.end);
 692         } else {
 693             bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
 694         }
 695
 696         va.handle = bo->handle;
 697         va.vm_id = 0;
 698         va.operation = RADEON_VA_MAP;
 699         va.flags = RADEON_VM_PAGE_READABLE |
 700                    RADEON_VM_PAGE_WRITEABLE |
 701                    RADEON_VM_PAGE_SNOOPED;
 702         va.offset = bo->va;
 703         r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
 704         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
 705             fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
 706             fprintf(stderr, "radeon:    size      : %d bytes\n", size);
 707             fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
 708             fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
 709             fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
 710             radeon_bo_destroy(&bo->base);
 711             return NULL;
 712         }
 713         mtx_lock(&rws->bo_handles_mutex);
 714         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
 715             struct pb_buffer *b = &bo->base;
 716             struct radeon_bo *old_bo =
 717                 util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
 718
 719             mtx_unlock(&rws->bo_handles_mutex);
 720             pb_reference(&b, &old_bo->base);
 721             return radeon_bo(b);
 722         }
 723
 724         util_hash_table_set(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
 725         mtx_unlock(&rws->bo_handles_mutex);
 726     }
 727
 728     if (initial_domains & RADEON_DOMAIN_VRAM)
 729         rws->allocated_vram += align(size, rws->info.gart_page_size);
 730     else if (initial_domains & RADEON_DOMAIN_GTT)
 731         rws->allocated_gtt += align(size, rws->info.gart_page_size);
 732
 733     return bo;
 734 }
 735
 736 bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
 737 {
 738    struct radeon_bo *bo = radeon_bo(_buf);
 739
 740    if (radeon_bo_is_referenced_by_any_cs(bo))
 741       return false;
 742
 743    return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
 744 }
 745
 746 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
 747 {
 748     struct radeon_bo *bo = NULL; /* fix container_of */
 749     bo = container_of(entry, bo, u.slab.entry);
 750
 751     return radeon_bo_can_reclaim(&bo->base);
 752 }
 753
 754 static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
 755 {
 756     struct radeon_bo *bo = radeon_bo(_buf);
 757
 758     assert(!bo->handle);
 759
 760     pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
 761 }
 762
 763 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
 764     radeon_bo_slab_destroy
 765     /* other functions are never called */
 766 };
 767
 768 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
 769                                      unsigned entry_size,
 770                                      unsigned group_index)
 771 {
 772     struct radeon_drm_winsys *ws = priv;
 773     struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
 774     enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
 775     enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
 776     unsigned base_hash;
 777
 778     if (!slab)
 779         return NULL;
 780
 781     slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
 782                                                      64 * 1024, 64 * 1024,
 783                                                      domains, flags));
 784     if (!slab->buffer)
 785         goto fail;
 786
 787     assert(slab->buffer->handle);
 788
 789     slab->base.num_entries = slab->buffer->base.size / entry_size;
 790     slab->base.num_free = slab->base.num_entries;
 791     slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
 792     if (!slab->entries)
 793         goto fail_buffer;
 794
 795     LIST_INITHEAD(&slab->base.free);
 796
 797     base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
 798
 799     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
 800         struct radeon_bo *bo = &slab->entries[i];
 801
 802         bo->base.alignment = entry_size;
 803         bo->base.usage = slab->buffer->base.usage;
 804         bo->base.size = entry_size;
 805         bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
 806         bo->rws = ws;
 807         bo->va = slab->buffer->va + i * entry_size;
 808         bo->initial_domain = domains;
 809         bo->hash = base_hash + i;
 810         bo->u.slab.entry.slab = &slab->base;
 811         bo->u.slab.entry.group_index = group_index;
 812         bo->u.slab.real = slab->buffer;
 813
 814         LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
 815     }
 816
 817     return &slab->base;
 818
 819 fail_buffer:
 820     radeon_bo_reference(&slab->buffer, NULL);
 821 fail:
 822     FREE(slab);
 823     return NULL;
 824 }
 825
 826 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
 827 {
 828     struct radeon_slab *slab = (struct radeon_slab *)pslab;
 829
 830     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
 831         struct radeon_bo *bo = &slab->entries[i];
 832         for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
 833             radeon_bo_reference(&bo->u.slab.fences[j], NULL);
 834         FREE(bo->u.slab.fences);
 835     }
 836
 837     FREE(slab->entries);
 838     radeon_bo_reference(&slab->buffer, NULL);
 839     FREE(slab);
 840 }
 841
 842 static unsigned eg_tile_split(unsigned tile_split)
 843 {
 844     switch (tile_split) {
 845     case 0:     tile_split = 64;    break;
 846     case 1:     tile_split = 128;   break;
 847     case 2:     tile_split = 256;   break;
 848     case 3:     tile_split = 512;   break;
 849     default:
 850     case 4:     tile_split = 1024;  break;
 851     case 5:     tile_split = 2048;  break;
 852     case 6:     tile_split = 4096;  break;
 853     }
 854     return tile_split;
 855 }
 856
 857 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
 858 {
 859     switch (eg_tile_split) {
 860     case 64:    return 0;
 861     case 128:   return 1;
 862     case 256:   return 2;
 863     case 512:   return 3;
 864     default:
 865     case 1024:  return 4;
 866     case 2048:  return 5;
 867     case 4096:  return 6;
 868     }
 869 }
 870
 871 static void radeon_bo_get_metadata(struct pb_buffer *_buf,
 872                                    struct radeon_bo_metadata *md)
 873 {
 874     struct radeon_bo *bo = radeon_bo(_buf);
 875     struct drm_radeon_gem_set_tiling args;
 876
 877     assert(bo->handle && "must not be called for slab entries");
 878
 879     memset(&args, 0, sizeof(args));
 880
 881     args.handle = bo->handle;
 882
 883     drmCommandWriteRead(bo->rws->fd,
 884                         DRM_RADEON_GEM_GET_TILING,
 885                         &args,
 886                         sizeof(args));
 887
 888     md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
 889     md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
 890     if (args.tiling_flags & RADEON_TILING_MICRO)
 891         md->u.legacy.microtile = RADEON_LAYOUT_TILED;
 892     else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
 893         md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
 894
 895     if (args.tiling_flags & RADEON_TILING_MACRO)
 896         md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
 897
 898     md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
 899     md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
 900     md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
 901     md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
 902     md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
 903     md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
 904 }
 905
 906 static void radeon_bo_set_metadata(struct pb_buffer *_buf,
 907                                    struct radeon_bo_metadata *md)
 908 {
 909     struct radeon_bo *bo = radeon_bo(_buf);
 910     struct drm_radeon_gem_set_tiling args;
 911
 912     assert(bo->handle && "must not be called for slab entries");
 913
 914     memset(&args, 0, sizeof(args));
 915
 916     os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
 917
 918     if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
 919         args.tiling_flags |= RADEON_TILING_MICRO;
 920     else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
 921         args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
 922
 923     if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
 924         args.tiling_flags |= RADEON_TILING_MACRO;
 925
 926     args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
 927         RADEON_TILING_EG_BANKW_SHIFT;
 928     args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
 929         RADEON_TILING_EG_BANKH_SHIFT;
 930     if (md->u.legacy.tile_split) {
 931         args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
 932                               RADEON_TILING_EG_TILE_SPLIT_MASK) <<
 933             RADEON_TILING_EG_TILE_SPLIT_SHIFT;
 934     }
 935     args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
 936         RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
 937
 938     if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
 939         args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
 940
 941     args.handle = bo->handle;
 942     args.pitch = md->u.legacy.stride;
 943
 944     drmCommandWriteRead(bo->rws->fd,
 945                         DRM_RADEON_GEM_SET_TILING,
 946                         &args,
 947                         sizeof(args));
 948 }
 949
 950 static struct pb_buffer *
 951 radeon_winsys_bo_create(struct radeon_winsys *rws,
 952                         uint64_t size,
 953                         unsigned alignment,
 954                         enum radeon_bo_domain domain,
 955                         enum radeon_bo_flag flags)
 956 {
 957     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
 958     struct radeon_bo *bo;
 959     int heap = -1;
 960
 961     assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
 962
 963     /* Only 32-bit sizes are supported. */
 964     if (size > UINT_MAX)
 965         return NULL;
 966
 967     /* VRAM implies WC. This is not optional. */
 968     if (domain & RADEON_DOMAIN_VRAM)
 969         flags |= RADEON_FLAG_GTT_WC;
 970     /* NO_CPU_ACCESS is valid with VRAM only. */
 971     if (domain != RADEON_DOMAIN_VRAM)
 972         flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
 973
 974     /* Sub-allocate small buffers from slabs. */
 975     if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
 976         size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
 977         ws->info.has_virtual_memory &&
 978         alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
 979         struct pb_slab_entry *entry;
 980         int heap = radeon_get_heap_index(domain, flags);
 981
 982         if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
 983             goto no_slab;
 984
 985         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
 986         if (!entry) {
 987             /* Clear the cache and try again. */
 988             pb_cache_release_all_buffers(&ws->bo_cache);
 989
 990             entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
 991         }
 992         if (!entry)
 993             return NULL;
 994
 995         bo = NULL;
 996         bo = container_of(entry, bo, u.slab.entry);
 997
 998         pipe_reference_init(&bo->base.reference, 1);
 999
1000         return &bo->base;
1001     }
1002 no_slab:
1003
1004     /* This flag is irrelevant for the cache. */
1005     flags &= ~RADEON_FLAG_NO_SUBALLOC;
1006
1007     /* Align size to page size. This is the minimum alignment for normal
1008      * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1009      * like constant/uniform buffers, can benefit from better and more reuse.
1010      */
1011     size = align(size, ws->info.gart_page_size);
1012     alignment = align(alignment, ws->info.gart_page_size);
1013
1014     bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
1015
1016     /* Shared resources don't use cached heaps. */
1017     if (use_reusable_pool) {
1018         heap = radeon_get_heap_index(domain, flags);
1019         assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
1020
1021         bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1022                                                0, heap));
1023         if (bo)
1024             return &bo->base;
1025     }
1026
1027     bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1028     if (!bo) {
1029         /* Clear the cache and try again. */
1030         if (ws->info.has_virtual_memory)
1031             pb_slabs_reclaim(&ws->bo_slabs);
1032         pb_cache_release_all_buffers(&ws->bo_cache);
1033         bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1034         if (!bo)
1035             return NULL;
1036     }
1037
1038     bo->u.real.use_reusable_pool = use_reusable_pool;
1039
1040     mtx_lock(&ws->bo_handles_mutex);
1041     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1042     mtx_unlock(&ws->bo_handles_mutex);
1043
1044     return &bo->base;
1045 }
1046
1047 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1048                                                    void *pointer, uint64_t size)
1049 {
1050     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1051     struct drm_radeon_gem_userptr args;
1052     struct radeon_bo *bo;
1053     int r;
1054
1055     bo = CALLOC_STRUCT(radeon_bo);
1056     if (!bo)
1057         return NULL;
1058
1059     memset(&args, 0, sizeof(args));
1060     args.addr = (uintptr_t)pointer;
1061     args.size = align(size, ws->info.gart_page_size);
1062     args.flags = RADEON_GEM_USERPTR_ANONONLY |
1063         RADEON_GEM_USERPTR_VALIDATE |
1064         RADEON_GEM_USERPTR_REGISTER;
1065     if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1066                             &args, sizeof(args))) {
1067         FREE(bo);
1068         return NULL;
1069     }
1070
1071     assert(args.handle != 0);
1072
1073     mtx_lock(&ws->bo_handles_mutex);
1074
1075     /* Initialize it. */
1076     pipe_reference_init(&bo->base.reference, 1);
1077     bo->handle = args.handle;
1078     bo->base.alignment = 0;
1079     bo->base.size = size;
1080     bo->base.vtbl = &radeon_bo_vtbl;
1081     bo->rws = ws;
1082     bo->user_ptr = pointer;
1083     bo->va = 0;
1084     bo->initial_domain = RADEON_DOMAIN_GTT;
1085     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1086     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1087
1088     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1089
1090     mtx_unlock(&ws->bo_handles_mutex);
1091
1092     if (ws->info.has_virtual_memory) {
1093         struct drm_radeon_gem_va va;
1094
1095         bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1096
1097         va.handle = bo->handle;
1098         va.operation = RADEON_VA_MAP;
1099         va.vm_id = 0;
1100         va.offset = bo->va;
1101         va.flags = RADEON_VM_PAGE_READABLE |
1102                    RADEON_VM_PAGE_WRITEABLE |
1103                    RADEON_VM_PAGE_SNOOPED;
1104         va.offset = bo->va;
1105         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1106         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1107             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1108             radeon_bo_destroy(&bo->base);
1109             return NULL;
1110         }
1111         mtx_lock(&ws->bo_handles_mutex);
1112         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1113             struct pb_buffer *b = &bo->base;
1114             struct radeon_bo *old_bo =
1115                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1116
1117             mtx_unlock(&ws->bo_handles_mutex);
1118             pb_reference(&b, &old_bo->base);
1119             return b;
1120         }
1121
1122         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1123         mtx_unlock(&ws->bo_handles_mutex);
1124     }
1125
1126     ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1127
1128     return (struct pb_buffer*)bo;
1129 }
1130
1131 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1132                                                       struct winsys_handle *whandle,
1133                                                       unsigned *stride,
1134                                                       unsigned *offset)
1135 {
1136     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1137     struct radeon_bo *bo;
1138     int r;
1139     unsigned handle;
1140     uint64_t size = 0;
1141
1142     if (!offset && whandle->offset != 0) {
1143         fprintf(stderr, "attempt to import unsupported winsys offset %u\n",
1144                 whandle->offset);
1145         return NULL;
1146     }
1147
1148     /* We must maintain a list of pairs <handle, bo>, so that we always return
1149      * the same BO for one particular handle. If we didn't do that and created
1150      * more than one BO for the same handle and then relocated them in a CS,
1151      * we would hit a deadlock in the kernel.
1152      *
1153      * The list of pairs is guarded by a mutex, of course. */
1154     mtx_lock(&ws->bo_handles_mutex);
1155
1156     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1157         /* First check if there already is an existing bo for the handle. */
1158         bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1159     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1160         /* We must first get the GEM handle, as fds are unreliable keys */
1161         r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1162         if (r)
1163             goto fail;
1164         bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1165     } else {
1166         /* Unknown handle type */
1167         goto fail;
1168     }
1169
1170     if (bo) {
1171         /* Increase the refcount. */
1172         struct pb_buffer *b = NULL;
1173         pb_reference(&b, &bo->base);
1174         goto done;
1175     }
1176
1177     /* There isn't, create a new one. */
1178     bo = CALLOC_STRUCT(radeon_bo);
1179     if (!bo) {
1180         goto fail;
1181     }
1182
1183     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1184         struct drm_gem_open open_arg = {};
1185         memset(&open_arg, 0, sizeof(open_arg));
1186         /* Open the BO. */
1187         open_arg.name = whandle->handle;
1188         if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1189             FREE(bo);
1190             goto fail;
1191         }
1192         handle = open_arg.handle;
1193         size = open_arg.size;
1194         bo->flink_name = whandle->handle;
1195     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1196         size = lseek(whandle->handle, 0, SEEK_END);
1197         /*
1198          * Could check errno to determine whether the kernel is new enough, but
1199          * it doesn't really matter why this failed, just that it failed.
1200          */
1201         if (size == (off_t)-1) {
1202             FREE(bo);
1203             goto fail;
1204         }
1205         lseek(whandle->handle, 0, SEEK_SET);
1206     }
1207
1208     assert(handle != 0);
1209
1210     bo->handle = handle;
1211
1212     /* Initialize it. */
1213     pipe_reference_init(&bo->base.reference, 1);
1214     bo->base.alignment = 0;
1215     bo->base.size = (unsigned) size;
1216     bo->base.vtbl = &radeon_bo_vtbl;
1217     bo->rws = ws;
1218     bo->va = 0;
1219     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1220     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1221
1222     if (bo->flink_name)
1223         util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1224
1225     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1226
1227 done:
1228     mtx_unlock(&ws->bo_handles_mutex);
1229
1230     if (stride)
1231         *stride = whandle->stride;
1232     if (offset)
1233         *offset = whandle->offset;
1234
1235     if (ws->info.has_virtual_memory && !bo->va) {
1236         struct drm_radeon_gem_va va;
1237
1238         bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1239
1240         va.handle = bo->handle;
1241         va.operation = RADEON_VA_MAP;
1242         va.vm_id = 0;
1243         va.offset = bo->va;
1244         va.flags = RADEON_VM_PAGE_READABLE |
1245                    RADEON_VM_PAGE_WRITEABLE |
1246                    RADEON_VM_PAGE_SNOOPED;
1247         va.offset = bo->va;
1248         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1249         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1250             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1251             radeon_bo_destroy(&bo->base);
1252             return NULL;
1253         }
1254         mtx_lock(&ws->bo_handles_mutex);
1255         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1256             struct pb_buffer *b = &bo->base;
1257             struct radeon_bo *old_bo =
1258                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1259
1260             mtx_unlock(&ws->bo_handles_mutex);
1261             pb_reference(&b, &old_bo->base);
1262             return b;
1263         }
1264
1265         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1266         mtx_unlock(&ws->bo_handles_mutex);
1267     }
1268
1269     bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1270
1271     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1272         ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1273     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1274         ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1275
1276     return (struct pb_buffer*)bo;
1277
1278 fail:
1279     mtx_unlock(&ws->bo_handles_mutex);
1280     return NULL;
1281 }
1282
1283 static bool radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
1284                                         unsigned stride, unsigned offset,
1285                                         unsigned slice_size,
1286                                         struct winsys_handle *whandle)
1287 {
1288     struct drm_gem_flink flink;
1289     struct radeon_bo *bo = radeon_bo(buffer);
1290     struct radeon_drm_winsys *ws = bo->rws;
1291
1292     /* Don't allow exports of slab entries. */
1293     if (!bo->handle)
1294         return false;
1295
1296     memset(&flink, 0, sizeof(flink));
1297
1298     bo->u.real.use_reusable_pool = false;
1299
1300     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1301         if (!bo->flink_name) {
1302             flink.handle = bo->handle;
1303
1304             if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1305                 return false;
1306             }
1307
1308             bo->flink_name = flink.name;
1309
1310             mtx_lock(&ws->bo_handles_mutex);
1311             util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1312             mtx_unlock(&ws->bo_handles_mutex);
1313         }
1314         whandle->handle = bo->flink_name;
1315     } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
1316         whandle->handle = bo->handle;
1317     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1318         if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1319             return false;
1320     }
1321
1322     whandle->stride = stride;
1323     whandle->offset = offset;
1324     whandle->offset += slice_size * whandle->layer;
1325
1326     return true;
1327 }
1328
1329 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1330 {
1331    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1332 }
1333
1334 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)
1335 {
1336    return !((struct radeon_bo*)buf)->handle;
1337 }
1338
1339 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1340 {
1341     return ((struct radeon_bo*)buf)->va;
1342 }
1343
1344 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1345 {
1346     struct radeon_bo *bo = radeon_bo(buf);
1347
1348     if (bo->handle)
1349         return 0;
1350
1351     return bo->va - bo->u.slab.real->va;
1352 }
1353
1354 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1355 {
1356     ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1357     ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1358     ws->base.buffer_map = radeon_bo_map;
1359     ws->base.buffer_unmap = radeon_bo_unmap;
1360     ws->base.buffer_wait = radeon_bo_wait;
1361     ws->base.buffer_create = radeon_winsys_bo_create;
1362     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1363     ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1364     ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1365     ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1366     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1367     ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1368     ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1369     ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1370 }