src/gallium/winsys/radeon/drm/radeon_drm_bo.c

   1 /*
   2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 #include "radeon_drm_cs.h"
  28
  29 #include "util/u_hash_table.h"
  30 #include "util/u_memory.h"
  31 #include "util/simple_list.h"
  32 #include "os/os_thread.h"
  33 #include "os/os_mman.h"
  34 #include "os/os_time.h"
  35
  36 #include "state_tracker/drm_driver.h"
  37
  38 #include <sys/ioctl.h>
  39 #include <xf86drm.h>
  40 #include <errno.h>
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <inttypes.h>
  44
  45 static struct pb_buffer *
  46 radeon_winsys_bo_create(struct radeon_winsys *rws,
  47                         uint64_t size,
  48                         unsigned alignment,
  49                         enum radeon_bo_domain domain,
  50                         enum radeon_bo_flag flags);
  51
  52 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
  53 {
  54     return (struct radeon_bo *)bo;
  55 }
  56
  57 struct radeon_bo_va_hole {
  58     struct list_head list;
  59     uint64_t         offset;
  60     uint64_t         size;
  61 };
  62
  63 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
  64 {
  65     struct drm_radeon_gem_busy args = {0};
  66
  67     args.handle = bo->handle;
  68     return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
  69                                &args, sizeof(args)) != 0;
  70 }
  71
  72 static bool radeon_bo_is_busy(struct radeon_bo *bo)
  73 {
  74     unsigned num_idle;
  75     bool busy = false;
  76
  77     if (bo->handle)
  78         return radeon_real_bo_is_busy(bo);
  79
  80     pipe_mutex_lock(bo->rws->bo_fence_lock);
  81     for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
  82         if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
  83             busy = true;
  84             break;
  85         }
  86         radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
  87     }
  88     memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
  89             (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
  90     bo->u.slab.num_fences -= num_idle;
  91     pipe_mutex_unlock(bo->rws->bo_fence_lock);
  92
  93     return busy;
  94 }
  95
  96 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
  97 {
  98     struct drm_radeon_gem_wait_idle args = {0};
  99
 100     args.handle = bo->handle;
 101     while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
 102                            &args, sizeof(args)) == -EBUSY);
 103 }
 104
 105 static void radeon_bo_wait_idle(struct radeon_bo *bo)
 106 {
 107     if (bo->handle) {
 108         radeon_real_bo_wait_idle(bo);
 109     } else {
 110         pipe_mutex_lock(bo->rws->bo_fence_lock);
 111         while (bo->u.slab.num_fences) {
 112             struct radeon_bo *fence = NULL;
 113             radeon_bo_reference(&fence, bo->u.slab.fences[0]);
 114             pipe_mutex_unlock(bo->rws->bo_fence_lock);
 115
 116             /* Wait without holding the fence lock. */
 117             radeon_real_bo_wait_idle(fence);
 118
 119             pipe_mutex_lock(bo->rws->bo_fence_lock);
 120             if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
 121                 radeon_bo_reference(&bo->u.slab.fences[0], NULL);
 122                 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
 123                         (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
 124                 bo->u.slab.num_fences--;
 125             }
 126             radeon_bo_reference(&fence, NULL);
 127         }
 128         pipe_mutex_unlock(bo->rws->bo_fence_lock);
 129     }
 130 }
 131
 132 static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
 133                            enum radeon_bo_usage usage)
 134 {
 135     struct radeon_bo *bo = radeon_bo(_buf);
 136     int64_t abs_timeout;
 137
 138     /* No timeout. Just query. */
 139     if (timeout == 0)
 140         return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
 141
 142     abs_timeout = os_time_get_absolute_timeout(timeout);
 143
 144     /* Wait if any ioctl is being submitted with this buffer. */
 145     if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
 146         return false;
 147
 148     /* Infinite timeout. */
 149     if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
 150         radeon_bo_wait_idle(bo);
 151         return true;
 152     }
 153
 154     /* Other timeouts need to be emulated with a loop. */
 155     while (radeon_bo_is_busy(bo)) {
 156        if (os_time_get_nano() >= abs_timeout)
 157           return false;
 158        os_time_sleep(10);
 159     }
 160
 161     return true;
 162 }
 163
 164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
 165 {
 166     /* Zero domains the driver doesn't understand. */
 167     domain &= RADEON_DOMAIN_VRAM_GTT;
 168
 169     /* If no domain is set, we must set something... */
 170     if (!domain)
 171         domain = RADEON_DOMAIN_VRAM_GTT;
 172
 173     return domain;
 174 }
 175
 176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
 177                 struct pb_buffer *buf)
 178 {
 179     struct radeon_bo *bo = (struct radeon_bo*)buf;
 180     struct drm_radeon_gem_op args;
 181
 182     if (bo->rws->info.drm_minor < 38)
 183         return RADEON_DOMAIN_VRAM_GTT;
 184
 185     memset(&args, 0, sizeof(args));
 186     args.handle = bo->handle;
 187     args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
 188
 189     drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
 190                         &args, sizeof(args));
 191
 192     /* GEM domains and winsys domains are defined the same. */
 193     return get_valid_domain(args.value);
 194 }
 195
 196 static uint64_t radeon_bomgr_find_va(struct radeon_drm_winsys *rws,
 197                                      uint64_t size, uint64_t alignment)
 198 {
 199     struct radeon_bo_va_hole *hole, *n;
 200     uint64_t offset = 0, waste = 0;
 201
 202     /* All VM address space holes will implicitly start aligned to the
 203      * size alignment, so we don't need to sanitize the alignment here
 204      */
 205     size = align(size, rws->info.gart_page_size);
 206
 207     pipe_mutex_lock(rws->bo_va_mutex);
 208     /* first look for a hole */
 209     LIST_FOR_EACH_ENTRY_SAFE(hole, n, &rws->va_holes, list) {
 210         offset = hole->offset;
 211         waste = offset % alignment;
 212         waste = waste ? alignment - waste : 0;
 213         offset += waste;
 214         if (offset >= (hole->offset + hole->size)) {
 215             continue;
 216         }
 217         if (!waste && hole->size == size) {
 218             offset = hole->offset;
 219             list_del(&hole->list);
 220             FREE(hole);
 221             pipe_mutex_unlock(rws->bo_va_mutex);
 222             return offset;
 223         }
 224         if ((hole->size - waste) > size) {
 225             if (waste) {
 226                 n = CALLOC_STRUCT(radeon_bo_va_hole);
 227                 n->size = waste;
 228                 n->offset = hole->offset;
 229                 list_add(&n->list, &hole->list);
 230             }
 231             hole->size -= (size + waste);
 232             hole->offset += size + waste;
 233             pipe_mutex_unlock(rws->bo_va_mutex);
 234             return offset;
 235         }
 236         if ((hole->size - waste) == size) {
 237             hole->size = waste;
 238             pipe_mutex_unlock(rws->bo_va_mutex);
 239             return offset;
 240         }
 241     }
 242
 243     offset = rws->va_offset;
 244     waste = offset % alignment;
 245     waste = waste ? alignment - waste : 0;
 246     if (waste) {
 247         n = CALLOC_STRUCT(radeon_bo_va_hole);
 248         n->size = waste;
 249         n->offset = offset;
 250         list_add(&n->list, &rws->va_holes);
 251     }
 252     offset += waste;
 253     rws->va_offset += size + waste;
 254     pipe_mutex_unlock(rws->bo_va_mutex);
 255     return offset;
 256 }
 257
 258 static void radeon_bomgr_free_va(struct radeon_drm_winsys *rws,
 259                                  uint64_t va, uint64_t size)
 260 {
 261     struct radeon_bo_va_hole *hole;
 262
 263     size = align(size, rws->info.gart_page_size);
 264
 265     pipe_mutex_lock(rws->bo_va_mutex);
 266     if ((va + size) == rws->va_offset) {
 267         rws->va_offset = va;
 268         /* Delete uppermost hole if it reaches the new top */
 269         if (!LIST_IS_EMPTY(&rws->va_holes)) {
 270             hole = container_of(rws->va_holes.next, hole, list);
 271             if ((hole->offset + hole->size) == va) {
 272                 rws->va_offset = hole->offset;
 273                 list_del(&hole->list);
 274                 FREE(hole);
 275             }
 276         }
 277     } else {
 278         struct radeon_bo_va_hole *next;
 279
 280         hole = container_of(&rws->va_holes, hole, list);
 281         LIST_FOR_EACH_ENTRY(next, &rws->va_holes, list) {
 282             if (next->offset < va)
 283                 break;
 284             hole = next;
 285         }
 286
 287         if (&hole->list != &rws->va_holes) {
 288             /* Grow upper hole if it's adjacent */
 289             if (hole->offset == (va + size)) {
 290                 hole->offset = va;
 291                 hole->size += size;
 292                 /* Merge lower hole if it's adjacent */
 293                 if (next != hole && &next->list != &rws->va_holes &&
 294                     (next->offset + next->size) == va) {
 295                     next->size += hole->size;
 296                     list_del(&hole->list);
 297                     FREE(hole);
 298                 }
 299                 goto out;
 300             }
 301         }
 302
 303         /* Grow lower hole if it's adjacent */
 304         if (next != hole && &next->list != &rws->va_holes &&
 305             (next->offset + next->size) == va) {
 306             next->size += size;
 307             goto out;
 308         }
 309
 310         /* FIXME on allocation failure we just lose virtual address space
 311          * maybe print a warning
 312          */
 313         next = CALLOC_STRUCT(radeon_bo_va_hole);
 314         if (next) {
 315             next->size = size;
 316             next->offset = va;
 317             list_add(&next->list, &hole->list);
 318         }
 319     }
 320 out:
 321     pipe_mutex_unlock(rws->bo_va_mutex);
 322 }
 323
 324 void radeon_bo_destroy(struct pb_buffer *_buf)
 325 {
 326     struct radeon_bo *bo = radeon_bo(_buf);
 327     struct radeon_drm_winsys *rws = bo->rws;
 328     struct drm_gem_close args;
 329
 330     assert(bo->handle && "must not be called for slab entries");
 331
 332     memset(&args, 0, sizeof(args));
 333
 334     pipe_mutex_lock(rws->bo_handles_mutex);
 335     util_hash_table_remove(rws->bo_handles, (void*)(uintptr_t)bo->handle);
 336     if (bo->flink_name) {
 337         util_hash_table_remove(rws->bo_names,
 338                                (void*)(uintptr_t)bo->flink_name);
 339     }
 340     pipe_mutex_unlock(rws->bo_handles_mutex);
 341
 342     if (bo->u.real.ptr)
 343         os_munmap(bo->u.real.ptr, bo->base.size);
 344
 345     if (rws->info.has_virtual_memory) {
 346         if (rws->va_unmap_working) {
 347             struct drm_radeon_gem_va va;
 348
 349             va.handle = bo->handle;
 350             va.vm_id = 0;
 351             va.operation = RADEON_VA_UNMAP;
 352             va.flags = RADEON_VM_PAGE_READABLE |
 353                        RADEON_VM_PAGE_WRITEABLE |
 354                        RADEON_VM_PAGE_SNOOPED;
 355             va.offset = bo->va;
 356
 357             if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
 358                                     sizeof(va)) != 0 &&
 359                 va.operation == RADEON_VA_RESULT_ERROR) {
 360                 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
 361                 fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
 362                 fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
 363             }
 364         }
 365
 366         radeon_bomgr_free_va(rws, bo->va, bo->base.size);
 367     }
 368
 369     /* Close object. */
 370     args.handle = bo->handle;
 371     drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
 372
 373     pipe_mutex_destroy(bo->u.real.map_mutex);
 374
 375     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 376         rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
 377     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
 378         rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
 379
 380     if (bo->u.real.map_count >= 1) {
 381         if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 382             bo->rws->mapped_vram -= bo->base.size;
 383         else
 384             bo->rws->mapped_gtt -= bo->base.size;
 385         bo->rws->num_mapped_buffers--;
 386     }
 387
 388     FREE(bo);
 389 }
 390
 391 static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
 392 {
 393    struct radeon_bo *bo = radeon_bo(_buf);
 394
 395     assert(bo->handle && "must not be called for slab entries");
 396
 397    if (bo->u.real.use_reusable_pool)
 398       pb_cache_add_buffer(&bo->u.real.cache_entry);
 399    else
 400       radeon_bo_destroy(_buf);
 401 }
 402
 403 void *radeon_bo_do_map(struct radeon_bo *bo)
 404 {
 405     struct drm_radeon_gem_mmap args = {0};
 406     void *ptr;
 407     unsigned offset;
 408
 409     /* If the buffer is created from user memory, return the user pointer. */
 410     if (bo->user_ptr)
 411         return bo->user_ptr;
 412
 413     if (bo->handle) {
 414         offset = 0;
 415     } else {
 416         offset = bo->va - bo->u.slab.real->va;
 417         bo = bo->u.slab.real;
 418     }
 419
 420     /* Map the buffer. */
 421     pipe_mutex_lock(bo->u.real.map_mutex);
 422     /* Return the pointer if it's already mapped. */
 423     if (bo->u.real.ptr) {
 424         bo->u.real.map_count++;
 425         pipe_mutex_unlock(bo->u.real.map_mutex);
 426         return (uint8_t*)bo->u.real.ptr + offset;
 427     }
 428     args.handle = bo->handle;
 429     args.offset = 0;
 430     args.size = (uint64_t)bo->base.size;
 431     if (drmCommandWriteRead(bo->rws->fd,
 432                             DRM_RADEON_GEM_MMAP,
 433                             &args,
 434                             sizeof(args))) {
 435         pipe_mutex_unlock(bo->u.real.map_mutex);
 436         fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
 437                 bo, bo->handle);
 438         return NULL;
 439     }
 440
 441     ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
 442                bo->rws->fd, args.addr_ptr);
 443     if (ptr == MAP_FAILED) {
 444         /* Clear the cache and try again. */
 445         pb_cache_release_all_buffers(&bo->rws->bo_cache);
 446
 447         ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
 448                       bo->rws->fd, args.addr_ptr);
 449         if (ptr == MAP_FAILED) {
 450             pipe_mutex_unlock(bo->u.real.map_mutex);
 451             fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
 452             return NULL;
 453         }
 454     }
 455     bo->u.real.ptr = ptr;
 456     bo->u.real.map_count = 1;
 457
 458     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 459        bo->rws->mapped_vram += bo->base.size;
 460     else
 461        bo->rws->mapped_gtt += bo->base.size;
 462     bo->rws->num_mapped_buffers++;
 463
 464     pipe_mutex_unlock(bo->u.real.map_mutex);
 465     return (uint8_t*)bo->u.real.ptr + offset;
 466 }
 467
 468 static void *radeon_bo_map(struct pb_buffer *buf,
 469                            struct radeon_winsys_cs *rcs,
 470                            enum pipe_transfer_usage usage)
 471 {
 472     struct radeon_bo *bo = (struct radeon_bo*)buf;
 473     struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
 474
 475     /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
 476     if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 477         /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
 478         if (usage & PIPE_TRANSFER_DONTBLOCK) {
 479             if (!(usage & PIPE_TRANSFER_WRITE)) {
 480                 /* Mapping for read.
 481                  *
 482                  * Since we are mapping for read, we don't need to wait
 483                  * if the GPU is using the buffer for read too
 484                  * (neither one is changing it).
 485                  *
 486                  * Only check whether the buffer is being used for write. */
 487                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
 488                     cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
 489                     return NULL;
 490                 }
 491
 492                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
 493                                     RADEON_USAGE_WRITE)) {
 494                     return NULL;
 495                 }
 496             } else {
 497                 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
 498                     cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
 499                     return NULL;
 500                 }
 501
 502                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
 503                                     RADEON_USAGE_READWRITE)) {
 504                     return NULL;
 505                 }
 506             }
 507         } else {
 508             uint64_t time = os_time_get_nano();
 509
 510             if (!(usage & PIPE_TRANSFER_WRITE)) {
 511                 /* Mapping for read.
 512                  *
 513                  * Since we are mapping for read, we don't need to wait
 514                  * if the GPU is using the buffer for read too
 515                  * (neither one is changing it).
 516                  *
 517                  * Only check whether the buffer is being used for write. */
 518                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
 519                     cs->flush_cs(cs->flush_data, 0, NULL);
 520                 }
 521                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
 522                                RADEON_USAGE_WRITE);
 523             } else {
 524                 /* Mapping for write. */
 525                 if (cs) {
 526                     if (radeon_bo_is_referenced_by_cs(cs, bo)) {
 527                         cs->flush_cs(cs->flush_data, 0, NULL);
 528                     } else {
 529                         /* Try to avoid busy-waiting in radeon_bo_wait. */
 530                         if (p_atomic_read(&bo->num_active_ioctls))
 531                             radeon_drm_cs_sync_flush(rcs);
 532                     }
 533                 }
 534
 535                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
 536                                RADEON_USAGE_READWRITE);
 537             }
 538
 539             bo->rws->buffer_wait_time += os_time_get_nano() - time;
 540         }
 541     }
 542
 543     return radeon_bo_do_map(bo);
 544 }
 545
 546 static void radeon_bo_unmap(struct pb_buffer *_buf)
 547 {
 548     struct radeon_bo *bo = (struct radeon_bo*)_buf;
 549
 550     if (bo->user_ptr)
 551         return;
 552
 553     if (!bo->handle)
 554         bo = bo->u.slab.real;
 555
 556     pipe_mutex_lock(bo->u.real.map_mutex);
 557     if (!bo->u.real.ptr) {
 558         pipe_mutex_unlock(bo->u.real.map_mutex);
 559         return; /* it's not been mapped */
 560     }
 561
 562     assert(bo->u.real.map_count);
 563     if (--bo->u.real.map_count) {
 564         pipe_mutex_unlock(bo->u.real.map_mutex);
 565         return; /* it's been mapped multiple times */
 566     }
 567
 568     os_munmap(bo->u.real.ptr, bo->base.size);
 569     bo->u.real.ptr = NULL;
 570
 571     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 572        bo->rws->mapped_vram -= bo->base.size;
 573     else
 574        bo->rws->mapped_gtt -= bo->base.size;
 575     bo->rws->num_mapped_buffers--;
 576
 577     pipe_mutex_unlock(bo->u.real.map_mutex);
 578 }
 579
 580 static const struct pb_vtbl radeon_bo_vtbl = {
 581     radeon_bo_destroy_or_cache
 582     /* other functions are never called */
 583 };
 584
 585 #ifndef RADEON_GEM_GTT_WC
 586 #define RADEON_GEM_GTT_WC               (1 << 2)
 587 #endif
 588 #ifndef RADEON_GEM_CPU_ACCESS
 589 /* BO is expected to be accessed by the CPU */
 590 #define RADEON_GEM_CPU_ACCESS           (1 << 3)
 591 #endif
 592 #ifndef RADEON_GEM_NO_CPU_ACCESS
 593 /* CPU access is not expected to work for this BO */
 594 #define RADEON_GEM_NO_CPU_ACCESS        (1 << 4)
 595 #endif
 596
 597 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
 598                                           unsigned size, unsigned alignment,
 599                                           unsigned usage,
 600                                           unsigned initial_domains,
 601                                           unsigned flags,
 602                                           unsigned pb_cache_bucket)
 603 {
 604     struct radeon_bo *bo;
 605     struct drm_radeon_gem_create args;
 606     int r;
 607
 608     memset(&args, 0, sizeof(args));
 609
 610     assert(initial_domains);
 611     assert((initial_domains &
 612             ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
 613
 614     args.size = size;
 615     args.alignment = alignment;
 616     args.initial_domain = initial_domains;
 617     args.flags = 0;
 618
 619     if (flags & RADEON_FLAG_GTT_WC)
 620         args.flags |= RADEON_GEM_GTT_WC;
 621     if (flags & RADEON_FLAG_CPU_ACCESS)
 622         args.flags |= RADEON_GEM_CPU_ACCESS;
 623     if (flags & RADEON_FLAG_NO_CPU_ACCESS)
 624         args.flags |= RADEON_GEM_NO_CPU_ACCESS;
 625
 626     if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
 627                             &args, sizeof(args))) {
 628         fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
 629         fprintf(stderr, "radeon:    size      : %u bytes\n", size);
 630         fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
 631         fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
 632         fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
 633         return NULL;
 634     }
 635
 636     assert(args.handle != 0);
 637
 638     bo = CALLOC_STRUCT(radeon_bo);
 639     if (!bo)
 640         return NULL;
 641
 642     pipe_reference_init(&bo->base.reference, 1);
 643     bo->base.alignment = alignment;
 644     bo->base.usage = usage;
 645     bo->base.size = size;
 646     bo->base.vtbl = &radeon_bo_vtbl;
 647     bo->rws = rws;
 648     bo->handle = args.handle;
 649     bo->va = 0;
 650     bo->initial_domain = initial_domains;
 651     bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
 652     pipe_mutex_init(bo->u.real.map_mutex);
 653     pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
 654                         pb_cache_bucket);
 655
 656     if (rws->info.has_virtual_memory) {
 657         struct drm_radeon_gem_va va;
 658         unsigned va_gap_size;
 659
 660         va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
 661         bo->va = radeon_bomgr_find_va(rws, size + va_gap_size, alignment);
 662
 663         va.handle = bo->handle;
 664         va.vm_id = 0;
 665         va.operation = RADEON_VA_MAP;
 666         va.flags = RADEON_VM_PAGE_READABLE |
 667                    RADEON_VM_PAGE_WRITEABLE |
 668                    RADEON_VM_PAGE_SNOOPED;
 669         va.offset = bo->va;
 670         r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
 671         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
 672             fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
 673             fprintf(stderr, "radeon:    size      : %d bytes\n", size);
 674             fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
 675             fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
 676             fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
 677             radeon_bo_destroy(&bo->base);
 678             return NULL;
 679         }
 680         pipe_mutex_lock(rws->bo_handles_mutex);
 681         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
 682             struct pb_buffer *b = &bo->base;
 683             struct radeon_bo *old_bo =
 684                 util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
 685
 686             pipe_mutex_unlock(rws->bo_handles_mutex);
 687             pb_reference(&b, &old_bo->base);
 688             return radeon_bo(b);
 689         }
 690
 691         util_hash_table_set(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
 692         pipe_mutex_unlock(rws->bo_handles_mutex);
 693     }
 694
 695     if (initial_domains & RADEON_DOMAIN_VRAM)
 696         rws->allocated_vram += align(size, rws->info.gart_page_size);
 697     else if (initial_domains & RADEON_DOMAIN_GTT)
 698         rws->allocated_gtt += align(size, rws->info.gart_page_size);
 699
 700     return bo;
 701 }
 702
 703 bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
 704 {
 705    struct radeon_bo *bo = radeon_bo(_buf);
 706
 707    if (radeon_bo_is_referenced_by_any_cs(bo))
 708       return false;
 709
 710    return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
 711 }
 712
 713 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
 714 {
 715     struct radeon_bo *bo = NULL; /* fix container_of */
 716     bo = container_of(entry, bo, u.slab.entry);
 717
 718     return radeon_bo_can_reclaim(&bo->base);
 719 }
 720
 721 static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
 722 {
 723     struct radeon_bo *bo = radeon_bo(_buf);
 724
 725     assert(!bo->handle);
 726
 727     pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
 728 }
 729
 730 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
 731     radeon_bo_slab_destroy
 732     /* other functions are never called */
 733 };
 734
 735 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
 736                                      unsigned entry_size,
 737                                      unsigned group_index)
 738 {
 739     struct radeon_drm_winsys *ws = priv;
 740     struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
 741     enum radeon_bo_domain domains;
 742     enum radeon_bo_flag flags = 0;
 743     unsigned base_hash;
 744
 745     if (!slab)
 746         return NULL;
 747
 748     if (heap & 1)
 749         flags |= RADEON_FLAG_GTT_WC;
 750     if (heap & 2)
 751         flags |= RADEON_FLAG_CPU_ACCESS;
 752
 753     switch (heap >> 2) {
 754     case 0:
 755         domains = RADEON_DOMAIN_VRAM;
 756         break;
 757     default:
 758     case 1:
 759         domains = RADEON_DOMAIN_VRAM_GTT;
 760         break;
 761     case 2:
 762         domains = RADEON_DOMAIN_GTT;
 763         break;
 764     }
 765
 766     slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
 767                                                      64 * 1024, 64 * 1024,
 768                                                      domains, flags));
 769     if (!slab->buffer)
 770         goto fail;
 771
 772     assert(slab->buffer->handle);
 773
 774     slab->base.num_entries = slab->buffer->base.size / entry_size;
 775     slab->base.num_free = slab->base.num_entries;
 776     slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
 777     if (!slab->entries)
 778         goto fail_buffer;
 779
 780     LIST_INITHEAD(&slab->base.free);
 781
 782     base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
 783
 784     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
 785         struct radeon_bo *bo = &slab->entries[i];
 786
 787         bo->base.alignment = entry_size;
 788         bo->base.usage = slab->buffer->base.usage;
 789         bo->base.size = entry_size;
 790         bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
 791         bo->rws = ws;
 792         bo->va = slab->buffer->va + i * entry_size;
 793         bo->initial_domain = domains;
 794         bo->hash = base_hash + i;
 795         bo->u.slab.entry.slab = &slab->base;
 796         bo->u.slab.entry.group_index = group_index;
 797         bo->u.slab.real = slab->buffer;
 798
 799         LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
 800     }
 801
 802     return &slab->base;
 803
 804 fail_buffer:
 805     radeon_bo_reference(&slab->buffer, NULL);
 806 fail:
 807     FREE(slab);
 808     return NULL;
 809 }
 810
 811 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
 812 {
 813     struct radeon_slab *slab = (struct radeon_slab *)pslab;
 814
 815     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
 816         struct radeon_bo *bo = &slab->entries[i];
 817         for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
 818             radeon_bo_reference(&bo->u.slab.fences[j], NULL);
 819         FREE(bo->u.slab.fences);
 820     }
 821
 822     FREE(slab->entries);
 823     radeon_bo_reference(&slab->buffer, NULL);
 824     FREE(slab);
 825 }
 826
 827 static unsigned eg_tile_split(unsigned tile_split)
 828 {
 829     switch (tile_split) {
 830     case 0:     tile_split = 64;    break;
 831     case 1:     tile_split = 128;   break;
 832     case 2:     tile_split = 256;   break;
 833     case 3:     tile_split = 512;   break;
 834     default:
 835     case 4:     tile_split = 1024;  break;
 836     case 5:     tile_split = 2048;  break;
 837     case 6:     tile_split = 4096;  break;
 838     }
 839     return tile_split;
 840 }
 841
 842 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
 843 {
 844     switch (eg_tile_split) {
 845     case 64:    return 0;
 846     case 128:   return 1;
 847     case 256:   return 2;
 848     case 512:   return 3;
 849     default:
 850     case 1024:  return 4;
 851     case 2048:  return 5;
 852     case 4096:  return 6;
 853     }
 854 }
 855
 856 static void radeon_bo_get_metadata(struct pb_buffer *_buf,
 857                                    struct radeon_bo_metadata *md)
 858 {
 859     struct radeon_bo *bo = radeon_bo(_buf);
 860     struct drm_radeon_gem_set_tiling args;
 861
 862     assert(bo->handle && "must not be called for slab entries");
 863
 864     memset(&args, 0, sizeof(args));
 865
 866     args.handle = bo->handle;
 867
 868     drmCommandWriteRead(bo->rws->fd,
 869                         DRM_RADEON_GEM_GET_TILING,
 870                         &args,
 871                         sizeof(args));
 872
 873     md->microtile = RADEON_LAYOUT_LINEAR;
 874     md->macrotile = RADEON_LAYOUT_LINEAR;
 875     if (args.tiling_flags & RADEON_TILING_MICRO)
 876         md->microtile = RADEON_LAYOUT_TILED;
 877     else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
 878         md->microtile = RADEON_LAYOUT_SQUARETILED;
 879
 880     if (args.tiling_flags & RADEON_TILING_MACRO)
 881         md->macrotile = RADEON_LAYOUT_TILED;
 882
 883     md->bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
 884     md->bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
 885     md->tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
 886     md->mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
 887     md->tile_split = eg_tile_split(md->tile_split);
 888     md->scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
 889 }
 890
 891 static void radeon_bo_set_metadata(struct pb_buffer *_buf,
 892                                    struct radeon_bo_metadata *md)
 893 {
 894     struct radeon_bo *bo = radeon_bo(_buf);
 895     struct drm_radeon_gem_set_tiling args;
 896
 897     assert(bo->handle && "must not be called for slab entries");
 898
 899     memset(&args, 0, sizeof(args));
 900
 901     os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
 902
 903     if (md->microtile == RADEON_LAYOUT_TILED)
 904         args.tiling_flags |= RADEON_TILING_MICRO;
 905     else if (md->microtile == RADEON_LAYOUT_SQUARETILED)
 906         args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
 907
 908     if (md->macrotile == RADEON_LAYOUT_TILED)
 909         args.tiling_flags |= RADEON_TILING_MACRO;
 910
 911     args.tiling_flags |= (md->bankw & RADEON_TILING_EG_BANKW_MASK) <<
 912         RADEON_TILING_EG_BANKW_SHIFT;
 913     args.tiling_flags |= (md->bankh & RADEON_TILING_EG_BANKH_MASK) <<
 914         RADEON_TILING_EG_BANKH_SHIFT;
 915     if (md->tile_split) {
 916         args.tiling_flags |= (eg_tile_split_rev(md->tile_split) &
 917                               RADEON_TILING_EG_TILE_SPLIT_MASK) <<
 918             RADEON_TILING_EG_TILE_SPLIT_SHIFT;
 919     }
 920     args.tiling_flags |= (md->mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
 921         RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
 922
 923     if (bo->rws->gen >= DRV_SI && !md->scanout)
 924         args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
 925
 926     args.handle = bo->handle;
 927     args.pitch = md->stride;
 928
 929     drmCommandWriteRead(bo->rws->fd,
 930                         DRM_RADEON_GEM_SET_TILING,
 931                         &args,
 932                         sizeof(args));
 933 }
 934
 935 static struct pb_buffer *
 936 radeon_winsys_bo_create(struct radeon_winsys *rws,
 937                         uint64_t size,
 938                         unsigned alignment,
 939                         enum radeon_bo_domain domain,
 940                         enum radeon_bo_flag flags)
 941 {
 942     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
 943     struct radeon_bo *bo;
 944     unsigned usage = 0, pb_cache_bucket;
 945
 946     /* Only 32-bit sizes are supported. */
 947     if (size > UINT_MAX)
 948         return NULL;
 949
 950     /* Sub-allocate small buffers from slabs. */
 951     if (!(flags & RADEON_FLAG_HANDLE) &&
 952         size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
 953         ws->info.has_virtual_memory &&
 954         alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
 955         struct pb_slab_entry *entry;
 956         unsigned heap = 0;
 957
 958         if (flags & RADEON_FLAG_GTT_WC)
 959             heap |= 1;
 960         if (flags & RADEON_FLAG_CPU_ACCESS)
 961             heap |= 2;
 962         if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
 963             goto no_slab;
 964
 965         switch (domain) {
 966         case RADEON_DOMAIN_VRAM:
 967             heap |= 0 * 4;
 968             break;
 969         case RADEON_DOMAIN_VRAM_GTT:
 970             heap |= 1 * 4;
 971             break;
 972         case RADEON_DOMAIN_GTT:
 973             heap |= 2 * 4;
 974             break;
 975         default:
 976             goto no_slab;
 977         }
 978
 979         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
 980         if (!entry) {
 981             /* Clear the cache and try again. */
 982             pb_cache_release_all_buffers(&ws->bo_cache);
 983
 984             entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
 985         }
 986         if (!entry)
 987             return NULL;
 988
 989         bo = NULL;
 990         bo = container_of(entry, bo, u.slab.entry);
 991
 992         pipe_reference_init(&bo->base.reference, 1);
 993
 994         return &bo->base;
 995     }
 996 no_slab:
 997
 998     /* This flag is irrelevant for the cache. */
 999     flags &= ~RADEON_FLAG_HANDLE;
1000
1001     /* Align size to page size. This is the minimum alignment for normal
1002      * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1003      * like constant/uniform buffers, can benefit from better and more reuse.
1004      */
1005     size = align(size, ws->info.gart_page_size);
1006     alignment = align(alignment, ws->info.gart_page_size);
1007
1008     /* Only set one usage bit each for domains and flags, or the cache manager
1009      * might consider different sets of domains / flags compatible
1010      */
1011     if (domain == RADEON_DOMAIN_VRAM_GTT)
1012         usage = 1 << 2;
1013     else
1014         usage = (unsigned)domain >> 1;
1015     assert(flags < sizeof(usage) * 8 - 3);
1016     usage |= 1 << (flags + 3);
1017
1018     /* Determine the pb_cache bucket for minimizing pb_cache misses. */
1019     pb_cache_bucket = 0;
1020     if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
1021        pb_cache_bucket += 1;
1022     if (flags == RADEON_FLAG_GTT_WC) /* WC */
1023        pb_cache_bucket += 2;
1024     assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
1025
1026     bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1027                                            usage, pb_cache_bucket));
1028     if (bo)
1029         return &bo->base;
1030
1031     bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1032                           pb_cache_bucket);
1033     if (!bo) {
1034         /* Clear the cache and try again. */
1035         pb_slabs_reclaim(&ws->bo_slabs);
1036         pb_cache_release_all_buffers(&ws->bo_cache);
1037         bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1038                               pb_cache_bucket);
1039         if (!bo)
1040             return NULL;
1041     }
1042
1043     bo->u.real.use_reusable_pool = true;
1044
1045     pipe_mutex_lock(ws->bo_handles_mutex);
1046     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1047     pipe_mutex_unlock(ws->bo_handles_mutex);
1048
1049     return &bo->base;
1050 }
1051
1052 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1053                                                    void *pointer, uint64_t size)
1054 {
1055     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1056     struct drm_radeon_gem_userptr args;
1057     struct radeon_bo *bo;
1058     int r;
1059
1060     bo = CALLOC_STRUCT(radeon_bo);
1061     if (!bo)
1062         return NULL;
1063
1064     memset(&args, 0, sizeof(args));
1065     args.addr = (uintptr_t)pointer;
1066     args.size = align(size, ws->info.gart_page_size);
1067     args.flags = RADEON_GEM_USERPTR_ANONONLY |
1068         RADEON_GEM_USERPTR_VALIDATE |
1069         RADEON_GEM_USERPTR_REGISTER;
1070     if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1071                             &args, sizeof(args))) {
1072         FREE(bo);
1073         return NULL;
1074     }
1075
1076     assert(args.handle != 0);
1077
1078     pipe_mutex_lock(ws->bo_handles_mutex);
1079
1080     /* Initialize it. */
1081     pipe_reference_init(&bo->base.reference, 1);
1082     bo->handle = args.handle;
1083     bo->base.alignment = 0;
1084     bo->base.size = size;
1085     bo->base.vtbl = &radeon_bo_vtbl;
1086     bo->rws = ws;
1087     bo->user_ptr = pointer;
1088     bo->va = 0;
1089     bo->initial_domain = RADEON_DOMAIN_GTT;
1090     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1091     pipe_mutex_init(bo->u.real.map_mutex);
1092
1093     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1094
1095     pipe_mutex_unlock(ws->bo_handles_mutex);
1096
1097     if (ws->info.has_virtual_memory) {
1098         struct drm_radeon_gem_va va;
1099
1100         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1101
1102         va.handle = bo->handle;
1103         va.operation = RADEON_VA_MAP;
1104         va.vm_id = 0;
1105         va.offset = bo->va;
1106         va.flags = RADEON_VM_PAGE_READABLE |
1107                    RADEON_VM_PAGE_WRITEABLE |
1108                    RADEON_VM_PAGE_SNOOPED;
1109         va.offset = bo->va;
1110         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1111         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1112             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1113             radeon_bo_destroy(&bo->base);
1114             return NULL;
1115         }
1116         pipe_mutex_lock(ws->bo_handles_mutex);
1117         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1118             struct pb_buffer *b = &bo->base;
1119             struct radeon_bo *old_bo =
1120                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1121
1122             pipe_mutex_unlock(ws->bo_handles_mutex);
1123             pb_reference(&b, &old_bo->base);
1124             return b;
1125         }
1126
1127         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1128         pipe_mutex_unlock(ws->bo_handles_mutex);
1129     }
1130
1131     ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1132
1133     return (struct pb_buffer*)bo;
1134 }
1135
1136 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1137                                                       struct winsys_handle *whandle,
1138                                                       unsigned *stride,
1139                                                       unsigned *offset)
1140 {
1141     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1142     struct radeon_bo *bo;
1143     int r;
1144     unsigned handle;
1145     uint64_t size = 0;
1146
1147     if (!offset && whandle->offset != 0) {
1148         fprintf(stderr, "attempt to import unsupported winsys offset %u\n",
1149                 whandle->offset);
1150         return NULL;
1151     }
1152
1153     /* We must maintain a list of pairs <handle, bo>, so that we always return
1154      * the same BO for one particular handle. If we didn't do that and created
1155      * more than one BO for the same handle and then relocated them in a CS,
1156      * we would hit a deadlock in the kernel.
1157      *
1158      * The list of pairs is guarded by a mutex, of course. */
1159     pipe_mutex_lock(ws->bo_handles_mutex);
1160
1161     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1162         /* First check if there already is an existing bo for the handle. */
1163         bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1164     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1165         /* We must first get the GEM handle, as fds are unreliable keys */
1166         r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1167         if (r)
1168             goto fail;
1169         bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1170     } else {
1171         /* Unknown handle type */
1172         goto fail;
1173     }
1174
1175     if (bo) {
1176         /* Increase the refcount. */
1177         struct pb_buffer *b = NULL;
1178         pb_reference(&b, &bo->base);
1179         goto done;
1180     }
1181
1182     /* There isn't, create a new one. */
1183     bo = CALLOC_STRUCT(radeon_bo);
1184     if (!bo) {
1185         goto fail;
1186     }
1187
1188     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1189         struct drm_gem_open open_arg = {};
1190         memset(&open_arg, 0, sizeof(open_arg));
1191         /* Open the BO. */
1192         open_arg.name = whandle->handle;
1193         if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1194             FREE(bo);
1195             goto fail;
1196         }
1197         handle = open_arg.handle;
1198         size = open_arg.size;
1199         bo->flink_name = whandle->handle;
1200     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1201         size = lseek(whandle->handle, 0, SEEK_END);
1202         /*
1203          * Could check errno to determine whether the kernel is new enough, but
1204          * it doesn't really matter why this failed, just that it failed.
1205          */
1206         if (size == (off_t)-1) {
1207             FREE(bo);
1208             goto fail;
1209         }
1210         lseek(whandle->handle, 0, SEEK_SET);
1211     }
1212
1213     assert(handle != 0);
1214
1215     bo->handle = handle;
1216
1217     /* Initialize it. */
1218     pipe_reference_init(&bo->base.reference, 1);
1219     bo->base.alignment = 0;
1220     bo->base.size = (unsigned) size;
1221     bo->base.vtbl = &radeon_bo_vtbl;
1222     bo->rws = ws;
1223     bo->va = 0;
1224     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1225     pipe_mutex_init(bo->u.real.map_mutex);
1226
1227     if (bo->flink_name)
1228         util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1229
1230     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1231
1232 done:
1233     pipe_mutex_unlock(ws->bo_handles_mutex);
1234
1235     if (stride)
1236         *stride = whandle->stride;
1237     if (offset)
1238         *offset = whandle->offset;
1239
1240     if (ws->info.has_virtual_memory && !bo->va) {
1241         struct drm_radeon_gem_va va;
1242
1243         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1244
1245         va.handle = bo->handle;
1246         va.operation = RADEON_VA_MAP;
1247         va.vm_id = 0;
1248         va.offset = bo->va;
1249         va.flags = RADEON_VM_PAGE_READABLE |
1250                    RADEON_VM_PAGE_WRITEABLE |
1251                    RADEON_VM_PAGE_SNOOPED;
1252         va.offset = bo->va;
1253         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1254         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1255             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1256             radeon_bo_destroy(&bo->base);
1257             return NULL;
1258         }
1259         pipe_mutex_lock(ws->bo_handles_mutex);
1260         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1261             struct pb_buffer *b = &bo->base;
1262             struct radeon_bo *old_bo =
1263                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1264
1265             pipe_mutex_unlock(ws->bo_handles_mutex);
1266             pb_reference(&b, &old_bo->base);
1267             return b;
1268         }
1269
1270         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1271         pipe_mutex_unlock(ws->bo_handles_mutex);
1272     }
1273
1274     bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1275
1276     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1277         ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1278     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1279         ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1280
1281     return (struct pb_buffer*)bo;
1282
1283 fail:
1284     pipe_mutex_unlock(ws->bo_handles_mutex);
1285     return NULL;
1286 }
1287
1288 static bool radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
1289                                         unsigned stride, unsigned offset,
1290                                         unsigned slice_size,
1291                                         struct winsys_handle *whandle)
1292 {
1293     struct drm_gem_flink flink;
1294     struct radeon_bo *bo = radeon_bo(buffer);
1295     struct radeon_drm_winsys *ws = bo->rws;
1296
1297     if (!bo->handle) {
1298         offset += bo->va - bo->u.slab.real->va;
1299         bo = bo->u.slab.real;
1300     }
1301
1302     memset(&flink, 0, sizeof(flink));
1303
1304     bo->u.real.use_reusable_pool = false;
1305
1306     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1307         if (!bo->flink_name) {
1308             flink.handle = bo->handle;
1309
1310             if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1311                 return false;
1312             }
1313
1314             bo->flink_name = flink.name;
1315
1316             pipe_mutex_lock(ws->bo_handles_mutex);
1317             util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1318             pipe_mutex_unlock(ws->bo_handles_mutex);
1319         }
1320         whandle->handle = bo->flink_name;
1321     } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
1322         whandle->handle = bo->handle;
1323     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1324         if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1325             return false;
1326     }
1327
1328     whandle->stride = stride;
1329     whandle->offset = offset;
1330     whandle->offset += slice_size * whandle->layer;
1331
1332     return true;
1333 }
1334
1335 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1336 {
1337    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1338 }
1339
1340 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1341 {
1342     return ((struct radeon_bo*)buf)->va;
1343 }
1344
1345 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1346 {
1347     struct radeon_bo *bo = radeon_bo(buf);
1348
1349     if (bo->handle)
1350         return 0;
1351
1352     return bo->va - bo->u.slab.real->va;
1353 }
1354
1355 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1356 {
1357     ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1358     ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1359     ws->base.buffer_map = radeon_bo_map;
1360     ws->base.buffer_unmap = radeon_bo_unmap;
1361     ws->base.buffer_wait = radeon_bo_wait;
1362     ws->base.buffer_create = radeon_winsys_bo_create;
1363     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1364     ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1365     ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1366     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1367     ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1368     ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1369     ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1370 }