src/gallium/winsys/radeon/drm/radeon_drm_bo.c

   1 /*
   2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 #include "radeon_drm_cs.h"
  28
  29 #include "util/u_hash_table.h"
  30 #include "util/u_memory.h"
  31 #include "util/simple_list.h"
  32 #include "os/os_thread.h"
  33 #include "os/os_mman.h"
  34 #include "os/os_time.h"
  35
  36 #include "state_tracker/drm_driver.h"
  37
  38 #include <sys/ioctl.h>
  39 #include <xf86drm.h>
  40 #include <errno.h>
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <inttypes.h>
  44
  45 static struct pb_buffer *
  46 radeon_winsys_bo_create(struct radeon_winsys *rws,
  47                         uint64_t size,
  48                         unsigned alignment,
  49                         enum radeon_bo_domain domain,
  50                         enum radeon_bo_flag flags);
  51
  52 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
  53 {
  54     return (struct radeon_bo *)bo;
  55 }
  56
  57 struct radeon_bo_va_hole {
  58     struct list_head list;
  59     uint64_t         offset;
  60     uint64_t         size;
  61 };
  62
  63 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
  64 {
  65     struct drm_radeon_gem_busy args = {0};
  66
  67     args.handle = bo->handle;
  68     return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
  69                                &args, sizeof(args)) != 0;
  70 }
  71
  72 static bool radeon_bo_is_busy(struct radeon_bo *bo)
  73 {
  74     unsigned num_idle;
  75     bool busy = false;
  76
  77     if (bo->handle)
  78         return radeon_real_bo_is_busy(bo);
  79
  80     pipe_mutex_lock(bo->rws->bo_fence_lock);
  81     for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
  82         if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
  83             busy = true;
  84             break;
  85         }
  86         radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
  87     }
  88     memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
  89             (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
  90     bo->u.slab.num_fences -= num_idle;
  91     pipe_mutex_unlock(bo->rws->bo_fence_lock);
  92
  93     return busy;
  94 }
  95
  96 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
  97 {
  98     struct drm_radeon_gem_wait_idle args = {0};
  99
 100     args.handle = bo->handle;
 101     while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
 102                            &args, sizeof(args)) == -EBUSY);
 103 }
 104
 105 static void radeon_bo_wait_idle(struct radeon_bo *bo)
 106 {
 107     if (bo->handle) {
 108         radeon_real_bo_wait_idle(bo);
 109     } else {
 110         pipe_mutex_lock(bo->rws->bo_fence_lock);
 111         while (bo->u.slab.num_fences) {
 112             struct radeon_bo *fence = NULL;
 113             radeon_bo_reference(&fence, bo->u.slab.fences[0]);
 114             pipe_mutex_unlock(bo->rws->bo_fence_lock);
 115
 116             /* Wait without holding the fence lock. */
 117             radeon_real_bo_wait_idle(fence);
 118
 119             pipe_mutex_lock(bo->rws->bo_fence_lock);
 120             if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
 121                 radeon_bo_reference(&bo->u.slab.fences[0], NULL);
 122                 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
 123                         (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
 124                 bo->u.slab.num_fences--;
 125             }
 126             radeon_bo_reference(&fence, NULL);
 127         }
 128         pipe_mutex_unlock(bo->rws->bo_fence_lock);
 129     }
 130 }
 131
 132 static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
 133                            enum radeon_bo_usage usage)
 134 {
 135     struct radeon_bo *bo = radeon_bo(_buf);
 136     int64_t abs_timeout;
 137
 138     /* No timeout. Just query. */
 139     if (timeout == 0)
 140         return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
 141
 142     abs_timeout = os_time_get_absolute_timeout(timeout);
 143
 144     /* Wait if any ioctl is being submitted with this buffer. */
 145     if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
 146         return false;
 147
 148     /* Infinite timeout. */
 149     if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
 150         radeon_bo_wait_idle(bo);
 151         return true;
 152     }
 153
 154     /* Other timeouts need to be emulated with a loop. */
 155     while (radeon_bo_is_busy(bo)) {
 156        if (os_time_get_nano() >= abs_timeout)
 157           return false;
 158        os_time_sleep(10);
 159     }
 160
 161     return true;
 162 }
 163
 164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
 165 {
 166     /* Zero domains the driver doesn't understand. */
 167     domain &= RADEON_DOMAIN_VRAM_GTT;
 168
 169     /* If no domain is set, we must set something... */
 170     if (!domain)
 171         domain = RADEON_DOMAIN_VRAM_GTT;
 172
 173     return domain;
 174 }
 175
 176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
 177                 struct pb_buffer *buf)
 178 {
 179     struct radeon_bo *bo = (struct radeon_bo*)buf;
 180     struct drm_radeon_gem_op args;
 181
 182     if (bo->rws->info.drm_minor < 38)
 183         return RADEON_DOMAIN_VRAM_GTT;
 184
 185     memset(&args, 0, sizeof(args));
 186     args.handle = bo->handle;
 187     args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
 188
 189     drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
 190                         &args, sizeof(args));
 191
 192     /* GEM domains and winsys domains are defined the same. */
 193     return get_valid_domain(args.value);
 194 }
 195
 196 static uint64_t radeon_bomgr_find_va(struct radeon_drm_winsys *rws,
 197                                      uint64_t size, uint64_t alignment)
 198 {
 199     struct radeon_bo_va_hole *hole, *n;
 200     uint64_t offset = 0, waste = 0;
 201
 202     /* All VM address space holes will implicitly start aligned to the
 203      * size alignment, so we don't need to sanitize the alignment here
 204      */
 205     size = align(size, rws->info.gart_page_size);
 206
 207     pipe_mutex_lock(rws->bo_va_mutex);
 208     /* first look for a hole */
 209     LIST_FOR_EACH_ENTRY_SAFE(hole, n, &rws->va_holes, list) {
 210         offset = hole->offset;
 211         waste = offset % alignment;
 212         waste = waste ? alignment - waste : 0;
 213         offset += waste;
 214         if (offset >= (hole->offset + hole->size)) {
 215             continue;
 216         }
 217         if (!waste && hole->size == size) {
 218             offset = hole->offset;
 219             list_del(&hole->list);
 220             FREE(hole);
 221             pipe_mutex_unlock(rws->bo_va_mutex);
 222             return offset;
 223         }
 224         if ((hole->size - waste) > size) {
 225             if (waste) {
 226                 n = CALLOC_STRUCT(radeon_bo_va_hole);
 227                 n->size = waste;
 228                 n->offset = hole->offset;
 229                 list_add(&n->list, &hole->list);
 230             }
 231             hole->size -= (size + waste);
 232             hole->offset += size + waste;
 233             pipe_mutex_unlock(rws->bo_va_mutex);
 234             return offset;
 235         }
 236         if ((hole->size - waste) == size) {
 237             hole->size = waste;
 238             pipe_mutex_unlock(rws->bo_va_mutex);
 239             return offset;
 240         }
 241     }
 242
 243     offset = rws->va_offset;
 244     waste = offset % alignment;
 245     waste = waste ? alignment - waste : 0;
 246     if (waste) {
 247         n = CALLOC_STRUCT(radeon_bo_va_hole);
 248         n->size = waste;
 249         n->offset = offset;
 250         list_add(&n->list, &rws->va_holes);
 251     }
 252     offset += waste;
 253     rws->va_offset += size + waste;
 254     pipe_mutex_unlock(rws->bo_va_mutex);
 255     return offset;
 256 }
 257
 258 static void radeon_bomgr_free_va(struct radeon_drm_winsys *rws,
 259                                  uint64_t va, uint64_t size)
 260 {
 261     struct radeon_bo_va_hole *hole;
 262
 263     size = align(size, rws->info.gart_page_size);
 264
 265     pipe_mutex_lock(rws->bo_va_mutex);
 266     if ((va + size) == rws->va_offset) {
 267         rws->va_offset = va;
 268         /* Delete uppermost hole if it reaches the new top */
 269         if (!LIST_IS_EMPTY(&rws->va_holes)) {
 270             hole = container_of(rws->va_holes.next, hole, list);
 271             if ((hole->offset + hole->size) == va) {
 272                 rws->va_offset = hole->offset;
 273                 list_del(&hole->list);
 274                 FREE(hole);
 275             }
 276         }
 277     } else {
 278         struct radeon_bo_va_hole *next;
 279
 280         hole = container_of(&rws->va_holes, hole, list);
 281         LIST_FOR_EACH_ENTRY(next, &rws->va_holes, list) {
 282             if (next->offset < va)
 283                 break;
 284             hole = next;
 285         }
 286
 287         if (&hole->list != &rws->va_holes) {
 288             /* Grow upper hole if it's adjacent */
 289             if (hole->offset == (va + size)) {
 290                 hole->offset = va;
 291                 hole->size += size;
 292                 /* Merge lower hole if it's adjacent */
 293                 if (next != hole && &next->list != &rws->va_holes &&
 294                     (next->offset + next->size) == va) {
 295                     next->size += hole->size;
 296                     list_del(&hole->list);
 297                     FREE(hole);
 298                 }
 299                 goto out;
 300             }
 301         }
 302
 303         /* Grow lower hole if it's adjacent */
 304         if (next != hole && &next->list != &rws->va_holes &&
 305             (next->offset + next->size) == va) {
 306             next->size += size;
 307             goto out;
 308         }
 309
 310         /* FIXME on allocation failure we just lose virtual address space
 311          * maybe print a warning
 312          */
 313         next = CALLOC_STRUCT(radeon_bo_va_hole);
 314         if (next) {
 315             next->size = size;
 316             next->offset = va;
 317             list_add(&next->list, &hole->list);
 318         }
 319     }
 320 out:
 321     pipe_mutex_unlock(rws->bo_va_mutex);
 322 }
 323
 324 void radeon_bo_destroy(struct pb_buffer *_buf)
 325 {
 326     struct radeon_bo *bo = radeon_bo(_buf);
 327     struct radeon_drm_winsys *rws = bo->rws;
 328     struct drm_gem_close args;
 329
 330     assert(bo->handle && "must not be called for slab entries");
 331
 332     memset(&args, 0, sizeof(args));
 333
 334     pipe_mutex_lock(rws->bo_handles_mutex);
 335     util_hash_table_remove(rws->bo_handles, (void*)(uintptr_t)bo->handle);
 336     if (bo->flink_name) {
 337         util_hash_table_remove(rws->bo_names,
 338                                (void*)(uintptr_t)bo->flink_name);
 339     }
 340     pipe_mutex_unlock(rws->bo_handles_mutex);
 341
 342     if (bo->u.real.ptr)
 343         os_munmap(bo->u.real.ptr, bo->base.size);
 344
 345     if (rws->info.has_virtual_memory) {
 346         if (rws->va_unmap_working) {
 347             struct drm_radeon_gem_va va;
 348
 349             va.handle = bo->handle;
 350             va.vm_id = 0;
 351             va.operation = RADEON_VA_UNMAP;
 352             va.flags = RADEON_VM_PAGE_READABLE |
 353                        RADEON_VM_PAGE_WRITEABLE |
 354                        RADEON_VM_PAGE_SNOOPED;
 355             va.offset = bo->va;
 356
 357             if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
 358                                     sizeof(va)) != 0 &&
 359                 va.operation == RADEON_VA_RESULT_ERROR) {
 360                 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
 361                 fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
 362                 fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
 363             }
 364         }
 365
 366         radeon_bomgr_free_va(rws, bo->va, bo->base.size);
 367     }
 368
 369     /* Close object. */
 370     args.handle = bo->handle;
 371     drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
 372
 373     pipe_mutex_destroy(bo->u.real.map_mutex);
 374
 375     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 376         rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
 377     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
 378         rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
 379
 380     if (bo->u.real.map_count >= 1) {
 381         if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 382             bo->rws->mapped_vram -= bo->base.size;
 383         else
 384             bo->rws->mapped_gtt -= bo->base.size;
 385     }
 386
 387     FREE(bo);
 388 }
 389
 390 static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
 391 {
 392    struct radeon_bo *bo = radeon_bo(_buf);
 393
 394     assert(bo->handle && "must not be called for slab entries");
 395
 396    if (bo->u.real.use_reusable_pool)
 397       pb_cache_add_buffer(&bo->u.real.cache_entry);
 398    else
 399       radeon_bo_destroy(_buf);
 400 }
 401
 402 void *radeon_bo_do_map(struct radeon_bo *bo)
 403 {
 404     struct drm_radeon_gem_mmap args = {0};
 405     void *ptr;
 406     unsigned offset;
 407
 408     /* If the buffer is created from user memory, return the user pointer. */
 409     if (bo->user_ptr)
 410         return bo->user_ptr;
 411
 412     if (bo->handle) {
 413         offset = 0;
 414     } else {
 415         offset = bo->va - bo->u.slab.real->va;
 416         bo = bo->u.slab.real;
 417     }
 418
 419     /* Map the buffer. */
 420     pipe_mutex_lock(bo->u.real.map_mutex);
 421     /* Return the pointer if it's already mapped. */
 422     if (bo->u.real.ptr) {
 423         bo->u.real.map_count++;
 424         pipe_mutex_unlock(bo->u.real.map_mutex);
 425         return (uint8_t*)bo->u.real.ptr + offset;
 426     }
 427     args.handle = bo->handle;
 428     args.offset = 0;
 429     args.size = (uint64_t)bo->base.size;
 430     if (drmCommandWriteRead(bo->rws->fd,
 431                             DRM_RADEON_GEM_MMAP,
 432                             &args,
 433                             sizeof(args))) {
 434         pipe_mutex_unlock(bo->u.real.map_mutex);
 435         fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
 436                 bo, bo->handle);
 437         return NULL;
 438     }
 439
 440     ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
 441                bo->rws->fd, args.addr_ptr);
 442     if (ptr == MAP_FAILED) {
 443         /* Clear the cache and try again. */
 444         pb_cache_release_all_buffers(&bo->rws->bo_cache);
 445
 446         ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
 447                       bo->rws->fd, args.addr_ptr);
 448         if (ptr == MAP_FAILED) {
 449             pipe_mutex_unlock(bo->u.real.map_mutex);
 450             fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
 451             return NULL;
 452         }
 453     }
 454     bo->u.real.ptr = ptr;
 455     bo->u.real.map_count = 1;
 456
 457     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 458        bo->rws->mapped_vram += bo->base.size;
 459     else
 460        bo->rws->mapped_gtt += bo->base.size;
 461
 462     pipe_mutex_unlock(bo->u.real.map_mutex);
 463     return (uint8_t*)bo->u.real.ptr + offset;
 464 }
 465
 466 static void *radeon_bo_map(struct pb_buffer *buf,
 467                            struct radeon_winsys_cs *rcs,
 468                            enum pipe_transfer_usage usage)
 469 {
 470     struct radeon_bo *bo = (struct radeon_bo*)buf;
 471     struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
 472
 473     /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
 474     if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 475         /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
 476         if (usage & PIPE_TRANSFER_DONTBLOCK) {
 477             if (!(usage & PIPE_TRANSFER_WRITE)) {
 478                 /* Mapping for read.
 479                  *
 480                  * Since we are mapping for read, we don't need to wait
 481                  * if the GPU is using the buffer for read too
 482                  * (neither one is changing it).
 483                  *
 484                  * Only check whether the buffer is being used for write. */
 485                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
 486                     cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
 487                     return NULL;
 488                 }
 489
 490                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
 491                                     RADEON_USAGE_WRITE)) {
 492                     return NULL;
 493                 }
 494             } else {
 495                 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
 496                     cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
 497                     return NULL;
 498                 }
 499
 500                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
 501                                     RADEON_USAGE_READWRITE)) {
 502                     return NULL;
 503                 }
 504             }
 505         } else {
 506             uint64_t time = os_time_get_nano();
 507
 508             if (!(usage & PIPE_TRANSFER_WRITE)) {
 509                 /* Mapping for read.
 510                  *
 511                  * Since we are mapping for read, we don't need to wait
 512                  * if the GPU is using the buffer for read too
 513                  * (neither one is changing it).
 514                  *
 515                  * Only check whether the buffer is being used for write. */
 516                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
 517                     cs->flush_cs(cs->flush_data, 0, NULL);
 518                 }
 519                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
 520                                RADEON_USAGE_WRITE);
 521             } else {
 522                 /* Mapping for write. */
 523                 if (cs) {
 524                     if (radeon_bo_is_referenced_by_cs(cs, bo)) {
 525                         cs->flush_cs(cs->flush_data, 0, NULL);
 526                     } else {
 527                         /* Try to avoid busy-waiting in radeon_bo_wait. */
 528                         if (p_atomic_read(&bo->num_active_ioctls))
 529                             radeon_drm_cs_sync_flush(rcs);
 530                     }
 531                 }
 532
 533                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
 534                                RADEON_USAGE_READWRITE);
 535             }
 536
 537             bo->rws->buffer_wait_time += os_time_get_nano() - time;
 538         }
 539     }
 540
 541     return radeon_bo_do_map(bo);
 542 }
 543
 544 static void radeon_bo_unmap(struct pb_buffer *_buf)
 545 {
 546     struct radeon_bo *bo = (struct radeon_bo*)_buf;
 547
 548     if (bo->user_ptr)
 549         return;
 550
 551     if (!bo->handle)
 552         bo = bo->u.slab.real;
 553
 554     pipe_mutex_lock(bo->u.real.map_mutex);
 555     if (!bo->u.real.ptr) {
 556         pipe_mutex_unlock(bo->u.real.map_mutex);
 557         return; /* it's not been mapped */
 558     }
 559
 560     assert(bo->u.real.map_count);
 561     if (--bo->u.real.map_count) {
 562         pipe_mutex_unlock(bo->u.real.map_mutex);
 563         return; /* it's been mapped multiple times */
 564     }
 565
 566     os_munmap(bo->u.real.ptr, bo->base.size);
 567     bo->u.real.ptr = NULL;
 568
 569     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 570        bo->rws->mapped_vram -= bo->base.size;
 571     else
 572        bo->rws->mapped_gtt -= bo->base.size;
 573
 574     pipe_mutex_unlock(bo->u.real.map_mutex);
 575 }
 576
 577 static const struct pb_vtbl radeon_bo_vtbl = {
 578     radeon_bo_destroy_or_cache
 579     /* other functions are never called */
 580 };
 581
 582 #ifndef RADEON_GEM_GTT_WC
 583 #define RADEON_GEM_GTT_WC               (1 << 2)
 584 #endif
 585 #ifndef RADEON_GEM_CPU_ACCESS
 586 /* BO is expected to be accessed by the CPU */
 587 #define RADEON_GEM_CPU_ACCESS           (1 << 3)
 588 #endif
 589 #ifndef RADEON_GEM_NO_CPU_ACCESS
 590 /* CPU access is not expected to work for this BO */
 591 #define RADEON_GEM_NO_CPU_ACCESS        (1 << 4)
 592 #endif
 593
 594 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
 595                                           unsigned size, unsigned alignment,
 596                                           unsigned usage,
 597                                           unsigned initial_domains,
 598                                           unsigned flags,
 599                                           unsigned pb_cache_bucket)
 600 {
 601     struct radeon_bo *bo;
 602     struct drm_radeon_gem_create args;
 603     int r;
 604
 605     memset(&args, 0, sizeof(args));
 606
 607     assert(initial_domains);
 608     assert((initial_domains &
 609             ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
 610
 611     args.size = size;
 612     args.alignment = alignment;
 613     args.initial_domain = initial_domains;
 614     args.flags = 0;
 615
 616     if (flags & RADEON_FLAG_GTT_WC)
 617         args.flags |= RADEON_GEM_GTT_WC;
 618     if (flags & RADEON_FLAG_CPU_ACCESS)
 619         args.flags |= RADEON_GEM_CPU_ACCESS;
 620     if (flags & RADEON_FLAG_NO_CPU_ACCESS)
 621         args.flags |= RADEON_GEM_NO_CPU_ACCESS;
 622
 623     if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
 624                             &args, sizeof(args))) {
 625         fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
 626         fprintf(stderr, "radeon:    size      : %u bytes\n", size);
 627         fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
 628         fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
 629         fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
 630         return NULL;
 631     }
 632
 633     assert(args.handle != 0);
 634
 635     bo = CALLOC_STRUCT(radeon_bo);
 636     if (!bo)
 637         return NULL;
 638
 639     pipe_reference_init(&bo->base.reference, 1);
 640     bo->base.alignment = alignment;
 641     bo->base.usage = usage;
 642     bo->base.size = size;
 643     bo->base.vtbl = &radeon_bo_vtbl;
 644     bo->rws = rws;
 645     bo->handle = args.handle;
 646     bo->va = 0;
 647     bo->initial_domain = initial_domains;
 648     bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
 649     pipe_mutex_init(bo->u.real.map_mutex);
 650     pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
 651                         pb_cache_bucket);
 652
 653     if (rws->info.has_virtual_memory) {
 654         struct drm_radeon_gem_va va;
 655         unsigned va_gap_size;
 656
 657         va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
 658         bo->va = radeon_bomgr_find_va(rws, size + va_gap_size, alignment);
 659
 660         va.handle = bo->handle;
 661         va.vm_id = 0;
 662         va.operation = RADEON_VA_MAP;
 663         va.flags = RADEON_VM_PAGE_READABLE |
 664                    RADEON_VM_PAGE_WRITEABLE |
 665                    RADEON_VM_PAGE_SNOOPED;
 666         va.offset = bo->va;
 667         r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
 668         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
 669             fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
 670             fprintf(stderr, "radeon:    size      : %d bytes\n", size);
 671             fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
 672             fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
 673             fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
 674             radeon_bo_destroy(&bo->base);
 675             return NULL;
 676         }
 677         pipe_mutex_lock(rws->bo_handles_mutex);
 678         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
 679             struct pb_buffer *b = &bo->base;
 680             struct radeon_bo *old_bo =
 681                 util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
 682
 683             pipe_mutex_unlock(rws->bo_handles_mutex);
 684             pb_reference(&b, &old_bo->base);
 685             return radeon_bo(b);
 686         }
 687
 688         util_hash_table_set(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
 689         pipe_mutex_unlock(rws->bo_handles_mutex);
 690     }
 691
 692     if (initial_domains & RADEON_DOMAIN_VRAM)
 693         rws->allocated_vram += align(size, rws->info.gart_page_size);
 694     else if (initial_domains & RADEON_DOMAIN_GTT)
 695         rws->allocated_gtt += align(size, rws->info.gart_page_size);
 696
 697     return bo;
 698 }
 699
 700 bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
 701 {
 702    struct radeon_bo *bo = radeon_bo(_buf);
 703
 704    if (radeon_bo_is_referenced_by_any_cs(bo))
 705       return false;
 706
 707    return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
 708 }
 709
 710 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
 711 {
 712     struct radeon_bo *bo = NULL; /* fix container_of */
 713     bo = container_of(entry, bo, u.slab.entry);
 714
 715     return radeon_bo_can_reclaim(&bo->base);
 716 }
 717
 718 static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
 719 {
 720     struct radeon_bo *bo = radeon_bo(_buf);
 721
 722     assert(!bo->handle);
 723
 724     pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
 725 }
 726
 727 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
 728     radeon_bo_slab_destroy
 729     /* other functions are never called */
 730 };
 731
 732 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
 733                                      unsigned entry_size,
 734                                      unsigned group_index)
 735 {
 736     struct radeon_drm_winsys *ws = priv;
 737     struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
 738     enum radeon_bo_domain domains;
 739     enum radeon_bo_flag flags = 0;
 740     unsigned base_hash;
 741
 742     if (!slab)
 743         return NULL;
 744
 745     if (heap & 1)
 746         flags |= RADEON_FLAG_GTT_WC;
 747     if (heap & 2)
 748         flags |= RADEON_FLAG_CPU_ACCESS;
 749
 750     switch (heap >> 2) {
 751     case 0:
 752         domains = RADEON_DOMAIN_VRAM;
 753         break;
 754     default:
 755     case 1:
 756         domains = RADEON_DOMAIN_VRAM_GTT;
 757         break;
 758     case 2:
 759         domains = RADEON_DOMAIN_GTT;
 760         break;
 761     }
 762
 763     slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
 764                                                      64 * 1024, 64 * 1024,
 765                                                      domains, flags));
 766     if (!slab->buffer)
 767         goto fail;
 768
 769     assert(slab->buffer->handle);
 770
 771     slab->base.num_entries = slab->buffer->base.size / entry_size;
 772     slab->base.num_free = slab->base.num_entries;
 773     slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
 774     if (!slab->entries)
 775         goto fail_buffer;
 776
 777     LIST_INITHEAD(&slab->base.free);
 778
 779     base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
 780
 781     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
 782         struct radeon_bo *bo = &slab->entries[i];
 783
 784         bo->base.alignment = entry_size;
 785         bo->base.usage = slab->buffer->base.usage;
 786         bo->base.size = entry_size;
 787         bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
 788         bo->rws = ws;
 789         bo->va = slab->buffer->va + i * entry_size;
 790         bo->initial_domain = domains;
 791         bo->hash = base_hash + i;
 792         bo->u.slab.entry.slab = &slab->base;
 793         bo->u.slab.entry.group_index = group_index;
 794         bo->u.slab.real = slab->buffer;
 795
 796         LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
 797     }
 798
 799     return &slab->base;
 800
 801 fail_buffer:
 802     radeon_bo_reference(&slab->buffer, NULL);
 803 fail:
 804     FREE(slab);
 805     return NULL;
 806 }
 807
 808 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
 809 {
 810     struct radeon_slab *slab = (struct radeon_slab *)pslab;
 811
 812     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
 813         struct radeon_bo *bo = &slab->entries[i];
 814         for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
 815             radeon_bo_reference(&bo->u.slab.fences[j], NULL);
 816         FREE(bo->u.slab.fences);
 817     }
 818
 819     FREE(slab->entries);
 820     radeon_bo_reference(&slab->buffer, NULL);
 821     FREE(slab);
 822 }
 823
 824 static unsigned eg_tile_split(unsigned tile_split)
 825 {
 826     switch (tile_split) {
 827     case 0:     tile_split = 64;    break;
 828     case 1:     tile_split = 128;   break;
 829     case 2:     tile_split = 256;   break;
 830     case 3:     tile_split = 512;   break;
 831     default:
 832     case 4:     tile_split = 1024;  break;
 833     case 5:     tile_split = 2048;  break;
 834     case 6:     tile_split = 4096;  break;
 835     }
 836     return tile_split;
 837 }
 838
 839 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
 840 {
 841     switch (eg_tile_split) {
 842     case 64:    return 0;
 843     case 128:   return 1;
 844     case 256:   return 2;
 845     case 512:   return 3;
 846     default:
 847     case 1024:  return 4;
 848     case 2048:  return 5;
 849     case 4096:  return 6;
 850     }
 851 }
 852
 853 static void radeon_bo_get_metadata(struct pb_buffer *_buf,
 854                                    struct radeon_bo_metadata *md)
 855 {
 856     struct radeon_bo *bo = radeon_bo(_buf);
 857     struct drm_radeon_gem_set_tiling args;
 858
 859     assert(bo->handle && "must not be called for slab entries");
 860
 861     memset(&args, 0, sizeof(args));
 862
 863     args.handle = bo->handle;
 864
 865     drmCommandWriteRead(bo->rws->fd,
 866                         DRM_RADEON_GEM_GET_TILING,
 867                         &args,
 868                         sizeof(args));
 869
 870     md->microtile = RADEON_LAYOUT_LINEAR;
 871     md->macrotile = RADEON_LAYOUT_LINEAR;
 872     if (args.tiling_flags & RADEON_TILING_MICRO)
 873         md->microtile = RADEON_LAYOUT_TILED;
 874     else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
 875         md->microtile = RADEON_LAYOUT_SQUARETILED;
 876
 877     if (args.tiling_flags & RADEON_TILING_MACRO)
 878         md->macrotile = RADEON_LAYOUT_TILED;
 879
 880     md->bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
 881     md->bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
 882     md->tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
 883     md->mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
 884     md->tile_split = eg_tile_split(md->tile_split);
 885     md->scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
 886 }
 887
 888 static void radeon_bo_set_metadata(struct pb_buffer *_buf,
 889                                    struct radeon_bo_metadata *md)
 890 {
 891     struct radeon_bo *bo = radeon_bo(_buf);
 892     struct drm_radeon_gem_set_tiling args;
 893
 894     assert(bo->handle && "must not be called for slab entries");
 895
 896     memset(&args, 0, sizeof(args));
 897
 898     os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
 899
 900     if (md->microtile == RADEON_LAYOUT_TILED)
 901         args.tiling_flags |= RADEON_TILING_MICRO;
 902     else if (md->microtile == RADEON_LAYOUT_SQUARETILED)
 903         args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
 904
 905     if (md->macrotile == RADEON_LAYOUT_TILED)
 906         args.tiling_flags |= RADEON_TILING_MACRO;
 907
 908     args.tiling_flags |= (md->bankw & RADEON_TILING_EG_BANKW_MASK) <<
 909         RADEON_TILING_EG_BANKW_SHIFT;
 910     args.tiling_flags |= (md->bankh & RADEON_TILING_EG_BANKH_MASK) <<
 911         RADEON_TILING_EG_BANKH_SHIFT;
 912     if (md->tile_split) {
 913         args.tiling_flags |= (eg_tile_split_rev(md->tile_split) &
 914                               RADEON_TILING_EG_TILE_SPLIT_MASK) <<
 915             RADEON_TILING_EG_TILE_SPLIT_SHIFT;
 916     }
 917     args.tiling_flags |= (md->mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
 918         RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
 919
 920     if (bo->rws->gen >= DRV_SI && !md->scanout)
 921         args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
 922
 923     args.handle = bo->handle;
 924     args.pitch = md->stride;
 925
 926     drmCommandWriteRead(bo->rws->fd,
 927                         DRM_RADEON_GEM_SET_TILING,
 928                         &args,
 929                         sizeof(args));
 930 }
 931
 932 static struct pb_buffer *
 933 radeon_winsys_bo_create(struct radeon_winsys *rws,
 934                         uint64_t size,
 935                         unsigned alignment,
 936                         enum radeon_bo_domain domain,
 937                         enum radeon_bo_flag flags)
 938 {
 939     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
 940     struct radeon_bo *bo;
 941     unsigned usage = 0, pb_cache_bucket;
 942
 943     /* Only 32-bit sizes are supported. */
 944     if (size > UINT_MAX)
 945         return NULL;
 946
 947     /* Sub-allocate small buffers from slabs. */
 948     if (!(flags & RADEON_FLAG_HANDLE) &&
 949         size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
 950         ws->info.has_virtual_memory &&
 951         alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
 952         struct pb_slab_entry *entry;
 953         unsigned heap = 0;
 954
 955         if (flags & RADEON_FLAG_GTT_WC)
 956             heap |= 1;
 957         if (flags & RADEON_FLAG_CPU_ACCESS)
 958             heap |= 2;
 959         if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
 960             goto no_slab;
 961
 962         switch (domain) {
 963         case RADEON_DOMAIN_VRAM:
 964             heap |= 0 * 4;
 965             break;
 966         case RADEON_DOMAIN_VRAM_GTT:
 967             heap |= 1 * 4;
 968             break;
 969         case RADEON_DOMAIN_GTT:
 970             heap |= 2 * 4;
 971             break;
 972         default:
 973             goto no_slab;
 974         }
 975
 976         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
 977         if (!entry) {
 978             /* Clear the cache and try again. */
 979             pb_cache_release_all_buffers(&ws->bo_cache);
 980
 981             entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
 982         }
 983         if (!entry)
 984             return NULL;
 985
 986         bo = NULL;
 987         bo = container_of(entry, bo, u.slab.entry);
 988
 989         pipe_reference_init(&bo->base.reference, 1);
 990
 991         return &bo->base;
 992     }
 993 no_slab:
 994
 995     /* This flag is irrelevant for the cache. */
 996     flags &= ~RADEON_FLAG_HANDLE;
 997
 998     /* Align size to page size. This is the minimum alignment for normal
 999      * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1000      * like constant/uniform buffers, can benefit from better and more reuse.
1001      */
1002     size = align(size, ws->info.gart_page_size);
1003     alignment = align(alignment, ws->info.gart_page_size);
1004
1005     /* Only set one usage bit each for domains and flags, or the cache manager
1006      * might consider different sets of domains / flags compatible
1007      */
1008     if (domain == RADEON_DOMAIN_VRAM_GTT)
1009         usage = 1 << 2;
1010     else
1011         usage = (unsigned)domain >> 1;
1012     assert(flags < sizeof(usage) * 8 - 3);
1013     usage |= 1 << (flags + 3);
1014
1015     /* Determine the pb_cache bucket for minimizing pb_cache misses. */
1016     pb_cache_bucket = 0;
1017     if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
1018        pb_cache_bucket += 1;
1019     if (flags == RADEON_FLAG_GTT_WC) /* WC */
1020        pb_cache_bucket += 2;
1021     assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
1022
1023     bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1024                                            usage, pb_cache_bucket));
1025     if (bo)
1026         return &bo->base;
1027
1028     bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1029                           pb_cache_bucket);
1030     if (!bo) {
1031         /* Clear the cache and try again. */
1032         pb_slabs_reclaim(&ws->bo_slabs);
1033         pb_cache_release_all_buffers(&ws->bo_cache);
1034         bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1035                               pb_cache_bucket);
1036         if (!bo)
1037             return NULL;
1038     }
1039
1040     bo->u.real.use_reusable_pool = true;
1041
1042     pipe_mutex_lock(ws->bo_handles_mutex);
1043     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1044     pipe_mutex_unlock(ws->bo_handles_mutex);
1045
1046     return &bo->base;
1047 }
1048
1049 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1050                                                    void *pointer, uint64_t size)
1051 {
1052     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1053     struct drm_radeon_gem_userptr args;
1054     struct radeon_bo *bo;
1055     int r;
1056
1057     bo = CALLOC_STRUCT(radeon_bo);
1058     if (!bo)
1059         return NULL;
1060
1061     memset(&args, 0, sizeof(args));
1062     args.addr = (uintptr_t)pointer;
1063     args.size = align(size, ws->info.gart_page_size);
1064     args.flags = RADEON_GEM_USERPTR_ANONONLY |
1065         RADEON_GEM_USERPTR_VALIDATE |
1066         RADEON_GEM_USERPTR_REGISTER;
1067     if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1068                             &args, sizeof(args))) {
1069         FREE(bo);
1070         return NULL;
1071     }
1072
1073     assert(args.handle != 0);
1074
1075     pipe_mutex_lock(ws->bo_handles_mutex);
1076
1077     /* Initialize it. */
1078     pipe_reference_init(&bo->base.reference, 1);
1079     bo->handle = args.handle;
1080     bo->base.alignment = 0;
1081     bo->base.size = size;
1082     bo->base.vtbl = &radeon_bo_vtbl;
1083     bo->rws = ws;
1084     bo->user_ptr = pointer;
1085     bo->va = 0;
1086     bo->initial_domain = RADEON_DOMAIN_GTT;
1087     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1088     pipe_mutex_init(bo->u.real.map_mutex);
1089
1090     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1091
1092     pipe_mutex_unlock(ws->bo_handles_mutex);
1093
1094     if (ws->info.has_virtual_memory) {
1095         struct drm_radeon_gem_va va;
1096
1097         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1098
1099         va.handle = bo->handle;
1100         va.operation = RADEON_VA_MAP;
1101         va.vm_id = 0;
1102         va.offset = bo->va;
1103         va.flags = RADEON_VM_PAGE_READABLE |
1104                    RADEON_VM_PAGE_WRITEABLE |
1105                    RADEON_VM_PAGE_SNOOPED;
1106         va.offset = bo->va;
1107         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1108         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1109             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1110             radeon_bo_destroy(&bo->base);
1111             return NULL;
1112         }
1113         pipe_mutex_lock(ws->bo_handles_mutex);
1114         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1115             struct pb_buffer *b = &bo->base;
1116             struct radeon_bo *old_bo =
1117                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1118
1119             pipe_mutex_unlock(ws->bo_handles_mutex);
1120             pb_reference(&b, &old_bo->base);
1121             return b;
1122         }
1123
1124         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1125         pipe_mutex_unlock(ws->bo_handles_mutex);
1126     }
1127
1128     ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1129
1130     return (struct pb_buffer*)bo;
1131 }
1132
1133 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1134                                                       struct winsys_handle *whandle,
1135                                                       unsigned *stride,
1136                                                       unsigned *offset)
1137 {
1138     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1139     struct radeon_bo *bo;
1140     int r;
1141     unsigned handle;
1142     uint64_t size = 0;
1143
1144     if (!offset && whandle->offset != 0) {
1145         fprintf(stderr, "attempt to import unsupported winsys offset %u\n",
1146                 whandle->offset);
1147         return NULL;
1148     }
1149
1150     /* We must maintain a list of pairs <handle, bo>, so that we always return
1151      * the same BO for one particular handle. If we didn't do that and created
1152      * more than one BO for the same handle and then relocated them in a CS,
1153      * we would hit a deadlock in the kernel.
1154      *
1155      * The list of pairs is guarded by a mutex, of course. */
1156     pipe_mutex_lock(ws->bo_handles_mutex);
1157
1158     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1159         /* First check if there already is an existing bo for the handle. */
1160         bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1161     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1162         /* We must first get the GEM handle, as fds are unreliable keys */
1163         r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1164         if (r)
1165             goto fail;
1166         bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1167     } else {
1168         /* Unknown handle type */
1169         goto fail;
1170     }
1171
1172     if (bo) {
1173         /* Increase the refcount. */
1174         struct pb_buffer *b = NULL;
1175         pb_reference(&b, &bo->base);
1176         goto done;
1177     }
1178
1179     /* There isn't, create a new one. */
1180     bo = CALLOC_STRUCT(radeon_bo);
1181     if (!bo) {
1182         goto fail;
1183     }
1184
1185     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1186         struct drm_gem_open open_arg = {};
1187         memset(&open_arg, 0, sizeof(open_arg));
1188         /* Open the BO. */
1189         open_arg.name = whandle->handle;
1190         if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1191             FREE(bo);
1192             goto fail;
1193         }
1194         handle = open_arg.handle;
1195         size = open_arg.size;
1196         bo->flink_name = whandle->handle;
1197     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1198         size = lseek(whandle->handle, 0, SEEK_END);
1199         /*
1200          * Could check errno to determine whether the kernel is new enough, but
1201          * it doesn't really matter why this failed, just that it failed.
1202          */
1203         if (size == (off_t)-1) {
1204             FREE(bo);
1205             goto fail;
1206         }
1207         lseek(whandle->handle, 0, SEEK_SET);
1208     }
1209
1210     assert(handle != 0);
1211
1212     bo->handle = handle;
1213
1214     /* Initialize it. */
1215     pipe_reference_init(&bo->base.reference, 1);
1216     bo->base.alignment = 0;
1217     bo->base.size = (unsigned) size;
1218     bo->base.vtbl = &radeon_bo_vtbl;
1219     bo->rws = ws;
1220     bo->va = 0;
1221     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1222     pipe_mutex_init(bo->u.real.map_mutex);
1223
1224     if (bo->flink_name)
1225         util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1226
1227     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1228
1229 done:
1230     pipe_mutex_unlock(ws->bo_handles_mutex);
1231
1232     if (stride)
1233         *stride = whandle->stride;
1234     if (offset)
1235         *offset = whandle->offset;
1236
1237     if (ws->info.has_virtual_memory && !bo->va) {
1238         struct drm_radeon_gem_va va;
1239
1240         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1241
1242         va.handle = bo->handle;
1243         va.operation = RADEON_VA_MAP;
1244         va.vm_id = 0;
1245         va.offset = bo->va;
1246         va.flags = RADEON_VM_PAGE_READABLE |
1247                    RADEON_VM_PAGE_WRITEABLE |
1248                    RADEON_VM_PAGE_SNOOPED;
1249         va.offset = bo->va;
1250         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1251         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1252             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1253             radeon_bo_destroy(&bo->base);
1254             return NULL;
1255         }
1256         pipe_mutex_lock(ws->bo_handles_mutex);
1257         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1258             struct pb_buffer *b = &bo->base;
1259             struct radeon_bo *old_bo =
1260                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1261
1262             pipe_mutex_unlock(ws->bo_handles_mutex);
1263             pb_reference(&b, &old_bo->base);
1264             return b;
1265         }
1266
1267         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1268         pipe_mutex_unlock(ws->bo_handles_mutex);
1269     }
1270
1271     bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1272
1273     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1274         ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1275     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1276         ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1277
1278     return (struct pb_buffer*)bo;
1279
1280 fail:
1281     pipe_mutex_unlock(ws->bo_handles_mutex);
1282     return NULL;
1283 }
1284
1285 static bool radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
1286                                         unsigned stride, unsigned offset,
1287                                         unsigned slice_size,
1288                                         struct winsys_handle *whandle)
1289 {
1290     struct drm_gem_flink flink;
1291     struct radeon_bo *bo = radeon_bo(buffer);
1292     struct radeon_drm_winsys *ws = bo->rws;
1293
1294     if (!bo->handle) {
1295         offset += bo->va - bo->u.slab.real->va;
1296         bo = bo->u.slab.real;
1297     }
1298
1299     memset(&flink, 0, sizeof(flink));
1300
1301     bo->u.real.use_reusable_pool = false;
1302
1303     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1304         if (!bo->flink_name) {
1305             flink.handle = bo->handle;
1306
1307             if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1308                 return false;
1309             }
1310
1311             bo->flink_name = flink.name;
1312
1313             pipe_mutex_lock(ws->bo_handles_mutex);
1314             util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1315             pipe_mutex_unlock(ws->bo_handles_mutex);
1316         }
1317         whandle->handle = bo->flink_name;
1318     } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
1319         whandle->handle = bo->handle;
1320     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1321         if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1322             return false;
1323     }
1324
1325     whandle->stride = stride;
1326     whandle->offset = offset;
1327     whandle->offset += slice_size * whandle->layer;
1328
1329     return true;
1330 }
1331
1332 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1333 {
1334    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1335 }
1336
1337 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1338 {
1339     return ((struct radeon_bo*)buf)->va;
1340 }
1341
1342 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1343 {
1344     struct radeon_bo *bo = radeon_bo(buf);
1345
1346     if (bo->handle)
1347         return 0;
1348
1349     return bo->va - bo->u.slab.real->va;
1350 }
1351
1352 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1353 {
1354     ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1355     ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1356     ws->base.buffer_map = radeon_bo_map;
1357     ws->base.buffer_unmap = radeon_bo_unmap;
1358     ws->base.buffer_wait = radeon_bo_wait;
1359     ws->base.buffer_create = radeon_winsys_bo_create;
1360     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1361     ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1362     ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1363     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1364     ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1365     ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1366     ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1367 }