src/gallium/winsys/radeon/drm/radeon_drm_bo.c

   1 /*
   2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
  17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  */
  26
  27 #include "radeon_drm_cs.h"
  28
  29 #include "util/u_hash_table.h"
  30 #include "util/u_memory.h"
  31 #include "util/simple_list.h"
  32 #include "os/os_thread.h"
  33 #include "os/os_mman.h"
  34 #include "os/os_time.h"
  35
  36 #include "state_tracker/drm_driver.h"
  37
  38 #include <sys/ioctl.h>
  39 #include <xf86drm.h>
  40 #include <errno.h>
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <inttypes.h>
  44
  45 static struct pb_buffer *
  46 radeon_winsys_bo_create(struct radeon_winsys *rws,
  47                         uint64_t size,
  48                         unsigned alignment,
  49                         enum radeon_bo_domain domain,
  50                         enum radeon_bo_flag flags);
  51
  52 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
  53 {
  54     return (struct radeon_bo *)bo;
  55 }
  56
  57 struct radeon_bo_va_hole {
  58     struct list_head list;
  59     uint64_t         offset;
  60     uint64_t         size;
  61 };
  62
  63 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
  64 {
  65     struct drm_radeon_gem_busy args = {0};
  66
  67     args.handle = bo->handle;
  68     return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
  69                                &args, sizeof(args)) != 0;
  70 }
  71
  72 static bool radeon_bo_is_busy(struct radeon_bo *bo)
  73 {
  74     unsigned num_idle;
  75     bool busy = false;
  76
  77     if (bo->handle)
  78         return radeon_real_bo_is_busy(bo);
  79
  80     mtx_lock(&bo->rws->bo_fence_lock);
  81     for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
  82         if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
  83             busy = true;
  84             break;
  85         }
  86         radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
  87     }
  88     memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
  89             (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
  90     bo->u.slab.num_fences -= num_idle;
  91     pipe_mutex_unlock(bo->rws->bo_fence_lock);
  92
  93     return busy;
  94 }
  95
  96 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
  97 {
  98     struct drm_radeon_gem_wait_idle args = {0};
  99
 100     args.handle = bo->handle;
 101     while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
 102                            &args, sizeof(args)) == -EBUSY);
 103 }
 104
 105 static void radeon_bo_wait_idle(struct radeon_bo *bo)
 106 {
 107     if (bo->handle) {
 108         radeon_real_bo_wait_idle(bo);
 109     } else {
 110         mtx_lock(&bo->rws->bo_fence_lock);
 111         while (bo->u.slab.num_fences) {
 112             struct radeon_bo *fence = NULL;
 113             radeon_bo_reference(&fence, bo->u.slab.fences[0]);
 114             pipe_mutex_unlock(bo->rws->bo_fence_lock);
 115
 116             /* Wait without holding the fence lock. */
 117             radeon_real_bo_wait_idle(fence);
 118
 119             mtx_lock(&bo->rws->bo_fence_lock);
 120             if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
 121                 radeon_bo_reference(&bo->u.slab.fences[0], NULL);
 122                 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
 123                         (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
 124                 bo->u.slab.num_fences--;
 125             }
 126             radeon_bo_reference(&fence, NULL);
 127         }
 128         pipe_mutex_unlock(bo->rws->bo_fence_lock);
 129     }
 130 }
 131
 132 static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
 133                            enum radeon_bo_usage usage)
 134 {
 135     struct radeon_bo *bo = radeon_bo(_buf);
 136     int64_t abs_timeout;
 137
 138     /* No timeout. Just query. */
 139     if (timeout == 0)
 140         return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
 141
 142     abs_timeout = os_time_get_absolute_timeout(timeout);
 143
 144     /* Wait if any ioctl is being submitted with this buffer. */
 145     if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
 146         return false;
 147
 148     /* Infinite timeout. */
 149     if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
 150         radeon_bo_wait_idle(bo);
 151         return true;
 152     }
 153
 154     /* Other timeouts need to be emulated with a loop. */
 155     while (radeon_bo_is_busy(bo)) {
 156        if (os_time_get_nano() >= abs_timeout)
 157           return false;
 158        os_time_sleep(10);
 159     }
 160
 161     return true;
 162 }
 163
 164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
 165 {
 166     /* Zero domains the driver doesn't understand. */
 167     domain &= RADEON_DOMAIN_VRAM_GTT;
 168
 169     /* If no domain is set, we must set something... */
 170     if (!domain)
 171         domain = RADEON_DOMAIN_VRAM_GTT;
 172
 173     return domain;
 174 }
 175
 176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
 177                 struct pb_buffer *buf)
 178 {
 179     struct radeon_bo *bo = (struct radeon_bo*)buf;
 180     struct drm_radeon_gem_op args;
 181
 182     if (bo->rws->info.drm_minor < 38)
 183         return RADEON_DOMAIN_VRAM_GTT;
 184
 185     memset(&args, 0, sizeof(args));
 186     args.handle = bo->handle;
 187     args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
 188
 189     drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
 190                         &args, sizeof(args));
 191
 192     /* GEM domains and winsys domains are defined the same. */
 193     return get_valid_domain(args.value);
 194 }
 195
 196 static uint64_t radeon_bomgr_find_va(struct radeon_drm_winsys *rws,
 197                                      uint64_t size, uint64_t alignment)
 198 {
 199     struct radeon_bo_va_hole *hole, *n;
 200     uint64_t offset = 0, waste = 0;
 201
 202     /* All VM address space holes will implicitly start aligned to the
 203      * size alignment, so we don't need to sanitize the alignment here
 204      */
 205     size = align(size, rws->info.gart_page_size);
 206
 207     mtx_lock(&rws->bo_va_mutex);
 208     /* first look for a hole */
 209     LIST_FOR_EACH_ENTRY_SAFE(hole, n, &rws->va_holes, list) {
 210         offset = hole->offset;
 211         waste = offset % alignment;
 212         waste = waste ? alignment - waste : 0;
 213         offset += waste;
 214         if (offset >= (hole->offset + hole->size)) {
 215             continue;
 216         }
 217         if (!waste && hole->size == size) {
 218             offset = hole->offset;
 219             list_del(&hole->list);
 220             FREE(hole);
 221             pipe_mutex_unlock(rws->bo_va_mutex);
 222             return offset;
 223         }
 224         if ((hole->size - waste) > size) {
 225             if (waste) {
 226                 n = CALLOC_STRUCT(radeon_bo_va_hole);
 227                 n->size = waste;
 228                 n->offset = hole->offset;
 229                 list_add(&n->list, &hole->list);
 230             }
 231             hole->size -= (size + waste);
 232             hole->offset += size + waste;
 233             pipe_mutex_unlock(rws->bo_va_mutex);
 234             return offset;
 235         }
 236         if ((hole->size - waste) == size) {
 237             hole->size = waste;
 238             pipe_mutex_unlock(rws->bo_va_mutex);
 239             return offset;
 240         }
 241     }
 242
 243     offset = rws->va_offset;
 244     waste = offset % alignment;
 245     waste = waste ? alignment - waste : 0;
 246     if (waste) {
 247         n = CALLOC_STRUCT(radeon_bo_va_hole);
 248         n->size = waste;
 249         n->offset = offset;
 250         list_add(&n->list, &rws->va_holes);
 251     }
 252     offset += waste;
 253     rws->va_offset += size + waste;
 254     pipe_mutex_unlock(rws->bo_va_mutex);
 255     return offset;
 256 }
 257
 258 static void radeon_bomgr_free_va(struct radeon_drm_winsys *rws,
 259                                  uint64_t va, uint64_t size)
 260 {
 261     struct radeon_bo_va_hole *hole;
 262
 263     size = align(size, rws->info.gart_page_size);
 264
 265     mtx_lock(&rws->bo_va_mutex);
 266     if ((va + size) == rws->va_offset) {
 267         rws->va_offset = va;
 268         /* Delete uppermost hole if it reaches the new top */
 269         if (!LIST_IS_EMPTY(&rws->va_holes)) {
 270             hole = container_of(rws->va_holes.next, hole, list);
 271             if ((hole->offset + hole->size) == va) {
 272                 rws->va_offset = hole->offset;
 273                 list_del(&hole->list);
 274                 FREE(hole);
 275             }
 276         }
 277     } else {
 278         struct radeon_bo_va_hole *next;
 279
 280         hole = container_of(&rws->va_holes, hole, list);
 281         LIST_FOR_EACH_ENTRY(next, &rws->va_holes, list) {
 282             if (next->offset < va)
 283                 break;
 284             hole = next;
 285         }
 286
 287         if (&hole->list != &rws->va_holes) {
 288             /* Grow upper hole if it's adjacent */
 289             if (hole->offset == (va + size)) {
 290                 hole->offset = va;
 291                 hole->size += size;
 292                 /* Merge lower hole if it's adjacent */
 293                 if (next != hole && &next->list != &rws->va_holes &&
 294                     (next->offset + next->size) == va) {
 295                     next->size += hole->size;
 296                     list_del(&hole->list);
 297                     FREE(hole);
 298                 }
 299                 goto out;
 300             }
 301         }
 302
 303         /* Grow lower hole if it's adjacent */
 304         if (next != hole && &next->list != &rws->va_holes &&
 305             (next->offset + next->size) == va) {
 306             next->size += size;
 307             goto out;
 308         }
 309
 310         /* FIXME on allocation failure we just lose virtual address space
 311          * maybe print a warning
 312          */
 313         next = CALLOC_STRUCT(radeon_bo_va_hole);
 314         if (next) {
 315             next->size = size;
 316             next->offset = va;
 317             list_add(&next->list, &hole->list);
 318         }
 319     }
 320 out:
 321     pipe_mutex_unlock(rws->bo_va_mutex);
 322 }
 323
 324 void radeon_bo_destroy(struct pb_buffer *_buf)
 325 {
 326     struct radeon_bo *bo = radeon_bo(_buf);
 327     struct radeon_drm_winsys *rws = bo->rws;
 328     struct drm_gem_close args;
 329
 330     assert(bo->handle && "must not be called for slab entries");
 331
 332     memset(&args, 0, sizeof(args));
 333
 334     mtx_lock(&rws->bo_handles_mutex);
 335     util_hash_table_remove(rws->bo_handles, (void*)(uintptr_t)bo->handle);
 336     if (bo->flink_name) {
 337         util_hash_table_remove(rws->bo_names,
 338                                (void*)(uintptr_t)bo->flink_name);
 339     }
 340     pipe_mutex_unlock(rws->bo_handles_mutex);
 341
 342     if (bo->u.real.ptr)
 343         os_munmap(bo->u.real.ptr, bo->base.size);
 344
 345     if (rws->info.has_virtual_memory) {
 346         if (rws->va_unmap_working) {
 347             struct drm_radeon_gem_va va;
 348
 349             va.handle = bo->handle;
 350             va.vm_id = 0;
 351             va.operation = RADEON_VA_UNMAP;
 352             va.flags = RADEON_VM_PAGE_READABLE |
 353                        RADEON_VM_PAGE_WRITEABLE |
 354                        RADEON_VM_PAGE_SNOOPED;
 355             va.offset = bo->va;
 356
 357             if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
 358                                     sizeof(va)) != 0 &&
 359                 va.operation == RADEON_VA_RESULT_ERROR) {
 360                 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
 361                 fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
 362                 fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
 363             }
 364         }
 365
 366         radeon_bomgr_free_va(rws, bo->va, bo->base.size);
 367     }
 368
 369     /* Close object. */
 370     args.handle = bo->handle;
 371     drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
 372
 373     mtx_destroy(&bo->u.real.map_mutex);
 374
 375     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 376         rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
 377     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
 378         rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
 379
 380     if (bo->u.real.map_count >= 1) {
 381         if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 382             bo->rws->mapped_vram -= bo->base.size;
 383         else
 384             bo->rws->mapped_gtt -= bo->base.size;
 385         bo->rws->num_mapped_buffers--;
 386     }
 387
 388     FREE(bo);
 389 }
 390
 391 static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
 392 {
 393    struct radeon_bo *bo = radeon_bo(_buf);
 394
 395     assert(bo->handle && "must not be called for slab entries");
 396
 397    if (bo->u.real.use_reusable_pool)
 398       pb_cache_add_buffer(&bo->u.real.cache_entry);
 399    else
 400       radeon_bo_destroy(_buf);
 401 }
 402
 403 void *radeon_bo_do_map(struct radeon_bo *bo)
 404 {
 405     struct drm_radeon_gem_mmap args = {0};
 406     void *ptr;
 407     unsigned offset;
 408
 409     /* If the buffer is created from user memory, return the user pointer. */
 410     if (bo->user_ptr)
 411         return bo->user_ptr;
 412
 413     if (bo->handle) {
 414         offset = 0;
 415     } else {
 416         offset = bo->va - bo->u.slab.real->va;
 417         bo = bo->u.slab.real;
 418     }
 419
 420     /* Map the buffer. */
 421     mtx_lock(&bo->u.real.map_mutex);
 422     /* Return the pointer if it's already mapped. */
 423     if (bo->u.real.ptr) {
 424         bo->u.real.map_count++;
 425         pipe_mutex_unlock(bo->u.real.map_mutex);
 426         return (uint8_t*)bo->u.real.ptr + offset;
 427     }
 428     args.handle = bo->handle;
 429     args.offset = 0;
 430     args.size = (uint64_t)bo->base.size;
 431     if (drmCommandWriteRead(bo->rws->fd,
 432                             DRM_RADEON_GEM_MMAP,
 433                             &args,
 434                             sizeof(args))) {
 435         pipe_mutex_unlock(bo->u.real.map_mutex);
 436         fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
 437                 bo, bo->handle);
 438         return NULL;
 439     }
 440
 441     ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
 442                bo->rws->fd, args.addr_ptr);
 443     if (ptr == MAP_FAILED) {
 444         /* Clear the cache and try again. */
 445         pb_cache_release_all_buffers(&bo->rws->bo_cache);
 446
 447         ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
 448                       bo->rws->fd, args.addr_ptr);
 449         if (ptr == MAP_FAILED) {
 450             pipe_mutex_unlock(bo->u.real.map_mutex);
 451             fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
 452             return NULL;
 453         }
 454     }
 455     bo->u.real.ptr = ptr;
 456     bo->u.real.map_count = 1;
 457
 458     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 459        bo->rws->mapped_vram += bo->base.size;
 460     else
 461        bo->rws->mapped_gtt += bo->base.size;
 462     bo->rws->num_mapped_buffers++;
 463
 464     pipe_mutex_unlock(bo->u.real.map_mutex);
 465     return (uint8_t*)bo->u.real.ptr + offset;
 466 }
 467
 468 static void *radeon_bo_map(struct pb_buffer *buf,
 469                            struct radeon_winsys_cs *rcs,
 470                            enum pipe_transfer_usage usage)
 471 {
 472     struct radeon_bo *bo = (struct radeon_bo*)buf;
 473     struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
 474
 475     /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
 476     if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 477         /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
 478         if (usage & PIPE_TRANSFER_DONTBLOCK) {
 479             if (!(usage & PIPE_TRANSFER_WRITE)) {
 480                 /* Mapping for read.
 481                  *
 482                  * Since we are mapping for read, we don't need to wait
 483                  * if the GPU is using the buffer for read too
 484                  * (neither one is changing it).
 485                  *
 486                  * Only check whether the buffer is being used for write. */
 487                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
 488                     cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
 489                     return NULL;
 490                 }
 491
 492                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
 493                                     RADEON_USAGE_WRITE)) {
 494                     return NULL;
 495                 }
 496             } else {
 497                 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
 498                     cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
 499                     return NULL;
 500                 }
 501
 502                 if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
 503                                     RADEON_USAGE_READWRITE)) {
 504                     return NULL;
 505                 }
 506             }
 507         } else {
 508             uint64_t time = os_time_get_nano();
 509
 510             if (!(usage & PIPE_TRANSFER_WRITE)) {
 511                 /* Mapping for read.
 512                  *
 513                  * Since we are mapping for read, we don't need to wait
 514                  * if the GPU is using the buffer for read too
 515                  * (neither one is changing it).
 516                  *
 517                  * Only check whether the buffer is being used for write. */
 518                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
 519                     cs->flush_cs(cs->flush_data, 0, NULL);
 520                 }
 521                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
 522                                RADEON_USAGE_WRITE);
 523             } else {
 524                 /* Mapping for write. */
 525                 if (cs) {
 526                     if (radeon_bo_is_referenced_by_cs(cs, bo)) {
 527                         cs->flush_cs(cs->flush_data, 0, NULL);
 528                     } else {
 529                         /* Try to avoid busy-waiting in radeon_bo_wait. */
 530                         if (p_atomic_read(&bo->num_active_ioctls))
 531                             radeon_drm_cs_sync_flush(rcs);
 532                     }
 533                 }
 534
 535                 radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
 536                                RADEON_USAGE_READWRITE);
 537             }
 538
 539             bo->rws->buffer_wait_time += os_time_get_nano() - time;
 540         }
 541     }
 542
 543     return radeon_bo_do_map(bo);
 544 }
 545
 546 static void radeon_bo_unmap(struct pb_buffer *_buf)
 547 {
 548     struct radeon_bo *bo = (struct radeon_bo*)_buf;
 549
 550     if (bo->user_ptr)
 551         return;
 552
 553     if (!bo->handle)
 554         bo = bo->u.slab.real;
 555
 556     mtx_lock(&bo->u.real.map_mutex);
 557     if (!bo->u.real.ptr) {
 558         pipe_mutex_unlock(bo->u.real.map_mutex);
 559         return; /* it's not been mapped */
 560     }
 561
 562     assert(bo->u.real.map_count);
 563     if (--bo->u.real.map_count) {
 564         pipe_mutex_unlock(bo->u.real.map_mutex);
 565         return; /* it's been mapped multiple times */
 566     }
 567
 568     os_munmap(bo->u.real.ptr, bo->base.size);
 569     bo->u.real.ptr = NULL;
 570
 571     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 572        bo->rws->mapped_vram -= bo->base.size;
 573     else
 574        bo->rws->mapped_gtt -= bo->base.size;
 575     bo->rws->num_mapped_buffers--;
 576
 577     pipe_mutex_unlock(bo->u.real.map_mutex);
 578 }
 579
 580 static const struct pb_vtbl radeon_bo_vtbl = {
 581     radeon_bo_destroy_or_cache
 582     /* other functions are never called */
 583 };
 584
 585 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
 586                                           unsigned size, unsigned alignment,
 587                                           unsigned usage,
 588                                           unsigned initial_domains,
 589                                           unsigned flags,
 590                                           unsigned pb_cache_bucket)
 591 {
 592     struct radeon_bo *bo;
 593     struct drm_radeon_gem_create args;
 594     int r;
 595
 596     memset(&args, 0, sizeof(args));
 597
 598     assert(initial_domains);
 599     assert((initial_domains &
 600             ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
 601
 602     args.size = size;
 603     args.alignment = alignment;
 604     args.initial_domain = initial_domains;
 605     args.flags = 0;
 606
 607     if (flags & RADEON_FLAG_GTT_WC)
 608         args.flags |= RADEON_GEM_GTT_WC;
 609     if (flags & RADEON_FLAG_CPU_ACCESS)
 610         args.flags |= RADEON_GEM_CPU_ACCESS;
 611     if (flags & RADEON_FLAG_NO_CPU_ACCESS)
 612         args.flags |= RADEON_GEM_NO_CPU_ACCESS;
 613
 614     if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
 615                             &args, sizeof(args))) {
 616         fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
 617         fprintf(stderr, "radeon:    size      : %u bytes\n", size);
 618         fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
 619         fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
 620         fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
 621         return NULL;
 622     }
 623
 624     assert(args.handle != 0);
 625
 626     bo = CALLOC_STRUCT(radeon_bo);
 627     if (!bo)
 628         return NULL;
 629
 630     pipe_reference_init(&bo->base.reference, 1);
 631     bo->base.alignment = alignment;
 632     bo->base.usage = usage;
 633     bo->base.size = size;
 634     bo->base.vtbl = &radeon_bo_vtbl;
 635     bo->rws = rws;
 636     bo->handle = args.handle;
 637     bo->va = 0;
 638     bo->initial_domain = initial_domains;
 639     bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
 640     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
 641     pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
 642                         pb_cache_bucket);
 643
 644     if (rws->info.has_virtual_memory) {
 645         struct drm_radeon_gem_va va;
 646         unsigned va_gap_size;
 647
 648         va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
 649         bo->va = radeon_bomgr_find_va(rws, size + va_gap_size, alignment);
 650
 651         va.handle = bo->handle;
 652         va.vm_id = 0;
 653         va.operation = RADEON_VA_MAP;
 654         va.flags = RADEON_VM_PAGE_READABLE |
 655                    RADEON_VM_PAGE_WRITEABLE |
 656                    RADEON_VM_PAGE_SNOOPED;
 657         va.offset = bo->va;
 658         r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
 659         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
 660             fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
 661             fprintf(stderr, "radeon:    size      : %d bytes\n", size);
 662             fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
 663             fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
 664             fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
 665             radeon_bo_destroy(&bo->base);
 666             return NULL;
 667         }
 668         mtx_lock(&rws->bo_handles_mutex);
 669         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
 670             struct pb_buffer *b = &bo->base;
 671             struct radeon_bo *old_bo =
 672                 util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
 673
 674             pipe_mutex_unlock(rws->bo_handles_mutex);
 675             pb_reference(&b, &old_bo->base);
 676             return radeon_bo(b);
 677         }
 678
 679         util_hash_table_set(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
 680         pipe_mutex_unlock(rws->bo_handles_mutex);
 681     }
 682
 683     if (initial_domains & RADEON_DOMAIN_VRAM)
 684         rws->allocated_vram += align(size, rws->info.gart_page_size);
 685     else if (initial_domains & RADEON_DOMAIN_GTT)
 686         rws->allocated_gtt += align(size, rws->info.gart_page_size);
 687
 688     return bo;
 689 }
 690
 691 bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
 692 {
 693    struct radeon_bo *bo = radeon_bo(_buf);
 694
 695    if (radeon_bo_is_referenced_by_any_cs(bo))
 696       return false;
 697
 698    return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
 699 }
 700
 701 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
 702 {
 703     struct radeon_bo *bo = NULL; /* fix container_of */
 704     bo = container_of(entry, bo, u.slab.entry);
 705
 706     return radeon_bo_can_reclaim(&bo->base);
 707 }
 708
 709 static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
 710 {
 711     struct radeon_bo *bo = radeon_bo(_buf);
 712
 713     assert(!bo->handle);
 714
 715     pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
 716 }
 717
 718 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
 719     radeon_bo_slab_destroy
 720     /* other functions are never called */
 721 };
 722
 723 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
 724                                      unsigned entry_size,
 725                                      unsigned group_index)
 726 {
 727     struct radeon_drm_winsys *ws = priv;
 728     struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
 729     enum radeon_bo_domain domains;
 730     enum radeon_bo_flag flags = 0;
 731     unsigned base_hash;
 732
 733     if (!slab)
 734         return NULL;
 735
 736     if (heap & 1)
 737         flags |= RADEON_FLAG_GTT_WC;
 738     if (heap & 2)
 739         flags |= RADEON_FLAG_CPU_ACCESS;
 740
 741     switch (heap >> 2) {
 742     case 0:
 743         domains = RADEON_DOMAIN_VRAM;
 744         break;
 745     default:
 746     case 1:
 747         domains = RADEON_DOMAIN_VRAM_GTT;
 748         break;
 749     case 2:
 750         domains = RADEON_DOMAIN_GTT;
 751         break;
 752     }
 753
 754     slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
 755                                                      64 * 1024, 64 * 1024,
 756                                                      domains, flags));
 757     if (!slab->buffer)
 758         goto fail;
 759
 760     assert(slab->buffer->handle);
 761
 762     slab->base.num_entries = slab->buffer->base.size / entry_size;
 763     slab->base.num_free = slab->base.num_entries;
 764     slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
 765     if (!slab->entries)
 766         goto fail_buffer;
 767
 768     LIST_INITHEAD(&slab->base.free);
 769
 770     base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
 771
 772     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
 773         struct radeon_bo *bo = &slab->entries[i];
 774
 775         bo->base.alignment = entry_size;
 776         bo->base.usage = slab->buffer->base.usage;
 777         bo->base.size = entry_size;
 778         bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
 779         bo->rws = ws;
 780         bo->va = slab->buffer->va + i * entry_size;
 781         bo->initial_domain = domains;
 782         bo->hash = base_hash + i;
 783         bo->u.slab.entry.slab = &slab->base;
 784         bo->u.slab.entry.group_index = group_index;
 785         bo->u.slab.real = slab->buffer;
 786
 787         LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
 788     }
 789
 790     return &slab->base;
 791
 792 fail_buffer:
 793     radeon_bo_reference(&slab->buffer, NULL);
 794 fail:
 795     FREE(slab);
 796     return NULL;
 797 }
 798
 799 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
 800 {
 801     struct radeon_slab *slab = (struct radeon_slab *)pslab;
 802
 803     for (unsigned i = 0; i < slab->base.num_entries; ++i) {
 804         struct radeon_bo *bo = &slab->entries[i];
 805         for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
 806             radeon_bo_reference(&bo->u.slab.fences[j], NULL);
 807         FREE(bo->u.slab.fences);
 808     }
 809
 810     FREE(slab->entries);
 811     radeon_bo_reference(&slab->buffer, NULL);
 812     FREE(slab);
 813 }
 814
 815 static unsigned eg_tile_split(unsigned tile_split)
 816 {
 817     switch (tile_split) {
 818     case 0:     tile_split = 64;    break;
 819     case 1:     tile_split = 128;   break;
 820     case 2:     tile_split = 256;   break;
 821     case 3:     tile_split = 512;   break;
 822     default:
 823     case 4:     tile_split = 1024;  break;
 824     case 5:     tile_split = 2048;  break;
 825     case 6:     tile_split = 4096;  break;
 826     }
 827     return tile_split;
 828 }
 829
 830 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
 831 {
 832     switch (eg_tile_split) {
 833     case 64:    return 0;
 834     case 128:   return 1;
 835     case 256:   return 2;
 836     case 512:   return 3;
 837     default:
 838     case 1024:  return 4;
 839     case 2048:  return 5;
 840     case 4096:  return 6;
 841     }
 842 }
 843
 844 static void radeon_bo_get_metadata(struct pb_buffer *_buf,
 845                                    struct radeon_bo_metadata *md)
 846 {
 847     struct radeon_bo *bo = radeon_bo(_buf);
 848     struct drm_radeon_gem_set_tiling args;
 849
 850     assert(bo->handle && "must not be called for slab entries");
 851
 852     memset(&args, 0, sizeof(args));
 853
 854     args.handle = bo->handle;
 855
 856     drmCommandWriteRead(bo->rws->fd,
 857                         DRM_RADEON_GEM_GET_TILING,
 858                         &args,
 859                         sizeof(args));
 860
 861     md->microtile = RADEON_LAYOUT_LINEAR;
 862     md->macrotile = RADEON_LAYOUT_LINEAR;
 863     if (args.tiling_flags & RADEON_TILING_MICRO)
 864         md->microtile = RADEON_LAYOUT_TILED;
 865     else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
 866         md->microtile = RADEON_LAYOUT_SQUARETILED;
 867
 868     if (args.tiling_flags & RADEON_TILING_MACRO)
 869         md->macrotile = RADEON_LAYOUT_TILED;
 870
 871     md->bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
 872     md->bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
 873     md->tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
 874     md->mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
 875     md->tile_split = eg_tile_split(md->tile_split);
 876     md->scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
 877 }
 878
 879 static void radeon_bo_set_metadata(struct pb_buffer *_buf,
 880                                    struct radeon_bo_metadata *md)
 881 {
 882     struct radeon_bo *bo = radeon_bo(_buf);
 883     struct drm_radeon_gem_set_tiling args;
 884
 885     assert(bo->handle && "must not be called for slab entries");
 886
 887     memset(&args, 0, sizeof(args));
 888
 889     os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
 890
 891     if (md->microtile == RADEON_LAYOUT_TILED)
 892         args.tiling_flags |= RADEON_TILING_MICRO;
 893     else if (md->microtile == RADEON_LAYOUT_SQUARETILED)
 894         args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
 895
 896     if (md->macrotile == RADEON_LAYOUT_TILED)
 897         args.tiling_flags |= RADEON_TILING_MACRO;
 898
 899     args.tiling_flags |= (md->bankw & RADEON_TILING_EG_BANKW_MASK) <<
 900         RADEON_TILING_EG_BANKW_SHIFT;
 901     args.tiling_flags |= (md->bankh & RADEON_TILING_EG_BANKH_MASK) <<
 902         RADEON_TILING_EG_BANKH_SHIFT;
 903     if (md->tile_split) {
 904         args.tiling_flags |= (eg_tile_split_rev(md->tile_split) &
 905                               RADEON_TILING_EG_TILE_SPLIT_MASK) <<
 906             RADEON_TILING_EG_TILE_SPLIT_SHIFT;
 907     }
 908     args.tiling_flags |= (md->mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
 909         RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
 910
 911     if (bo->rws->gen >= DRV_SI && !md->scanout)
 912         args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
 913
 914     args.handle = bo->handle;
 915     args.pitch = md->stride;
 916
 917     drmCommandWriteRead(bo->rws->fd,
 918                         DRM_RADEON_GEM_SET_TILING,
 919                         &args,
 920                         sizeof(args));
 921 }
 922
 923 static struct pb_buffer *
 924 radeon_winsys_bo_create(struct radeon_winsys *rws,
 925                         uint64_t size,
 926                         unsigned alignment,
 927                         enum radeon_bo_domain domain,
 928                         enum radeon_bo_flag flags)
 929 {
 930     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
 931     struct radeon_bo *bo;
 932     unsigned usage = 0, pb_cache_bucket;
 933
 934     /* Only 32-bit sizes are supported. */
 935     if (size > UINT_MAX)
 936         return NULL;
 937
 938     /* Sub-allocate small buffers from slabs. */
 939     if (!(flags & RADEON_FLAG_HANDLE) &&
 940         size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
 941         ws->info.has_virtual_memory &&
 942         alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
 943         struct pb_slab_entry *entry;
 944         unsigned heap = 0;
 945
 946         if (flags & RADEON_FLAG_GTT_WC)
 947             heap |= 1;
 948         if (flags & RADEON_FLAG_CPU_ACCESS)
 949             heap |= 2;
 950         if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
 951             goto no_slab;
 952
 953         switch (domain) {
 954         case RADEON_DOMAIN_VRAM:
 955             heap |= 0 * 4;
 956             break;
 957         case RADEON_DOMAIN_VRAM_GTT:
 958             heap |= 1 * 4;
 959             break;
 960         case RADEON_DOMAIN_GTT:
 961             heap |= 2 * 4;
 962             break;
 963         default:
 964             goto no_slab;
 965         }
 966
 967         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
 968         if (!entry) {
 969             /* Clear the cache and try again. */
 970             pb_cache_release_all_buffers(&ws->bo_cache);
 971
 972             entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
 973         }
 974         if (!entry)
 975             return NULL;
 976
 977         bo = NULL;
 978         bo = container_of(entry, bo, u.slab.entry);
 979
 980         pipe_reference_init(&bo->base.reference, 1);
 981
 982         return &bo->base;
 983     }
 984 no_slab:
 985
 986     /* This flag is irrelevant for the cache. */
 987     flags &= ~RADEON_FLAG_HANDLE;
 988
 989     /* Align size to page size. This is the minimum alignment for normal
 990      * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
 991      * like constant/uniform buffers, can benefit from better and more reuse.
 992      */
 993     size = align(size, ws->info.gart_page_size);
 994     alignment = align(alignment, ws->info.gart_page_size);
 995
 996     /* Only set one usage bit each for domains and flags, or the cache manager
 997      * might consider different sets of domains / flags compatible
 998      */
 999     if (domain == RADEON_DOMAIN_VRAM_GTT)
1000         usage = 1 << 2;
1001     else
1002         usage = (unsigned)domain >> 1;
1003     assert(flags < sizeof(usage) * 8 - 3);
1004     usage |= 1 << (flags + 3);
1005
1006     /* Determine the pb_cache bucket for minimizing pb_cache misses. */
1007     pb_cache_bucket = 0;
1008     if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
1009        pb_cache_bucket += 1;
1010     if (flags == RADEON_FLAG_GTT_WC) /* WC */
1011        pb_cache_bucket += 2;
1012     assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
1013
1014     bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1015                                            usage, pb_cache_bucket));
1016     if (bo)
1017         return &bo->base;
1018
1019     bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1020                           pb_cache_bucket);
1021     if (!bo) {
1022         /* Clear the cache and try again. */
1023         pb_slabs_reclaim(&ws->bo_slabs);
1024         pb_cache_release_all_buffers(&ws->bo_cache);
1025         bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
1026                               pb_cache_bucket);
1027         if (!bo)
1028             return NULL;
1029     }
1030
1031     bo->u.real.use_reusable_pool = true;
1032
1033     mtx_lock(&ws->bo_handles_mutex);
1034     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1035     pipe_mutex_unlock(ws->bo_handles_mutex);
1036
1037     return &bo->base;
1038 }
1039
1040 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1041                                                    void *pointer, uint64_t size)
1042 {
1043     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1044     struct drm_radeon_gem_userptr args;
1045     struct radeon_bo *bo;
1046     int r;
1047
1048     bo = CALLOC_STRUCT(radeon_bo);
1049     if (!bo)
1050         return NULL;
1051
1052     memset(&args, 0, sizeof(args));
1053     args.addr = (uintptr_t)pointer;
1054     args.size = align(size, ws->info.gart_page_size);
1055     args.flags = RADEON_GEM_USERPTR_ANONONLY |
1056         RADEON_GEM_USERPTR_VALIDATE |
1057         RADEON_GEM_USERPTR_REGISTER;
1058     if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1059                             &args, sizeof(args))) {
1060         FREE(bo);
1061         return NULL;
1062     }
1063
1064     assert(args.handle != 0);
1065
1066     mtx_lock(&ws->bo_handles_mutex);
1067
1068     /* Initialize it. */
1069     pipe_reference_init(&bo->base.reference, 1);
1070     bo->handle = args.handle;
1071     bo->base.alignment = 0;
1072     bo->base.size = size;
1073     bo->base.vtbl = &radeon_bo_vtbl;
1074     bo->rws = ws;
1075     bo->user_ptr = pointer;
1076     bo->va = 0;
1077     bo->initial_domain = RADEON_DOMAIN_GTT;
1078     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1079     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1080
1081     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1082
1083     pipe_mutex_unlock(ws->bo_handles_mutex);
1084
1085     if (ws->info.has_virtual_memory) {
1086         struct drm_radeon_gem_va va;
1087
1088         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1089
1090         va.handle = bo->handle;
1091         va.operation = RADEON_VA_MAP;
1092         va.vm_id = 0;
1093         va.offset = bo->va;
1094         va.flags = RADEON_VM_PAGE_READABLE |
1095                    RADEON_VM_PAGE_WRITEABLE |
1096                    RADEON_VM_PAGE_SNOOPED;
1097         va.offset = bo->va;
1098         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1099         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1100             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1101             radeon_bo_destroy(&bo->base);
1102             return NULL;
1103         }
1104         mtx_lock(&ws->bo_handles_mutex);
1105         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1106             struct pb_buffer *b = &bo->base;
1107             struct radeon_bo *old_bo =
1108                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1109
1110             pipe_mutex_unlock(ws->bo_handles_mutex);
1111             pb_reference(&b, &old_bo->base);
1112             return b;
1113         }
1114
1115         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1116         pipe_mutex_unlock(ws->bo_handles_mutex);
1117     }
1118
1119     ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1120
1121     return (struct pb_buffer*)bo;
1122 }
1123
1124 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1125                                                       struct winsys_handle *whandle,
1126                                                       unsigned *stride,
1127                                                       unsigned *offset)
1128 {
1129     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1130     struct radeon_bo *bo;
1131     int r;
1132     unsigned handle;
1133     uint64_t size = 0;
1134
1135     if (!offset && whandle->offset != 0) {
1136         fprintf(stderr, "attempt to import unsupported winsys offset %u\n",
1137                 whandle->offset);
1138         return NULL;
1139     }
1140
1141     /* We must maintain a list of pairs <handle, bo>, so that we always return
1142      * the same BO for one particular handle. If we didn't do that and created
1143      * more than one BO for the same handle and then relocated them in a CS,
1144      * we would hit a deadlock in the kernel.
1145      *
1146      * The list of pairs is guarded by a mutex, of course. */
1147     mtx_lock(&ws->bo_handles_mutex);
1148
1149     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1150         /* First check if there already is an existing bo for the handle. */
1151         bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1152     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1153         /* We must first get the GEM handle, as fds are unreliable keys */
1154         r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1155         if (r)
1156             goto fail;
1157         bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1158     } else {
1159         /* Unknown handle type */
1160         goto fail;
1161     }
1162
1163     if (bo) {
1164         /* Increase the refcount. */
1165         struct pb_buffer *b = NULL;
1166         pb_reference(&b, &bo->base);
1167         goto done;
1168     }
1169
1170     /* There isn't, create a new one. */
1171     bo = CALLOC_STRUCT(radeon_bo);
1172     if (!bo) {
1173         goto fail;
1174     }
1175
1176     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1177         struct drm_gem_open open_arg = {};
1178         memset(&open_arg, 0, sizeof(open_arg));
1179         /* Open the BO. */
1180         open_arg.name = whandle->handle;
1181         if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1182             FREE(bo);
1183             goto fail;
1184         }
1185         handle = open_arg.handle;
1186         size = open_arg.size;
1187         bo->flink_name = whandle->handle;
1188     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1189         size = lseek(whandle->handle, 0, SEEK_END);
1190         /*
1191          * Could check errno to determine whether the kernel is new enough, but
1192          * it doesn't really matter why this failed, just that it failed.
1193          */
1194         if (size == (off_t)-1) {
1195             FREE(bo);
1196             goto fail;
1197         }
1198         lseek(whandle->handle, 0, SEEK_SET);
1199     }
1200
1201     assert(handle != 0);
1202
1203     bo->handle = handle;
1204
1205     /* Initialize it. */
1206     pipe_reference_init(&bo->base.reference, 1);
1207     bo->base.alignment = 0;
1208     bo->base.size = (unsigned) size;
1209     bo->base.vtbl = &radeon_bo_vtbl;
1210     bo->rws = ws;
1211     bo->va = 0;
1212     bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1213     (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1214
1215     if (bo->flink_name)
1216         util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1217
1218     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1219
1220 done:
1221     pipe_mutex_unlock(ws->bo_handles_mutex);
1222
1223     if (stride)
1224         *stride = whandle->stride;
1225     if (offset)
1226         *offset = whandle->offset;
1227
1228     if (ws->info.has_virtual_memory && !bo->va) {
1229         struct drm_radeon_gem_va va;
1230
1231         bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
1232
1233         va.handle = bo->handle;
1234         va.operation = RADEON_VA_MAP;
1235         va.vm_id = 0;
1236         va.offset = bo->va;
1237         va.flags = RADEON_VM_PAGE_READABLE |
1238                    RADEON_VM_PAGE_WRITEABLE |
1239                    RADEON_VM_PAGE_SNOOPED;
1240         va.offset = bo->va;
1241         r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1242         if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1243             fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1244             radeon_bo_destroy(&bo->base);
1245             return NULL;
1246         }
1247         mtx_lock(&ws->bo_handles_mutex);
1248         if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1249             struct pb_buffer *b = &bo->base;
1250             struct radeon_bo *old_bo =
1251                 util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
1252
1253             pipe_mutex_unlock(ws->bo_handles_mutex);
1254             pb_reference(&b, &old_bo->base);
1255             return b;
1256         }
1257
1258         util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
1259         pipe_mutex_unlock(ws->bo_handles_mutex);
1260     }
1261
1262     bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1263
1264     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1265         ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1266     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1267         ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1268
1269     return (struct pb_buffer*)bo;
1270
1271 fail:
1272     pipe_mutex_unlock(ws->bo_handles_mutex);
1273     return NULL;
1274 }
1275
1276 static bool radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
1277                                         unsigned stride, unsigned offset,
1278                                         unsigned slice_size,
1279                                         struct winsys_handle *whandle)
1280 {
1281     struct drm_gem_flink flink;
1282     struct radeon_bo *bo = radeon_bo(buffer);
1283     struct radeon_drm_winsys *ws = bo->rws;
1284
1285     if (!bo->handle) {
1286         offset += bo->va - bo->u.slab.real->va;
1287         bo = bo->u.slab.real;
1288     }
1289
1290     memset(&flink, 0, sizeof(flink));
1291
1292     bo->u.real.use_reusable_pool = false;
1293
1294     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
1295         if (!bo->flink_name) {
1296             flink.handle = bo->handle;
1297
1298             if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1299                 return false;
1300             }
1301
1302             bo->flink_name = flink.name;
1303
1304             mtx_lock(&ws->bo_handles_mutex);
1305             util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1306             pipe_mutex_unlock(ws->bo_handles_mutex);
1307         }
1308         whandle->handle = bo->flink_name;
1309     } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
1310         whandle->handle = bo->handle;
1311     } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
1312         if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1313             return false;
1314     }
1315
1316     whandle->stride = stride;
1317     whandle->offset = offset;
1318     whandle->offset += slice_size * whandle->layer;
1319
1320     return true;
1321 }
1322
1323 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1324 {
1325    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1326 }
1327
1328 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1329 {
1330     return ((struct radeon_bo*)buf)->va;
1331 }
1332
1333 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1334 {
1335     struct radeon_bo *bo = radeon_bo(buf);
1336
1337     if (bo->handle)
1338         return 0;
1339
1340     return bo->va - bo->u.slab.real->va;
1341 }
1342
1343 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1344 {
1345     ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1346     ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1347     ws->base.buffer_map = radeon_bo_map;
1348     ws->base.buffer_unmap = radeon_bo_unmap;
1349     ws->base.buffer_wait = radeon_bo_wait;
1350     ws->base.buffer_create = radeon_winsys_bo_create;
1351     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1352     ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1353     ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1354     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1355     ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1356     ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1357     ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1358 }