src/gallium/drivers/iris/iris_bufmgr.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * @file iris_bufmgr.c
  26  *
  27  * The Iris buffer manager.
  28  *
  29  * XXX: write better comments
  30  * - BOs
  31  * - Explain BO cache
  32  * - main interface to GEM in the kernel
  33  */
  34
  35 #ifdef HAVE_CONFIG_H
  36 #include "config.h"
  37 #endif
  38
  39 #include <xf86drm.h>
  40 #include <util/u_atomic.h>
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <unistd.h>
  46 #include <assert.h>
  47 #include <sys/ioctl.h>
  48 #include <sys/mman.h>
  49 #include <sys/stat.h>
  50 #include <sys/types.h>
  51 #include <stdbool.h>
  52 #include <time.h>
  53
  54 #include "errno.h"
  55 #ifndef ETIME
  56 #define ETIME ETIMEDOUT
  57 #endif
  58 #include "common/gen_clflush.h"
  59 #include "common/gen_debug.h"
  60 #include "common/gen_gem.h"
  61 #include "dev/gen_device_info.h"
  62 #include "main/macros.h"
  63 #include "util/debug.h"
  64 #include "util/macros.h"
  65 #include "util/hash_table.h"
  66 #include "util/list.h"
  67 #include "util/u_dynarray.h"
  68 #include "util/vma.h"
  69 #include "iris_bufmgr.h"
  70 #include "iris_context.h"
  71 #include "string.h"
  72
  73 #include "drm-uapi/i915_drm.h"
  74
  75 #ifdef HAVE_VALGRIND
  76 #include <valgrind.h>
  77 #include <memcheck.h>
  78 #define VG(x) x
  79 #else
  80 #define VG(x)
  81 #endif
  82
  83 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
  84  * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
  85  * leaked. All because it does not call VG(cli_free) from its
  86  * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
  87  * and allocation, we mark it available for use upon mmapping and remove
  88  * it upon unmapping.
  89  */
  90 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
  91 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
  92
  93 #define PAGE_SIZE 4096
  94
  95 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
  96
  97 /**
  98  * Call ioctl, restarting if it is interupted
  99  */
 100 int
 101 drm_ioctl(int fd, unsigned long request, void *arg)
 102 {
 103     int ret;
 104
 105     do {
 106         ret = ioctl(fd, request, arg);
 107     } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
 108     return ret;
 109 }
 110
 111 static inline int
 112 atomic_add_unless(int *v, int add, int unless)
 113 {
 114    int c, old;
 115    c = p_atomic_read(v);
 116    while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
 117       c = old;
 118    return c == unless;
 119 }
 120
 121 /**
 122  * Iris fixed-size bucketing VMA allocator.
 123  *
 124  * The BO cache maintains "cache buckets" for buffers of various sizes.
 125  * All buffers in a given bucket are identically sized - when allocating,
 126  * we always round up to the bucket size.  This means that virtually all
 127  * allocations are fixed-size; only buffers which are too large to fit in
 128  * a bucket can be variably-sized.
 129  *
 130  * We create an allocator for each bucket.  Each contains a free-list, where
 131  * each node contains a <starting address, 64-bit bitmap> pair.  Each bit
 132  * represents a bucket-sized block of memory.  (At the first level, each
 133  * bit corresponds to a page.  For the second bucket, bits correspond to
 134  * two pages, and so on.)  1 means a block is free, and 0 means it's in-use.
 135  * The lowest bit in the bitmap is for the first block.
 136  *
 137  * This makes allocations cheap - any bit of any node will do.  We can pick
 138  * the head of the list and use ffs() to find a free block.  If there are
 139  * none, we allocate 64 blocks from a larger allocator - either a bigger
 140  * bucketing allocator, or a fallback top-level allocator for large objects.
 141  */
 142 struct vma_bucket_node {
 143    uint64_t start_address;
 144    uint64_t bitmap;
 145 };
 146
 147 struct bo_cache_bucket {
 148    /** List of cached BOs. */
 149    struct list_head head;
 150
 151    /** Size of this bucket, in bytes. */
 152    uint64_t size;
 153
 154    /** List of vma_bucket_nodes. */
 155    struct util_dynarray vma_list[IRIS_MEMZONE_COUNT];
 156 };
 157
 158 struct iris_bufmgr {
 159    int fd;
 160
 161    mtx_t lock;
 162
 163    /** Array of lists of cached gem objects of power-of-two sizes */
 164    struct bo_cache_bucket cache_bucket[14 * 4];
 165    int num_buckets;
 166    time_t time;
 167
 168    struct hash_table *name_table;
 169    struct hash_table *handle_table;
 170
 171    struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
 172
 173    bool has_llc:1;
 174    bool bo_reuse:1;
 175 };
 176
 177 static int bo_set_tiling_internal(struct iris_bo *bo, uint32_t tiling_mode,
 178                                   uint32_t stride);
 179
 180 static void bo_free(struct iris_bo *bo);
 181
 182 static uint64_t vma_alloc(struct iris_bufmgr *bufmgr,
 183                           enum iris_memory_zone memzone,
 184                           uint64_t size, uint64_t alignment);
 185
 186 static uint32_t
 187 key_hash_uint(const void *key)
 188 {
 189    return _mesa_hash_data(key, 4);
 190 }
 191
 192 static bool
 193 key_uint_equal(const void *a, const void *b)
 194 {
 195    return *((unsigned *) a) == *((unsigned *) b);
 196 }
 197
 198 static struct iris_bo *
 199 hash_find_bo(struct hash_table *ht, unsigned int key)
 200 {
 201    struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
 202    return entry ? (struct iris_bo *) entry->data : NULL;
 203 }
 204
 205 /**
 206  * This function finds the correct bucket fit for the input size.
 207  * The function works with O(1) complexity when the requested size
 208  * was queried instead of iterating the size through all the buckets.
 209  */
 210 static struct bo_cache_bucket *
 211 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size)
 212 {
 213    /* Calculating the pages and rounding up to the page size. */
 214    const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 215
 216    /* Row  Bucket sizes    clz((x-1) | 3)   Row    Column
 217     *        in pages                      stride   size
 218     *   0:   1  2  3  4 -> 30 30 30 30        4       1
 219     *   1:   5  6  7  8 -> 29 29 29 29        4       1
 220     *   2:  10 12 14 16 -> 28 28 28 28        8       2
 221     *   3:  20 24 28 32 -> 27 27 27 27       16       4
 222     */
 223    const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
 224    const unsigned row_max_pages = 4 << row;
 225
 226    /* The '& ~2' is the special case for row 1. In row 1, max pages /
 227     * 2 is 2, but the previous row maximum is zero (because there is
 228     * no previous row). All row maximum sizes are power of 2, so that
 229     * is the only case where that bit will be set.
 230     */
 231    const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
 232    int col_size_log2 = row - 1;
 233    col_size_log2 += (col_size_log2 < 0);
 234
 235    const unsigned col = (pages - prev_row_max_pages +
 236                         ((1 << col_size_log2) - 1)) >> col_size_log2;
 237
 238    /* Calculating the index based on the row and column. */
 239    const unsigned index = (row * 4) + (col - 1);
 240
 241    return (index < bufmgr->num_buckets) ?
 242           &bufmgr->cache_bucket[index] : NULL;
 243 }
 244
 245 static enum iris_memory_zone
 246 memzone_for_address(uint64_t address)
 247 {
 248    STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START);
 249    STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START);
 250    STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_SHADER_START);
 251    STATIC_ASSERT(IRIS_BINDER_ADDRESS == IRIS_MEMZONE_SURFACE_START);
 252    STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
 253
 254    if (address >= IRIS_MEMZONE_OTHER_START)
 255       return IRIS_MEMZONE_OTHER;
 256
 257    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
 258       return IRIS_MEMZONE_BORDER_COLOR_POOL;
 259
 260    if (address > IRIS_MEMZONE_DYNAMIC_START)
 261       return IRIS_MEMZONE_DYNAMIC;
 262
 263    if (address == IRIS_BINDER_ADDRESS)
 264       return IRIS_MEMZONE_BINDER;
 265
 266    if (address > IRIS_MEMZONE_SURFACE_START)
 267       return IRIS_MEMZONE_SURFACE;
 268
 269    return IRIS_MEMZONE_SHADER;
 270 }
 271
 272 static uint64_t
 273 bucket_vma_alloc(struct iris_bufmgr *bufmgr,
 274                  struct bo_cache_bucket *bucket,
 275                  enum iris_memory_zone memzone)
 276 {
 277    struct util_dynarray *vma_list = &bucket->vma_list[memzone];
 278    struct vma_bucket_node *node;
 279
 280    if (vma_list->size == 0) {
 281       /* This bucket allocator is out of space - allocate a new block of
 282        * memory for 64 blocks from a larger allocator (either a larger
 283        * bucket or util_vma).
 284        *
 285        * We align the address to the node size (64 blocks) so that
 286        * bucket_vma_free can easily compute the starting address of this
 287        * block by rounding any address we return down to the node size.
 288        *
 289        * Set the first bit used, and return the start address.
 290        */
 291       const uint64_t node_size = 64ull * bucket->size;
 292       node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
 293
 294       if (unlikely(!node))
 295          return 0ull;
 296
 297       uint64_t addr = vma_alloc(bufmgr, memzone, node_size, node_size);
 298       node->start_address = gen_48b_address(addr);
 299       node->bitmap = ~1ull;
 300       return node->start_address;
 301    }
 302
 303    /* Pick any bit from any node - they're all the right size and free. */
 304    node = util_dynarray_top_ptr(vma_list, struct vma_bucket_node);
 305    int bit = ffsll(node->bitmap) - 1;
 306    assert(bit >= 0 && bit <= 63);
 307
 308    /* Reserve the memory by clearing the bit. */
 309    assert((node->bitmap & (1ull << bit)) != 0ull);
 310    node->bitmap &= ~(1ull << bit);
 311
 312    uint64_t addr = node->start_address + bit * bucket->size;
 313
 314    /* If this node is now completely full, remove it from the free list. */
 315    if (node->bitmap == 0ull) {
 316       (void) util_dynarray_pop(vma_list, struct vma_bucket_node);
 317    }
 318
 319    return addr;
 320 }
 321
 322 static void
 323 bucket_vma_free(struct bo_cache_bucket *bucket, uint64_t address)
 324 {
 325    enum iris_memory_zone memzone = memzone_for_address(address);
 326    struct util_dynarray *vma_list = &bucket->vma_list[memzone];
 327    const uint64_t node_bytes = 64ull * bucket->size;
 328    struct vma_bucket_node *node = NULL;
 329
 330    /* bucket_vma_alloc allocates 64 blocks at a time, and aligns it to
 331     * that 64 block size.  So, we can round down to get the starting address.
 332     */
 333    uint64_t start = (address / node_bytes) * node_bytes;
 334
 335    /* Dividing the offset from start by bucket size gives us the bit index. */
 336    int bit = (address - start) / bucket->size;
 337
 338    assert(start + bit * bucket->size == address);
 339
 340    util_dynarray_foreach(vma_list, struct vma_bucket_node, cur) {
 341       if (cur->start_address == start) {
 342          node = cur;
 343          break;
 344       }
 345    }
 346
 347    if (!node) {
 348       /* No node - the whole group of 64 blocks must have been in-use. */
 349       node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
 350
 351       if (unlikely(!node))
 352          return; /* bogus, leaks some GPU VMA, but nothing we can do... */
 353
 354       node->start_address = start;
 355       node->bitmap = 0ull;
 356    }
 357
 358    /* Set the bit to return the memory. */
 359    assert((node->bitmap & (1ull << bit)) == 0ull);
 360    node->bitmap |= 1ull << bit;
 361
 362    /* The block might be entirely free now, and if so, we could return it
 363     * to the larger allocator.  But we may as well hang on to it, in case
 364     * we get more allocations at this block size.
 365     */
 366 }
 367
 368 static struct bo_cache_bucket *
 369 get_bucket_allocator(struct iris_bufmgr *bufmgr, uint64_t size)
 370 {
 371    /* Skip using the bucket allocator for very large sizes, as it allocates
 372     * 64 of them and this can balloon rather quickly.
 373     */
 374    if (size > 1024 * PAGE_SIZE)
 375       return NULL;
 376
 377    struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size);
 378
 379    if (bucket && bucket->size == size)
 380       return bucket;
 381
 382    return NULL;
 383 }
 384
 385 /**
 386  * Allocate a section of virtual memory for a buffer, assigning an address.
 387  *
 388  * This uses either the bucket allocator for the given size, or the large
 389  * object allocator (util_vma).
 390  */
 391 static uint64_t
 392 vma_alloc(struct iris_bufmgr *bufmgr,
 393           enum iris_memory_zone memzone,
 394           uint64_t size,
 395           uint64_t alignment)
 396 {
 397    if (memzone == IRIS_MEMZONE_BINDER)
 398       return IRIS_BINDER_ADDRESS;
 399    else if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
 400       return IRIS_BORDER_COLOR_POOL_ADDRESS;
 401
 402    struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
 403    uint64_t addr;
 404
 405    if (bucket) {
 406       addr = bucket_vma_alloc(bufmgr, bucket, memzone);
 407    } else {
 408       addr = util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size,
 409                                  alignment);
 410    }
 411
 412    assert((addr >> 48ull) == 0);
 413    assert((addr % alignment) == 0);
 414
 415    return gen_canonical_address(addr);
 416 }
 417
 418 static void
 419 vma_free(struct iris_bufmgr *bufmgr,
 420          uint64_t address,
 421          uint64_t size)
 422 {
 423    if (address == IRIS_BINDER_ADDRESS ||
 424        address == IRIS_BORDER_COLOR_POOL_ADDRESS)
 425       return;
 426
 427    /* Un-canonicalize the address. */
 428    address = gen_48b_address(address);
 429
 430    if (address == 0ull)
 431       return;
 432
 433    struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
 434
 435    if (bucket) {
 436       bucket_vma_free(bucket, address);
 437    } else {
 438       enum iris_memory_zone memzone = memzone_for_address(address);
 439       util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
 440    }
 441 }
 442
 443 int
 444 iris_bo_busy(struct iris_bo *bo)
 445 {
 446    struct iris_bufmgr *bufmgr = bo->bufmgr;
 447    struct drm_i915_gem_busy busy = { .handle = bo->gem_handle };
 448
 449    int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
 450    if (ret == 0) {
 451       bo->idle = !busy.busy;
 452       return busy.busy;
 453    }
 454    return false;
 455 }
 456
 457 int
 458 iris_bo_madvise(struct iris_bo *bo, int state)
 459 {
 460    struct drm_i915_gem_madvise madv = {
 461       .handle = bo->gem_handle,
 462       .madv = state,
 463       .retained = 1,
 464    };
 465
 466    drm_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
 467
 468    return madv.retained;
 469 }
 470
 471 /* drop the oldest entries that have been purged by the kernel */
 472 static void
 473 iris_bo_cache_purge_bucket(struct iris_bufmgr *bufmgr,
 474                           struct bo_cache_bucket *bucket)
 475 {
 476    list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
 477       if (iris_bo_madvise(bo, I915_MADV_DONTNEED))
 478          break;
 479
 480       list_del(&bo->head);
 481       bo_free(bo);
 482    }
 483 }
 484
 485 static struct iris_bo *
 486 bo_alloc_internal(struct iris_bufmgr *bufmgr,
 487                   const char *name,
 488                   uint64_t size,
 489                   enum iris_memory_zone memzone,
 490                   unsigned flags,
 491                   uint32_t tiling_mode,
 492                   uint32_t stride)
 493 {
 494    struct iris_bo *bo;
 495    unsigned int page_size = getpagesize();
 496    int ret;
 497    struct bo_cache_bucket *bucket;
 498    bool alloc_from_cache;
 499    uint64_t bo_size;
 500    bool zeroed = false;
 501
 502    if (flags & BO_ALLOC_ZEROED)
 503       zeroed = true;
 504
 505    /* Round the allocated size up to a power of two number of pages. */
 506    bucket = bucket_for_size(bufmgr, size);
 507
 508    /* If we don't have caching at this size, don't actually round the
 509     * allocation up.
 510     */
 511    if (bucket == NULL) {
 512       bo_size = size;
 513       if (bo_size < page_size)
 514          bo_size = page_size;
 515    } else {
 516       bo_size = bucket->size;
 517    }
 518
 519    mtx_lock(&bufmgr->lock);
 520    /* Get a buffer out of the cache if available */
 521 retry:
 522    alloc_from_cache = false;
 523    if (bucket != NULL && !list_empty(&bucket->head)) {
 524       /* If the last BO in the cache is idle, then reuse it.  Otherwise,
 525        * allocate a fresh buffer to avoid stalling.
 526        */
 527       bo = LIST_ENTRY(struct iris_bo, bucket->head.next, head);
 528       if (!iris_bo_busy(bo)) {
 529          alloc_from_cache = true;
 530          list_del(&bo->head);
 531       }
 532
 533       if (alloc_from_cache) {
 534          if (!iris_bo_madvise(bo, I915_MADV_WILLNEED)) {
 535             bo_free(bo);
 536             iris_bo_cache_purge_bucket(bufmgr, bucket);
 537             goto retry;
 538          }
 539
 540          if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
 541             bo_free(bo);
 542             goto retry;
 543          }
 544
 545          if (zeroed) {
 546             void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
 547             if (!map) {
 548                bo_free(bo);
 549                goto retry;
 550             }
 551             memset(map, 0, bo_size);
 552          }
 553       }
 554    }
 555
 556    if (alloc_from_cache) {
 557       /* If the cached BO isn't in the right memory zone, free the old
 558        * memory and assign it a new address.
 559        */
 560       if (memzone != memzone_for_address(bo->gtt_offset)) {
 561          vma_free(bufmgr, bo->gtt_offset, bo_size);
 562          bo->gtt_offset = 0ull;
 563       }
 564    } else {
 565       bo = calloc(1, sizeof(*bo));
 566       if (!bo)
 567          goto err;
 568
 569       bo->size = bo_size;
 570       bo->idle = true;
 571
 572       struct drm_i915_gem_create create = { .size = bo_size };
 573
 574       /* All new BOs we get from the kernel are zeroed, so we don't need to
 575        * worry about that here.
 576        */
 577       ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
 578       if (ret != 0) {
 579          free(bo);
 580          goto err;
 581       }
 582
 583       bo->gem_handle = create.handle;
 584
 585       bo->bufmgr = bufmgr;
 586
 587       bo->tiling_mode = I915_TILING_NONE;
 588       bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
 589       bo->stride = 0;
 590
 591       if (bo_set_tiling_internal(bo, tiling_mode, stride))
 592          goto err_free;
 593
 594       /* Calling set_domain() will allocate pages for the BO outside of the
 595        * struct mutex lock in the kernel, which is more efficient than waiting
 596        * to create them during the first execbuf that uses the BO.
 597        */
 598       struct drm_i915_gem_set_domain sd = {
 599          .handle = bo->gem_handle,
 600          .read_domains = I915_GEM_DOMAIN_CPU,
 601          .write_domain = 0,
 602       };
 603
 604       if (drm_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0)
 605          goto err_free;
 606    }
 607
 608    bo->name = name;
 609    p_atomic_set(&bo->refcount, 1);
 610    bo->reusable = true;
 611    bo->cache_coherent = bufmgr->has_llc;
 612    bo->index = -1;
 613    bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
 614
 615    if (bo->gtt_offset == 0ull) {
 616       bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1);
 617
 618       if (bo->gtt_offset == 0ull)
 619          goto err_free;
 620    }
 621
 622    mtx_unlock(&bufmgr->lock);
 623
 624    DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
 625        (unsigned long long) size);
 626
 627    return bo;
 628
 629 err_free:
 630    bo_free(bo);
 631 err:
 632    mtx_unlock(&bufmgr->lock);
 633    return NULL;
 634 }
 635
 636 struct iris_bo *
 637 iris_bo_alloc(struct iris_bufmgr *bufmgr,
 638               const char *name,
 639               uint64_t size,
 640               enum iris_memory_zone memzone)
 641 {
 642    return bo_alloc_internal(bufmgr, name, size, memzone,
 643                             0, I915_TILING_NONE, 0);
 644 }
 645
 646 struct iris_bo *
 647 iris_bo_alloc_tiled(struct iris_bufmgr *bufmgr, const char *name,
 648                     uint64_t size, enum iris_memory_zone memzone,
 649                     uint32_t tiling_mode, uint32_t pitch, unsigned flags)
 650 {
 651    return bo_alloc_internal(bufmgr, name, size, memzone,
 652                             flags, tiling_mode, pitch);
 653 }
 654
 655 /**
 656  * Returns a iris_bo wrapping the given buffer object handle.
 657  *
 658  * This can be used when one application needs to pass a buffer object
 659  * to another.
 660  */
 661 struct iris_bo *
 662 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
 663                              const char *name, unsigned int handle)
 664 {
 665    struct iris_bo *bo;
 666
 667    /* At the moment most applications only have a few named bo.
 668     * For instance, in a DRI client only the render buffers passed
 669     * between X and the client are named. And since X returns the
 670     * alternating names for the front/back buffer a linear search
 671     * provides a sufficiently fast match.
 672     */
 673    mtx_lock(&bufmgr->lock);
 674    bo = hash_find_bo(bufmgr->name_table, handle);
 675    if (bo) {
 676       iris_bo_reference(bo);
 677       goto out;
 678    }
 679
 680    struct drm_gem_open open_arg = { .name = handle };
 681    int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
 682    if (ret != 0) {
 683       DBG("Couldn't reference %s handle 0x%08x: %s\n",
 684           name, handle, strerror(errno));
 685       bo = NULL;
 686       goto out;
 687    }
 688    /* Now see if someone has used a prime handle to get this
 689     * object from the kernel before by looking through the list
 690     * again for a matching gem_handle
 691     */
 692    bo = hash_find_bo(bufmgr->handle_table, open_arg.handle);
 693    if (bo) {
 694       iris_bo_reference(bo);
 695       goto out;
 696    }
 697
 698    bo = calloc(1, sizeof(*bo));
 699    if (!bo)
 700       goto out;
 701
 702    p_atomic_set(&bo->refcount, 1);
 703
 704    bo->size = open_arg.size;
 705    bo->gtt_offset = 0;
 706    bo->bufmgr = bufmgr;
 707    bo->gem_handle = open_arg.handle;
 708    bo->name = name;
 709    bo->global_name = handle;
 710    bo->reusable = false;
 711    bo->external = true;
 712    bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
 713    bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
 714
 715    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
 716    _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
 717
 718    struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
 719    ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
 720    if (ret != 0)
 721       goto err_unref;
 722
 723    bo->tiling_mode = get_tiling.tiling_mode;
 724    bo->swizzle_mode = get_tiling.swizzle_mode;
 725    /* XXX stride is unknown */
 726    DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
 727
 728 out:
 729    mtx_unlock(&bufmgr->lock);
 730    return bo;
 731
 732 err_unref:
 733    bo_free(bo);
 734    mtx_unlock(&bufmgr->lock);
 735    return NULL;
 736 }
 737
 738 static void
 739 bo_free(struct iris_bo *bo)
 740 {
 741    struct iris_bufmgr *bufmgr = bo->bufmgr;
 742
 743    if (bo->map_cpu) {
 744       VG_NOACCESS(bo->map_cpu, bo->size);
 745       munmap(bo->map_cpu, bo->size);
 746    }
 747    if (bo->map_wc) {
 748       VG_NOACCESS(bo->map_wc, bo->size);
 749       munmap(bo->map_wc, bo->size);
 750    }
 751    if (bo->map_gtt) {
 752       VG_NOACCESS(bo->map_gtt, bo->size);
 753       munmap(bo->map_gtt, bo->size);
 754    }
 755
 756    if (bo->external) {
 757       struct hash_entry *entry;
 758
 759       if (bo->global_name) {
 760          entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
 761          _mesa_hash_table_remove(bufmgr->name_table, entry);
 762       }
 763
 764       entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
 765       _mesa_hash_table_remove(bufmgr->handle_table, entry);
 766    }
 767
 768    /* Close this object */
 769    struct drm_gem_close close = { .handle = bo->gem_handle };
 770    int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
 771    if (ret != 0) {
 772       DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
 773           bo->gem_handle, bo->name, strerror(errno));
 774    }
 775
 776    vma_free(bo->bufmgr, bo->gtt_offset, bo->size);
 777
 778    free(bo);
 779 }
 780
 781 /** Frees all cached buffers significantly older than @time. */
 782 static void
 783 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
 784 {
 785    int i;
 786
 787    if (bufmgr->time == time)
 788       return;
 789
 790    for (i = 0; i < bufmgr->num_buckets; i++) {
 791       struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
 792
 793       list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
 794          if (time - bo->free_time <= 1)
 795             break;
 796
 797          list_del(&bo->head);
 798
 799          bo_free(bo);
 800       }
 801    }
 802
 803    bufmgr->time = time;
 804 }
 805
 806 static void
 807 bo_unreference_final(struct iris_bo *bo, time_t time)
 808 {
 809    struct iris_bufmgr *bufmgr = bo->bufmgr;
 810    struct bo_cache_bucket *bucket;
 811
 812    DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
 813
 814    bucket = bucket_for_size(bufmgr, bo->size);
 815    /* Put the buffer into our internal cache for reuse if we can. */
 816    if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
 817        iris_bo_madvise(bo, I915_MADV_DONTNEED)) {
 818       bo->free_time = time;
 819       bo->name = NULL;
 820
 821       list_addtail(&bo->head, &bucket->head);
 822    } else {
 823       bo_free(bo);
 824    }
 825 }
 826
 827 void
 828 iris_bo_unreference(struct iris_bo *bo)
 829 {
 830    if (bo == NULL)
 831       return;
 832
 833    assert(p_atomic_read(&bo->refcount) > 0);
 834
 835    if (atomic_add_unless(&bo->refcount, -1, 1)) {
 836       struct iris_bufmgr *bufmgr = bo->bufmgr;
 837       struct timespec time;
 838
 839       clock_gettime(CLOCK_MONOTONIC, &time);
 840
 841       mtx_lock(&bufmgr->lock);
 842
 843       if (p_atomic_dec_zero(&bo->refcount)) {
 844          bo_unreference_final(bo, time.tv_sec);
 845          cleanup_bo_cache(bufmgr, time.tv_sec);
 846       }
 847
 848       mtx_unlock(&bufmgr->lock);
 849    }
 850 }
 851
 852 static void
 853 bo_wait_with_stall_warning(struct pipe_debug_callback *dbg,
 854                            struct iris_bo *bo,
 855                            const char *action)
 856 {
 857    bool busy = dbg && !bo->idle;
 858    double elapsed = unlikely(busy) ? -get_time() : 0.0;
 859
 860    iris_bo_wait_rendering(bo);
 861
 862    if (unlikely(busy)) {
 863       elapsed += get_time();
 864       if (elapsed > 1e-5) /* 0.01ms */ {
 865          perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
 866                     action, bo->name, elapsed * 1000);
 867       }
 868    }
 869 }
 870
 871 static void
 872 print_flags(unsigned flags)
 873 {
 874    if (flags & MAP_READ)
 875       DBG("READ ");
 876    if (flags & MAP_WRITE)
 877       DBG("WRITE ");
 878    if (flags & MAP_ASYNC)
 879       DBG("ASYNC ");
 880    if (flags & MAP_PERSISTENT)
 881       DBG("PERSISTENT ");
 882    if (flags & MAP_COHERENT)
 883       DBG("COHERENT ");
 884    if (flags & MAP_RAW)
 885       DBG("RAW ");
 886    DBG("\n");
 887 }
 888
 889 static void *
 890 iris_bo_map_cpu(struct pipe_debug_callback *dbg,
 891                 struct iris_bo *bo, unsigned flags)
 892 {
 893    struct iris_bufmgr *bufmgr = bo->bufmgr;
 894
 895    /* We disallow CPU maps for writing to non-coherent buffers, as the
 896     * CPU map can become invalidated when a batch is flushed out, which
 897     * can happen at unpredictable times.  You should use WC maps instead.
 898     */
 899    assert(bo->cache_coherent || !(flags & MAP_WRITE));
 900
 901    if (!bo->map_cpu) {
 902       DBG("iris_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name);
 903
 904       struct drm_i915_gem_mmap mmap_arg = {
 905          .handle = bo->gem_handle,
 906          .size = bo->size,
 907       };
 908       int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
 909       if (ret != 0) {
 910          ret = -errno;
 911          DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
 912              __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
 913          return NULL;
 914       }
 915       void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
 916       VG_DEFINED(map, bo->size);
 917
 918       if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) {
 919          VG_NOACCESS(map, bo->size);
 920          munmap(map, bo->size);
 921       }
 922    }
 923    assert(bo->map_cpu);
 924
 925    DBG("iris_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name,
 926        bo->map_cpu);
 927    print_flags(flags);
 928
 929    if (!(flags & MAP_ASYNC)) {
 930       bo_wait_with_stall_warning(dbg, bo, "CPU mapping");
 931    }
 932
 933    if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
 934       /* If we're reusing an existing CPU mapping, the CPU caches may
 935        * contain stale data from the last time we read from that mapping.
 936        * (With the BO cache, it might even be data from a previous buffer!)
 937        * Even if it's a brand new mapping, the kernel may have zeroed the
 938        * buffer via CPU writes.
 939        *
 940        * We need to invalidate those cachelines so that we see the latest
 941        * contents, and so long as we only read from the CPU mmap we do not
 942        * need to write those cachelines back afterwards.
 943        *
 944        * On LLC, the emprical evidence suggests that writes from the GPU
 945        * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
 946        * cachelines. (Other reads, such as the display engine, bypass the
 947        * LLC entirely requiring us to keep dirty pixels for the scanout
 948        * out of any cache.)
 949        */
 950       gen_invalidate_range(bo->map_cpu, bo->size);
 951    }
 952
 953    return bo->map_cpu;
 954 }
 955
 956 static void *
 957 iris_bo_map_wc(struct pipe_debug_callback *dbg,
 958                struct iris_bo *bo, unsigned flags)
 959 {
 960    struct iris_bufmgr *bufmgr = bo->bufmgr;
 961
 962    if (!bo->map_wc) {
 963       DBG("iris_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name);
 964
 965       struct drm_i915_gem_mmap mmap_arg = {
 966          .handle = bo->gem_handle,
 967          .size = bo->size,
 968          .flags = I915_MMAP_WC,
 969       };
 970       int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
 971       if (ret != 0) {
 972          ret = -errno;
 973          DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
 974              __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
 975          return NULL;
 976       }
 977
 978       void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
 979       VG_DEFINED(map, bo->size);
 980
 981       if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) {
 982          VG_NOACCESS(map, bo->size);
 983          munmap(map, bo->size);
 984       }
 985    }
 986    assert(bo->map_wc);
 987
 988    DBG("iris_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc);
 989    print_flags(flags);
 990
 991    if (!(flags & MAP_ASYNC)) {
 992       bo_wait_with_stall_warning(dbg, bo, "WC mapping");
 993    }
 994
 995    return bo->map_wc;
 996 }
 997
 998 /**
 999  * Perform an uncached mapping via the GTT.
1000  *
1001  * Write access through the GTT is not quite fully coherent. On low power
1002  * systems especially, like modern Atoms, we can observe reads from RAM before
1003  * the write via GTT has landed. A write memory barrier that flushes the Write
1004  * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later
1005  * read after the write as the GTT write suffers a small delay through the GTT
1006  * indirection. The kernel uses an uncached mmio read to ensure the GTT write
1007  * is ordered with reads (either by the GPU, WB or WC) and unconditionally
1008  * flushes prior to execbuf submission. However, if we are not informing the
1009  * kernel about our GTT writes, it will not flush before earlier access, such
1010  * as when using the cmdparser. Similarly, we need to be careful if we should
1011  * ever issue a CPU read immediately following a GTT write.
1012  *
1013  * Telling the kernel about write access also has one more important
1014  * side-effect. Upon receiving notification about the write, it cancels any
1015  * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by
1016  * either SW_FINISH or DIRTYFB. The presumption is that we never write to the
1017  * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR
1018  * tracking is handled on the buffer exchange instead.
1019  */
1020 static void *
1021 iris_bo_map_gtt(struct pipe_debug_callback *dbg,
1022                 struct iris_bo *bo, unsigned flags)
1023 {
1024    struct iris_bufmgr *bufmgr = bo->bufmgr;
1025
1026    /* Get a mapping of the buffer if we haven't before. */
1027    if (bo->map_gtt == NULL) {
1028       DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name);
1029
1030       struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle };
1031
1032       /* Get the fake offset back... */
1033       int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
1034       if (ret != 0) {
1035          DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1036              __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1037          return NULL;
1038       }
1039
1040       /* and mmap it. */
1041       void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE,
1042                        MAP_SHARED, bufmgr->fd, mmap_arg.offset);
1043       if (map == MAP_FAILED) {
1044          DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1045              __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
1046          return NULL;
1047       }
1048
1049       /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will
1050        * already intercept this mmap call. However, for consistency between
1051        * all the mmap paths, we mark the pointer as defined now and mark it
1052        * as inaccessible afterwards.
1053        */
1054       VG_DEFINED(map, bo->size);
1055
1056       if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) {
1057          VG_NOACCESS(map, bo->size);
1058          munmap(map, bo->size);
1059       }
1060    }
1061    assert(bo->map_gtt);
1062
1063    DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt);
1064    print_flags(flags);
1065
1066    if (!(flags & MAP_ASYNC)) {
1067       bo_wait_with_stall_warning(dbg, bo, "GTT mapping");
1068    }
1069
1070    return bo->map_gtt;
1071 }
1072
1073 static bool
1074 can_map_cpu(struct iris_bo *bo, unsigned flags)
1075 {
1076    if (bo->cache_coherent)
1077       return true;
1078
1079    /* Even if the buffer itself is not cache-coherent (such as a scanout), on
1080     * an LLC platform reads always are coherent (as they are performed via the
1081     * central system agent). It is just the writes that we need to take special
1082     * care to ensure that land in main memory and not stick in the CPU cache.
1083     */
1084    if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
1085       return true;
1086
1087    /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
1088     * across batch flushes where the kernel will change cache domains of the
1089     * bo, invalidating continued access to the CPU mmap on non-LLC device.
1090     *
1091     * Similarly, ASYNC typically means that the buffer will be accessed via
1092     * both the CPU and the GPU simultaneously.  Batches may be executed that
1093     * use the BO even while it is mapped.  While OpenGL technically disallows
1094     * most drawing while non-persistent mappings are active, we may still use
1095     * the GPU for blits or other operations, causing batches to happen at
1096     * inconvenient times.
1097     */
1098    if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC))
1099       return false;
1100
1101    return !(flags & MAP_WRITE);
1102 }
1103
1104 void *
1105 iris_bo_map(struct pipe_debug_callback *dbg,
1106             struct iris_bo *bo, unsigned flags)
1107 {
1108    if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
1109       return iris_bo_map_gtt(dbg, bo, flags);
1110
1111    void *map;
1112
1113    if (can_map_cpu(bo, flags))
1114       map = iris_bo_map_cpu(dbg, bo, flags);
1115    else
1116       map = iris_bo_map_wc(dbg, bo, flags);
1117
1118    /* Allow the attempt to fail by falling back to the GTT where necessary.
1119     *
1120     * Not every buffer can be mmaped directly using the CPU (or WC), for
1121     * example buffers that wrap stolen memory or are imported from other
1122     * devices. For those, we have little choice but to use a GTT mmapping.
1123     * However, if we use a slow GTT mmapping for reads where we expected fast
1124     * access, that order of magnitude difference in throughput will be clearly
1125     * expressed by angry users.
1126     *
1127     * We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
1128     */
1129    if (!map && !(flags & MAP_RAW)) {
1130       perf_debug(dbg, "Fallback GTT mapping for %s with access flags %x\n",
1131                  bo->name, flags);
1132       map = iris_bo_map_gtt(dbg, bo, flags);
1133    }
1134
1135    return map;
1136 }
1137
1138 int
1139 iris_bo_subdata(struct iris_bo *bo, uint64_t offset,
1140                uint64_t size, const void *data)
1141 {
1142    struct iris_bufmgr *bufmgr = bo->bufmgr;
1143
1144    struct drm_i915_gem_pwrite pwrite = {
1145       .handle = bo->gem_handle,
1146       .offset = offset,
1147       .size = size,
1148       .data_ptr = (uint64_t) (uintptr_t) data,
1149    };
1150
1151    int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
1152    if (ret != 0) {
1153       ret = -errno;
1154       DBG("%s:%d: Error writing data to buffer %d: "
1155           "(%"PRIu64" %"PRIu64") %s .\n",
1156           __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno));
1157    }
1158
1159    return ret;
1160 }
1161
1162 /** Waits for all GPU rendering with the object to have completed. */
1163 void
1164 iris_bo_wait_rendering(struct iris_bo *bo)
1165 {
1166    /* We require a kernel recent enough for WAIT_IOCTL support.
1167     * See intel_init_bufmgr()
1168     */
1169    iris_bo_wait(bo, -1);
1170 }
1171
1172 /**
1173  * Waits on a BO for the given amount of time.
1174  *
1175  * @bo: buffer object to wait for
1176  * @timeout_ns: amount of time to wait in nanoseconds.
1177  *   If value is less than 0, an infinite wait will occur.
1178  *
1179  * Returns 0 if the wait was successful ie. the last batch referencing the
1180  * object has completed within the allotted time. Otherwise some negative return
1181  * value describes the error. Of particular interest is -ETIME when the wait has
1182  * failed to yield the desired result.
1183  *
1184  * Similar to iris_bo_wait_rendering except a timeout parameter allows
1185  * the operation to give up after a certain amount of time. Another subtle
1186  * difference is the internal locking semantics are different (this variant does
1187  * not hold the lock for the duration of the wait). This makes the wait subject
1188  * to a larger userspace race window.
1189  *
1190  * The implementation shall wait until the object is no longer actively
1191  * referenced within a batch buffer at the time of the call. The wait will
1192  * not guarantee that the buffer is re-issued via another thread, or an flinked
1193  * handle. Userspace must make sure this race does not occur if such precision
1194  * is important.
1195  *
1196  * Note that some kernels have broken the inifite wait for negative values
1197  * promise, upgrade to latest stable kernels if this is the case.
1198  */
1199 int
1200 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1201 {
1202    struct iris_bufmgr *bufmgr = bo->bufmgr;
1203
1204    /* If we know it's idle, don't bother with the kernel round trip */
1205    if (bo->idle && !bo->external)
1206       return 0;
1207
1208    struct drm_i915_gem_wait wait = {
1209       .bo_handle = bo->gem_handle,
1210       .timeout_ns = timeout_ns,
1211    };
1212    int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1213    if (ret != 0)
1214       return -errno;
1215
1216    bo->idle = true;
1217
1218    return ret;
1219 }
1220
1221 void
1222 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1223 {
1224    mtx_destroy(&bufmgr->lock);
1225
1226    /* Free any cached buffer objects we were going to reuse */
1227    for (int i = 0; i < bufmgr->num_buckets; i++) {
1228       struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
1229
1230       list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1231          list_del(&bo->head);
1232
1233          bo_free(bo);
1234       }
1235
1236       for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1237          util_dynarray_fini(&bucket->vma_list[z]);
1238    }
1239
1240    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1241    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1242
1243    for (int z = 0; z < IRIS_MEMZONE_COUNT; z++) {
1244       util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1245    }
1246
1247    free(bufmgr);
1248 }
1249
1250 static int
1251 bo_set_tiling_internal(struct iris_bo *bo, uint32_t tiling_mode,
1252                        uint32_t stride)
1253 {
1254    struct iris_bufmgr *bufmgr = bo->bufmgr;
1255    struct drm_i915_gem_set_tiling set_tiling;
1256    int ret;
1257
1258    if (bo->global_name == 0 &&
1259        tiling_mode == bo->tiling_mode && stride == bo->stride)
1260       return 0;
1261
1262    memset(&set_tiling, 0, sizeof(set_tiling));
1263    do {
1264       /* set_tiling is slightly broken and overwrites the
1265        * input on the error path, so we have to open code
1266        * drm_ioctl.
1267        */
1268       set_tiling.handle = bo->gem_handle;
1269       set_tiling.tiling_mode = tiling_mode;
1270       set_tiling.stride = stride;
1271
1272       ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
1273    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
1274    if (ret == -1)
1275       return -errno;
1276
1277    bo->tiling_mode = set_tiling.tiling_mode;
1278    bo->swizzle_mode = set_tiling.swizzle_mode;
1279    bo->stride = set_tiling.stride;
1280    return 0;
1281 }
1282
1283 int
1284 iris_bo_get_tiling(struct iris_bo *bo, uint32_t *tiling_mode,
1285                   uint32_t *swizzle_mode)
1286 {
1287    *tiling_mode = bo->tiling_mode;
1288    *swizzle_mode = bo->swizzle_mode;
1289    return 0;
1290 }
1291
1292 struct iris_bo *
1293 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd)
1294 {
1295    uint32_t handle;
1296    struct iris_bo *bo;
1297
1298    mtx_lock(&bufmgr->lock);
1299    int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1300    if (ret) {
1301       DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1302           strerror(errno));
1303       mtx_unlock(&bufmgr->lock);
1304       return NULL;
1305    }
1306
1307    /*
1308     * See if the kernel has already returned this buffer to us. Just as
1309     * for named buffers, we must not create two bo's pointing at the same
1310     * kernel object
1311     */
1312    bo = hash_find_bo(bufmgr->handle_table, handle);
1313    if (bo) {
1314       iris_bo_reference(bo);
1315       goto out;
1316    }
1317
1318    bo = calloc(1, sizeof(*bo));
1319    if (!bo)
1320       goto out;
1321
1322    p_atomic_set(&bo->refcount, 1);
1323
1324    /* Determine size of bo.  The fd-to-handle ioctl really should
1325     * return the size, but it doesn't.  If we have kernel 3.12 or
1326     * later, we can lseek on the prime fd to get the size.  Older
1327     * kernels will just fail, in which case we fall back to the
1328     * provided (estimated or guess size). */
1329    ret = lseek(prime_fd, 0, SEEK_END);
1330    if (ret != -1)
1331       bo->size = ret;
1332
1333    bo->bufmgr = bufmgr;
1334
1335    bo->gem_handle = handle;
1336    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1337
1338    bo->name = "prime";
1339    bo->reusable = false;
1340    bo->external = true;
1341    bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
1342    bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1343
1344    struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
1345    if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
1346       goto err;
1347
1348    bo->tiling_mode = get_tiling.tiling_mode;
1349    bo->swizzle_mode = get_tiling.swizzle_mode;
1350    /* XXX stride is unknown */
1351
1352 out:
1353    mtx_unlock(&bufmgr->lock);
1354    return bo;
1355
1356 err:
1357    bo_free(bo);
1358    mtx_unlock(&bufmgr->lock);
1359    return NULL;
1360 }
1361
1362 static void
1363 iris_bo_make_external(struct iris_bo *bo)
1364 {
1365    struct iris_bufmgr *bufmgr = bo->bufmgr;
1366
1367    if (!bo->external) {
1368       mtx_lock(&bufmgr->lock);
1369       if (!bo->external) {
1370          _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1371          bo->external = true;
1372       }
1373       mtx_unlock(&bufmgr->lock);
1374    }
1375 }
1376
1377 int
1378 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
1379 {
1380    struct iris_bufmgr *bufmgr = bo->bufmgr;
1381
1382    iris_bo_make_external(bo);
1383
1384    if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1385                           DRM_CLOEXEC, prime_fd) != 0)
1386       return -errno;
1387
1388    bo->reusable = false;
1389
1390    return 0;
1391 }
1392
1393 uint32_t
1394 iris_bo_export_gem_handle(struct iris_bo *bo)
1395 {
1396    iris_bo_make_external(bo);
1397
1398    return bo->gem_handle;
1399 }
1400
1401 int
1402 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
1403 {
1404    struct iris_bufmgr *bufmgr = bo->bufmgr;
1405
1406    if (!bo->global_name) {
1407       struct drm_gem_flink flink = { .handle = bo->gem_handle };
1408
1409       if (drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
1410          return -errno;
1411
1412       iris_bo_make_external(bo);
1413       mtx_lock(&bufmgr->lock);
1414       if (!bo->global_name) {
1415          bo->global_name = flink.name;
1416          _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
1417       }
1418       mtx_unlock(&bufmgr->lock);
1419
1420       bo->reusable = false;
1421    }
1422
1423    *name = bo->global_name;
1424    return 0;
1425 }
1426
1427 static void
1428 add_bucket(struct iris_bufmgr *bufmgr, int size)
1429 {
1430    unsigned int i = bufmgr->num_buckets;
1431
1432    assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
1433
1434    list_inithead(&bufmgr->cache_bucket[i].head);
1435    for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1436       util_dynarray_init(&bufmgr->cache_bucket[i].vma_list[z], NULL);
1437    bufmgr->cache_bucket[i].size = size;
1438    bufmgr->num_buckets++;
1439
1440    assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
1441    assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
1442    assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
1443 }
1444
1445 static void
1446 init_cache_buckets(struct iris_bufmgr *bufmgr)
1447 {
1448    uint64_t size, cache_max_size = 64 * 1024 * 1024;
1449
1450    /* OK, so power of two buckets was too wasteful of memory.
1451     * Give 3 other sizes between each power of two, to hopefully
1452     * cover things accurately enough.  (The alternative is
1453     * probably to just go for exact matching of sizes, and assume
1454     * that for things like composited window resize the tiled
1455     * width/height alignment and rounding of sizes to pages will
1456     * get us useful cache hit rates anyway)
1457     */
1458    add_bucket(bufmgr, PAGE_SIZE);
1459    add_bucket(bufmgr, PAGE_SIZE * 2);
1460    add_bucket(bufmgr, PAGE_SIZE * 3);
1461
1462    /* Initialize the linked lists for BO reuse cache. */
1463    for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
1464       add_bucket(bufmgr, size);
1465
1466       add_bucket(bufmgr, size + size * 1 / 4);
1467       add_bucket(bufmgr, size + size * 2 / 4);
1468       add_bucket(bufmgr, size + size * 3 / 4);
1469    }
1470 }
1471
1472 uint32_t
1473 iris_create_hw_context(struct iris_bufmgr *bufmgr)
1474 {
1475    struct drm_i915_gem_context_create create = { };
1476    int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
1477    if (ret != 0) {
1478       DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno));
1479       return 0;
1480    }
1481
1482    return create.ctx_id;
1483 }
1484
1485 int
1486 iris_hw_context_set_priority(struct iris_bufmgr *bufmgr,
1487                             uint32_t ctx_id,
1488                             int priority)
1489 {
1490    struct drm_i915_gem_context_param p = {
1491       .ctx_id = ctx_id,
1492       .param = I915_CONTEXT_PARAM_PRIORITY,
1493       .value = priority,
1494    };
1495    int err;
1496
1497    err = 0;
1498    if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
1499       err = -errno;
1500
1501    return err;
1502 }
1503
1504 void
1505 iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id)
1506 {
1507    struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id };
1508
1509    if (ctx_id != 0 &&
1510        drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) {
1511       fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
1512               strerror(errno));
1513    }
1514 }
1515
1516 int
1517 iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *result)
1518 {
1519    struct drm_i915_reg_read reg_read = { .offset = offset };
1520    int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
1521
1522    *result = reg_read.val;
1523    return ret;
1524 }
1525
1526 /**
1527  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
1528  * and manage map buffer objections.
1529  *
1530  * \param fd File descriptor of the opened DRM device.
1531  */
1532 struct iris_bufmgr *
1533 iris_bufmgr_init(struct gen_device_info *devinfo, int fd)
1534 {
1535    struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
1536    if (bufmgr == NULL)
1537       return NULL;
1538
1539    /* Handles to buffer objects belong to the device fd and are not
1540     * reference counted by the kernel.  If the same fd is used by
1541     * multiple parties (threads sharing the same screen bufmgr, or
1542     * even worse the same device fd passed to multiple libraries)
1543     * ownership of those handles is shared by those independent parties.
1544     *
1545     * Don't do this! Ensure that each library/bufmgr has its own device
1546     * fd so that its namespace does not clash with another.
1547     */
1548    bufmgr->fd = fd;
1549
1550    if (mtx_init(&bufmgr->lock, mtx_plain) != 0) {
1551       free(bufmgr);
1552       return NULL;
1553    }
1554
1555    bufmgr->has_llc = devinfo->has_llc;
1556
1557    STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
1558    const uint64_t _4GB = 1ull << 32;
1559
1560    util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER],
1561                       PAGE_SIZE, _4GB);
1562    util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE],
1563                       IRIS_MEMZONE_SURFACE_START + IRIS_BINDER_SIZE,
1564                       _4GB - IRIS_BINDER_SIZE);
1565    util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC],
1566                       IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
1567                       _4GB - IRIS_BORDER_COLOR_POOL_SIZE);
1568    util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_OTHER],
1569                       IRIS_MEMZONE_OTHER_START,
1570                       (1ull << 48) - IRIS_MEMZONE_OTHER_START);
1571
1572    // XXX: driconf
1573    bufmgr->bo_reuse = env_var_as_boolean("bo_reuse", true);
1574
1575    init_cache_buckets(bufmgr);
1576
1577    bufmgr->name_table =
1578       _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
1579    bufmgr->handle_table =
1580       _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
1581
1582    return bufmgr;
1583 }