src/panfrost/encoder/pan_bo.c

   1 /*
   2  * Copyright 2019 Collabora, Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26 #include <errno.h>
  27 #include <stdio.h>
  28 #include <fcntl.h>
  29 #include <xf86drm.h>
  30 #include <pthread.h>
  31 #include "drm-uapi/panfrost_drm.h"
  32
  33 #include "pan_bo.h"
  34
  35 #include "os/os_mman.h"
  36
  37 #include "util/u_inlines.h"
  38 #include "util/u_math.h"
  39
  40 /* This file implements a userspace BO cache. Allocating and freeing
  41  * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
  42  * adds more work than we would like at this point. So caching BOs in userspace
  43  * solves both of these problems and does not require kernel updates.
  44  *
  45  * Cached BOs are sorted into a bucket based on rounding their size down to the
  46  * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
  47  * objects. Putting a BO into the cache is accomplished by adding it to the
  48  * corresponding bucket. Getting a BO from the cache consists of finding the
  49  * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
  50  * BO and removing it from the bucket. We special case evicting all BOs from
  51  * the cache, since that's what helpful in practice and avoids extra logic
  52  * around the linked list.
  53  */
  54
  55 static struct panfrost_bo *
  56 panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
  57                   uint32_t flags)
  58 {
  59         struct drm_panfrost_create_bo create_bo = { .size = size };
  60         struct panfrost_bo *bo;
  61         int ret;
  62
  63         if (dev->kernel_version->version_major > 1 ||
  64             dev->kernel_version->version_minor >= 1) {
  65                 if (flags & PAN_BO_GROWABLE)
  66                         create_bo.flags |= PANFROST_BO_HEAP;
  67                 if (!(flags & PAN_BO_EXECUTE))
  68                         create_bo.flags |= PANFROST_BO_NOEXEC;
  69         }
  70
  71         ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
  72         if (ret) {
  73                 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
  74                 return NULL;
  75         }
  76
  77         bo = rzalloc(dev->memctx, struct panfrost_bo);
  78         assert(bo);
  79         bo->size = create_bo.size;
  80         bo->gpu = create_bo.offset;
  81         bo->gem_handle = create_bo.handle;
  82         bo->flags = flags;
  83         bo->dev = dev;
  84         return bo;
  85 }
  86
  87 static void
  88 panfrost_bo_free(struct panfrost_bo *bo)
  89 {
  90         struct drm_gem_close gem_close = { .handle = bo->gem_handle };
  91         int ret;
  92
  93         ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
  94         if (ret) {
  95                 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
  96                 assert(0);
  97         }
  98
  99         ralloc_free(bo);
 100 }
 101
 102 /* Returns true if the BO is ready, false otherwise.
 103  * access_type is encoding the type of access one wants to ensure is done.
 104  * Say you want to make sure all writers are done writing, you should pass
 105  * PAN_BO_ACCESS_WRITE.
 106  * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
 107  * PAN_BO_ACCESS_READ would work too as waiting for readers implies
 108  * waiting for writers as well, but we want to make things explicit and waiting
 109  * only for readers is impossible.
 110  */
 111 bool
 112 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
 113                  uint32_t access_type)
 114 {
 115         struct drm_panfrost_wait_bo req = {
 116                 .handle = bo->gem_handle,
 117                 .timeout_ns = timeout_ns,
 118         };
 119         int ret;
 120
 121         assert(access_type == PAN_BO_ACCESS_WRITE ||
 122                access_type == PAN_BO_ACCESS_RW);
 123
 124         /* If the BO has been exported or imported we can't rely on the cached
 125          * state, we need to call the WAIT_BO ioctl.
 126          */
 127         if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
 128                 /* If ->gpu_access is 0, the BO is idle, no need to wait. */
 129                 if (!bo->gpu_access)
 130                         return true;
 131
 132                 /* If the caller only wants to wait for writers and no
 133                  * writes are pending, we don't have to wait.
 134                  */
 135                 if (access_type == PAN_BO_ACCESS_WRITE &&
 136                     !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
 137                         return true;
 138         }
 139
 140         /* The ioctl returns >= 0 value when the BO we are waiting for is ready
 141          * -1 otherwise.
 142          */
 143         ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
 144         if (ret != -1) {
 145                 /* Set gpu_access to 0 so that the next call to bo_wait()
 146                  * doesn't have to call the WAIT_BO ioctl.
 147                  */
 148                 bo->gpu_access = 0;
 149                 return true;
 150         }
 151
 152         /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
 153          * is invalid, which shouldn't happen here.
 154          */
 155         assert(errno == ETIMEDOUT || errno == EBUSY);
 156         return false;
 157 }
 158
 159 /* Helper to calculate the bucket index of a BO */
 160
 161 static unsigned
 162 pan_bucket_index(unsigned size)
 163 {
 164         /* Round down to POT to compute a bucket index */
 165
 166         unsigned bucket_index = util_logbase2(size);
 167
 168         /* Clamp the bucket index; all huge allocations will be
 169          * sorted into the largest bucket */
 170
 171         bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);
 172
 173         /* The minimum bucket size must equal the minimum allocation
 174          * size; the maximum we clamped */
 175
 176         assert(bucket_index >= MIN_BO_CACHE_BUCKET);
 177         assert(bucket_index <= MAX_BO_CACHE_BUCKET);
 178
 179         /* Reindex from 0 */
 180         return (bucket_index - MIN_BO_CACHE_BUCKET);
 181 }
 182
 183 static struct list_head *
 184 pan_bucket(struct panfrost_device *dev, unsigned size)
 185 {
 186         return &dev->bo_cache.buckets[pan_bucket_index(size)];
 187 }
 188
 189 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
 190  * BO cache. If it succeeds, it returns that BO and removes the BO from the
 191  * cache. If it fails, it returns NULL signaling the caller to allocate a new
 192  * BO. */
 193
 194 static struct panfrost_bo *
 195 panfrost_bo_cache_fetch(struct panfrost_device *dev,
 196                         size_t size, uint32_t flags, bool dontwait)
 197 {
 198         pthread_mutex_lock(&dev->bo_cache.lock);
 199         struct list_head *bucket = pan_bucket(dev, size);
 200         struct panfrost_bo *bo = NULL;
 201
 202         /* Iterate the bucket looking for something suitable */
 203         list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
 204                                  bucket_link) {
 205                 if (entry->size < size || entry->flags != flags)
 206                         continue;
 207
 208                 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
 209                                       PAN_BO_ACCESS_RW))
 210                         continue;
 211
 212                 struct drm_panfrost_madvise madv = {
 213                         .handle = entry->gem_handle,
 214                         .madv = PANFROST_MADV_WILLNEED,
 215                 };
 216                 int ret;
 217
 218                 /* This one works, splice it out of the cache */
 219                 list_del(&entry->bucket_link);
 220                 list_del(&entry->lru_link);
 221
 222                 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
 223                 if (!ret && !madv.retained) {
 224                         panfrost_bo_free(entry);
 225                         continue;
 226                 }
 227                 /* Let's go! */
 228                 bo = entry;
 229                 break;
 230         }
 231         pthread_mutex_unlock(&dev->bo_cache.lock);
 232
 233         return bo;
 234 }
 235
 236 static void
 237 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
 238 {
 239         struct timespec time;
 240
 241         clock_gettime(CLOCK_MONOTONIC, &time);
 242         list_for_each_entry_safe(struct panfrost_bo, entry,
 243                                  &dev->bo_cache.lru, lru_link) {
 244                 /* We want all entries that have been used more than 1 sec
 245                  * ago to be dropped, others can be kept.
 246                  * Note the <= 2 check and not <= 1. It's here to account for
 247                  * the fact that we're only testing ->tv_sec, not ->tv_nsec.
 248                  * That means we might keep entries that are between 1 and 2
 249                  * seconds old, but we don't really care, as long as unused BOs
 250                  * are dropped at some point.
 251                  */
 252                 if (time.tv_sec - entry->last_used <= 2)
 253                         break;
 254
 255                 list_del(&entry->bucket_link);
 256                 list_del(&entry->lru_link);
 257                 panfrost_bo_free(entry);
 258         }
 259 }
 260
 261 /* Tries to add a BO to the cache. Returns if it was
 262  * successful */
 263
 264 static bool
 265 panfrost_bo_cache_put(struct panfrost_bo *bo)
 266 {
 267         struct panfrost_device *dev = bo->dev;
 268
 269         if (bo->flags & PAN_BO_DONT_REUSE)
 270                 return false;
 271
 272         pthread_mutex_lock(&dev->bo_cache.lock);
 273         struct list_head *bucket = pan_bucket(dev, bo->size);
 274         struct drm_panfrost_madvise madv;
 275         struct timespec time;
 276
 277         madv.handle = bo->gem_handle;
 278         madv.madv = PANFROST_MADV_DONTNEED;
 279         madv.retained = 0;
 280
 281         drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
 282
 283         /* Add us to the bucket */
 284         list_addtail(&bo->bucket_link, bucket);
 285
 286         /* Add us to the LRU list and update the last_used field. */
 287         list_addtail(&bo->lru_link, &dev->bo_cache.lru);
 288         clock_gettime(CLOCK_MONOTONIC, &time);
 289         bo->last_used = time.tv_sec;
 290
 291         /* Let's do some cleanup in the BO cache while we hold the
 292          * lock.
 293          */
 294         panfrost_bo_cache_evict_stale_bos(dev);
 295         pthread_mutex_unlock(&dev->bo_cache.lock);
 296
 297         return true;
 298 }
 299
 300 /* Evicts all BOs from the cache. Called during context
 301  * destroy or during low-memory situations (to free up
 302  * memory that may be unused by us just sitting in our
 303  * cache, but still reserved from the perspective of the
 304  * OS) */
 305
 306 void
 307 panfrost_bo_cache_evict_all(
 308                 struct panfrost_device *dev)
 309 {
 310         pthread_mutex_lock(&dev->bo_cache.lock);
 311         for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
 312                 struct list_head *bucket = &dev->bo_cache.buckets[i];
 313
 314                 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
 315                                          bucket_link) {
 316                         list_del(&entry->bucket_link);
 317                         list_del(&entry->lru_link);
 318                         panfrost_bo_free(entry);
 319                 }
 320         }
 321         pthread_mutex_unlock(&dev->bo_cache.lock);
 322 }
 323
 324 void
 325 panfrost_bo_mmap(struct panfrost_bo *bo)
 326 {
 327         struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
 328         int ret;
 329
 330         if (bo->cpu)
 331                 return;
 332
 333         ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
 334         if (ret) {
 335                 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
 336                 assert(0);
 337         }
 338
 339         bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
 340                           bo->dev->fd, mmap_bo.offset);
 341         if (bo->cpu == MAP_FAILED) {
 342                 fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
 343                 assert(0);
 344         }
 345 }
 346
 347 static void
 348 panfrost_bo_munmap(struct panfrost_bo *bo)
 349 {
 350         if (!bo->cpu)
 351                 return;
 352
 353         if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
 354                 perror("munmap");
 355                 abort();
 356         }
 357
 358         bo->cpu = NULL;
 359 }
 360
 361 struct panfrost_bo *
 362 panfrost_bo_create(struct panfrost_device *dev, size_t size,
 363                    uint32_t flags)
 364 {
 365         struct panfrost_bo *bo;
 366
 367         /* Kernel will fail (confusingly) with EPERM otherwise */
 368         assert(size > 0);
 369
 370         /* To maximize BO cache usage, don't allocate tiny BOs */
 371         size = MAX2(size, 4096);
 372
 373         /* GROWABLE BOs cannot be mmapped */
 374         if (flags & PAN_BO_GROWABLE)
 375                 assert(flags & PAN_BO_INVISIBLE);
 376
 377         /* Before creating a BO, we first want to check the cache but without
 378          * waiting for BO readiness (BOs in the cache can still be referenced
 379          * by jobs that are not finished yet).
 380          * If the cached allocation fails we fall back on fresh BO allocation,
 381          * and if that fails too, we try one more time to allocate from the
 382          * cache, but this time we accept to wait.
 383          */
 384         bo = panfrost_bo_cache_fetch(dev, size, flags, true);
 385         if (!bo)
 386                 bo = panfrost_bo_alloc(dev, size, flags);
 387         if (!bo)
 388                 bo = panfrost_bo_cache_fetch(dev, size, flags, false);
 389
 390         if (!bo)
 391                 fprintf(stderr, "BO creation failed\n");
 392
 393         assert(bo);
 394
 395         /* Only mmap now if we know we need to. For CPU-invisible buffers, we
 396          * never map since we don't care about their contents; they're purely
 397          * for GPU-internal use. But we do trace them anyway. */
 398
 399         if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
 400                 panfrost_bo_mmap(bo);
 401
 402         p_atomic_set(&bo->refcnt, 1);
 403
 404         pthread_mutex_lock(&dev->active_bos_lock);
 405         _mesa_set_add(bo->dev->active_bos, bo);
 406         pthread_mutex_unlock(&dev->active_bos_lock);
 407
 408         return bo;
 409 }
 410
 411 void
 412 panfrost_bo_reference(struct panfrost_bo *bo)
 413 {
 414         if (bo) {
 415                 ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
 416                 assert(count != 1);
 417         }
 418 }
 419
 420 void
 421 panfrost_bo_unreference(struct panfrost_bo *bo)
 422 {
 423         if (!bo)
 424                 return;
 425
 426         /* Don't return to cache if there are still references */
 427         if (p_atomic_dec_return(&bo->refcnt))
 428                 return;
 429
 430         struct panfrost_device *dev = bo->dev;
 431
 432         pthread_mutex_lock(&dev->active_bos_lock);
 433         /* Someone might have imported this BO while we were waiting for the
 434          * lock, let's make sure it's still not referenced before freeing it.
 435          */
 436         if (p_atomic_read(&bo->refcnt) == 0) {
 437                 _mesa_set_remove_key(bo->dev->active_bos, bo);
 438
 439                 /* When the reference count goes to zero, we need to cleanup */
 440                 panfrost_bo_munmap(bo);
 441
 442                 /* Rather than freeing the BO now, we'll cache the BO for later
 443                  * allocations if we're allowed to.
 444                  */
 445                 if (!panfrost_bo_cache_put(bo))
 446                         panfrost_bo_free(bo);
 447         }
 448         pthread_mutex_unlock(&dev->active_bos_lock);
 449 }
 450
 451 struct panfrost_bo *
 452 panfrost_bo_import(struct panfrost_device *dev, int fd)
 453 {
 454         struct panfrost_bo *bo, *newbo = rzalloc(dev->memctx, struct panfrost_bo);
 455         struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
 456         struct set_entry *entry;
 457         ASSERTED int ret;
 458         unsigned gem_handle;
 459
 460         newbo->dev = dev;
 461
 462         ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
 463         assert(!ret);
 464
 465         newbo->gem_handle = gem_handle;
 466
 467         pthread_mutex_lock(&dev->active_bos_lock);
 468         entry = _mesa_set_search_or_add(dev->active_bos, newbo);
 469         assert(entry);
 470         bo = (struct panfrost_bo *)entry->key;
 471         if (newbo == bo) {
 472                 get_bo_offset.handle = gem_handle;
 473                 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
 474                 assert(!ret);
 475
 476                 newbo->gpu = (mali_ptr) get_bo_offset.offset;
 477                 newbo->size = lseek(fd, 0, SEEK_END);
 478                 newbo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED;
 479                 assert(newbo->size > 0);
 480                 p_atomic_set(&newbo->refcnt, 1);
 481                 // TODO map and unmap on demand?
 482                 panfrost_bo_mmap(newbo);
 483         } else {
 484                 ralloc_free(newbo);
 485                 /* bo->refcnt != 0 can happen if the BO
 486                  * was being released but panfrost_bo_import() acquired the
 487                  * lock before panfrost_bo_unreference(). In that case, refcnt
 488                  * is 0 and we can't use panfrost_bo_reference() directly, we
 489                  * have to re-initialize the refcnt().
 490                  * Note that panfrost_bo_unreference() checks
 491                  * refcnt value just after acquiring the lock to
 492                  * make sure the object is not freed if panfrost_bo_import()
 493                  * acquired it in the meantime.
 494                  */
 495                 if (p_atomic_read(&bo->refcnt))
 496                         p_atomic_set(&newbo->refcnt, 1);
 497                 else
 498                         panfrost_bo_reference(bo);
 499                 assert(bo->cpu);
 500         }
 501         pthread_mutex_unlock(&dev->active_bos_lock);
 502
 503         return bo;
 504 }
 505
 506 int
 507 panfrost_bo_export(struct panfrost_bo *bo)
 508 {
 509         struct drm_prime_handle args = {
 510                 .handle = bo->gem_handle,
 511                 .flags = DRM_CLOEXEC,
 512         };
 513
 514         int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
 515         if (ret == -1)
 516                 return -1;
 517
 518         bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED;
 519         return args.fd;
 520 }
 521