From: Alyssa Rosenzweig Date: Mon, 23 Mar 2020 23:36:46 +0000 (-0400) Subject: panfrost: Move pan_bo to root panfrost X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0f65f00a0dc438350454ba247b453cf80271a671;p=mesa.git panfrost: Move pan_bo to root panfrost Now that its Gallium dependencies have been resolved, we can move this all out to root. The only nontrivial change here is keeping the pandecode calls in Gallium-panfrost to avoid creating a circular dependency between encoder/decoder. This could be solved with a third drm folder but this seems less intrusive for now and Roman would probably appreciate if I went longer than 8 hours without breaking the Android build. Signed-off-by: Alyssa Rosenzweig Part-of: --- diff --git a/src/gallium/drivers/panfrost/Makefile.sources b/src/gallium/drivers/panfrost/Makefile.sources index 1c796605d64..c734cd0807c 100644 --- a/src/gallium/drivers/panfrost/Makefile.sources +++ b/src/gallium/drivers/panfrost/Makefile.sources @@ -13,8 +13,6 @@ C_SOURCES := \ pan_blend_shaders.c \ pan_blend_shaders.h \ pan_blit.c \ - pan_bo.c \ - pan_bo.h \ pan_cmdstream.c \ pan_cmdstream.h \ pan_compute.c \ @@ -31,4 +29,3 @@ C_SOURCES := \ pan_screen.c \ pan_screen.h \ pan_sfbd.c \ - pan_util.h diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 92f043ea0a0..5a2d466c9d5 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -30,7 +30,6 @@ files_panfrost = files( 'nir/nir_lower_framebuffer.c', 'pan_context.c', - 'pan_bo.c', 'pan_blit.c', 'pan_job.c', 'pan_allocate.c', diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 63f87dc26e1..5d5a0f33248 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -76,7 +76,7 @@ panfrost_shader_compile(struct panfrost_context *ctx, * that's how I'd do it. */ if (size) { - state->bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE); + state->bo = pan_bo_create(dev, size, PAN_BO_EXECUTE); memcpy(state->bo->cpu, dst, size); state->first_tag = program.first_tag; } else { diff --git a/src/gallium/drivers/panfrost/pan_bo.c b/src/gallium/drivers/panfrost/pan_bo.c deleted file mode 100644 index 62e14a5bafd..00000000000 --- a/src/gallium/drivers/panfrost/pan_bo.c +++ /dev/null @@ -1,531 +0,0 @@ -/* - * Copyright 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors (Collabora): - * Alyssa Rosenzweig - */ -#include -#include -#include -#include -#include -#include "drm-uapi/panfrost_drm.h" - -#include "pan_bo.h" -#include "pan_util.h" -#include "pandecode/decode.h" - -#include "os/os_mman.h" - -#include "util/u_inlines.h" -#include "util/u_math.h" - -/* This file implements a userspace BO cache. Allocating and freeing - * GPU-visible buffers is very expensive, and even the extra kernel roundtrips - * adds more work than we would like at this point. So caching BOs in userspace - * solves both of these problems and does not require kernel updates. - * - * Cached BOs are sorted into a bucket based on rounding their size down to the - * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo - * objects. Putting a BO into the cache is accomplished by adding it to the - * corresponding bucket. Getting a BO from the cache consists of finding the - * appropriate bucket and sorting. A cache eviction is a kernel-level free of a - * BO and removing it from the bucket. We special case evicting all BOs from - * the cache, since that's what helpful in practice and avoids extra logic - * around the linked list. - */ - -static struct panfrost_bo * -panfrost_bo_alloc(struct panfrost_device *dev, size_t size, - uint32_t flags) -{ - struct drm_panfrost_create_bo create_bo = { .size = size }; - struct panfrost_bo *bo; - int ret; - - if (dev->kernel_version->version_major > 1 || - dev->kernel_version->version_minor >= 1) { - if (flags & PAN_BO_GROWABLE) - create_bo.flags |= PANFROST_BO_HEAP; - if (!(flags & PAN_BO_EXECUTE)) - create_bo.flags |= PANFROST_BO_NOEXEC; - } - - ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo); - if (ret) { - DBG("DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n"); - return NULL; - } - - bo = rzalloc(dev->memctx, struct panfrost_bo); - assert(bo); - bo->size = create_bo.size; - bo->gpu = create_bo.offset; - bo->gem_handle = create_bo.handle; - bo->flags = flags; - bo->dev = dev; - return bo; -} - -static void -panfrost_bo_free(struct panfrost_bo *bo) -{ - struct drm_gem_close gem_close = { .handle = bo->gem_handle }; - int ret; - - ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close); - if (ret) { - fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n"); - assert(0); - } - - ralloc_free(bo); -} - -/* Returns true if the BO is ready, false otherwise. - * access_type is encoding the type of access one wants to ensure is done. - * Say you want to make sure all writers are done writing, you should pass - * PAN_BO_ACCESS_WRITE. - * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW. - * PAN_BO_ACCESS_READ would work too as waiting for readers implies - * waiting for writers as well, but we want to make things explicit and waiting - * only for readers is impossible. - */ -bool -panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, - uint32_t access_type) -{ - struct drm_panfrost_wait_bo req = { - .handle = bo->gem_handle, - .timeout_ns = timeout_ns, - }; - int ret; - - assert(access_type == PAN_BO_ACCESS_WRITE || - access_type == PAN_BO_ACCESS_RW); - - /* If the BO has been exported or imported we can't rely on the cached - * state, we need to call the WAIT_BO ioctl. - */ - if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) { - /* If ->gpu_access is 0, the BO is idle, no need to wait. */ - if (!bo->gpu_access) - return true; - - /* If the caller only wants to wait for writers and no - * writes are pending, we don't have to wait. - */ - if (access_type == PAN_BO_ACCESS_WRITE && - !(bo->gpu_access & PAN_BO_ACCESS_WRITE)) - return true; - } - - /* The ioctl returns >= 0 value when the BO we are waiting for is ready - * -1 otherwise. - */ - ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req); - if (ret != -1) { - /* Set gpu_access to 0 so that the next call to bo_wait() - * doesn't have to call the WAIT_BO ioctl. - */ - bo->gpu_access = 0; - return true; - } - - /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed - * is invalid, which shouldn't happen here. - */ - assert(errno == ETIMEDOUT || errno == EBUSY); - return false; -} - -/* Helper to calculate the bucket index of a BO */ - -static unsigned -pan_bucket_index(unsigned size) -{ - /* Round down to POT to compute a bucket index */ - - unsigned bucket_index = util_logbase2(size); - - /* Clamp the bucket index; all huge allocations will be - * sorted into the largest bucket */ - - bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET); - - /* The minimum bucket size must equal the minimum allocation - * size; the maximum we clamped */ - - assert(bucket_index >= MIN_BO_CACHE_BUCKET); - assert(bucket_index <= MAX_BO_CACHE_BUCKET); - - /* Reindex from 0 */ - return (bucket_index - MIN_BO_CACHE_BUCKET); -} - -static struct list_head * -pan_bucket(struct panfrost_device *dev, unsigned size) -{ - return &dev->bo_cache.buckets[pan_bucket_index(size)]; -} - -/* Tries to fetch a BO of sufficient size with the appropriate flags from the - * BO cache. If it succeeds, it returns that BO and removes the BO from the - * cache. If it fails, it returns NULL signaling the caller to allocate a new - * BO. */ - -static struct panfrost_bo * -panfrost_bo_cache_fetch(struct panfrost_device *dev, - size_t size, uint32_t flags, bool dontwait) -{ - pthread_mutex_lock(&dev->bo_cache.lock); - struct list_head *bucket = pan_bucket(dev, size); - struct panfrost_bo *bo = NULL; - - /* Iterate the bucket looking for something suitable */ - list_for_each_entry_safe(struct panfrost_bo, entry, bucket, - bucket_link) { - if (entry->size < size || entry->flags != flags) - continue; - - if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, - PAN_BO_ACCESS_RW)) - continue; - - struct drm_panfrost_madvise madv = { - .handle = entry->gem_handle, - .madv = PANFROST_MADV_WILLNEED, - }; - int ret; - - /* This one works, splice it out of the cache */ - list_del(&entry->bucket_link); - list_del(&entry->lru_link); - - ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv); - if (!ret && !madv.retained) { - panfrost_bo_free(entry); - continue; - } - /* Let's go! */ - bo = entry; - break; - } - pthread_mutex_unlock(&dev->bo_cache.lock); - - return bo; -} - -static void -panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev) -{ - struct timespec time; - - clock_gettime(CLOCK_MONOTONIC, &time); - list_for_each_entry_safe(struct panfrost_bo, entry, - &dev->bo_cache.lru, lru_link) { - /* We want all entries that have been used more than 1 sec - * ago to be dropped, others can be kept. - * Note the <= 2 check and not <= 1. It's here to account for - * the fact that we're only testing ->tv_sec, not ->tv_nsec. - * That means we might keep entries that are between 1 and 2 - * seconds old, but we don't really care, as long as unused BOs - * are dropped at some point. - */ - if (time.tv_sec - entry->last_used <= 2) - break; - - list_del(&entry->bucket_link); - list_del(&entry->lru_link); - panfrost_bo_free(entry); - } -} - -/* Tries to add a BO to the cache. Returns if it was - * successful */ - -static bool -panfrost_bo_cache_put(struct panfrost_bo *bo) -{ - struct panfrost_device *dev = bo->dev; - - if (bo->flags & PAN_BO_DONT_REUSE) - return false; - - pthread_mutex_lock(&dev->bo_cache.lock); - struct list_head *bucket = pan_bucket(dev, bo->size); - struct drm_panfrost_madvise madv; - struct timespec time; - - madv.handle = bo->gem_handle; - madv.madv = PANFROST_MADV_DONTNEED; - madv.retained = 0; - - drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv); - - /* Add us to the bucket */ - list_addtail(&bo->bucket_link, bucket); - - /* Add us to the LRU list and update the last_used field. */ - list_addtail(&bo->lru_link, &dev->bo_cache.lru); - clock_gettime(CLOCK_MONOTONIC, &time); - bo->last_used = time.tv_sec; - - /* Let's do some cleanup in the BO cache while we hold the - * lock. - */ - panfrost_bo_cache_evict_stale_bos(dev); - pthread_mutex_unlock(&dev->bo_cache.lock); - - return true; -} - -/* Evicts all BOs from the cache. Called during context - * destroy or during low-memory situations (to free up - * memory that may be unused by us just sitting in our - * cache, but still reserved from the perspective of the - * OS) */ - -void -panfrost_bo_cache_evict_all( - struct panfrost_device *dev) -{ - pthread_mutex_lock(&dev->bo_cache.lock); - for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) { - struct list_head *bucket = &dev->bo_cache.buckets[i]; - - list_for_each_entry_safe(struct panfrost_bo, entry, bucket, - bucket_link) { - list_del(&entry->bucket_link); - list_del(&entry->lru_link); - panfrost_bo_free(entry); - } - } - pthread_mutex_unlock(&dev->bo_cache.lock); -} - -void -panfrost_bo_mmap(struct panfrost_bo *bo) -{ - struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle }; - int ret; - - if (bo->cpu) - return; - - ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo); - if (ret) { - fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n"); - assert(0); - } - - bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - bo->dev->fd, mmap_bo.offset); - if (bo->cpu == MAP_FAILED) { - fprintf(stderr, "mmap failed: %p %m\n", bo->cpu); - assert(0); - } - - /* Record the mmap if we're tracing */ - if (pan_debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) - pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL); -} - -static void -panfrost_bo_munmap(struct panfrost_bo *bo) -{ - if (!bo->cpu) - return; - - if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) { - perror("munmap"); - abort(); - } - - bo->cpu = NULL; -} - -struct panfrost_bo * -panfrost_bo_create(struct panfrost_device *dev, size_t size, - uint32_t flags) -{ - struct panfrost_bo *bo; - - /* Kernel will fail (confusingly) with EPERM otherwise */ - assert(size > 0); - - /* To maximize BO cache usage, don't allocate tiny BOs */ - size = MAX2(size, 4096); - - /* GROWABLE BOs cannot be mmapped */ - if (flags & PAN_BO_GROWABLE) - assert(flags & PAN_BO_INVISIBLE); - - /* Before creating a BO, we first want to check the cache but without - * waiting for BO readiness (BOs in the cache can still be referenced - * by jobs that are not finished yet). - * If the cached allocation fails we fall back on fresh BO allocation, - * and if that fails too, we try one more time to allocate from the - * cache, but this time we accept to wait. - */ - bo = panfrost_bo_cache_fetch(dev, size, flags, true); - if (!bo) - bo = panfrost_bo_alloc(dev, size, flags); - if (!bo) - bo = panfrost_bo_cache_fetch(dev, size, flags, false); - - if (!bo) - fprintf(stderr, "BO creation failed\n"); - - assert(bo); - - /* Only mmap now if we know we need to. For CPU-invisible buffers, we - * never map since we don't care about their contents; they're purely - * for GPU-internal use. But we do trace them anyway. */ - - if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP))) - panfrost_bo_mmap(bo); - else if (flags & PAN_BO_INVISIBLE) { - if (pan_debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) - pandecode_inject_mmap(bo->gpu, NULL, bo->size, NULL); - } - - p_atomic_set(&bo->refcnt, 1); - - pthread_mutex_lock(&dev->active_bos_lock); - _mesa_set_add(bo->dev->active_bos, bo); - pthread_mutex_unlock(&dev->active_bos_lock); - - return bo; -} - -void -panfrost_bo_reference(struct panfrost_bo *bo) -{ - if (bo) { - ASSERTED int count = p_atomic_inc_return(&bo->refcnt); - assert(count != 1); - } -} - -void -panfrost_bo_unreference(struct panfrost_bo *bo) -{ - if (!bo) - return; - - /* Don't return to cache if there are still references */ - if (p_atomic_dec_return(&bo->refcnt)) - return; - - struct panfrost_device *dev = bo->dev; - - pthread_mutex_lock(&dev->active_bos_lock); - /* Someone might have imported this BO while we were waiting for the - * lock, let's make sure it's still not referenced before freeing it. - */ - if (p_atomic_read(&bo->refcnt) == 0) { - _mesa_set_remove_key(bo->dev->active_bos, bo); - - /* When the reference count goes to zero, we need to cleanup */ - panfrost_bo_munmap(bo); - - /* Rather than freeing the BO now, we'll cache the BO for later - * allocations if we're allowed to. - */ - if (!panfrost_bo_cache_put(bo)) - panfrost_bo_free(bo); - } - pthread_mutex_unlock(&dev->active_bos_lock); -} - -struct panfrost_bo * -panfrost_bo_import(struct panfrost_device *dev, int fd) -{ - struct panfrost_bo *bo, *newbo = rzalloc(dev->memctx, struct panfrost_bo); - struct drm_panfrost_get_bo_offset get_bo_offset = {0,}; - struct set_entry *entry; - ASSERTED int ret; - unsigned gem_handle; - - newbo->dev = dev; - - ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle); - assert(!ret); - - newbo->gem_handle = gem_handle; - - pthread_mutex_lock(&dev->active_bos_lock); - entry = _mesa_set_search_or_add(dev->active_bos, newbo); - assert(entry); - bo = (struct panfrost_bo *)entry->key; - if (newbo == bo) { - get_bo_offset.handle = gem_handle; - ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset); - assert(!ret); - - newbo->gpu = (mali_ptr) get_bo_offset.offset; - newbo->size = lseek(fd, 0, SEEK_END); - newbo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED; - assert(newbo->size > 0); - p_atomic_set(&newbo->refcnt, 1); - // TODO map and unmap on demand? - panfrost_bo_mmap(newbo); - } else { - ralloc_free(newbo); - /* bo->refcnt != 0 can happen if the BO - * was being released but panfrost_bo_import() acquired the - * lock before panfrost_bo_unreference(). In that case, refcnt - * is 0 and we can't use panfrost_bo_reference() directly, we - * have to re-initialize the refcnt(). - * Note that panfrost_bo_unreference() checks - * refcnt value just after acquiring the lock to - * make sure the object is not freed if panfrost_bo_import() - * acquired it in the meantime. - */ - if (p_atomic_read(&bo->refcnt)) - p_atomic_set(&newbo->refcnt, 1); - else - panfrost_bo_reference(bo); - assert(bo->cpu); - } - pthread_mutex_unlock(&dev->active_bos_lock); - - return bo; -} - -int -panfrost_bo_export(struct panfrost_bo *bo) -{ - struct drm_prime_handle args = { - .handle = bo->gem_handle, - .flags = DRM_CLOEXEC, - }; - - int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); - if (ret == -1) - return -1; - - bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED; - return args.fd; -} - diff --git a/src/gallium/drivers/panfrost/pan_bo.h b/src/gallium/drivers/panfrost/pan_bo.h deleted file mode 100644 index fc20ceed1d4..00000000000 --- a/src/gallium/drivers/panfrost/pan_bo.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * © Copyright 2019 Alyssa Rosenzweig - * © Copyright 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __PAN_BO_H__ -#define __PAN_BO_H__ - -#include -#include "util/list.h" -#include "pan_device.h" - -/* Flags for allocated memory */ - -/* This memory region is executable */ -#define PAN_BO_EXECUTE (1 << 0) - -/* This memory region should be lazily allocated and grow-on-page-fault. Must - * be used in conjunction with INVISIBLE */ -#define PAN_BO_GROWABLE (1 << 1) - -/* This memory region should not be mapped to the CPU */ -#define PAN_BO_INVISIBLE (1 << 2) - -/* This memory region will be used for varyings and needs to have the cache - * bits twiddled accordingly */ -#define PAN_BO_COHERENT_LOCAL (1 << 3) - -/* This region may not be used immediately and will not mmap on allocate - * (semantically distinct from INVISIBLE, which cannot never be mmaped) */ -#define PAN_BO_DELAY_MMAP (1 << 4) - -/* Some BOs shouldn't be returned back to the reuse BO cache, use this flag to - * let the BO logic know about this contraint. */ -#define PAN_BO_DONT_REUSE (1 << 5) - -/* BO has been imported */ -#define PAN_BO_IMPORTED (1 << 6) - -/* BO has been exported */ -#define PAN_BO_EXPORTED (1 << 7) - -/* GPU access flags */ - -/* BO is either shared (can be accessed by more than one GPU batch) or private - * (reserved by a specific GPU job). */ -#define PAN_BO_ACCESS_PRIVATE (0 << 0) -#define PAN_BO_ACCESS_SHARED (1 << 0) - -/* BO is being read/written by the GPU */ -#define PAN_BO_ACCESS_READ (1 << 1) -#define PAN_BO_ACCESS_WRITE (1 << 2) -#define PAN_BO_ACCESS_RW (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE) - -/* BO is accessed by the vertex/tiler job. */ -#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3) - -/* BO is accessed by the fragment job. */ -#define PAN_BO_ACCESS_FRAGMENT (1 << 4) - -struct panfrost_bo { - /* Must be first for casting */ - struct list_head bucket_link; - - /* Used to link the BO to the BO cache LRU list. */ - struct list_head lru_link; - - /* Store the time this BO was use last, so the BO cache logic can evict - * stale BOs. - */ - time_t last_used; - - /* Atomic reference count */ - int32_t refcnt; - - struct panfrost_device *dev; - - /* Mapping for the entire object (all levels) */ - uint8_t *cpu; - - /* GPU address for the object */ - mali_ptr gpu; - - /* Size of all entire trees */ - size_t size; - - int gem_handle; - - uint32_t flags; - - /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending - * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl - * when the BO is idle. - */ - uint32_t gpu_access; -}; - -bool -panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, - uint32_t access_type); -void -panfrost_bo_reference(struct panfrost_bo *bo); -void -panfrost_bo_unreference(struct panfrost_bo *bo); -struct panfrost_bo * -panfrost_bo_create(struct panfrost_device *dev, size_t size, - uint32_t flags); -void -panfrost_bo_mmap(struct panfrost_bo *bo); -struct panfrost_bo * -panfrost_bo_import(struct panfrost_device *dev, int fd); -int -panfrost_bo_export(struct panfrost_bo *bo); -void -panfrost_bo_cache_evict_all(struct panfrost_device *dev); - -#endif /* __PAN_BO_H__ */ diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index a0111a6ca06..2d268270954 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -922,7 +922,7 @@ panfrost_create_sampler_view( template->u.tex.last_layer, type, prsrc->layout); - so->bo = panfrost_bo_create(device, size, 0); + so->bo = pan_bo_create(device, size, 0); panfrost_new_texture( so->bo->cpu, @@ -1180,7 +1180,7 @@ panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: /* Allocate a bo for the query results to be stored */ if (!query->bo) { - query->bo = panfrost_bo_create( + query->bo = pan_bo_create( pan_device(ctx->base.screen), sizeof(unsigned), 0); } diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index eca2944bff0..8634bf74d6e 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -603,12 +603,12 @@ panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size, { struct panfrost_bo *bo; - bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size, + bo = pan_bo_create(pan_device(batch->ctx->base.screen), size, create_flags); panfrost_batch_add_bo(batch, bo, access_flags); /* panfrost_batch_add_bo() has retained a reference and - * panfrost_bo_create() initialize the refcnt to 1, so let's + * pan_bo_create() initialize the refcnt to 1, so let's * unreference the BO here so it gets released when the batch is * destroyed (unless it's retained by someone else in the meantime). */ diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index 3929ff176cd..3505f373788 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -47,8 +47,26 @@ #include "pan_resource.h" #include "pan_util.h" #include "pan_tiling.h" +#include "pandecode/decode.h" #include "panfrost-quirks.h" +/* Wrapper around panfrost_bo_create that handles pandecode */ + +struct panfrost_bo * +pan_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags) +{ + struct panfrost_bo *bo = panfrost_bo_create(dev, size, flags); + + if (pan_debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) { + if (flags & PAN_BO_INVISIBLE) + pandecode_inject_mmap(bo->gpu, NULL, bo->size, NULL); + else if (!(flags & PAN_BO_DELAY_MMAP)) + pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL); + } + + return bo; +} + void panfrost_resource_reset_damage(struct panfrost_resource *pres) { @@ -406,7 +424,7 @@ panfrost_resource_create_bo(struct panfrost_device *dev, struct panfrost_resourc /* We create a BO immediately but don't bother mapping, since we don't * care to map e.g. FBOs which the CPU probably won't touch */ - pres->bo = panfrost_bo_create(dev, bo_size, PAN_BO_DELAY_MMAP); + pres->bo = pan_bo_create(dev, bo_size, PAN_BO_DELAY_MMAP); } void @@ -562,6 +580,9 @@ panfrost_transfer_map(struct pipe_context *pctx, /* If we haven't already mmaped, now's the time */ panfrost_bo_mmap(bo); + if (pan_debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) + pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL); + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* If the BO is used by one of the pending batches or if it's * not ready yet (still accessed by one of the already flushed @@ -580,7 +601,7 @@ panfrost_transfer_map(struct pipe_context *pctx, * doing to it. */ if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) - newbo = panfrost_bo_create(dev, bo->size, + newbo = pan_bo_create(dev, bo->size, flags); if (newbo) { @@ -862,7 +883,7 @@ panfrost_resource_hint_layout( /* If we grew in size, reallocate the BO */ if (new_size > rsrc->bo->size) { panfrost_bo_unreference(rsrc->bo); - rsrc->bo = panfrost_bo_create(dev, new_size, PAN_BO_DELAY_MMAP); + rsrc->bo = pan_bo_create(dev, new_size, PAN_BO_DELAY_MMAP); } /* TODO: If there are textures bound, regenerate their descriptors */ diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h index a3d78e940e5..0a25e305de0 100644 --- a/src/gallium/drivers/panfrost/pan_resource.h +++ b/src/gallium/drivers/panfrost/pan_resource.h @@ -118,4 +118,8 @@ panfrost_resource_set_damage_region(struct pipe_screen *screen, unsigned int nrects, const struct pipe_box *rects); + +struct panfrost_bo * +pan_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags); + #endif /* PAN_RESOURCE_H */ diff --git a/src/gallium/drivers/panfrost/pan_util.h b/src/gallium/drivers/panfrost/pan_util.h deleted file mode 100644 index 06484ce383b..00000000000 --- a/src/gallium/drivers/panfrost/pan_util.h +++ /dev/null @@ -1,46 +0,0 @@ -/************************************************************************** - * - * Copyright 2019 Collabora, Ltd. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef PAN_UTIL_H -#define PAN_UTIL_H - -#define PAN_DBG_MSGS 0x0001 -#define PAN_DBG_TRACE 0x0002 -#define PAN_DBG_DEQP 0x0004 -#define PAN_DBG_AFBC 0x0008 -#define PAN_DBG_SYNC 0x0010 -#define PAN_DBG_PRECOMPILE 0x0020 -#define PAN_DBG_GLES3 0x0040 - -extern int pan_debug; - -#define DBG(fmt, ...) \ - do { if (pan_debug & PAN_DBG_MSGS) \ - fprintf(stderr, "%s:%d: "fmt, \ - __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) - -#endif /* PAN_UTIL_H */ diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources index ad9a230567e..a92213a21ca 100644 --- a/src/panfrost/Makefile.sources +++ b/src/panfrost/Makefile.sources @@ -17,6 +17,8 @@ bifrost_FILES := \ encoder_FILES := \ encoder/pan_afbc.c \ encoder/pan_attributes.c \ + encoder/pan_bo.c \ + encoder/pan_bo.h \ encoder/pan_device.h \ encoder/pan_encoder.h \ encoder/pan_format.c \ @@ -25,7 +27,8 @@ encoder_FILES := \ encoder/pan_sampler.c \ encoder/pan_tiler.c \ encoder/pan_texture.c \ - encoder/pan_scratch.c + encoder/pan_scratch.c \ + encoder/pan_util.h midgard_FILES := \ midgard/compiler.h \ diff --git a/src/panfrost/encoder/meson.build b/src/panfrost/encoder/meson.build index babf52916ae..de45322c578 100644 --- a/src/panfrost/encoder/meson.build +++ b/src/panfrost/encoder/meson.build @@ -24,6 +24,7 @@ libpanfrost_encoder_files = files( 'pan_afbc.c', 'pan_attributes.c', + 'pan_bo.c', 'pan_format.c', 'pan_invocation.c', 'pan_sampler.c', diff --git a/src/panfrost/encoder/pan_bo.c b/src/panfrost/encoder/pan_bo.c new file mode 100644 index 00000000000..aca4f8f3848 --- /dev/null +++ b/src/panfrost/encoder/pan_bo.c @@ -0,0 +1,521 @@ +/* + * Copyright 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ +#include +#include +#include +#include +#include +#include "drm-uapi/panfrost_drm.h" + +#include "pan_bo.h" + +#include "os/os_mman.h" + +#include "util/u_inlines.h" +#include "util/u_math.h" + +/* This file implements a userspace BO cache. Allocating and freeing + * GPU-visible buffers is very expensive, and even the extra kernel roundtrips + * adds more work than we would like at this point. So caching BOs in userspace + * solves both of these problems and does not require kernel updates. + * + * Cached BOs are sorted into a bucket based on rounding their size down to the + * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo + * objects. Putting a BO into the cache is accomplished by adding it to the + * corresponding bucket. Getting a BO from the cache consists of finding the + * appropriate bucket and sorting. A cache eviction is a kernel-level free of a + * BO and removing it from the bucket. We special case evicting all BOs from + * the cache, since that's what helpful in practice and avoids extra logic + * around the linked list. + */ + +static struct panfrost_bo * +panfrost_bo_alloc(struct panfrost_device *dev, size_t size, + uint32_t flags) +{ + struct drm_panfrost_create_bo create_bo = { .size = size }; + struct panfrost_bo *bo; + int ret; + + if (dev->kernel_version->version_major > 1 || + dev->kernel_version->version_minor >= 1) { + if (flags & PAN_BO_GROWABLE) + create_bo.flags |= PANFROST_BO_HEAP; + if (!(flags & PAN_BO_EXECUTE)) + create_bo.flags |= PANFROST_BO_NOEXEC; + } + + ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo); + if (ret) { + fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n"); + return NULL; + } + + bo = rzalloc(dev->memctx, struct panfrost_bo); + assert(bo); + bo->size = create_bo.size; + bo->gpu = create_bo.offset; + bo->gem_handle = create_bo.handle; + bo->flags = flags; + bo->dev = dev; + return bo; +} + +static void +panfrost_bo_free(struct panfrost_bo *bo) +{ + struct drm_gem_close gem_close = { .handle = bo->gem_handle }; + int ret; + + ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close); + if (ret) { + fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n"); + assert(0); + } + + ralloc_free(bo); +} + +/* Returns true if the BO is ready, false otherwise. + * access_type is encoding the type of access one wants to ensure is done. + * Say you want to make sure all writers are done writing, you should pass + * PAN_BO_ACCESS_WRITE. + * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW. + * PAN_BO_ACCESS_READ would work too as waiting for readers implies + * waiting for writers as well, but we want to make things explicit and waiting + * only for readers is impossible. + */ +bool +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, + uint32_t access_type) +{ + struct drm_panfrost_wait_bo req = { + .handle = bo->gem_handle, + .timeout_ns = timeout_ns, + }; + int ret; + + assert(access_type == PAN_BO_ACCESS_WRITE || + access_type == PAN_BO_ACCESS_RW); + + /* If the BO has been exported or imported we can't rely on the cached + * state, we need to call the WAIT_BO ioctl. + */ + if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) { + /* If ->gpu_access is 0, the BO is idle, no need to wait. */ + if (!bo->gpu_access) + return true; + + /* If the caller only wants to wait for writers and no + * writes are pending, we don't have to wait. + */ + if (access_type == PAN_BO_ACCESS_WRITE && + !(bo->gpu_access & PAN_BO_ACCESS_WRITE)) + return true; + } + + /* The ioctl returns >= 0 value when the BO we are waiting for is ready + * -1 otherwise. + */ + ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req); + if (ret != -1) { + /* Set gpu_access to 0 so that the next call to bo_wait() + * doesn't have to call the WAIT_BO ioctl. + */ + bo->gpu_access = 0; + return true; + } + + /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed + * is invalid, which shouldn't happen here. + */ + assert(errno == ETIMEDOUT || errno == EBUSY); + return false; +} + +/* Helper to calculate the bucket index of a BO */ + +static unsigned +pan_bucket_index(unsigned size) +{ + /* Round down to POT to compute a bucket index */ + + unsigned bucket_index = util_logbase2(size); + + /* Clamp the bucket index; all huge allocations will be + * sorted into the largest bucket */ + + bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET); + + /* The minimum bucket size must equal the minimum allocation + * size; the maximum we clamped */ + + assert(bucket_index >= MIN_BO_CACHE_BUCKET); + assert(bucket_index <= MAX_BO_CACHE_BUCKET); + + /* Reindex from 0 */ + return (bucket_index - MIN_BO_CACHE_BUCKET); +} + +static struct list_head * +pan_bucket(struct panfrost_device *dev, unsigned size) +{ + return &dev->bo_cache.buckets[pan_bucket_index(size)]; +} + +/* Tries to fetch a BO of sufficient size with the appropriate flags from the + * BO cache. If it succeeds, it returns that BO and removes the BO from the + * cache. If it fails, it returns NULL signaling the caller to allocate a new + * BO. */ + +static struct panfrost_bo * +panfrost_bo_cache_fetch(struct panfrost_device *dev, + size_t size, uint32_t flags, bool dontwait) +{ + pthread_mutex_lock(&dev->bo_cache.lock); + struct list_head *bucket = pan_bucket(dev, size); + struct panfrost_bo *bo = NULL; + + /* Iterate the bucket looking for something suitable */ + list_for_each_entry_safe(struct panfrost_bo, entry, bucket, + bucket_link) { + if (entry->size < size || entry->flags != flags) + continue; + + if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, + PAN_BO_ACCESS_RW)) + continue; + + struct drm_panfrost_madvise madv = { + .handle = entry->gem_handle, + .madv = PANFROST_MADV_WILLNEED, + }; + int ret; + + /* This one works, splice it out of the cache */ + list_del(&entry->bucket_link); + list_del(&entry->lru_link); + + ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv); + if (!ret && !madv.retained) { + panfrost_bo_free(entry); + continue; + } + /* Let's go! */ + bo = entry; + break; + } + pthread_mutex_unlock(&dev->bo_cache.lock); + + return bo; +} + +static void +panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev) +{ + struct timespec time; + + clock_gettime(CLOCK_MONOTONIC, &time); + list_for_each_entry_safe(struct panfrost_bo, entry, + &dev->bo_cache.lru, lru_link) { + /* We want all entries that have been used more than 1 sec + * ago to be dropped, others can be kept. + * Note the <= 2 check and not <= 1. It's here to account for + * the fact that we're only testing ->tv_sec, not ->tv_nsec. + * That means we might keep entries that are between 1 and 2 + * seconds old, but we don't really care, as long as unused BOs + * are dropped at some point. + */ + if (time.tv_sec - entry->last_used <= 2) + break; + + list_del(&entry->bucket_link); + list_del(&entry->lru_link); + panfrost_bo_free(entry); + } +} + +/* Tries to add a BO to the cache. Returns if it was + * successful */ + +static bool +panfrost_bo_cache_put(struct panfrost_bo *bo) +{ + struct panfrost_device *dev = bo->dev; + + if (bo->flags & PAN_BO_DONT_REUSE) + return false; + + pthread_mutex_lock(&dev->bo_cache.lock); + struct list_head *bucket = pan_bucket(dev, bo->size); + struct drm_panfrost_madvise madv; + struct timespec time; + + madv.handle = bo->gem_handle; + madv.madv = PANFROST_MADV_DONTNEED; + madv.retained = 0; + + drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv); + + /* Add us to the bucket */ + list_addtail(&bo->bucket_link, bucket); + + /* Add us to the LRU list and update the last_used field. */ + list_addtail(&bo->lru_link, &dev->bo_cache.lru); + clock_gettime(CLOCK_MONOTONIC, &time); + bo->last_used = time.tv_sec; + + /* Let's do some cleanup in the BO cache while we hold the + * lock. + */ + panfrost_bo_cache_evict_stale_bos(dev); + pthread_mutex_unlock(&dev->bo_cache.lock); + + return true; +} + +/* Evicts all BOs from the cache. Called during context + * destroy or during low-memory situations (to free up + * memory that may be unused by us just sitting in our + * cache, but still reserved from the perspective of the + * OS) */ + +void +panfrost_bo_cache_evict_all( + struct panfrost_device *dev) +{ + pthread_mutex_lock(&dev->bo_cache.lock); + for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) { + struct list_head *bucket = &dev->bo_cache.buckets[i]; + + list_for_each_entry_safe(struct panfrost_bo, entry, bucket, + bucket_link) { + list_del(&entry->bucket_link); + list_del(&entry->lru_link); + panfrost_bo_free(entry); + } + } + pthread_mutex_unlock(&dev->bo_cache.lock); +} + +void +panfrost_bo_mmap(struct panfrost_bo *bo) +{ + struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle }; + int ret; + + if (bo->cpu) + return; + + ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo); + if (ret) { + fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n"); + assert(0); + } + + bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->dev->fd, mmap_bo.offset); + if (bo->cpu == MAP_FAILED) { + fprintf(stderr, "mmap failed: %p %m\n", bo->cpu); + assert(0); + } +} + +static void +panfrost_bo_munmap(struct panfrost_bo *bo) +{ + if (!bo->cpu) + return; + + if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) { + perror("munmap"); + abort(); + } + + bo->cpu = NULL; +} + +struct panfrost_bo * +panfrost_bo_create(struct panfrost_device *dev, size_t size, + uint32_t flags) +{ + struct panfrost_bo *bo; + + /* Kernel will fail (confusingly) with EPERM otherwise */ + assert(size > 0); + + /* To maximize BO cache usage, don't allocate tiny BOs */ + size = MAX2(size, 4096); + + /* GROWABLE BOs cannot be mmapped */ + if (flags & PAN_BO_GROWABLE) + assert(flags & PAN_BO_INVISIBLE); + + /* Before creating a BO, we first want to check the cache but without + * waiting for BO readiness (BOs in the cache can still be referenced + * by jobs that are not finished yet). + * If the cached allocation fails we fall back on fresh BO allocation, + * and if that fails too, we try one more time to allocate from the + * cache, but this time we accept to wait. + */ + bo = panfrost_bo_cache_fetch(dev, size, flags, true); + if (!bo) + bo = panfrost_bo_alloc(dev, size, flags); + if (!bo) + bo = panfrost_bo_cache_fetch(dev, size, flags, false); + + if (!bo) + fprintf(stderr, "BO creation failed\n"); + + assert(bo); + + /* Only mmap now if we know we need to. For CPU-invisible buffers, we + * never map since we don't care about their contents; they're purely + * for GPU-internal use. But we do trace them anyway. */ + + if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP))) + panfrost_bo_mmap(bo); + + p_atomic_set(&bo->refcnt, 1); + + pthread_mutex_lock(&dev->active_bos_lock); + _mesa_set_add(bo->dev->active_bos, bo); + pthread_mutex_unlock(&dev->active_bos_lock); + + return bo; +} + +void +panfrost_bo_reference(struct panfrost_bo *bo) +{ + if (bo) { + ASSERTED int count = p_atomic_inc_return(&bo->refcnt); + assert(count != 1); + } +} + +void +panfrost_bo_unreference(struct panfrost_bo *bo) +{ + if (!bo) + return; + + /* Don't return to cache if there are still references */ + if (p_atomic_dec_return(&bo->refcnt)) + return; + + struct panfrost_device *dev = bo->dev; + + pthread_mutex_lock(&dev->active_bos_lock); + /* Someone might have imported this BO while we were waiting for the + * lock, let's make sure it's still not referenced before freeing it. + */ + if (p_atomic_read(&bo->refcnt) == 0) { + _mesa_set_remove_key(bo->dev->active_bos, bo); + + /* When the reference count goes to zero, we need to cleanup */ + panfrost_bo_munmap(bo); + + /* Rather than freeing the BO now, we'll cache the BO for later + * allocations if we're allowed to. + */ + if (!panfrost_bo_cache_put(bo)) + panfrost_bo_free(bo); + } + pthread_mutex_unlock(&dev->active_bos_lock); +} + +struct panfrost_bo * +panfrost_bo_import(struct panfrost_device *dev, int fd) +{ + struct panfrost_bo *bo, *newbo = rzalloc(dev->memctx, struct panfrost_bo); + struct drm_panfrost_get_bo_offset get_bo_offset = {0,}; + struct set_entry *entry; + ASSERTED int ret; + unsigned gem_handle; + + newbo->dev = dev; + + ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle); + assert(!ret); + + newbo->gem_handle = gem_handle; + + pthread_mutex_lock(&dev->active_bos_lock); + entry = _mesa_set_search_or_add(dev->active_bos, newbo); + assert(entry); + bo = (struct panfrost_bo *)entry->key; + if (newbo == bo) { + get_bo_offset.handle = gem_handle; + ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset); + assert(!ret); + + newbo->gpu = (mali_ptr) get_bo_offset.offset; + newbo->size = lseek(fd, 0, SEEK_END); + newbo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED; + assert(newbo->size > 0); + p_atomic_set(&newbo->refcnt, 1); + // TODO map and unmap on demand? + panfrost_bo_mmap(newbo); + } else { + ralloc_free(newbo); + /* bo->refcnt != 0 can happen if the BO + * was being released but panfrost_bo_import() acquired the + * lock before panfrost_bo_unreference(). In that case, refcnt + * is 0 and we can't use panfrost_bo_reference() directly, we + * have to re-initialize the refcnt(). + * Note that panfrost_bo_unreference() checks + * refcnt value just after acquiring the lock to + * make sure the object is not freed if panfrost_bo_import() + * acquired it in the meantime. + */ + if (p_atomic_read(&bo->refcnt)) + p_atomic_set(&newbo->refcnt, 1); + else + panfrost_bo_reference(bo); + assert(bo->cpu); + } + pthread_mutex_unlock(&dev->active_bos_lock); + + return bo; +} + +int +panfrost_bo_export(struct panfrost_bo *bo) +{ + struct drm_prime_handle args = { + .handle = bo->gem_handle, + .flags = DRM_CLOEXEC, + }; + + int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED; + return args.fd; +} + diff --git a/src/panfrost/encoder/pan_bo.h b/src/panfrost/encoder/pan_bo.h new file mode 100644 index 00000000000..fc20ceed1d4 --- /dev/null +++ b/src/panfrost/encoder/pan_bo.h @@ -0,0 +1,137 @@ +/* + * © Copyright 2019 Alyssa Rosenzweig + * © Copyright 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __PAN_BO_H__ +#define __PAN_BO_H__ + +#include +#include "util/list.h" +#include "pan_device.h" + +/* Flags for allocated memory */ + +/* This memory region is executable */ +#define PAN_BO_EXECUTE (1 << 0) + +/* This memory region should be lazily allocated and grow-on-page-fault. Must + * be used in conjunction with INVISIBLE */ +#define PAN_BO_GROWABLE (1 << 1) + +/* This memory region should not be mapped to the CPU */ +#define PAN_BO_INVISIBLE (1 << 2) + +/* This memory region will be used for varyings and needs to have the cache + * bits twiddled accordingly */ +#define PAN_BO_COHERENT_LOCAL (1 << 3) + +/* This region may not be used immediately and will not mmap on allocate + * (semantically distinct from INVISIBLE, which cannot never be mmaped) */ +#define PAN_BO_DELAY_MMAP (1 << 4) + +/* Some BOs shouldn't be returned back to the reuse BO cache, use this flag to + * let the BO logic know about this contraint. */ +#define PAN_BO_DONT_REUSE (1 << 5) + +/* BO has been imported */ +#define PAN_BO_IMPORTED (1 << 6) + +/* BO has been exported */ +#define PAN_BO_EXPORTED (1 << 7) + +/* GPU access flags */ + +/* BO is either shared (can be accessed by more than one GPU batch) or private + * (reserved by a specific GPU job). */ +#define PAN_BO_ACCESS_PRIVATE (0 << 0) +#define PAN_BO_ACCESS_SHARED (1 << 0) + +/* BO is being read/written by the GPU */ +#define PAN_BO_ACCESS_READ (1 << 1) +#define PAN_BO_ACCESS_WRITE (1 << 2) +#define PAN_BO_ACCESS_RW (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE) + +/* BO is accessed by the vertex/tiler job. */ +#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3) + +/* BO is accessed by the fragment job. */ +#define PAN_BO_ACCESS_FRAGMENT (1 << 4) + +struct panfrost_bo { + /* Must be first for casting */ + struct list_head bucket_link; + + /* Used to link the BO to the BO cache LRU list. */ + struct list_head lru_link; + + /* Store the time this BO was use last, so the BO cache logic can evict + * stale BOs. + */ + time_t last_used; + + /* Atomic reference count */ + int32_t refcnt; + + struct panfrost_device *dev; + + /* Mapping for the entire object (all levels) */ + uint8_t *cpu; + + /* GPU address for the object */ + mali_ptr gpu; + + /* Size of all entire trees */ + size_t size; + + int gem_handle; + + uint32_t flags; + + /* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending + * GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl + * when the BO is idle. + */ + uint32_t gpu_access; +}; + +bool +panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, + uint32_t access_type); +void +panfrost_bo_reference(struct panfrost_bo *bo); +void +panfrost_bo_unreference(struct panfrost_bo *bo); +struct panfrost_bo * +panfrost_bo_create(struct panfrost_device *dev, size_t size, + uint32_t flags); +void +panfrost_bo_mmap(struct panfrost_bo *bo); +struct panfrost_bo * +panfrost_bo_import(struct panfrost_device *dev, int fd); +int +panfrost_bo_export(struct panfrost_bo *bo); +void +panfrost_bo_cache_evict_all(struct panfrost_device *dev); + +#endif /* __PAN_BO_H__ */ diff --git a/src/panfrost/encoder/pan_device.h b/src/panfrost/encoder/pan_device.h index 07158b92007..19aa2df35bd 100644 --- a/src/panfrost/encoder/pan_device.h +++ b/src/panfrost/encoder/pan_device.h @@ -34,9 +34,9 @@ #include "util/u_dynarray.h" #include "util/bitset.h" #include "util/set.h" +#include "util/list.h" #include -#include "pan_allocate.h" /* Driver limits */ #define PAN_MAX_CONST_BUFFERS 16 diff --git a/src/panfrost/encoder/pan_util.h b/src/panfrost/encoder/pan_util.h new file mode 100644 index 00000000000..06484ce383b --- /dev/null +++ b/src/panfrost/encoder/pan_util.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2019 Collabora, Ltd. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PAN_UTIL_H +#define PAN_UTIL_H + +#define PAN_DBG_MSGS 0x0001 +#define PAN_DBG_TRACE 0x0002 +#define PAN_DBG_DEQP 0x0004 +#define PAN_DBG_AFBC 0x0008 +#define PAN_DBG_SYNC 0x0010 +#define PAN_DBG_PRECOMPILE 0x0020 +#define PAN_DBG_GLES3 0x0040 + +extern int pan_debug; + +#define DBG(fmt, ...) \ + do { if (pan_debug & PAN_DBG_MSGS) \ + fprintf(stderr, "%s:%d: "fmt, \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) + +#endif /* PAN_UTIL_H */