X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Firis%2Firis_bufmgr.c;h=2dc7e3eec4ce45bcd2f49335fdaee6cff05aecfd;hb=12d8a17957a54ae201417b8539c3fa3bdc0761f2;hp=6423136dd4d51a6911e17d2a7c305af2a58365da;hpb=3861d24e236e51b09b2024c7c4dbb36386f359f0;p=mesa.git diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c index 6423136dd4d..2dc7e3eec4c 100644 --- a/src/gallium/drivers/iris/iris_bufmgr.c +++ b/src/gallium/drivers/iris/iris_bufmgr.c @@ -8,17 +8,27 @@ * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * @file iris_bufmgr.c + * + * The Iris buffer manager. + * + * XXX: write better comments + * - BOs + * - Explain BO cache + * - main interface to GEM in the kernel */ #ifdef HAVE_CONFIG_H @@ -41,13 +51,12 @@ #include #include "errno.h" -#ifndef ETIME -#define ETIME ETIMEDOUT -#endif #include "common/gen_clflush.h" -#include "common/gen_debug.h" +#include "dev/gen_debug.h" +#include "common/gen_gem.h" #include "dev/gen_device_info.h" #include "main/macros.h" +#include "util/debug.h" #include "util/macros.h" #include "util/hash_table.h" #include "util/list.h" @@ -81,20 +90,6 @@ #define FILE_DEBUG_FLAG DEBUG_BUFMGR -/** - * Call ioctl, restarting if it is interupted - */ -int -drm_ioctl(int fd, unsigned long request, void *arg) -{ - int ret; - - do { - ret = ioctl(fd, request, arg); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - return ret; -} - static inline int atomic_add_unless(int *v, int add, int unless) { @@ -105,27 +100,20 @@ atomic_add_unless(int *v, int add, int unless) return c == unless; } -/* - * Idea: - * - * Have a bitmap-allocator for each BO cache bucket size. Because bo_alloc - * rounds up allocations to the bucket size anyway, we can make 1 bit in the - * bitmap represent N pages of memory, where N = . - * Allocations and frees always set/unset a single bit. Because ffsll only - * works on uint64_t, use a tree(?) of those. - * - * Nodes contain a starting address and a uint64_t bitmap. (pair-of-uint64_t) - * Bitmap uses 1 for a free block, 0 for in-use. - * - * Bucket contains... - * - * Dynamic array of nodes. (pointer, two ints) - */ - -struct vma_bucket_node { - uint64_t start_address; - uint64_t bitmap; -}; +static const char * +memzone_name(enum iris_memory_zone memzone) +{ + const char *names[] = { + [IRIS_MEMZONE_SHADER] = "shader", + [IRIS_MEMZONE_BINDER] = "binder", + [IRIS_MEMZONE_SURFACE] = "surface", + [IRIS_MEMZONE_DYNAMIC] = "dynamic", + [IRIS_MEMZONE_OTHER] = "other", + [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor", + }; + assert(memzone < ARRAY_SIZE(names)); + return names[memzone]; +} struct bo_cache_bucket { /** List of cached BOs. */ @@ -133,9 +121,6 @@ struct bo_cache_bucket { /** Size of this bucket, in bytes. */ uint64_t size; - - /** List of vma_bucket_nodes */ - struct util_dynarray vma_list[IRIS_MEMZONE_COUNT]; }; struct iris_bufmgr { @@ -151,6 +136,12 @@ struct iris_bufmgr { struct hash_table *name_table; struct hash_table *handle_table; + /** + * List of BOs which we've effectively freed, but are hanging on to + * until they're idle before closing and returning the VMA. + */ + struct list_head zombie_list; + struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT]; bool has_llc:1; @@ -179,10 +170,27 @@ key_uint_equal(const void *a, const void *b) } static struct iris_bo * -hash_find_bo(struct hash_table *ht, unsigned int key) +find_and_ref_external_bo(struct hash_table *ht, unsigned int key) { struct hash_entry *entry = _mesa_hash_table_search(ht, &key); - return entry ? (struct iris_bo *) entry->data : NULL; + struct iris_bo *bo = entry ? entry->data : NULL; + + if (bo) { + assert(bo->external); + assert(!bo->reusable); + + /* Being non-reusable, the BO cannot be in the cache lists, but it + * may be in the zombie list if it had reached zero references, but + * we hadn't yet closed it...and then reimported the same BO. If it + * is, then remove it since it's now been resurrected. + */ + if (bo->head.prev || bo->head.next) + list_del(&bo->head); + + iris_bo_reference(bo); + } + + return bo; } /** @@ -225,124 +233,62 @@ bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size) &bufmgr->cache_bucket[index] : NULL; } -static enum iris_memory_zone -memzone_for_address(uint64_t address) +enum iris_memory_zone +iris_memzone_for_address(uint64_t address) { - const uint64_t _4GB = 1ull << 32; + STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START); + STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START); + STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_BINDER_START); + STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START); + STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START); - if (address >= 3 * _4GB) + if (address >= IRIS_MEMZONE_OTHER_START) return IRIS_MEMZONE_OTHER; - if (address >= 2 * _4GB) + if (address == IRIS_BORDER_COLOR_POOL_ADDRESS) + return IRIS_MEMZONE_BORDER_COLOR_POOL; + + if (address > IRIS_MEMZONE_DYNAMIC_START) return IRIS_MEMZONE_DYNAMIC; - if (address >= 1 * _4GB) + if (address >= IRIS_MEMZONE_SURFACE_START) return IRIS_MEMZONE_SURFACE; - return IRIS_MEMZONE_SHADER; -} - -static uint64_t -bucket_vma_alloc(struct iris_bufmgr *bufmgr, - struct bo_cache_bucket *bucket, - enum iris_memory_zone memzone) -{ - struct util_dynarray *vma_list = &bucket->vma_list[memzone]; - struct vma_bucket_node *node; - - if (vma_list->size == 0) { - /* This bucket allocator is out of space - allocate a new block of - * memory from a larger allocator (either another bucket or util_vma). - * - * Set the first bit used, and return the start address. - */ - node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node)); - node->start_address = - vma_alloc(bufmgr, memzone, 64ull * bucket->size, bucket->size); - node->bitmap = ~1ull; - return node->start_address; - } - - /* Pick any bit from any node - they're all the right size and free. */ - node = util_dynarray_top_ptr(vma_list, struct vma_bucket_node); - int bit = ffsll(node->bitmap) - 1; - assert(bit != -1); - - /* Reserve the memory by clearing the bit. */ - node->bitmap &= ~(1ull << bit); - - /* If this node is now completely full, remove it from the free list. */ - if (node->bitmap == 0ull) { - (void) util_dynarray_pop(vma_list, struct vma_bucket_node); - } - - return node->start_address + bit * bucket->size; -} - -static void -bucket_vma_free(struct bo_cache_bucket *bucket, - uint64_t address, - uint64_t size) -{ - enum iris_memory_zone memzone = memzone_for_address(address); - struct util_dynarray *vma_list = &bucket->vma_list[memzone]; - const uint64_t node_bytes = 64ull * bucket->size; - struct vma_bucket_node *node = NULL; - - uint64_t start = (address / node_bytes) * node_bytes; - int bit = (address - start) / bucket->size; - - util_dynarray_foreach(vma_list, struct vma_bucket_node, cur) { - if (cur->start_address == start) { - node = cur; - break; - } - } - - if (!node) { - node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node)); - node->start_address = start; - node->bitmap = 0ull; - } - - node->bitmap |= 1ull << bit; - - /* The block might be entirely free now, and if so, we could return it - * to the larger allocator. But we may as well hang on to it, in case - * we get more allocations at this block size. - */ -} - -static struct bo_cache_bucket * -get_bucket_allocator(struct iris_bufmgr *bufmgr, uint64_t size) -{ - /* Skip using the bucket allocator for very large sizes, as it allocates - * 64 of them and this can balloon rather quickly. - */ - if (size > 1024 * PAGE_SIZE) - return NULL; - - struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size); + if (address >= IRIS_MEMZONE_BINDER_START) + return IRIS_MEMZONE_BINDER; - if (bucket && bucket->size == size) - return bucket; - - return NULL; + return IRIS_MEMZONE_SHADER; } +/** + * Allocate a section of virtual memory for a buffer, assigning an address. + * + * This uses either the bucket allocator for the given size, or the large + * object allocator (util_vma). + */ static uint64_t vma_alloc(struct iris_bufmgr *bufmgr, enum iris_memory_zone memzone, uint64_t size, uint64_t alignment) { - struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size); + /* Force alignment to be some number of pages */ + alignment = ALIGN(alignment, PAGE_SIZE); - if (bucket) - return bucket_vma_alloc(bufmgr, bucket, memzone); + if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL) + return IRIS_BORDER_COLOR_POOL_ADDRESS; - return util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, - alignment); + /* The binder handles its own allocations. Return non-zero here. */ + if (memzone == IRIS_MEMZONE_BINDER) + return IRIS_MEMZONE_BINDER_START; + + uint64_t addr = + util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment); + + assert((addr >> 48ull) == 0); + assert((addr % alignment) == 0); + + return gen_canonical_address(addr); } static void @@ -350,14 +296,22 @@ vma_free(struct iris_bufmgr *bufmgr, uint64_t address, uint64_t size) { - struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size); + if (address == IRIS_BORDER_COLOR_POOL_ADDRESS) + return; - if (bucket) { - bucket_vma_free(bucket, address, size); - } else { - enum iris_memory_zone memzone = memzone_for_address(address); - util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size); - } + /* Un-canonicalize the address. */ + address = gen_48b_address(address); + + if (address == 0ull) + return; + + enum iris_memory_zone memzone = iris_memzone_for_address(address); + + /* The binder handles its own allocations. */ + if (memzone == IRIS_MEMZONE_BINDER) + return; + + util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size); } int @@ -366,7 +320,7 @@ iris_bo_busy(struct iris_bo *bo) struct iris_bufmgr *bufmgr = bo->bufmgr; struct drm_i915_gem_busy busy = { .handle = bo->gem_handle }; - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); if (ret == 0) { bo->idle = !busy.busy; return busy.busy; @@ -383,29 +337,134 @@ iris_bo_madvise(struct iris_bo *bo, int state) .retained = 1, }; - drm_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); + gen_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); return madv.retained; } -/* drop the oldest entries that have been purged by the kernel */ -static void -iris_bo_cache_purge_bucket(struct iris_bufmgr *bufmgr, - struct bo_cache_bucket *bucket) +static struct iris_bo * +bo_calloc(void) +{ + struct iris_bo *bo = calloc(1, sizeof(*bo)); + if (bo) { + bo->hash = _mesa_hash_pointer(bo); + } + return bo; +} + +static struct iris_bo * +alloc_bo_from_cache(struct iris_bufmgr *bufmgr, + struct bo_cache_bucket *bucket, + uint32_t alignment, + enum iris_memory_zone memzone, + unsigned flags, + bool match_zone) { - list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { - if (iris_bo_madvise(bo, I915_MADV_DONTNEED)) + if (!bucket) + return NULL; + + struct iris_bo *bo = NULL; + + list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) { + /* Try a little harder to find one that's already in the right memzone */ + if (match_zone && memzone != iris_memzone_for_address(cur->gtt_offset)) + continue; + + /* If the last BO in the cache is busy, there are no idle BOs. Bail, + * either falling back to a non-matching memzone, or if that fails, + * allocating a fresh buffer. + */ + if (iris_bo_busy(cur)) + return NULL; + + list_del(&cur->head); + + /* Tell the kernel we need this BO. If it still exists, we're done! */ + if (iris_bo_madvise(cur, I915_MADV_WILLNEED)) { + bo = cur; break; + } - list_del(&bo->head); + /* This BO was purged, throw it out and keep looking. */ + bo_free(cur); + } + + if (!bo) + return NULL; + + /* If the cached BO isn't in the right memory zone, or the alignment + * isn't sufficient, free the old memory and assign it a new address. + */ + if (memzone != iris_memzone_for_address(bo->gtt_offset) || + bo->gtt_offset % alignment != 0) { + vma_free(bufmgr, bo->gtt_offset, bo->size); + bo->gtt_offset = 0ull; + } + + /* Zero the contents if necessary. If this fails, fall back to + * allocating a fresh BO, which will always be zeroed by the kernel. + */ + if (flags & BO_ALLOC_ZEROED) { + void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); + if (map) { + memset(map, 0, bo->size); + } else { + bo_free(bo); + return NULL; + } + } + + return bo; +} + +static struct iris_bo * +alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size) +{ + struct iris_bo *bo = bo_calloc(); + if (!bo) + return NULL; + + struct drm_i915_gem_create create = { .size = bo_size }; + + /* All new BOs we get from the kernel are zeroed, so we don't need to + * worry about that here. + */ + if (gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create) != 0) { + free(bo); + return NULL; + } + + bo->gem_handle = create.handle; + bo->bufmgr = bufmgr; + bo->size = bo_size; + bo->idle = true; + bo->tiling_mode = I915_TILING_NONE; + bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo->stride = 0; + + /* Calling set_domain() will allocate pages for the BO outside of the + * struct mutex lock in the kernel, which is more efficient than waiting + * to create them during the first execbuf that uses the BO. + */ + struct drm_i915_gem_set_domain sd = { + .handle = bo->gem_handle, + .read_domains = I915_GEM_DOMAIN_CPU, + .write_domain = 0, + }; + + if (gen_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) { bo_free(bo); + return NULL; } + + return bo; } static struct iris_bo * bo_alloc_internal(struct iris_bufmgr *bufmgr, const char *name, uint64_t size, + uint32_t alignment, enum iris_memory_zone memzone, unsigned flags, uint32_t tiling_mode, @@ -413,143 +472,78 @@ bo_alloc_internal(struct iris_bufmgr *bufmgr, { struct iris_bo *bo; unsigned int page_size = getpagesize(); - int ret; - struct bo_cache_bucket *bucket; - bool alloc_from_cache; - uint64_t bo_size; - bool zeroed = false; - - if (flags & BO_ALLOC_ZEROED) - zeroed = true; - - /* Round the allocated size up to a power of two number of pages. */ - bucket = bucket_for_size(bufmgr, size); + struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size); - /* If we don't have caching at this size, don't actually round the - * allocation up. + /* Round the size up to the bucket size, or if we don't have caching + * at this size, a multiple of the page size. */ - if (bucket == NULL) { - bo_size = size; - if (bo_size < page_size) - bo_size = page_size; - } else { - bo_size = bucket->size; - } + uint64_t bo_size = + bucket ? bucket->size : MAX2(ALIGN(size, page_size), page_size); mtx_lock(&bufmgr->lock); - /* Get a buffer out of the cache if available */ -retry: - alloc_from_cache = false; - if (bucket != NULL && !list_empty(&bucket->head)) { - /* If the last BO in the cache is idle, then reuse it. Otherwise, - * allocate a fresh buffer to avoid stalling. - */ - bo = LIST_ENTRY(struct iris_bo, bucket->head.next, head); - if (!iris_bo_busy(bo)) { - alloc_from_cache = true; - list_del(&bo->head); - } - if (alloc_from_cache) { - if (!iris_bo_madvise(bo, I915_MADV_WILLNEED)) { - bo_free(bo); - iris_bo_cache_purge_bucket(bufmgr, bucket); - goto retry; - } - - if (bo_set_tiling_internal(bo, tiling_mode, stride)) { - bo_free(bo); - goto retry; - } - - if (zeroed) { - void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); - if (!map) { - bo_free(bo); - goto retry; - } - memset(map, 0, bo_size); - } - } - } - - if (alloc_from_cache) { - /* If the cached BO isn't in the right memory zone, free the old - * memory and assign it a new address. - */ - if (memzone != memzone_for_address(bo->gtt_offset)) { - vma_free(bufmgr, bo->gtt_offset, size); - bo->gtt_offset = 0; - } - } else { - bo = calloc(1, sizeof(*bo)); - if (!bo) - goto err; - - bo->size = bo_size; - bo->idle = true; - - struct drm_i915_gem_create create = { .size = bo_size }; - - /* All new BOs we get from the kernel are zeroed, so we don't need to - * worry about that here. - */ - ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create); - if (ret != 0) { - free(bo); - goto err; - } - - bo->gem_handle = create.handle; - - bo->bufmgr = bufmgr; - bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; - - bo->tiling_mode = I915_TILING_NONE; - bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; - bo->stride = 0; + /* Get a buffer out of the cache if available. First, we try to find + * one with a matching memory zone so we can avoid reallocating VMA. + */ + bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, flags, true); - if (bo_set_tiling_internal(bo, tiling_mode, stride)) - goto err_free; + /* If that fails, we try for any cached BO, without matching memzone. */ + if (!bo) { + bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, flags, + false); + } - /* Calling set_domain() will allocate pages for the BO outside of the - * struct mutex lock in the kernel, which is more efficient than waiting - * to create them during the first execbuf that uses the BO. - */ - struct drm_i915_gem_set_domain sd = { - .handle = bo->gem_handle, - .read_domains = I915_GEM_DOMAIN_CPU, - .write_domain = 0, - }; + mtx_unlock(&bufmgr->lock); - if (drm_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) - goto err_free; + if (!bo) { + bo = alloc_fresh_bo(bufmgr, bo_size); + if (!bo) + return NULL; } if (bo->gtt_offset == 0ull) { - bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1); + mtx_lock(&bufmgr->lock); + bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, alignment); + mtx_unlock(&bufmgr->lock); if (bo->gtt_offset == 0ull) goto err_free; } + if (bo_set_tiling_internal(bo, tiling_mode, stride)) + goto err_free; + bo->name = name; p_atomic_set(&bo->refcount, 1); - bo->reusable = true; + bo->reusable = bucket && bufmgr->bo_reuse; bo->cache_coherent = bufmgr->has_llc; bo->index = -1; + bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; - mtx_unlock(&bufmgr->lock); + /* By default, capture all driver-internal buffers like shader kernels, + * surface states, dynamic states, border colors, and so on. + */ + if (memzone < IRIS_MEMZONE_OTHER) + bo->kflags |= EXEC_OBJECT_CAPTURE; - DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name, - (unsigned long long) size); + if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) { + struct drm_i915_gem_caching arg = { + .handle = bo->gem_handle, + .caching = 1, + }; + if (gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) == 0) { + bo->cache_coherent = true; + bo->reusable = false; + } + } + + DBG("bo_create: buf %d (%s) (%s memzone) %llub\n", bo->gem_handle, + bo->name, memzone_name(memzone), (unsigned long long) size); return bo; err_free: bo_free(bo); -err: - mtx_unlock(&bufmgr->lock); return NULL; } @@ -559,19 +553,76 @@ iris_bo_alloc(struct iris_bufmgr *bufmgr, uint64_t size, enum iris_memory_zone memzone) { - return bo_alloc_internal(bufmgr, name, size, memzone, + return bo_alloc_internal(bufmgr, name, size, 1, memzone, 0, I915_TILING_NONE, 0); } struct iris_bo * iris_bo_alloc_tiled(struct iris_bufmgr *bufmgr, const char *name, - uint64_t size, enum iris_memory_zone memzone, + uint64_t size, uint32_t alignment, + enum iris_memory_zone memzone, uint32_t tiling_mode, uint32_t pitch, unsigned flags) { - return bo_alloc_internal(bufmgr, name, size, memzone, + return bo_alloc_internal(bufmgr, name, size, alignment, memzone, flags, tiling_mode, pitch); } +struct iris_bo * +iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name, + void *ptr, size_t size, + enum iris_memory_zone memzone) +{ + struct iris_bo *bo; + + bo = bo_calloc(); + if (!bo) + return NULL; + + struct drm_i915_gem_userptr arg = { + .user_ptr = (uintptr_t)ptr, + .user_size = size, + }; + if (gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) + goto err_free; + bo->gem_handle = arg.handle; + + /* Check the buffer for validity before we try and use it in a batch */ + struct drm_i915_gem_set_domain sd = { + .handle = bo->gem_handle, + .read_domains = I915_GEM_DOMAIN_CPU, + }; + if (gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd)) + goto err_close; + + bo->name = name; + bo->size = size; + bo->map_cpu = ptr; + + bo->bufmgr = bufmgr; + bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; + + mtx_lock(&bufmgr->lock); + bo->gtt_offset = vma_alloc(bufmgr, memzone, size, 1); + mtx_unlock(&bufmgr->lock); + + if (bo->gtt_offset == 0ull) + goto err_close; + + p_atomic_set(&bo->refcount, 1); + bo->userptr = true; + bo->cache_coherent = true; + bo->index = -1; + bo->idle = true; + + return bo; + +err_close: + gen_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &bo->gem_handle); +err_free: + free(bo); + return NULL; +} + /** * Returns a iris_bo wrapping the given buffer object handle. * @@ -591,14 +642,12 @@ iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr, * provides a sufficiently fast match. */ mtx_lock(&bufmgr->lock); - bo = hash_find_bo(bufmgr->name_table, handle); - if (bo) { - iris_bo_reference(bo); + bo = find_and_ref_external_bo(bufmgr->name_table, handle); + if (bo) goto out; - } struct drm_gem_open open_arg = { .name = handle }; - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); if (ret != 0) { DBG("Couldn't reference %s handle 0x%08x: %s\n", name, handle, strerror(errno)); @@ -609,13 +658,11 @@ iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr, * object from the kernel before by looking through the list * again for a matching gem_handle */ - bo = hash_find_bo(bufmgr->handle_table, open_arg.handle); - if (bo) { - iris_bo_reference(bo); + bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle); + if (bo) goto out; - } - bo = calloc(1, sizeof(*bo)); + bo = bo_calloc(); if (!bo) goto out; @@ -624,19 +671,19 @@ iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr, bo->size = open_arg.size; bo->gtt_offset = 0; bo->bufmgr = bufmgr; - bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; bo->gem_handle = open_arg.handle; bo->name = name; bo->global_name = handle; bo->reusable = false; bo->external = true; + bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1); _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; - ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); + ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); if (ret != 0) goto err_unref; @@ -656,23 +703,10 @@ err_unref: } static void -bo_free(struct iris_bo *bo) +bo_close(struct iris_bo *bo) { struct iris_bufmgr *bufmgr = bo->bufmgr; - if (bo->map_cpu) { - VG_NOACCESS(bo->map_cpu, bo->size); - munmap(bo->map_cpu, bo->size); - } - if (bo->map_wc) { - VG_NOACCESS(bo->map_wc, bo->size); - munmap(bo->map_wc, bo->size); - } - if (bo->map_gtt) { - VG_NOACCESS(bo->map_gtt, bo->size); - munmap(bo->map_gtt, bo->size); - } - if (bo->external) { struct hash_entry *entry; @@ -685,18 +719,48 @@ bo_free(struct iris_bo *bo) _mesa_hash_table_remove(bufmgr->handle_table, entry); } - vma_free(bo->bufmgr, bo->gtt_offset, bo->size); - /* Close this object */ struct drm_gem_close close = { .handle = bo->gem_handle }; - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); if (ret != 0) { DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", bo->gem_handle, bo->name, strerror(errno)); } + + /* Return the VMA for reuse */ + vma_free(bo->bufmgr, bo->gtt_offset, bo->size); + free(bo); } +static void +bo_free(struct iris_bo *bo) +{ + struct iris_bufmgr *bufmgr = bo->bufmgr; + + if (bo->map_cpu && !bo->userptr) { + VG_NOACCESS(bo->map_cpu, bo->size); + munmap(bo->map_cpu, bo->size); + } + if (bo->map_wc) { + VG_NOACCESS(bo->map_wc, bo->size); + munmap(bo->map_wc, bo->size); + } + if (bo->map_gtt) { + VG_NOACCESS(bo->map_gtt, bo->size); + munmap(bo->map_gtt, bo->size); + } + + if (bo->idle) { + bo_close(bo); + } else { + /* Defer closing the GEM BO and returning the VMA for reuse until the + * BO is idle. Just move it to the dead list for now. + */ + list_addtail(&bo->head, &bufmgr->zombie_list); + } +} + /** Frees all cached buffers significantly older than @time. */ static void cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time) @@ -719,6 +783,17 @@ cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time) } } + list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) { + /* Stop once we reach a busy BO - all others past this point were + * freed more recently so are likely also busy. + */ + if (!bo->idle && iris_bo_busy(bo)) + break; + + list_del(&bo->head); + bo_close(bo); + } + bufmgr->time = time; } @@ -730,10 +805,11 @@ bo_unreference_final(struct iris_bo *bo, time_t time) DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name); - bucket = bucket_for_size(bufmgr, bo->size); + bucket = NULL; + if (bo->reusable) + bucket = bucket_for_size(bufmgr, bo->size); /* Put the buffer into our internal cache for reuse if we can. */ - if (bufmgr->bo_reuse && bo->reusable && bucket != NULL && - iris_bo_madvise(bo, I915_MADV_DONTNEED)) { + if (bucket && iris_bo_madvise(bo, I915_MADV_DONTNEED)) { bo->free_time = time; bo->name = NULL; @@ -824,9 +900,8 @@ iris_bo_map_cpu(struct pipe_debug_callback *dbg, .handle = bo->gem_handle, .size = bo->size, }; - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); if (ret != 0) { - ret = -errno; DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); return NULL; @@ -886,9 +961,8 @@ iris_bo_map_wc(struct pipe_debug_callback *dbg, .size = bo->size, .flags = I915_MMAP_WC, }; - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); if (ret != 0) { - ret = -errno; DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); return NULL; @@ -949,7 +1023,7 @@ iris_bo_map_gtt(struct pipe_debug_callback *dbg, struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle }; /* Get the fake offset back... */ - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); if (ret != 0) { DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); @@ -1013,8 +1087,11 @@ can_map_cpu(struct iris_bo *bo, unsigned flags) * most drawing while non-persistent mappings are active, we may still use * the GPU for blits or other operations, causing batches to happen at * inconvenient times. + * + * If RAW is set, we expect the caller to be able to handle a WC buffer + * more efficiently than the involuntary clflushes. */ - if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC)) + if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC | MAP_RAW)) return false; return !(flags & MAP_WRITE); @@ -1054,30 +1131,6 @@ iris_bo_map(struct pipe_debug_callback *dbg, return map; } -int -iris_bo_subdata(struct iris_bo *bo, uint64_t offset, - uint64_t size, const void *data) -{ - struct iris_bufmgr *bufmgr = bo->bufmgr; - - struct drm_i915_gem_pwrite pwrite = { - .handle = bo->gem_handle, - .offset = offset, - .size = size, - .data_ptr = (uint64_t) (uintptr_t) data, - }; - - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); - if (ret != 0) { - ret = -errno; - DBG("%s:%d: Error writing data to buffer %d: " - "(%"PRIu64" %"PRIu64") %s .\n", - __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno)); - } - - return ret; -} - /** Waits for all GPU rendering with the object to have completed. */ void iris_bo_wait_rendering(struct iris_bo *bo) @@ -1128,8 +1181,8 @@ iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns) .bo_handle = bo->gem_handle, .timeout_ns = timeout_ns, }; - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); - if (ret == -1) + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + if (ret != 0) return -errno; bo->idle = true; @@ -1151,14 +1204,22 @@ iris_bufmgr_destroy(struct iris_bufmgr *bufmgr) bo_free(bo); } + } - for (int i = 0; i < IRIS_MEMZONE_COUNT; i++) - util_dynarray_fini(&bucket->vma_list[i]); + /* Close any buffer objects on the dead list. */ + list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) { + list_del(&bo->head); + bo_close(bo); } _mesa_hash_table_destroy(bufmgr->name_table, NULL); _mesa_hash_table_destroy(bufmgr->handle_table, NULL); + for (int z = 0; z < IRIS_MEMZONE_COUNT; z++) { + if (z != IRIS_MEMZONE_BINDER) + util_vma_heap_finish(&bufmgr->vma_allocator[z]); + } + free(bufmgr); } @@ -1224,13 +1285,11 @@ iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd) * for named buffers, we must not create two bo's pointing at the same * kernel object */ - bo = hash_find_bo(bufmgr->handle_table, handle); - if (bo) { - iris_bo_reference(bo); + bo = find_and_ref_external_bo(bufmgr->handle_table, handle); + if (bo) goto out; - } - bo = calloc(1, sizeof(*bo)); + bo = bo_calloc(); if (!bo) goto out; @@ -1246,7 +1305,6 @@ iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd) bo->size = ret; bo->bufmgr = bufmgr; - bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; bo->gem_handle = handle; _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); @@ -1254,10 +1312,11 @@ iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd) bo->name = "prime"; bo->reusable = false; bo->external = true; + bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1); struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; - if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) + if (gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) goto err; bo->tiling_mode = get_tiling.tiling_mode; @@ -1274,19 +1333,26 @@ err: return NULL; } +static void +iris_bo_make_external_locked(struct iris_bo *bo) +{ + if (!bo->external) { + _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo); + bo->external = true; + } +} + static void iris_bo_make_external(struct iris_bo *bo) { struct iris_bufmgr *bufmgr = bo->bufmgr; - if (!bo->external) { - mtx_lock(&bufmgr->lock); - if (!bo->external) { - _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); - bo->external = true; - } - mtx_unlock(&bufmgr->lock); - } + if (bo->external) + return; + + mtx_lock(&bufmgr->lock); + iris_bo_make_external_locked(bo); + mtx_unlock(&bufmgr->lock); } int @@ -1321,12 +1387,12 @@ iris_bo_flink(struct iris_bo *bo, uint32_t *name) if (!bo->global_name) { struct drm_gem_flink flink = { .handle = bo->gem_handle }; - if (drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) + if (gen_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) return -errno; - iris_bo_make_external(bo); mtx_lock(&bufmgr->lock); if (!bo->global_name) { + iris_bo_make_external_locked(bo); bo->global_name = flink.name; _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); } @@ -1339,19 +1405,6 @@ iris_bo_flink(struct iris_bo *bo, uint32_t *name) return 0; } -/** - * Enables unlimited caching of buffer objects for reuse. - * - * This is potentially very memory expensive, as the cache at each bucket - * size is only bounded by how many buffers of that size we've managed to have - * in flight at once. - */ -void -iris_bufmgr_enable_reuse(struct iris_bufmgr *bufmgr) -{ - bufmgr->bo_reuse = true; -} - static void add_bucket(struct iris_bufmgr *bufmgr, int size) { @@ -1360,8 +1413,6 @@ add_bucket(struct iris_bufmgr *bufmgr, int size) assert(i < ARRAY_SIZE(bufmgr->cache_bucket)); list_inithead(&bufmgr->cache_bucket[i].head); - for (int i = 0; i < IRIS_MEMZONE_COUNT; i++) - util_dynarray_init(&bufmgr->cache_bucket[i].vma_list[i], NULL); bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; @@ -1401,15 +1452,48 @@ uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr) { struct drm_i915_gem_context_create create = { }; - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); if (ret != 0) { DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno)); return 0; } + /* Upon declaring a GPU hang, the kernel will zap the guilty context + * back to the default logical HW state and attempt to continue on to + * our next submitted batchbuffer. However, our render batches assume + * the previous GPU state is preserved, and only emit commands needed + * to incrementally change that state. In particular, we inherit the + * STATE_BASE_ADDRESS and PIPELINE_SELECT settings, which are critical. + * With default base addresses, our next batches will almost certainly + * cause more GPU hangs, leading to repeated hangs until we're banned + * or the machine is dead. + * + * Here we tell the kernel not to attempt to recover our context but + * immediately (on the next batchbuffer submission) report that the + * context is lost, and we will do the recovery ourselves. Ideally, + * we'll have two lost batches instead of a continual stream of hangs. + */ + struct drm_i915_gem_context_param p = { + .ctx_id = create.ctx_id, + .param = I915_CONTEXT_PARAM_RECOVERABLE, + .value = false, + }; + drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p); + return create.ctx_id; } +static int +iris_hw_context_get_priority(struct iris_bufmgr *bufmgr, uint32_t ctx_id) +{ + struct drm_i915_gem_context_param p = { + .ctx_id = ctx_id, + .param = I915_CONTEXT_PARAM_PRIORITY, + }; + drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p); + return p.value; /* on error, return 0 i.e. default priority */ +} + int iris_hw_context_set_priority(struct iris_bufmgr *bufmgr, uint32_t ctx_id, @@ -1423,19 +1507,32 @@ iris_hw_context_set_priority(struct iris_bufmgr *bufmgr, int err; err = 0; - if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p)) + if (gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p)) err = -errno; return err; } +uint32_t +iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id) +{ + uint32_t new_ctx = iris_create_hw_context(bufmgr); + + if (new_ctx) { + int priority = iris_hw_context_get_priority(bufmgr, ctx_id); + iris_hw_context_set_priority(bufmgr, new_ctx, priority); + } + + return new_ctx; +} + void iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id) { struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id }; if (ctx_id != 0 && - drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) { + gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) { fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", strerror(errno)); } @@ -1445,12 +1542,27 @@ int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *result) { struct drm_i915_reg_read reg_read = { .offset = offset }; - int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read); + int ret = gen_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read); *result = reg_read.val; return ret; } +static uint64_t +iris_gtt_size(int fd) +{ + /* We use the default (already allocated) context to determine + * the default configuration of the virtual address space. + */ + struct drm_i915_gem_context_param p = { + .param = I915_CONTEXT_PARAM_GTT_SIZE, + }; + if (!gen_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p)) + return p.value; + + return 0; +} + /** * Initializes the GEM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. @@ -1458,8 +1570,12 @@ iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *result) * \param fd File descriptor of the opened DRM device. */ struct iris_bufmgr * -iris_bufmgr_init(struct gen_device_info *devinfo, int fd) +iris_bufmgr_init(struct gen_device_info *devinfo, int fd, bool bo_reuse) { + uint64_t gtt_size = iris_gtt_size(fd); + if (gtt_size <= IRIS_MEMZONE_OTHER_START) + return NULL; + struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr)); if (bufmgr == NULL) return NULL; @@ -1480,18 +1596,32 @@ iris_bufmgr_init(struct gen_device_info *devinfo, int fd) return NULL; } + list_inithead(&bufmgr->zombie_list); + bufmgr->has_llc = devinfo->has_llc; + bufmgr->bo_reuse = bo_reuse; + STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull); const uint64_t _4GB = 1ull << 32; + /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */ + const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE; + util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER], - PAGE_SIZE, _4GB); + PAGE_SIZE, _4GB_minus_1 - PAGE_SIZE); util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE], - 1 * _4GB, _4GB); + IRIS_MEMZONE_SURFACE_START, + _4GB_minus_1 - IRIS_MAX_BINDERS * IRIS_BINDER_SIZE); util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC], - 2 * _4GB, _4GB); + IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE, + _4GB_minus_1 - IRIS_BORDER_COLOR_POOL_SIZE); + + /* Leave the last 4GB out of the high vma range, so that no state + * base address + size can overflow 48 bits. + */ util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_OTHER], - 3 * _4GB, (1ull << 48) - 3 * _4GB); + IRIS_MEMZONE_OTHER_START, + (gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START); init_cache_buckets(bufmgr);