* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file iris_bufmgr.c
+ *
+ * The Iris buffer manager.
+ *
+ * XXX: write better comments
+ * - BOs
+ * - Explain BO cache
+ * - main interface to GEM in the kernel
*/
#ifdef HAVE_CONFIG_H
#endif
#include "common/gen_clflush.h"
#include "common/gen_debug.h"
+#include "common/gen_gem.h"
#include "dev/gen_device_info.h"
#include "main/macros.h"
#include "util/debug.h"
return c == unless;
}
-/*
- * Idea:
- *
- * Have a bitmap-allocator for each BO cache bucket size. Because bo_alloc
- * rounds up allocations to the bucket size anyway, we can make 1 bit in the
- * bitmap represent N pages of memory, where N = <bucket size / page size>.
- * Allocations and frees always set/unset a single bit. Because ffsll only
- * works on uint64_t, use a tree(?) of those.
+/**
+ * Iris fixed-size bucketing VMA allocator.
*
- * Nodes contain a starting address and a uint64_t bitmap. (pair-of-uint64_t)
- * Bitmap uses 1 for a free block, 0 for in-use.
+ * The BO cache maintains "cache buckets" for buffers of various sizes.
+ * All buffers in a given bucket are identically sized - when allocating,
+ * we always round up to the bucket size. This means that virtually all
+ * allocations are fixed-size; only buffers which are too large to fit in
+ * a bucket can be variably-sized.
*
- * Bucket contains...
+ * We create an allocator for each bucket. Each contains a free-list, where
+ * each node contains a <starting address, 64-bit bitmap> pair. Each bit
+ * represents a bucket-sized block of memory. (At the first level, each
+ * bit corresponds to a page. For the second bucket, bits correspond to
+ * two pages, and so on.) 1 means a block is free, and 0 means it's in-use.
+ * The lowest bit in the bitmap is for the first block.
*
- * Dynamic array of nodes. (pointer, two ints)
+ * This makes allocations cheap - any bit of any node will do. We can pick
+ * the head of the list and use ffs() to find a free block. If there are
+ * none, we allocate 64 blocks from a larger allocator - either a bigger
+ * bucketing allocator, or a fallback top-level allocator for large objects.
*/
-
struct vma_bucket_node {
uint64_t start_address;
uint64_t bitmap;
/** Size of this bucket, in bytes. */
uint64_t size;
- /** List of vma_bucket_nodes */
+ /** List of vma_bucket_nodes. */
struct util_dynarray vma_list[IRIS_MEMZONE_COUNT];
};
static void bo_free(struct iris_bo *bo);
-static uint64_t __vma_alloc(struct iris_bufmgr *bufmgr,
- enum iris_memory_zone memzone,
- uint64_t size, uint64_t alignment);
+static uint64_t vma_alloc(struct iris_bufmgr *bufmgr,
+ enum iris_memory_zone memzone,
+ uint64_t size, uint64_t alignment);
static uint32_t
key_hash_uint(const void *key)
static enum iris_memory_zone
memzone_for_address(uint64_t address)
{
- const uint64_t _4GB = 1ull << 32;
+ STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START);
+ STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START);
+ STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_BINDER_START);
+ STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START);
+ STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
- if (address >= 3 * _4GB)
+ if (address >= IRIS_MEMZONE_OTHER_START)
return IRIS_MEMZONE_OTHER;
- if (address >= 2 * _4GB)
+ if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
+ return IRIS_MEMZONE_BORDER_COLOR_POOL;
+
+ if (address > IRIS_MEMZONE_DYNAMIC_START)
return IRIS_MEMZONE_DYNAMIC;
- if (address > 1 * _4GB)
+ if (address >= IRIS_MEMZONE_SURFACE_START)
return IRIS_MEMZONE_SURFACE;
- /* The binder isn't in any memory zone. */
- if (address == 1 * _4GB)
+ if (address >= IRIS_MEMZONE_BINDER_START)
return IRIS_MEMZONE_BINDER;
return IRIS_MEMZONE_SHADER;
*/
const uint64_t node_size = 64ull * bucket->size;
node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
- node->start_address = __vma_alloc(bufmgr, memzone, node_size, node_size);
+
+ if (unlikely(!node))
+ return 0ull;
+
+ uint64_t addr = vma_alloc(bufmgr, memzone, node_size, node_size);
+ node->start_address = gen_48b_address(addr);
node->bitmap = ~1ull;
return node->start_address;
}
assert((node->bitmap & (1ull << bit)) != 0ull);
node->bitmap &= ~(1ull << bit);
+ uint64_t addr = node->start_address + bit * bucket->size;
+
/* If this node is now completely full, remove it from the free list. */
if (node->bitmap == 0ull) {
(void) util_dynarray_pop(vma_list, struct vma_bucket_node);
}
- return node->start_address + bit * bucket->size;
+ return addr;
}
static void
-bucket_vma_free(struct bo_cache_bucket *bucket,
- uint64_t address,
- uint64_t size)
+bucket_vma_free(struct bo_cache_bucket *bucket, uint64_t address)
{
enum iris_memory_zone memzone = memzone_for_address(address);
struct util_dynarray *vma_list = &bucket->vma_list[memzone];
if (!node) {
/* No node - the whole group of 64 blocks must have been in-use. */
node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
+
+ if (unlikely(!node))
+ return; /* bogus, leaks some GPU VMA, but nothing we can do... */
+
node->start_address = start;
node->bitmap = 0ull;
}
}
static struct bo_cache_bucket *
-get_bucket_allocator(struct iris_bufmgr *bufmgr, uint64_t size)
+get_bucket_allocator(struct iris_bufmgr *bufmgr,
+ enum iris_memory_zone memzone,
+ uint64_t size)
{
/* Skip using the bucket allocator for very large sizes, as it allocates
* 64 of them and this can balloon rather quickly.
return NULL;
}
-/** Like vma_alloc, but returns a non-canonicalized address. */
+/**
+ * Allocate a section of virtual memory for a buffer, assigning an address.
+ *
+ * This uses either the bucket allocator for the given size, or the large
+ * object allocator (util_vma).
+ */
static uint64_t
-__vma_alloc(struct iris_bufmgr *bufmgr,
- enum iris_memory_zone memzone,
- uint64_t size,
- uint64_t alignment)
+vma_alloc(struct iris_bufmgr *bufmgr,
+ enum iris_memory_zone memzone,
+ uint64_t size,
+ uint64_t alignment)
{
+ if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
+ return IRIS_BORDER_COLOR_POOL_ADDRESS;
+
+ /* The binder handles its own allocations. Return non-zero here. */
if (memzone == IRIS_MEMZONE_BINDER)
- return IRIS_BINDER_ADDRESS;
+ return IRIS_MEMZONE_BINDER_START;
- struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
+ struct bo_cache_bucket *bucket =
+ get_bucket_allocator(bufmgr, memzone, size);
uint64_t addr;
if (bucket) {
addr = util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size,
alignment);
}
-
+
assert((addr >> 48ull) == 0);
assert((addr % alignment) == 0);
- return addr;
-}
-/**
- * Allocate a section of virtual memory for a buffer, assigning an address.
- *
- * This uses either the bucket allocator for the given size, or the large
- * object allocator (util_vma).
- */
-static uint64_t
-vma_alloc(struct iris_bufmgr *bufmgr,
- enum iris_memory_zone memzone,
- uint64_t size,
- uint64_t alignment)
-{
- uint64_t addr = __vma_alloc(bufmgr, memzone, size, alignment);
-
- /* Canonicalize the address.
- *
- * The Broadwell PRM Vol. 2a, MI_LOAD_REGISTER_MEM::MemoryAddress says:
- *
- * "This field specifies the address of the memory location where the
- * register value specified in the DWord above will read from. The
- * address specifies the DWord location of the data. Range =
- * GraphicsVirtualAddress[63:2] for a DWord register GraphicsAddress
- * [63:48] are ignored by the HW and assumed to be in correct
- * canonical form [63:48] == [47]."
- */
- const int shift = 63 - 47;
- addr = (((int64_t) addr) << shift) >> shift;
-
- return addr;
+ return gen_canonical_address(addr);
}
static void
uint64_t address,
uint64_t size)
{
- if (address == IRIS_BINDER_ADDRESS)
+ if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
return;
- /* Un-canonicalize the address; our allocators expect 0 in the high bits */
- address &= (1ull << 48) - 1;
+ /* Un-canonicalize the address. */
+ address = gen_48b_address(address);
+
+ if (address == 0ull)
+ return;
+
+ enum iris_memory_zone memzone = memzone_for_address(address);
+
+ /* The binder handles its own allocations. */
+ if (memzone == IRIS_MEMZONE_BINDER)
+ return;
- struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
+ struct bo_cache_bucket *bucket =
+ get_bucket_allocator(bufmgr, memzone, size);
if (bucket) {
- bucket_vma_free(bucket, address, size);
+ bucket_vma_free(bucket, address);
} else {
- enum iris_memory_zone memzone = memzone_for_address(address);
util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
}
}
}
}
+static struct iris_bo *
+bo_calloc(void)
+{
+ struct iris_bo *bo = calloc(1, sizeof(*bo));
+ if (bo) {
+ bo->hash = _mesa_hash_pointer(bo);
+ }
+ return bo;
+}
+
static struct iris_bo *
bo_alloc_internal(struct iris_bufmgr *bufmgr,
const char *name,
* allocation up.
*/
if (bucket == NULL) {
- bo_size = size;
- if (bo_size < page_size)
- bo_size = page_size;
+ bo_size = MAX2(ALIGN(size, page_size), page_size);
} else {
bo_size = bucket->size;
}
* memory and assign it a new address.
*/
if (memzone != memzone_for_address(bo->gtt_offset)) {
- vma_free(bufmgr, bo->gtt_offset, bo_size);
+ vma_free(bufmgr, bo->gtt_offset, bo->size);
bo->gtt_offset = 0ull;
}
} else {
- bo = calloc(1, sizeof(*bo));
+ bo = bo_calloc();
if (!bo)
goto err;
bo->gem_handle = create.handle;
bo->bufmgr = bufmgr;
- bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
bo->tiling_mode = I915_TILING_NONE;
bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
goto err_free;
}
+ bo->name = name;
+ p_atomic_set(&bo->refcount, 1);
+ bo->reusable = bucket && bufmgr->bo_reuse;
+ bo->cache_coherent = bufmgr->has_llc;
+ bo->index = -1;
+ bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
+
+ /* By default, capture all driver-internal buffers like shader kernels,
+ * surface states, dynamic states, border colors, and so on.
+ */
+ if (memzone < IRIS_MEMZONE_OTHER)
+ bo->kflags |= EXEC_OBJECT_CAPTURE;
+
if (bo->gtt_offset == 0ull) {
bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1);
goto err_free;
}
- bo->name = name;
- p_atomic_set(&bo->refcount, 1);
- bo->reusable = true;
- bo->cache_coherent = bufmgr->has_llc;
- bo->index = -1;
-
mtx_unlock(&bufmgr->lock);
DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
flags, tiling_mode, pitch);
}
+struct iris_bo *
+iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
+ void *ptr, size_t size,
+ enum iris_memory_zone memzone)
+{
+ struct iris_bo *bo;
+
+ bo = bo_calloc();
+ if (!bo)
+ return NULL;
+
+ struct drm_i915_gem_userptr arg = {
+ .user_ptr = (uintptr_t)ptr,
+ .user_size = size,
+ };
+ if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg))
+ goto err_free;
+ bo->gem_handle = arg.handle;
+
+ /* Check the buffer for validity before we try and use it in a batch */
+ struct drm_i915_gem_set_domain sd = {
+ .handle = bo->gem_handle,
+ .read_domains = I915_GEM_DOMAIN_CPU,
+ };
+ if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd))
+ goto err_close;
+
+ bo->name = name;
+ bo->size = size;
+ bo->map_cpu = ptr;
+
+ bo->bufmgr = bufmgr;
+ bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
+ bo->gtt_offset = vma_alloc(bufmgr, memzone, size, 1);
+ if (bo->gtt_offset == 0ull)
+ goto err_close;
+
+ p_atomic_set(&bo->refcount, 1);
+ bo->userptr = true;
+ bo->cache_coherent = true;
+ bo->index = -1;
+ bo->idle = true;
+
+ return bo;
+
+err_close:
+ drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &bo->gem_handle);
+err_free:
+ free(bo);
+ return NULL;
+}
+
/**
* Returns a iris_bo wrapping the given buffer object handle.
*
goto out;
}
- bo = calloc(1, sizeof(*bo));
+ bo = bo_calloc();
if (!bo)
goto out;
bo->size = open_arg.size;
bo->gtt_offset = 0;
bo->bufmgr = bufmgr;
- bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
bo->gem_handle = open_arg.handle;
bo->name = name;
bo->global_name = handle;
bo->reusable = false;
bo->external = true;
+ bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
_mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
{
struct iris_bufmgr *bufmgr = bo->bufmgr;
- if (bo->map_cpu) {
+ if (bo->map_cpu && !bo->userptr) {
VG_NOACCESS(bo->map_cpu, bo->size);
munmap(bo->map_cpu, bo->size);
}
_mesa_hash_table_remove(bufmgr->handle_table, entry);
}
- vma_free(bo->bufmgr, bo->gtt_offset, bo->size);
-
/* Close this object */
struct drm_gem_close close = { .handle = bo->gem_handle };
int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
bo->gem_handle, bo->name, strerror(errno));
}
+
+ vma_free(bo->bufmgr, bo->gtt_offset, bo->size);
+
free(bo);
}
DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
- bucket = bucket_for_size(bufmgr, bo->size);
+ bucket = NULL;
+ if (bo->reusable)
+ bucket = bucket_for_size(bufmgr, bo->size);
/* Put the buffer into our internal cache for reuse if we can. */
- if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
- iris_bo_madvise(bo, I915_MADV_DONTNEED)) {
+ if (bucket && iris_bo_madvise(bo, I915_MADV_DONTNEED)) {
bo->free_time = time;
bo->name = NULL;
};
int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
if (ret != 0) {
- ret = -errno;
DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
__FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
return NULL;
};
int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
if (ret != 0) {
- ret = -errno;
DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
__FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
return NULL;
return map;
}
-int
-iris_bo_subdata(struct iris_bo *bo, uint64_t offset,
- uint64_t size, const void *data)
-{
- struct iris_bufmgr *bufmgr = bo->bufmgr;
-
- struct drm_i915_gem_pwrite pwrite = {
- .handle = bo->gem_handle,
- .offset = offset,
- .size = size,
- .data_ptr = (uint64_t) (uintptr_t) data,
- };
-
- int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
- if (ret != 0) {
- ret = -errno;
- DBG("%s:%d: Error writing data to buffer %d: "
- "(%"PRIu64" %"PRIu64") %s .\n",
- __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno));
- }
-
- return ret;
-}
-
/** Waits for all GPU rendering with the object to have completed. */
void
iris_bo_wait_rendering(struct iris_bo *bo)
.timeout_ns = timeout_ns,
};
int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
- if (ret == -1)
+ if (ret != 0)
return -errno;
bo->idle = true;
bo_free(bo);
}
- for (int i = 0; i < IRIS_MEMZONE_COUNT; i++)
- util_dynarray_fini(&bucket->vma_list[i]);
+ for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
+ util_dynarray_fini(&bucket->vma_list[z]);
}
_mesa_hash_table_destroy(bufmgr->name_table, NULL);
_mesa_hash_table_destroy(bufmgr->handle_table, NULL);
+ for (int z = 0; z < IRIS_MEMZONE_COUNT; z++) {
+ if (z != IRIS_MEMZONE_BINDER)
+ util_vma_heap_finish(&bufmgr->vma_allocator[z]);
+ }
+
free(bufmgr);
}
goto out;
}
- bo = calloc(1, sizeof(*bo));
+ bo = bo_calloc();
if (!bo)
goto out;
bo->size = ret;
bo->bufmgr = bufmgr;
- bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
bo->gem_handle = handle;
_mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
bo->name = "prime";
bo->reusable = false;
bo->external = true;
+ bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
return NULL;
}
+static void
+iris_bo_make_external_locked(struct iris_bo *bo)
+{
+ if (!bo->external) {
+ _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo);
+ bo->external = true;
+ }
+}
+
static void
iris_bo_make_external(struct iris_bo *bo)
{
struct iris_bufmgr *bufmgr = bo->bufmgr;
- if (!bo->external) {
- mtx_lock(&bufmgr->lock);
- if (!bo->external) {
- _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
- bo->external = true;
- }
- mtx_unlock(&bufmgr->lock);
- }
+ if (bo->external)
+ return;
+
+ mtx_lock(&bufmgr->lock);
+ iris_bo_make_external_locked(bo);
+ mtx_unlock(&bufmgr->lock);
}
int
if (drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
return -errno;
- iris_bo_make_external(bo);
mtx_lock(&bufmgr->lock);
if (!bo->global_name) {
+ iris_bo_make_external_locked(bo);
bo->global_name = flink.name;
_mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
}
bufmgr->has_llc = devinfo->has_llc;
+ STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
const uint64_t _4GB = 1ull << 32;
util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER],
PAGE_SIZE, _4GB);
util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE],
- 1 * _4GB, _4GB);
+ IRIS_MEMZONE_SURFACE_START,
+ _4GB - IRIS_MAX_BINDERS * IRIS_BINDER_SIZE);
util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC],
- 2 * _4GB, _4GB);
+ IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
+ _4GB - IRIS_BORDER_COLOR_POOL_SIZE);
util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_OTHER],
- 3 * _4GB, (1ull << 48) - 3 * _4GB);
+ IRIS_MEMZONE_OTHER_START,
+ (1ull << 48) - IRIS_MEMZONE_OTHER_START);
// XXX: driconf
bufmgr->bo_reuse = env_var_as_boolean("bo_reuse", true);