#include <stdbool.h>
#include "errno.h"
-#ifndef ETIME
-#define ETIME ETIMEDOUT
-#endif
#include "common/gen_clflush.h"
-#include "common/gen_debug.h"
+#include "dev/gen_debug.h"
#include "common/gen_gem.h"
#include "dev/gen_device_info.h"
#include "libdrm_macros.h"
#include "brw_context.h"
#include "string.h"
-#include "i915_drm.h"
+#include "drm-uapi/i915_drm.h"
#ifdef HAVE_VALGRIND
#include <valgrind.h>
};
struct brw_bufmgr {
+ uint32_t refcount;
+
+ struct list_head link;
+
int fd;
mtx_t lock;
bool has_llc:1;
bool has_mmap_wc:1;
+ bool has_mmap_offset:1;
bool bo_reuse:1;
uint64_t initial_kflags;
};
+static mtx_t global_bufmgr_list_mutex = _MTX_INITIALIZER_NP;
+static struct list_head global_bufmgr_list = {
+ .next = &global_bufmgr_list,
+ .prev = &global_bufmgr_list,
+};
+
static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
uint32_t stride);
enum brw_memory_zone memzone,
uint64_t size, uint64_t alignment);
-static uint32_t
-key_hash_uint(const void *key)
-{
- return _mesa_hash_data(key, 4);
-}
-
-static bool
-key_uint_equal(const void *a, const void *b)
-{
- return *((unsigned *) a) == *((unsigned *) b);
-}
-
static struct brw_bo *
hash_find_bo(struct hash_table *ht, unsigned int key)
{
* Set the first bit used, and return the start address.
*/
uint64_t node_size = 64ull * bucket->size;
- node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
+ node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1);
if (unlikely(!node))
return 0ull;
if (!node) {
/* No node - the whole group of 64 blocks must have been in-use. */
- node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
+ node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1);
if (unlikely(!node))
return; /* bogus, leaks some GPU VMA, but nothing we can do... */
/* Without softpin support, we let the kernel assign addresses. */
assert(brw_using_softpin(bufmgr));
+ alignment = ALIGN(alignment, PAGE_SIZE);
+
struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
uint64_t addr;
/* Get a buffer out of the cache if available */
retry:
alloc_from_cache = false;
- if (bucket != NULL && !list_empty(&bucket->head)) {
+ if (bucket != NULL && !list_is_empty(&bucket->head)) {
if (busy && !zeroed) {
/* Allocate new render-target BOs from the tail (MRU)
* of the list, as it will likely be hot in the GPU
}
static void *
-brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
+brw_bo_gem_mmap_legacy(struct brw_context *brw, struct brw_bo *bo, bool wc)
+{
+ struct brw_bufmgr *bufmgr = bo->bufmgr;
+
+ struct drm_i915_gem_mmap mmap_arg = {
+ .handle = bo->gem_handle,
+ .size = bo->size,
+ .flags = wc ? I915_MMAP_WC : 0,
+ };
+
+ int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
+ if (ret != 0) {
+ DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
+ __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
+ return NULL;
+ }
+ void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
+
+ return map;
+}
+
+static void *
+brw_bo_gem_mmap_offset(struct brw_context *brw, struct brw_bo *bo, bool wc)
+{
+ struct brw_bufmgr *bufmgr = bo->bufmgr;
+
+ struct drm_i915_gem_mmap_offset mmap_arg = {
+ .handle = bo->gem_handle,
+ .flags = wc ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB,
+ };
+
+ /* Get the fake offset back */
+ int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg);
+ if (ret != 0) {
+ DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
+ __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
+ return NULL;
+ }
+
+ /* And map it */
+ void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ bufmgr->fd, mmap_arg.offset);
+ if (map == MAP_FAILED) {
+ DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
+ __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
+ return NULL;
+ }
+
+ return map;
+}
+
+static void *
+brw_bo_gem_mmap(struct brw_context *brw, struct brw_bo *bo, bool wc)
{
struct brw_bufmgr *bufmgr = bo->bufmgr;
+ if (bufmgr->has_mmap_offset)
+ return brw_bo_gem_mmap_offset(brw, bo, wc);
+ else
+ return brw_bo_gem_mmap_legacy(brw, bo, wc);
+}
+
+static void *
+brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
+{
/* We disallow CPU maps for writing to non-coherent buffers, as the
* CPU map can become invalidated when a batch is flushed out, which
* can happen at unpredictable times. You should use WC maps instead.
if (!bo->map_cpu) {
DBG("brw_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name);
- struct drm_i915_gem_mmap mmap_arg = {
- .handle = bo->gem_handle,
- .size = bo->size,
- };
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
- if (ret != 0) {
- ret = -errno;
- DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return NULL;
- }
- void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
+ void *map = brw_bo_gem_mmap(brw, bo, false);
VG_DEFINED(map, bo->size);
if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) {
if (!bo->map_wc) {
DBG("brw_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name);
-
- struct drm_i915_gem_mmap mmap_arg = {
- .handle = bo->gem_handle,
- .size = bo->size,
- .flags = I915_MMAP_WC,
- };
- int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
- if (ret != 0) {
- ret = -errno;
- DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return NULL;
- }
-
- void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
+ void *map = brw_bo_gem_mmap(brw, bo, true);
VG_DEFINED(map, bo->size);
if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) {
}
void
-brw_bufmgr_destroy(struct brw_bufmgr *bufmgr)
+brw_bufmgr_unref(struct brw_bufmgr *bufmgr)
{
+ mtx_lock(&global_bufmgr_list_mutex);
+ if (p_atomic_dec_zero(&bufmgr->refcount)) {
+ list_del(&bufmgr->link);
+ } else {
+ bufmgr = NULL;
+ }
+ mtx_unlock(&global_bufmgr_list_mutex);
+
+ if (!bufmgr)
+ return;
+
mtx_destroy(&bufmgr->lock);
/* Free any cached buffer objects we were going to reuse */
}
}
+ close(bufmgr->fd);
+ bufmgr->fd = -1;
+
free(bufmgr);
}
return 0;
}
-/**
- * Enables unlimited caching of buffer objects for reuse.
- *
- * This is potentially very memory expensive, as the cache at each bucket
- * size is only bounded by how many buffers of that size we've managed to have
- * in flight at once.
- */
-void
-brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr)
-{
- bufmgr->bo_reuse = true;
-}
-
static void
add_bucket(struct brw_bufmgr *bufmgr, int size)
{
return bufmgr->initial_kflags & EXEC_OBJECT_PINNED;
}
+static struct brw_bufmgr *
+brw_bufmgr_ref(struct brw_bufmgr *bufmgr)
+{
+ p_atomic_inc(&bufmgr->refcount);
+ return bufmgr;
+}
+
/**
* Initializes the GEM buffer manager, which uses the kernel to allocate, map,
* and manage map buffer objections.
*
* \param fd File descriptor of the opened DRM device.
*/
-struct brw_bufmgr *
-brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
+static struct brw_bufmgr *
+brw_bufmgr_create(struct gen_device_info *devinfo, int fd, bool bo_reuse)
{
struct brw_bufmgr *bufmgr;
* Don't do this! Ensure that each library/bufmgr has its own device
* fd so that its namespace does not clash with another.
*/
- bufmgr->fd = fd;
+ bufmgr->fd = dup(fd);
+ if (bufmgr->fd < 0) {
+ free(bufmgr);
+ return NULL;
+ }
+
+ p_atomic_set(&bufmgr->refcount, 1);
if (mtx_init(&bufmgr->lock, mtx_plain) != 0) {
+ close(bufmgr->fd);
free(bufmgr);
return NULL;
}
bufmgr->has_llc = devinfo->has_llc;
bufmgr->has_mmap_wc = gem_param(fd, I915_PARAM_MMAP_VERSION) > 0;
+ bufmgr->bo_reuse = bo_reuse;
+ bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
const uint64_t _4GB = 4ull << 30;
+ /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
+ const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
+
if (devinfo->gen >= 8 && gtt_size > _4GB) {
bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
bufmgr->initial_kflags |= EXEC_OBJECT_PINNED;
util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_LOW_4G],
- PAGE_SIZE, _4GB);
+ PAGE_SIZE, _4GB_minus_1);
+
+ /* Leave the last 4GB out of the high vma range, so that no state
+ * base address + size can overflow 48 bits.
+ */
util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER],
- 1 * _4GB, gtt_size - 1 * _4GB);
+ 1 * _4GB, gtt_size - 2 * _4GB);
} else if (devinfo->gen >= 10) {
/* Softpin landed in 4.5, but GVT used an aliasing PPGTT until
* kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in
* might actually mean requiring 4.14.
*/
fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gen10+.");
+ close(bufmgr->fd);
free(bufmgr);
return NULL;
}
init_cache_buckets(bufmgr);
bufmgr->name_table =
- _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
+ _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
bufmgr->handle_table =
- _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal);
+ _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
+
+ return bufmgr;
+}
+
+struct brw_bufmgr *
+brw_bufmgr_get_for_fd(struct gen_device_info *devinfo, int fd, bool bo_reuse)
+{
+ struct stat st;
+
+ if (fstat(fd, &st))
+ return NULL;
+
+ struct brw_bufmgr *bufmgr = NULL;
+
+ mtx_lock(&global_bufmgr_list_mutex);
+ list_for_each_entry(struct brw_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
+ struct stat iter_st;
+ if (fstat(iter_bufmgr->fd, &iter_st))
+ continue;
+
+ if (st.st_rdev == iter_st.st_rdev) {
+ assert(iter_bufmgr->bo_reuse == bo_reuse);
+ bufmgr = brw_bufmgr_ref(iter_bufmgr);
+ goto unlock;
+ }
+ }
+
+ bufmgr = brw_bufmgr_create(devinfo, fd, bo_reuse);
+ list_addtail(&bufmgr->link, &global_bufmgr_list);
+
+ unlock:
+ mtx_unlock(&global_bufmgr_list_mutex);
return bufmgr;
}
+
+int
+brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr)
+{
+ return bufmgr->fd;
+}