struct drm_i915_op_arg bo_arg;
};
+struct dri_ttm_bo_bucket_entry {
+ drmBO drm_bo;
+ struct dri_ttm_bo_bucket_entry *next;
+};
+
+struct dri_ttm_bo_bucket {
+ struct dri_ttm_bo_bucket_entry *head;
+ struct dri_ttm_bo_bucket_entry **tail;
+ /**
+ * Limit on the number of entries in this bucket.
+ *
+ * 0 means that this caching at this bucket size is disabled.
+ * -1 means that there is no limit to caching at this size.
+ */
+ int max_entries;
+ int num_entries;
+};
+
+/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
+ * is 1 << 16 pages, or 256MB.
+ */
+#define INTEL_TTM_BO_BUCKETS 16
typedef struct _dri_bufmgr_ttm {
dri_bufmgr bufmgr;
struct intel_validate_entry *validate_array;
int validate_array_size;
int validate_count;
+
+ /** Array of lists of cached drmBOs of power-of-two sizes */
+ struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS];
} dri_bufmgr_ttm;
/**
drmFence drm_fence;
} dri_fence_ttm;
+static int
+logbase2(int n)
+{
+ GLint i = 1;
+ GLint log2 = 0;
+
+ while (n > i) {
+ i *= 2;
+ log2++;
+ }
+
+ return log2;
+}
+
+static struct dri_ttm_bo_bucket *
+dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size)
+{
+ int i;
+
+ /* We only do buckets in power of two increments */
+ if ((size & (size - 1)) != 0)
+ return NULL;
+
+ /* We should only see sizes rounded to pages. */
+ assert((size % 4096) == 0);
+
+ /* We always allocate in units of pages */
+ i = ffs(size / 4096) - 1;
+ if (i >= INTEL_TTM_BO_BUCKETS)
+ return NULL;
+
+ return &bufmgr_ttm->cache_bucket[i];
+}
+
+
static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
{
int i, j;
int ret;
uint64_t flags;
unsigned int hint;
+ unsigned long alloc_size;
+ struct dri_ttm_bo_bucket *bucket;
+ GLboolean alloc_from_cache = GL_FALSE;
ttm_buf = calloc(1, sizeof(*ttm_buf));
if (!ttm_buf)
/* No hints we want to use. */
hint = 0;
- ret = drmBOCreate(bufmgr_ttm->fd, size, alignment / pageSize,
- NULL, flags, hint, &ttm_buf->drm_bo);
- if (ret != 0) {
- free(ttm_buf);
- return NULL;
+ /* Round the allocated size up to a power of two number of pages. */
+ alloc_size = 1 << logbase2(size);
+ if (alloc_size < pageSize)
+ alloc_size = pageSize;
+ bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size);
+
+ /* If we don't have caching at this size, don't actually round the
+ * allocation up.
+ */
+ if (bucket == NULL || bucket->max_entries == 0)
+ alloc_size = size;
+
+ /* Get a buffer out of the cache if available */
+ if (bucket != NULL && bucket->num_entries > 0) {
+ struct dri_ttm_bo_bucket_entry *entry = bucket->head;
+ int busy;
+
+ /* Check if the buffer is still in flight. If not, reuse it. */
+ ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy);
+ alloc_from_cache = (ret == 0 && busy == 0);
+
+ if (alloc_from_cache) {
+ bucket->head = entry->next;
+ if (entry->next == NULL)
+ bucket->tail = &bucket->head;
+ bucket->num_entries--;
+
+ ttm_buf->drm_bo = entry->drm_bo;
+ free(entry);
+ }
}
- ttm_buf->bo.size = ttm_buf->drm_bo.size;
+
+ if (!alloc_from_cache) {
+ ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize,
+ NULL, flags, hint, &ttm_buf->drm_bo);
+ if (ret != 0) {
+ free(ttm_buf);
+ return NULL;
+ }
+ }
+
+ ttm_buf->bo.size = size;
ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
ttm_buf->bo.virtual = NULL;
ttm_buf->bo.bufmgr = bufmgr;
return;
if (--ttm_buf->refcount == 0) {
+ struct dri_ttm_bo_bucket *bucket;
int ret;
assert(ttm_buf->map_count == 0);
}
}
- ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
- if (ret != 0) {
- fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
- ttm_buf->name, strerror(-ret));
+ bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size);
+ /* Put the buffer into our internal cache for reuse if we can. */
+ if (!ttm_buf->shared &&
+ bucket != NULL &&
+ (bucket->max_entries == -1 ||
+ (bucket->max_entries > 0 &&
+ bucket->num_entries < bucket->max_entries)))
+ {
+ struct dri_ttm_bo_bucket_entry *entry;
+
+ entry = calloc(1, sizeof(*entry));
+ entry->drm_bo = ttm_buf->drm_bo;
+
+ entry->next = NULL;
+ *bucket->tail = entry;
+ bucket->tail = &entry->next;
+ bucket->num_entries++;
+ } else {
+ /* Decrement the kernel refcount for the buffer. */
+ ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
+ if (ret != 0) {
+ fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
+ ttm_buf->name, strerror(-ret));
+ }
}
+
DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
free(buf);
dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
{
dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+ int i;
free(bufmgr_ttm->validate_array);
+ /* Free any cached buffer objects we were going to reuse */
+ for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
+ struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i];
+ struct dri_ttm_bo_bucket_entry *entry;
+
+ while ((entry = bucket->head) != NULL) {
+ int ret;
+
+ bucket->head = entry->next;
+ if (entry->next == NULL)
+ bucket->tail = &bucket->head;
+ bucket->num_entries--;
+
+ /* Decrement the kernel refcount for the buffer. */
+ ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo);
+ if (ret != 0) {
+ fprintf(stderr, "drmBOUnreference failed: %s\n",
+ strerror(-ret));
+ }
+
+ free(entry);
+ }
+ }
+
free(bufmgr);
}
bufmgr_ttm->validate_count = 0;
}
+/**
+ * Enables unlimited caching of buffer objects for reuse.
+ *
+ * This is potentially very memory expensive, as the cache at each bucket
+ * size is only bounded by how many buffers of that size we've managed to have
+ * in flight at once.
+ */
+void
+intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
+{
+ dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+ int i;
+
+ for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
+ bufmgr_ttm->cache_bucket[i].max_entries = -1;
+ }
+}
+
/**
* Initializes the TTM buffer manager, which uses the kernel to allocate, map,
* and manage map buffer objections.
unsigned int fence_type_flush, int batch_size)
{
dri_bufmgr_ttm *bufmgr_ttm;
+ int i;
bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm));
bufmgr_ttm->fd = fd;
bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
bufmgr_ttm->bufmgr.debug = GL_FALSE;
+ /* Initialize the linked lists for BO reuse cache. */
+ for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++)
+ bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head;
+
return &bufmgr_ttm->bufmgr;
}
ttm_supported = GL_FALSE;
if (!ttm_disable && ttm_supported) {
+ int bo_reuse_mode;
intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
DRM_FENCE_TYPE_EXE,
DRM_FENCE_TYPE_EXE |
BATCH_SZ);
if (intel->bufmgr != NULL)
intel->ttm = GL_TRUE;
+
+ bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
+ switch (bo_reuse_mode) {
+ case DRI_CONF_BO_REUSE_DISABLED:
+ break;
+ case DRI_CONF_BO_REUSE_ALL:
+ intel_ttm_enable_bo_reuse(intel->bufmgr);
+ break;
+ }
}
/* Otherwise, use the classic buffer manager. */
if (intel->bufmgr == NULL) {
intel->width = intelScreen->width;
intel->height = intelScreen->height;
+ driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
+ intel->driScreen->myNum,
+ IS_965(intelScreen->deviceID) ? "i965" : "i915");
if (intelScreen->deviceID == PCI_CHIP_I865_G)
intel->maxBatchSize = 4096;
else
if (!intel_init_bufmgr(intel))
return GL_FALSE;
- driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
- intel->driScreen->myNum,
- IS_965(intelScreen->deviceID) ? "i965" : "i915");
-
ctx->Const.MaxTextureMaxAnisotropy = 2.0;
/* This doesn't yet catch all non-conformant rendering, but it's a