[intel] Add a driconf option to cache freed buffer objects for reuse.

author Eric Anholt <eric@anholt.net>

Wed, 5 Mar 2008 22:14:54 +0000 (14:14 -0800)

committer Eric Anholt <eric@anholt.net>

Thu, 6 Mar 2008 00:29:14 +0000 (16:29 -0800)
author Eric Anholt <eric@anholt.net>
Wed, 5 Mar 2008 22:14:54 +0000 (14:14 -0800)
committer Eric Anholt <eric@anholt.net>
Thu, 6 Mar 2008 00:29:14 +0000 (16:29 -0800)
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c

index fb65e66555a506c25bd5861b0e5a8a719304bc97..13455e685d9e4957f7a8ea434c73a2ce230a9594 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
@@ -72,6 +72,28 @@ struct intel_validate_entry {
      struct drm_i915_op_arg bo_arg;
  };
  
+struct dri_ttm_bo_bucket_entry {
+   drmBO drm_bo;
+   struct dri_ttm_bo_bucket_entry *next;
+};
+
+struct dri_ttm_bo_bucket {
+   struct dri_ttm_bo_bucket_entry *head;
+   struct dri_ttm_bo_bucket_entry **tail;
+   /**
+    * Limit on the number of entries in this bucket.
+    *
+    * 0 means that this caching at this bucket size is disabled.
+    * -1 means that there is no limit to caching at this size.
+    */
+   int max_entries;
+   int num_entries;
+};
+
+/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
+ * is 1 << 16 pages, or 256MB.
+ */
+#define INTEL_TTM_BO_BUCKETS   16
  typedef struct _dri_bufmgr_ttm {
      dri_bufmgr bufmgr;
  
@@ -84,6 +106,9 @@ typedef struct _dri_bufmgr_ttm {
      struct intel_validate_entry *validate_array;
      int validate_array_size;
      int validate_count;
+
+    /** Array of lists of cached drmBOs of power-of-two sizes */
+    struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS];
  } dri_bufmgr_ttm;
  
  /**
@@ -137,6 +162,41 @@ typedef struct _dri_fence_ttm
      drmFence drm_fence;
  } dri_fence_ttm;
  
+static int
+logbase2(int n)
+{
+   GLint i = 1;
+   GLint log2 = 0;
+
+   while (n > i) {
+      i *= 2;
+      log2++;
+   }
+
+   return log2;
+}
+
+static struct dri_ttm_bo_bucket *
+dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size)
+{
+    int i;
+
+    /* We only do buckets in power of two increments */
+    if ((size & (size - 1)) != 0)
+       return NULL;
+
+    /* We should only see sizes rounded to pages. */
+    assert((size % 4096) == 0);
+
+    /* We always allocate in units of pages */
+    i = ffs(size / 4096) - 1;
+    if (i >= INTEL_TTM_BO_BUCKETS)
+       return NULL;
+
+    return &bufmgr_ttm->cache_bucket[i];
+}
+
+
  static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
  {
      int i, j;
@@ -338,6 +398,9 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
      int ret;
      uint64_t flags;
      unsigned int hint;
+    unsigned long alloc_size;
+    struct dri_ttm_bo_bucket *bucket;
+    GLboolean alloc_from_cache = GL_FALSE;
  
      ttm_buf = calloc(1, sizeof(*ttm_buf));
      if (!ttm_buf)
@@ -352,13 +415,48 @@ dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
      /* No hints we want to use. */
      hint = 0;
  
-    ret = drmBOCreate(bufmgr_ttm->fd, size, alignment / pageSize,
-                     NULL, flags, hint, &ttm_buf->drm_bo);
-    if (ret != 0) {
-       free(ttm_buf);
-       return NULL;
+    /* Round the allocated size up to a power of two number of pages. */
+    alloc_size = 1 << logbase2(size);
+    if (alloc_size < pageSize)
+       alloc_size = pageSize;
+    bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size);
+
+    /* If we don't have caching at this size, don't actually round the
+     * allocation up.
+     */
+    if (bucket == NULL || bucket->max_entries == 0)
+       alloc_size = size;
+
+    /* Get a buffer out of the cache if available */
+    if (bucket != NULL && bucket->num_entries > 0) {
+       struct dri_ttm_bo_bucket_entry *entry = bucket->head;
+       int busy;
+
+       /* Check if the buffer is still in flight.  If not, reuse it. */
+       ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy);
+       alloc_from_cache = (ret == 0 && busy == 0);
+
+       if (alloc_from_cache) {
+           bucket->head = entry->next;
+           if (entry->next == NULL)
+               bucket->tail = &bucket->head;
+           bucket->num_entries--;
+
+           ttm_buf->drm_bo = entry->drm_bo;
+           free(entry);
+       }
      }
-    ttm_buf->bo.size = ttm_buf->drm_bo.size;
+
+    if (!alloc_from_cache) {
+       ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize,
+                         NULL, flags, hint, &ttm_buf->drm_bo);
+       if (ret != 0) {
+           free(ttm_buf);
+           return NULL;
+       }
+    }
+
+    ttm_buf->bo.size = size;
      ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
      ttm_buf->bo.virtual = NULL;
      ttm_buf->bo.bufmgr = bufmgr;
@@ -450,6 +548,7 @@ dri_ttm_bo_unreference(dri_bo *buf)
         return;
  
      if (--ttm_buf->refcount == 0) {
+       struct dri_ttm_bo_bucket *bucket;
         int ret;
  
         assert(ttm_buf->map_count == 0);
@@ -476,11 +575,32 @@ dri_ttm_bo_unreference(dri_bo *buf)
            }
         }
  
-       ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
-       if (ret != 0) {
-           fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
-                   ttm_buf->name, strerror(-ret));
+       bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size);
+       /* Put the buffer into our internal cache for reuse if we can. */
+       if (!ttm_buf->shared &&
+           bucket != NULL &&
+           (bucket->max_entries == -1 ||
+            (bucket->max_entries > 0 &&
+             bucket->num_entries < bucket->max_entries)))
+       {
+           struct dri_ttm_bo_bucket_entry *entry;
+
+           entry = calloc(1, sizeof(*entry));
+           entry->drm_bo = ttm_buf->drm_bo;
+
+           entry->next = NULL;
+           *bucket->tail = entry;
+           bucket->tail = &entry->next;
+           bucket->num_entries++;
+       } else {
+           /* Decrement the kernel refcount for the buffer. */
+           ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
+           if (ret != 0) {
+              fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
+                      ttm_buf->name, strerror(-ret));
+           }
         }
+
         DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
  
         free(buf);
@@ -657,9 +777,34 @@ static void
  dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
  {
      dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+    int i;
  
      free(bufmgr_ttm->validate_array);
  
+    /* Free any cached buffer objects we were going to reuse */
+    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
+       struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i];
+       struct dri_ttm_bo_bucket_entry *entry;
+
+       while ((entry = bucket->head) != NULL) {
+           int ret;
+
+           bucket->head = entry->next;
+           if (entry->next == NULL)
+               bucket->tail = &bucket->head;
+           bucket->num_entries--;
+
+           /* Decrement the kernel refcount for the buffer. */
+           ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo);
+           if (ret != 0) {
+              fprintf(stderr, "drmBOUnreference failed: %s\n",
+                      strerror(-ret));
+           }
+
+           free(entry);
+       }
+    }
+
      free(bufmgr);
  }
  
@@ -876,6 +1021,24 @@ dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
      bufmgr_ttm->validate_count = 0;
  }
  
+/**
+ * Enables unlimited caching of buffer objects for reuse.
+ *
+ * This is potentially very memory expensive, as the cache at each bucket
+ * size is only bounded by how many buffers of that size we've managed to have
+ * in flight at once.
+ */
+void
+intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+    int i;
+
+    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
+       bufmgr_ttm->cache_bucket[i].max_entries = -1;
+    }
+}
+
  /**
   * Initializes the TTM buffer manager, which uses the kernel to allocate, map,
   * and manage map buffer objections.
@@ -890,6 +1053,7 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
                       unsigned int fence_type_flush, int batch_size)
  {
      dri_bufmgr_ttm *bufmgr_ttm;
+    int i;
  
      bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm));
      bufmgr_ttm->fd = fd;
@@ -919,6 +1083,10 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
      bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
      bufmgr_ttm->bufmgr.debug = GL_FALSE;
  
+    /* Initialize the linked lists for BO reuse cache. */
+    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++)
+       bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head;
+
      return &bufmgr_ttm->bufmgr;
  }
  
diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h

index 0738839cefbe8fb3140c59274d8c26f2f97cc104..d267a168cd4513c19993032609d3eb8ac99bdf9d 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
@@ -14,4 +14,7 @@ dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
  dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
                                   unsigned int fence_type_flush, int batch_size);
  
+void
+intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr);
+
  #endif
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c

index d3f0681807e8d475adc456ba635bd2f7fbf844c3..6c8ab1fa1e2c61694ffdd56fb504d14711326386 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -456,6 +456,7 @@ intel_init_bufmgr(struct intel_context *intel)
         ttm_supported = GL_FALSE;
  
     if (!ttm_disable && ttm_supported) {
+      int bo_reuse_mode;
        intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
                                             DRM_FENCE_TYPE_EXE,
                                             DRM_FENCE_TYPE_EXE |
@@ -463,6 +464,15 @@ intel_init_bufmgr(struct intel_context *intel)
                                             BATCH_SZ);
        if (intel->bufmgr != NULL)
          intel->ttm = GL_TRUE;
+
+      bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
+      switch (bo_reuse_mode) {
+      case DRI_CONF_BO_REUSE_DISABLED:
+        break;
+      case DRI_CONF_BO_REUSE_ALL:
+        intel_ttm_enable_bo_reuse(intel->bufmgr);
+        break;
+      }
     }
     /* Otherwise, use the classic buffer manager. */
     if (intel->bufmgr == NULL) {
@@ -548,6 +558,9 @@ intelInitContext(struct intel_context *intel,
     intel->width = intelScreen->width;
     intel->height = intelScreen->height;
  
+   driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
+                       intel->driScreen->myNum,
+                      IS_965(intelScreen->deviceID) ? "i965" : "i915");
     if (intelScreen->deviceID == PCI_CHIP_I865_G)
        intel->maxBatchSize = 4096;
     else
@@ -556,10 +569,6 @@ intelInitContext(struct intel_context *intel,
     if (!intel_init_bufmgr(intel))
        return GL_FALSE;
  
-   driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
-                       intel->driScreen->myNum,
-                      IS_965(intelScreen->deviceID) ? "i965" : "i915");
-
     ctx->Const.MaxTextureMaxAnisotropy = 2.0;
  
     /* This doesn't yet catch all non-conformant rendering, but it's a
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h

index 6c97955b1459d186e190c68fe2de81c317d5349a..809cb6ea5781312cb85197d6fa9b6616d294524b 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -476,6 +476,11 @@ extern void intelInitStateFuncs(struct dd_function_table *functions);
  #define BLENDFACT_INV_CONST_ALPHA      0x0f
  #define BLENDFACT_MASK                 0x0f
  
+enum {
+   DRI_CONF_BO_REUSE_DISABLED,
+   DRI_CONF_BO_REUSE_ALL
+};
+
  extern int intel_translate_shadow_compare_func(GLenum func);
  extern int intel_translate_compare_func(GLenum func);
  extern int intel_translate_stencil_op(GLenum op);
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c

index 8b8eeb77aa3053c3a1fca1c5269850c4557c13ed..1c79cf2cff4bd9711604f0bfdaaa0026ec1ca1f0 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -56,6 +56,15 @@ PUBLIC const char __driConfigOptions[] =
     DRI_CONF_SECTION_PERFORMANCE
        DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+      /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
+       * DRI_CONF_BO_REUSE_ALL
+       */
+      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1")
+        DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
+           DRI_CONF_ENUM(0, "Disable buffer object reuse")
+           DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
+        DRI_CONF_DESC_END
+      DRI_CONF_OPT_END
     DRI_CONF_SECTION_END
     DRI_CONF_SECTION_QUALITY
        DRI_CONF_FORCE_S3TC_ENABLE(false)
@@ -66,7 +75,7 @@ PUBLIC const char __driConfigOptions[] =
     DRI_CONF_SECTION_END
  DRI_CONF_END;
  
-const GLuint __driNConfigOptions = 5;
+const GLuint __driNConfigOptions = 6;
  
  #ifdef USE_NEW_INTERFACE
  static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
author	Eric Anholt <eric@anholt.net>
	Wed, 5 Mar 2008 22:14:54 +0000 (14:14 -0800)
committer	Eric Anholt <eric@anholt.net>
	Thu, 6 Mar 2008 00:29:14 +0000 (16:29 -0800)
src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c		patch \| blob \| history
src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h		patch \| blob \| history
src/mesa/drivers/dri/intel/intel_context.c		patch \| blob \| history
src/mesa/drivers/dri/intel/intel_context.h		patch \| blob \| history
src/mesa/drivers/dri/intel/intel_screen.c		patch \| blob \| history