i965: Grow the batch/state buffers if we need space and can't flush.

author Kenneth Graunke <kenneth@whitecape.org>

Fri, 1 Sep 2017 23:42:56 +0000 (16:42 -0700)

committer Kenneth Graunke <kenneth@whitecape.org>

Thu, 14 Sep 2017 23:17:36 +0000 (16:17 -0700)
author Kenneth Graunke <kenneth@whitecape.org>
Fri, 1 Sep 2017 23:42:56 +0000 (16:42 -0700)
committer Kenneth Graunke <kenneth@whitecape.org>
Thu, 14 Sep 2017 23:17:36 +0000 (16:17 -0700)
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c

index 074bb74f99f9218cf9e3a22f8b0dcc9dfd8626b1..9a22b8297f2806658a20db91797b72be27c781ad 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -40,9 +40,27 @@
  
  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
  
+/**
+ * Target sizes of the batch and state buffers.  We create the initial
+ * buffers at these sizes, and flush when they're nearly full.  If we
+ * underestimate how close we are to the end, and suddenly need more space
+ * in the middle of a draw, we can grow the buffers, and finish the draw.
+ * At that point, we'll be over our target size, so the next operation
+ * should flush.  Each time we flush the batch, we recreate both buffers
+ * at the original target size, so it doesn't grow without bound.
+ */
  #define BATCH_SZ (8192*sizeof(uint32_t))
  #define STATE_SZ (8192*sizeof(uint32_t))
  
+/* The kernel assumes batchbuffers are smaller than 256kB. */
+#define MAX_BATCH_SIZE (256 * 1024)
+
+/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
+ * Address, which means that we can't put binding tables beyond 64kB.  This
+ * effectively limits the maximum statebuffer size to 64kB.
+ */
+#define MAX_STATE_SIZE (64 * 1024)
+
  static void
  intel_batchbuffer_reset(struct intel_batchbuffer *batch,
                          struct intel_screen *screen);
@@ -252,6 +270,93 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
        _mesa_hash_table_destroy(batch->state_batch_sizes, NULL);
  }
  
+static void
+replace_bo_in_reloc_list(struct brw_reloc_list *rlist,
+                         uint32_t old_handle, uint32_t new_handle)
+{
+   for (int i = 0; i < rlist->reloc_count; i++) {
+      if (rlist->relocs[i].target_handle == old_handle)
+         rlist->relocs[i].target_handle = new_handle;
+   }
+}
+
+/**
+ * Grow either the batch or state buffer to a new larger size.
+ *
+ * We can't actually grow buffers, so we allocate a new one, copy over
+ * the existing contents, and update our lists to refer to the new one.
+ *
+ * Note that this is only temporary - each new batch recreates the buffers
+ * at their original target size (BATCH_SZ or STATE_SZ).
+ */
+static void
+grow_buffer(struct brw_context *brw,
+            struct brw_bo **bo_ptr,
+            uint32_t **map_ptr,
+            uint32_t **cpu_map_ptr,
+            unsigned existing_bytes,
+            unsigned new_size)
+{
+   struct intel_batchbuffer *batch = &brw->batch;
+   struct brw_bufmgr *bufmgr = brw->bufmgr;
+
+   uint32_t *old_map = *map_ptr;
+   struct brw_bo *old_bo = *bo_ptr;
+
+   struct brw_bo *new_bo = brw_bo_alloc(bufmgr, old_bo->name, new_size, 4096);
+   uint32_t *new_map;
+
+   perf_debug("Growing %s - ran out of space\n", old_bo->name);
+
+   /* Copy existing data to the new larger buffer */
+   if (*cpu_map_ptr) {
+      *cpu_map_ptr = new_map = realloc(*cpu_map_ptr, new_size);
+   } else {
+      new_map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
+      memcpy(new_map, old_map, existing_bytes);
+   }
+
+   /* Try to put the new BO at the same GTT offset as the old BO (which
+    * we're throwing away, so it doesn't need to be there).
+    *
+    * This guarantees that our relocations continue to work: values we've
+    * already written into the buffer, values we're going to write into the
+    * buffer, and the validation/relocation lists all will match.
+    */
+   new_bo->gtt_offset = old_bo->gtt_offset;
+   new_bo->index = old_bo->index;
+
+   /* Batch/state buffers are per-context, and if we've run out of space,
+    * we must have actually used them before, so...they will be in the list.
+    */
+   assert(old_bo->index < batch->exec_count);
+   assert(batch->exec_bos[old_bo->index] == old_bo);
+
+   /* Update the validation list to use the new BO. */
+   batch->exec_bos[old_bo->index] = new_bo;
+   batch->validation_list[old_bo->index].handle = new_bo->gem_handle;
+   brw_bo_reference(new_bo);
+   brw_bo_unreference(old_bo);
+
+   if (!batch->use_batch_first) {
+      /* We're not using I915_EXEC_HANDLE_LUT, which means we need to go
+       * update the relocation list entries to point at the new BO as well.
+       * (With newer kernels, the "handle" is an offset into the validation
+       * list, which remains unchanged, so we can skip this.)
+       */
+      replace_bo_in_reloc_list(&batch->batch_relocs,
+                               old_bo->gem_handle, new_bo->gem_handle);
+      replace_bo_in_reloc_list(&batch->state_relocs,
+                               old_bo->gem_handle, new_bo->gem_handle);
+   }
+
+   /* Drop the *bo_ptr reference.  This should free the old BO. */
+   brw_bo_unreference(old_bo);
+
+   *bo_ptr = new_bo;
+   *map_ptr = new_map;
+}
+
  void
  intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
                                  enum brw_gpu_ring ring)
@@ -266,9 +371,21 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
     }
  
     /* For now, flush as if the batch and state buffers still shared a BO */
-   if (USED_BATCH(*batch) * 4 + sz >=
-       BATCH_SZ - batch->reserved_space - batch->state_used)
-      intel_batchbuffer_flush(brw);
+   const unsigned batch_used = USED_BATCH(*batch) * 4;
+   if (batch_used + sz >=
+       BATCH_SZ - batch->reserved_space - batch->state_used) {
+      if (!brw->no_batch_wrap) {
+         intel_batchbuffer_flush(brw);
+      } else {
+         const unsigned new_size =
+            MIN2(batch->bo->size + batch->bo->size / 2, MAX_BATCH_SIZE);
+         grow_buffer(brw, &batch->bo, &batch->map, &batch->batch_cpu_map,
+                     batch_used, new_size);
+         batch->map_next = (void *) batch->map + batch_used;
+         assert(batch_used + sz <
+                batch->bo->size - batch->reserved_space - batch->state_used);
+      }
+   }
  
     /* The intel_batchbuffer_flush() calls above might have changed
      * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end.
@@ -918,8 +1035,17 @@ brw_state_batch(struct brw_context *brw,
     int batch_space = batch->reserved_space + USED_BATCH(*batch) * 4;
  
     if (offset + size >= STATE_SZ - batch_space) {
-      intel_batchbuffer_flush(brw);
-      offset = ALIGN(batch->state_used, alignment);
+      if (!brw->no_batch_wrap) {
+         intel_batchbuffer_flush(brw);
+         offset = ALIGN(batch->state_used, alignment);
+      } else {
+         const unsigned new_size =
+            MIN2(batch->state_bo->size + batch->state_bo->size / 2,
+                 MAX_STATE_SIZE);
+         grow_buffer(brw, &batch->state_bo, &batch->state_map,
+                     &batch->state_cpu_map, batch->state_used, new_size);
+         assert(offset + size < batch->state_bo->size - batch_space);
+      }
     }
  
     if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
author	Kenneth Graunke <kenneth@whitecape.org>
	Fri, 1 Sep 2017 23:42:56 +0000 (16:42 -0700)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Thu, 14 Sep 2017 23:17:36 +0000 (16:17 -0700)