i965: Disentangle batch and state buffer flushing.

author Kenneth Graunke <kenneth@whitecape.org>

Tue, 5 Sep 2017 22:14:18 +0000 (15:14 -0700)

committer Kenneth Graunke <kenneth@whitecape.org>

Thu, 14 Sep 2017 23:17:36 +0000 (16:17 -0700)
author Kenneth Graunke <kenneth@whitecape.org>
Tue, 5 Sep 2017 22:14:18 +0000 (15:14 -0700)
committer Kenneth Graunke <kenneth@whitecape.org>
Thu, 14 Sep 2017 23:17:36 +0000 (16:17 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c

index 1bad7ac7a0ce25ff3955b8fc7eba79563085dcbe..7f0278ac92ba6369dadcf759357201ebf8dd7bde 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -167,7 +167,6 @@ static void
  brw_dispatch_compute_common(struct gl_context *ctx)
  {
     struct brw_context *brw = brw_context(ctx);
-   int estimated_buffer_space_needed;
     bool fail_next = false;
  
     if (!_mesa_check_conditional_render(ctx))
@@ -180,20 +179,11 @@ brw_dispatch_compute_common(struct gl_context *ctx)
  
     brw_predraw_resolve_inputs(brw);
  
-   const int sampler_state_size = 16; /* 16 bytes */
-   estimated_buffer_space_needed = 512; /* batchbuffer commands */
-   estimated_buffer_space_needed += (BRW_MAX_TEX_UNIT *
-                                     (sampler_state_size +
-                                      sizeof(struct gen5_sampler_default_color)));
-   estimated_buffer_space_needed += 1024; /* push constants */
-   estimated_buffer_space_needed += 512; /* misc. pad */
-
-   /* Flush the batch if it's approaching full, so that we don't wrap while
-    * we've got validated state that needs to be in the same batch as the
-    * primitives.
+   /* Flush the batch if the batch/state buffers are nearly full.  We can
+    * grow them if needed, but this is not free, so we'd like to avoid it.
      */
-   intel_batchbuffer_require_space(brw, estimated_buffer_space_needed,
-                                   RENDER_RING);
+   intel_batchbuffer_require_space(brw, 600, RENDER_RING);
+   brw_require_statebuffer_space(brw, 2500);
     intel_batchbuffer_save_state(brw);
  
   retry:
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c

index d1ec2e3f09d7b68006b4e48de3b7de60b5e74c77..06c6ed72c98e2d71c7bff68b84b1c138191b40da 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -669,26 +669,16 @@ brw_try_draw_prims(struct gl_context *ctx,
     brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
  
     for (i = 0; i < nr_prims; i++) {
-      int estimated_max_prim_size;
-      const int sampler_state_size = 16;
-
-      estimated_max_prim_size = 512; /* batchbuffer commands */
-      estimated_max_prim_size += BRW_MAX_TEX_UNIT *
-         (sampler_state_size + sizeof(struct gen5_sampler_default_color));
-      estimated_max_prim_size += 1024; /* gen6 VS push constants */
-      estimated_max_prim_size += 1024; /* gen6 WM push constants */
-      estimated_max_prim_size += 512; /* misc. pad */
-
        /* Flag BRW_NEW_DRAW_CALL on every draw.  This allows us to have
         * atoms that happen on every draw call.
         */
        brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL;
  
-      /* Flush the batch if it's approaching full, so that we don't wrap while
-       * we've got validated state that needs to be in the same batch as the
-       * primitives.
+      /* Flush the batch if the batch/state buffers are nearly full.  We can
+       * grow them if needed, but this is not free, so we'd like to avoid it.
         */
-      intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
+      intel_batchbuffer_require_space(brw, 1500, RENDER_RING);
+      brw_require_statebuffer_space(brw, 2400);
        intel_batchbuffer_save_state(brw);
  
        if (brw->num_instances != prims[i].num_instances ||
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h

index c8b71e72de559a0d48348b02c4954e5384b738eb..9718739dea9a4f07a68737a32436b033d4fce098 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -185,6 +185,7 @@ void brw_destroy_caches( struct brw_context *brw );
  void brw_print_program_cache(struct brw_context *brw);
  
  /* intel_batchbuffer.c */
+void brw_require_statebuffer_space(struct brw_context *brw, int size);
  void *brw_state_batch(struct brw_context *brw,
                        int size, int alignment, uint32_t *out_offset);
  uint32_t brw_state_batch_size(struct brw_context *brw, uint32_t offset);
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c

index 5bff7eaff593885be83ba67de5b3ba8e13c6d411..3fe81c7c6a12cc4e01cfdbdefafc69d8f3701305 100644 (file)
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -205,7 +205,6 @@ genX(blorp_exec)(struct blorp_batch *batch,
     assert(batch->blorp->driver_ctx == batch->driver_batch);
     struct brw_context *brw = batch->driver_batch;
     struct gl_context *ctx = &brw->ctx;
-   const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1920 : 1700;
     bool check_aperture_failed_once = false;
  
     /* Flush the sampler and render caches.  We definitely need to flush the
@@ -222,7 +221,8 @@ genX(blorp_exec)(struct blorp_batch *batch,
     brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
  
  retry:
-   intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
+   intel_batchbuffer_require_space(brw, 1400, RENDER_RING);
+   brw_require_statebuffer_space(brw, 600);
     intel_batchbuffer_save_state(brw);
     brw->no_batch_wrap = true;
  
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c

index 5aa34e742931cd5cc0e56412b9e1ccd7525562a3..fddc84fcf9b10069f601ad2d429678dc424de750 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -49,8 +49,8 @@
   * should flush.  Each time we flush the batch, we recreate both buffers
   * at the original target size, so it doesn't grow without bound.
   */
-#define BATCH_SZ (8192*sizeof(uint32_t))
-#define STATE_SZ (8192*sizeof(uint32_t))
+#define BATCH_SZ (20 * 1024)
+#define STATE_SZ (16 * 1024)
  
  /* The kernel assumes batchbuffers are smaller than 256kB. */
  #define MAX_BATCH_SIZE (256 * 1024)
@@ -369,9 +369,8 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
        intel_batchbuffer_flush(brw);
     }
  
-   /* For now, flush as if the batch and state buffers still shared a BO */
     const unsigned batch_used = USED_BATCH(*batch) * 4;
-   if (batch_used + sz >= BATCH_SZ - batch->state_used) {
+   if (batch_used + sz >= BATCH_SZ) {
        if (!brw->no_batch_wrap) {
           intel_batchbuffer_flush(brw);
        } else {
@@ -380,7 +379,7 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
           grow_buffer(brw, &batch->bo, &batch->map, &batch->batch_cpu_map,
                       batch_used, new_size);
           batch->map_next = (void *) batch->map + batch_used;
-         assert(batch_used + sz < batch->bo->size - batch->state_used);
+         assert(batch_used + sz < batch->bo->size);
        }
     }
  
@@ -1011,6 +1010,19 @@ brw_state_batch_size(struct brw_context *brw, uint32_t offset)
     return entry ? (uintptr_t) entry->data : 0;
  }
  
+/**
+ * Reserve some space in the statebuffer, or flush.
+ *
+ * This is used to estimate when we're near the end of the batch,
+ * so we can flush early.
+ */
+void
+brw_require_statebuffer_space(struct brw_context *brw, int size)
+{
+   if (brw->batch.state_used + size >= STATE_SZ)
+      intel_batchbuffer_flush(brw);
+}
+
  /**
   * Allocates a block of space in the batchbuffer for indirect state.
   */
@@ -1026,10 +1038,7 @@ brw_state_batch(struct brw_context *brw,
  
     uint32_t offset = ALIGN(batch->state_used, alignment);
  
-   /* For now, follow the old flushing behavior. */
-   int batch_space = USED_BATCH(*batch) * 4;
-
-   if (offset + size >= STATE_SZ - batch_space) {
+   if (offset + size >= STATE_SZ) {
        if (!brw->no_batch_wrap) {
           intel_batchbuffer_flush(brw);
           offset = ALIGN(batch->state_used, alignment);
@@ -1039,7 +1048,7 @@ brw_state_batch(struct brw_context *brw,
                   MAX_STATE_SIZE);
           grow_buffer(brw, &batch->state_bo, &batch->state_map,
                       &batch->state_cpu_map, batch->state_used, new_size);
-         assert(offset + size < batch->state_bo->size - batch_space);
+         assert(offset + size < batch->state_bo->size);
        }
     }
author	Kenneth Graunke <kenneth@whitecape.org>
	Tue, 5 Sep 2017 22:14:18 +0000 (15:14 -0700)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Thu, 14 Sep 2017 23:17:36 +0000 (16:17 -0700)
src/mesa/drivers/dri/i965/brw_compute.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_draw.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_state.h		patch \| blob \| history
src/mesa/drivers/dri/i965/genX_blorp_exec.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_batchbuffer.c		patch \| blob \| history