i965: Fix check_aperture calls to cover everything needed for the prim at once.
authorEric Anholt <eric@anholt.net>
Fri, 24 Oct 2008 20:02:21 +0000 (13:02 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 29 Oct 2008 05:52:38 +0000 (22:52 -0700)
Previously, since my check_aperture API change, we would check each piece of
state against the batchbuffer individually, but not all the state against the
batchbuffer at once.  In addition to not being terribly useful in assuring
success, it probably also increased CPU load by calling check_aperture many
times per primitive.

src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_curbe.c
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_queryobj.c
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c

index 474158b484b2111d9b148a5647b4754ae025a56f..e2bc08a6cb74fab25cabaac0ff0acc46c236bb47 100644 (file)
@@ -39,6 +39,7 @@
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_draw.h"
+#include "brw_state.h"
 #include "brw_vs.h"
 #include "intel_tex.h"
 #include "intel_blit.h"
index 1b823ab8ca49dc6458ab1582572cec7b8ef54841..e3904be977fbc3ca52d686bace1d46f0cecd7ca4 100644 (file)
@@ -444,6 +444,19 @@ struct brw_context
       GLuint nr_draw_regions;
       struct intel_region *draw_regions[MAX_DRAW_BUFFERS];
       struct intel_region *depth_region;
+
+      /**
+       * List of buffers accumulated in brw_validate_state to receive
+       * dri_bo_check_aperture treatment before exec, so we can know if we
+       * should flush the batch and try again before emitting primitives.
+       *
+       * This can be a fixed number as we only have a limited number of
+       * objects referenced from the batchbuffer in a primitive emit,
+       * consisting of the vertex buffers, pipelined state pointers,
+       * the CURBE, the depth buffer, and a query BO.
+       */
+      dri_bo *validated_bos[VERT_ATTRIB_MAX + 16];
+      int validated_bo_count;
    } state;
 
    struct brw_state_pointers attribs;
@@ -678,14 +691,6 @@ void brw_prepare_query_begin(struct brw_context *brw);
 void brw_emit_query_begin(struct brw_context *brw);
 void brw_emit_query_end(struct brw_context *brw);
 
-/*======================================================================
- * brw_state.c
- */
-void brw_validate_state( struct brw_context *brw );
-void brw_init_state( struct brw_context *brw );
-void brw_destroy_state( struct brw_context *brw );
-
-
 /*======================================================================
  * brw_state_dump.c
  */
index 6ffa221d66999fa701f862f58ddf1591a6aa8ccc..c7bac7b0c522b8ae26f51c318d455e24aed75bf3 100644 (file)
@@ -307,6 +307,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
       dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
    }
 
+   brw_add_validated_bo(brw, brw->curbe.curbe_bo);
 
    /* Because this provokes an action (ie copy the constants into the
     * URB), it shouldn't be shortcircuited if identical to the
@@ -328,15 +329,6 @@ static void emit_constant_buffer(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
    GLuint sz = brw->curbe.total_size;
-   dri_bo *aper_array[] = {
-      brw->intel.batch->buf,
-      brw->curbe.curbe_bo,
-   };
-
-   if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) {
-      intel_batchbuffer_flush(intel->batch);
-      return;
-   }
 
    BEGIN_BATCH(2, IGNORE_CLIPRECTS);
    if (sz == 0) {
index 05d6f42ab464404641884a6e86bfe1ba39e653f9..d87b8f8a848cc799db2aaf38be3d1e244783dfd0 100644 (file)
@@ -256,6 +256,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    struct intel_context *intel = intel_context(ctx);
    struct brw_context *brw = brw_context(ctx);
    GLboolean retval = GL_FALSE;
+   GLboolean warn = GL_FALSE;
    GLuint i;
 
    if (ctx->NewState)
@@ -301,8 +302,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
        */
       brw_set_prim(brw, prim[0].mode);
 
-      /* XXX:  Need to separate validate and upload of state.  
-       */
       brw_validate_state( brw );
 
       /* Various fallback checks:
@@ -313,6 +312,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
       if (check_fallbacks( brw, prim, nr_prims ))
         goto out;
 
+      /* Check that we can fit our state in with our existing batchbuffer, or
+       * flush otherwise.
+       */
+      if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+                                         brw->state.validated_bo_count)) {
+        static GLboolean warned;
+        intel_batchbuffer_flush(intel->batch);
+
+        /* Validate the state after we flushed the batch (which would have
+         * changed the set of dirty state).  If we still fail to
+         * check_aperture, warn of what's happening, but attempt to continue
+         * on since it may succeed anyway, and the user would probably rather
+         * see a failure and a warning than a fallback.
+         */
+        brw_validate_state(brw);
+        if (!warned &&
+            dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+                                            brw->state.validated_bo_count)) {
+           warn = GL_TRUE;
+           warned = GL_TRUE;
+        }
+      }
+
+      brw_upload_state(brw);
+
       for (i = 0; i < nr_prims; i++) {
         brw_emit_prim(brw, &prim[i]);
       }
@@ -323,6 +347,10 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
  out:
    UNLOCK_HARDWARE(intel);
 
+   if (warn)
+      fprintf(stderr, "i965: Single primitive emit potentially exceeded "
+             "available aperture space\n");
+
    if (!retval)
       DBG("%s failed\n", __FUNCTION__);
 
index 7b88b5eaa1ebc56926a01072810bbe5b69dec9da..4080c5e32289dc23919f526706242e618615daaa 100644 (file)
@@ -250,10 +250,10 @@ static void get_space( struct brw_context *brw,
       wrap_buffers(brw, size);
    }
 
+   assert(*bo_return == NULL);
    dri_bo_reference(brw->vb.upload.bo);
    *bo_return = brw->vb.upload.bo;
    *offset_return = brw->vb.upload.offset;
-
    brw->vb.upload.offset += size;
 }
 
@@ -359,6 +359,14 @@ static void brw_prepare_vertices(struct brw_context *brw)
         input->offset = (unsigned long)input->glarray->Ptr;
         input->stride = input->glarray->StrideB;
       } else {
+        if (input->bo != NULL) {
+           /* Already-uploaded vertex data is present from a previous
+            * prepare_vertices, but we had to re-validate state due to
+            * check_aperture failing and a new batch being produced.
+            */
+           continue;
+        }
+
         /* Queue the buffer object up to be uploaded in the next pass,
          * when we've decided if we're doing interleaved or not.
          */
@@ -417,6 +425,12 @@ static void brw_prepare_vertices(struct brw_context *brw)
    }
 
    brw_prepare_query_begin(brw);
+
+   for (i = 0; i < nr_enabled; i++) {
+      struct brw_vertex_element *input = enabled[i];
+
+      brw_add_validated_bo(brw, input->bo);
+   }
 }
 
 static void brw_emit_vertices(struct brw_context *brw)
@@ -512,7 +526,7 @@ static void brw_prepare_indices(struct brw_context *brw)
    struct intel_context *intel = &brw->intel;
    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
    GLuint ib_size;
-   dri_bo *bo;
+   dri_bo *bo = NULL;
    struct gl_buffer_object *bufferobj;
    GLuint offset;
 
@@ -561,6 +575,8 @@ static void brw_prepare_indices(struct brw_context *brw)
    dri_bo_unreference(brw->ib.bo);
    brw->ib.bo = bo;
    brw->ib.offset = offset;
+
+   brw_add_validated_bo(brw, brw->ib.bo);
 }
 
 static void brw_emit_indices(struct brw_context *brw)
index e347dd224444c29c9eafda32636ad165446a7ff6..5bba8c84ec081d81d4cd59549667ff63feca7636 100644 (file)
@@ -98,6 +98,11 @@ const struct brw_tracked_state brw_drawing_rect = {
    .emit = upload_drawing_rect
 };
 
+static void prepare_binding_table_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->wm.bind_bo);
+}
+
 /**
  * Upload the binding table pointers, which point each stage's array of surface
  * state pointers.
@@ -108,15 +113,6 @@ const struct brw_tracked_state brw_drawing_rect = {
 static void upload_binding_table_pointers(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
-   dri_bo *aper_array[] = {
-      intel->batch->buf,
-      brw->wm.bind_bo,
-   };
-
-   if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) {
-      intel_batchbuffer_flush(intel->batch);
-      return;
-   }
 
    BEGIN_BATCH(6, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
@@ -136,6 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = {
       .brw = BRW_NEW_BATCH,
       .cache = CACHE_NEW_SURF_BIND,
    },
+   .prepare = prepare_binding_table_pointers,
    .emit = upload_binding_table_pointers,
 };
 
@@ -169,23 +166,18 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
    brw->state.dirty.brw |= BRW_NEW_PSP;
 }
 
-static void upload_psp_urb_cbs(struct brw_context *brw )
+
+static void prepare_psp_urb_cbs(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
-   dri_bo *aper_array[] = {
-      intel->batch->buf,
-      brw->vs.state_bo,
-      brw->gs.state_bo,
-      brw->clip.state_bo,
-      brw->wm.state_bo,
-      brw->cc.state_bo,
-   };
-
-   if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) {
-      intel_batchbuffer_flush(intel->batch);
-      return;
-   }
+   brw_add_validated_bo(brw, brw->vs.state_bo);
+   brw_add_validated_bo(brw, brw->gs.state_bo);
+   brw_add_validated_bo(brw, brw->clip.state_bo);
+   brw_add_validated_bo(brw, brw->wm.state_bo);
+   brw_add_validated_bo(brw, brw->cc.state_bo);
+}
 
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
    upload_pipelined_state_pointers(brw);
    brw_upload_urb_fence(brw);
    brw_upload_constant_buffer_state(brw);
@@ -203,9 +195,18 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
                CACHE_NEW_WM_UNIT | 
                CACHE_NEW_CC_UNIT)
    },
+   .prepare = prepare_psp_urb_cbs,
    .emit = upload_psp_urb_cbs,
 };
 
+static void prepare_depthbuffer(struct brw_context *brw)
+{
+   struct intel_region *region = brw->state.depth_region;
+
+   if (region != NULL)
+      brw_add_validated_bo(brw, region->buffer);
+}
+
 static void emit_depthbuffer(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
@@ -227,10 +228,6 @@ static void emit_depthbuffer(struct brw_context *brw)
       ADVANCE_BATCH();
    } else {
       unsigned int format;
-      dri_bo *aper_array[] = {
-        intel->batch->buf,
-        region->buffer
-      };
 
       switch (region->cpp) {
       case 2:
@@ -247,11 +244,6 @@ static void emit_depthbuffer(struct brw_context *brw)
         return;
       }
 
-      if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) {
-        intel_batchbuffer_flush(intel->batch);
-        return;
-      }
-
       BEGIN_BATCH(len, IGNORE_CLIPRECTS);
       OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
       OUT_BATCH(((region->pitch * region->cpp) - 1) |
@@ -280,6 +272,7 @@ const struct brw_tracked_state brw_depthbuffer = {
       .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
       .cache = 0,
    },
+   .prepare = prepare_depthbuffer,
    .emit = emit_depthbuffer,
 };
 
index a1a1353dee7605c354be61842400fe2b601375ac..cb9169e2eef1846088a0ee515e9da37e04920369 100644 (file)
@@ -42,6 +42,7 @@
 #include "main/imports.h"
 
 #include "brw_context.h"
+#include "brw_state.h"
 #include "intel_batchbuffer.h"
 #include "intel_reg.h"
 
@@ -163,10 +164,6 @@ void
 brw_prepare_query_begin(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
-   dri_bo *aper_array[] = {
-      intel->batch->buf,
-      brw->query.bo,
-   };
 
    /* Skip if we're not doing any queries. */
    if (is_empty_list(&brw->query.active_head))
@@ -182,8 +179,7 @@ brw_prepare_query_begin(struct brw_context *brw)
       brw->query.index = 0;
    }
 
-   if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
-      intel_batchbuffer_flush(intel->batch);
+   brw_add_validated_bo(brw, brw->query.bo);
 }
 
 /** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
index a27ef1f8bcf1322d510356d69f76764f224b5f3a..bb22c03eeb6663a97251b1348b5b30ab714bd08c 100644 (file)
 
 #include "brw_context.h"
 
+static inline void
+brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
+{
+   assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
+
+   if (bo != NULL) {
+      dri_bo_reference(bo);
+      brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
+   }
+};
 
 const struct brw_tracked_state brw_blend_constant_color;
 const struct brw_tracked_state brw_cc_unit;
@@ -83,6 +93,14 @@ const struct brw_tracked_state brw_drawing_rect;
 const struct brw_tracked_state brw_indices;
 const struct brw_tracked_state brw_vertices;
 
+/***********************************************************************
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
 /***********************************************************************
  * brw_state_cache.c
  */
index 8d57daf3ab5a09c976cfb3c6789414c2e94649ef..16b0496f4790852425d7d4147e3e743786879adf 100644 (file)
@@ -169,6 +169,18 @@ static void xor_states( struct brw_state_flags *result,
    result->cache = a->cache ^ b->cache;
 }
 
+static void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+   int i;
+
+   /* Clear the last round of validated bos */
+   for (i = 0; i < brw->state.validated_bo_count; i++) {
+      dri_bo_unreference(brw->state.validated_bos[i]);
+      brw->state.validated_bos[i] = NULL;
+   }
+   brw->state.validated_bo_count = 0;
+}
 
 /***********************************************************************
  * Emit all state:
@@ -177,12 +189,15 @@ void brw_validate_state( struct brw_context *brw )
 {
    struct intel_context *intel = &brw->intel;
    struct brw_state_flags *state = &brw->state.dirty;
-   GLuint i, count, pass = 0;
-   dri_bo *last_batch_bo = NULL;
+   GLuint i;
+
+   brw_clear_validated_bos(brw);
 
    state->mesa |= brw->intel.NewGLState;
    brw->intel.NewGLState = 0;
 
+   brw_add_validated_bo(brw, intel->batch->buf);
+
    if (brw->emit_state_always) {
       state->mesa |= ~0;
       state->brw |= ~0;
@@ -208,8 +223,6 @@ void brw_validate_state( struct brw_context *brw )
 
    brw->intel.Fallback = 0;
 
-   count = 0;
-
    /* do prepare stage for all atoms */
    for (i = 0; i < Elements(atoms); i++) {
       const struct brw_tracked_state *atom = brw->state.atoms[i];
@@ -223,19 +236,15 @@ void brw_validate_state( struct brw_context *brw )
         }
       }
    }
+}
 
-   if (brw->intel.Fallback)
-      return;
 
-   /* We're about to try to set up a coherent state in the batchbuffer for
-    * the emission of primitives.  If we exceed the aperture size in any of the
-    * emit() calls, we need to go back to square 1 and try setting up again.
-    */
-got_flushed:
-   dri_bo_unreference(last_batch_bo);
-   last_batch_bo = intel->batch->buf;
-   dri_bo_reference(last_batch_bo);
-   assert(pass++ <= 2);
+void brw_upload_state(struct brw_context *brw)
+{
+   struct brw_state_flags *state = &brw->state.dirty;
+   int i;
+
+   brw_clear_validated_bos(brw);
 
    if (INTEL_DEBUG) {
       /* Debug version which enforces various sanity checks on the
@@ -260,8 +269,6 @@ got_flushed:
         if (check_state(state, &atom->dirty)) {
            if (atom->emit) {
               atom->emit( brw );
-              if (intel->batch->buf != last_batch_bo)
-                 goto got_flushed;
            }
         }
 
@@ -286,15 +293,11 @@ got_flushed:
         if (check_state(state, &atom->dirty)) {
            if (atom->emit) {
               atom->emit( brw );
-              if (intel->batch->buf != last_batch_bo)
-                 goto got_flushed;
            }
         }
       }
    }
 
-   dri_bo_unreference(last_batch_bo);
-
    if (!brw->intel.Fallback)
       memset(state, 0, sizeof(*state));
 }