i965: Remove CACHED_BATCH support altogether.
authorKenneth Graunke <kenneth@whitecape.org>
Mon, 26 Aug 2013 20:11:21 +0000 (13:11 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 17 Jan 2014 21:21:11 +0000 (13:21 -0800)
Using an unoptimized variant of glamor spending 50% of its CPU time in
brw_draw_prims() (and hitting the cache *very* frequently):

    N           Min           Max        Median           Avg        Stddev
x 200         29200         40500         34900         34750     958.43256
+ 200         31000         40300         34700         34622     916.35941
No difference proven at 95.0% confidence

Similarly, no difference on GLB2.7:

    N           Min           Max        Median           Avg        Stddev
x  63          64.1         71.36         70.69     70.113175     1.6782026
+  63          63.6         71.18         70.75     70.223651     1.6044186
No difference proven at 95.0% confidence

v2: Rebase on master (by anholt)
v3: Add a missing BEGIN_BATCH(3) to aa_line_parameters -- CACHED_BATCH
    didn't have the asserts about batchbuffer usage that ADVANCE_BATCH
    does, so we started assertion failing.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_cc.c
src/mesa/drivers/dri/i965/brw_curbe.c
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/intel_batchbuffer.c
src/mesa/drivers/dri/i965/intel_batchbuffer.h

index 840f96027bd8bb3b6108e58b21ec7968cbfb84eb..2f4e9dcb196eda2395d7a45416f93bc75b7c9e24 100644 (file)
@@ -247,7 +247,7 @@ static void upload_blend_constant_color(struct brw_context *brw)
    OUT_BATCH_F(ctx->Color.BlendColorUnclamped[1]);
    OUT_BATCH_F(ctx->Color.BlendColorUnclamped[2]);
    OUT_BATCH_F(ctx->Color.BlendColorUnclamped[3]);
-   CACHED_BATCH();
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_blend_constant_color = {
index 9e556fb6361822d81a566a2b9f1bd5f82feafdb2..bb1c79784388c71b6d47d2acec4477fa5cb945dd 100644 (file)
@@ -161,7 +161,7 @@ void brw_upload_cs_urb_state(struct brw_context *brw)
       assert(brw->urb.nr_cs_entries);
       OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
    }
-   CACHED_BATCH();
+   ADVANCE_BATCH();
 }
 
 static GLfloat fixed_plane[6][4] = {
index 4959a08898f7c1c65a6f2bdde1bac52a3210fab5..5418398afdf6efb5b63e918c12a1908788c23f7d 100644 (file)
@@ -643,7 +643,7 @@ static void brw_emit_vertices(struct brw_context *brw)
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
-      CACHED_BATCH();
+      ADVANCE_BATCH();
       return;
    }
 
@@ -808,7 +808,7 @@ static void brw_emit_vertices(struct brw_context *brw)
       OUT_BATCH(dw1);
    }
 
-   CACHED_BATCH();
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_vertices = {
index 6d1004e2b7b25850f57fcc4865c7de930b432c8c..70dc07111f13cc18ccebf91d7742b327c449d087 100644 (file)
@@ -778,7 +778,7 @@ static void upload_polygon_stipple(struct brw_context *brw)
       for (i = 0; i < 32; i++)
         OUT_BATCH(ctx->PolygonStipple[i]);
    }
-   CACHED_BATCH();
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_polygon_stipple = {
@@ -822,7 +822,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
       OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
    else
       OUT_BATCH(0);
-   CACHED_BATCH();
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_polygon_stipple_offset = {
@@ -852,11 +852,12 @@ static void upload_aa_line_parameters(struct brw_context *brw)
    if (brw->gen == 6)
       intel_emit_post_sync_nonzero_flush(brw);
 
+   BEGIN_BATCH(3);
    OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
    /* use legacy aa line coverage computation */
    OUT_BATCH(0);
    OUT_BATCH(0);
-   CACHED_BATCH();
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_aa_line_parameters = {
@@ -901,7 +902,7 @@ static void upload_line_stipple(struct brw_context *brw)
       OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
    }
 
-   CACHED_BATCH();
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_line_stipple = {
index b193d7901daa45ad60f09177c8860b02b2b75a90..2a724ec38dff1f74a0672ccbdffea9228d3cdc6c 100644 (file)
@@ -432,48 +432,6 @@ intel_batchbuffer_data(struct brw_context *brw,
    brw->batch.used += bytes >> 2;
 }
 
-void
-intel_batchbuffer_cached_advance(struct brw_context *brw)
-{
-   struct cached_batch_item **prev = &brw->batch.cached_items, *item;
-   uint32_t sz = (brw->batch.used - brw->batch.emit) * sizeof(uint32_t);
-   uint32_t *start = brw->batch.map + brw->batch.emit;
-   uint16_t op = *start >> 16;
-
-   while (*prev) {
-      uint32_t *old;
-
-      item = *prev;
-      old = brw->batch.map + item->header;
-      if (op == *old >> 16) {
-        if (item->size == sz && memcmp(old, start, sz) == 0) {
-           if (prev != &brw->batch.cached_items) {
-              *prev = item->next;
-              item->next = brw->batch.cached_items;
-              brw->batch.cached_items = item;
-           }
-           brw->batch.used = brw->batch.emit;
-            assert(brw->batch.used > 0);
-           return;
-        }
-
-        goto emit;
-      }
-      prev = &item->next;
-   }
-
-   item = malloc(sizeof(struct cached_batch_item));
-   if (item == NULL)
-      return;
-
-   item->next = brw->batch.cached_items;
-   brw->batch.cached_items = item;
-
-emit:
-   item->size = sz;
-   item->header = brw->batch.emit;
-}
-
 /**
  * Restriction [DevSNB, DevIVB]:
  *
index 80cd571fe13c696c3f8cf44efe63e4843c719f39..652a45b514606b049938917b2134dc277ca89128 100644 (file)
@@ -163,8 +163,6 @@ intel_batchbuffer_advance(struct brw_context *brw)
 #endif
 }
 
-void intel_batchbuffer_cached_advance(struct brw_context *brw);
-
 #define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING)
 #define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING)
 #define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d)
@@ -175,7 +173,6 @@ void intel_batchbuffer_cached_advance(struct brw_context *brw);
 } while (0)
 
 #define ADVANCE_BATCH() intel_batchbuffer_advance(brw);
-#define CACHED_BATCH() intel_batchbuffer_cached_advance(brw);
 
 #ifdef __cplusplus
 }