Using an unoptimized variant of glamor spending 50% of its CPU time in
brw_draw_prims() (and hitting the cache *very* frequently):
N Min Max Median Avg Stddev
x 200 29200 40500 34900 34750 958.43256
+ 200 31000 40300 34700 34622 916.35941
No difference proven at 95.0% confidence
Similarly, no difference on GLB2.7:
N Min Max Median Avg Stddev
x 63 64.1 71.36 70.69 70.113175 1.
6782026
+ 63 63.6 71.18 70.75 70.223651 1.
6044186
No difference proven at 95.0% confidence
v2: Rebase on master (by anholt)
v3: Add a missing BEGIN_BATCH(3) to aa_line_parameters -- CACHED_BATCH
didn't have the asserts about batchbuffer usage that ADVANCE_BATCH
does, so we started assertion failing.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
OUT_BATCH_F(ctx->Color.BlendColorUnclamped[1]);
OUT_BATCH_F(ctx->Color.BlendColorUnclamped[2]);
OUT_BATCH_F(ctx->Color.BlendColorUnclamped[3]);
- CACHED_BATCH();
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_blend_constant_color = {
assert(brw->urb.nr_cs_entries);
OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
}
- CACHED_BATCH();
+ ADVANCE_BATCH();
}
static GLfloat fixed_plane[6][4] = {
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
- CACHED_BATCH();
+ ADVANCE_BATCH();
return;
}
OUT_BATCH(dw1);
}
- CACHED_BATCH();
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_vertices = {
for (i = 0; i < 32; i++)
OUT_BATCH(ctx->PolygonStipple[i]);
}
- CACHED_BATCH();
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_polygon_stipple = {
OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
else
OUT_BATCH(0);
- CACHED_BATCH();
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_polygon_stipple_offset = {
if (brw->gen == 6)
intel_emit_post_sync_nonzero_flush(brw);
+ BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
/* use legacy aa line coverage computation */
OUT_BATCH(0);
OUT_BATCH(0);
- CACHED_BATCH();
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_aa_line_parameters = {
OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
}
- CACHED_BATCH();
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_line_stipple = {
brw->batch.used += bytes >> 2;
}
-void
-intel_batchbuffer_cached_advance(struct brw_context *brw)
-{
- struct cached_batch_item **prev = &brw->batch.cached_items, *item;
- uint32_t sz = (brw->batch.used - brw->batch.emit) * sizeof(uint32_t);
- uint32_t *start = brw->batch.map + brw->batch.emit;
- uint16_t op = *start >> 16;
-
- while (*prev) {
- uint32_t *old;
-
- item = *prev;
- old = brw->batch.map + item->header;
- if (op == *old >> 16) {
- if (item->size == sz && memcmp(old, start, sz) == 0) {
- if (prev != &brw->batch.cached_items) {
- *prev = item->next;
- item->next = brw->batch.cached_items;
- brw->batch.cached_items = item;
- }
- brw->batch.used = brw->batch.emit;
- assert(brw->batch.used > 0);
- return;
- }
-
- goto emit;
- }
- prev = &item->next;
- }
-
- item = malloc(sizeof(struct cached_batch_item));
- if (item == NULL)
- return;
-
- item->next = brw->batch.cached_items;
- brw->batch.cached_items = item;
-
-emit:
- item->size = sz;
- item->header = brw->batch.emit;
-}
-
/**
* Restriction [DevSNB, DevIVB]:
*
#endif
}
-void intel_batchbuffer_cached_advance(struct brw_context *brw);
-
#define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING)
#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING)
#define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d)
} while (0)
#define ADVANCE_BATCH() intel_batchbuffer_advance(brw);
-#define CACHED_BATCH() intel_batchbuffer_cached_advance(brw);
#ifdef __cplusplus
}