This is the other half of Keith's draw/flush patch.
There are now 5 flush flags to control what's flushed (post-xform vertex
cache, prim cache, vbuf, etc).
The gears slow-down in this part of the patch was due to the cull stage not
getting invoked. It was unconditional before, but is now gated by 'need_det'.
But it also needs to be gated by draw->rasterizer->cull_mode. Gears uses
back-face culling.
draw->convert_wide_points = TRUE;
draw->convert_wide_lines = TRUE;
- draw->prim = ~0; /* != any of PIPE_PRIM_x */
+ draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
draw_vertex_cache_invalidate( draw );
draw_set_mapped_element_buffer( draw, 0, NULL );
void draw_flush( struct draw_context *draw )
{
- if (draw->drawing)
- draw_do_flush( draw, DRAW_FLUSH_DRAW );
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
}
void draw_set_rasterizer_state( struct draw_context *draw,
const struct pipe_rasterizer_state *raster )
{
- draw_flush( draw );
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
draw->rasterizer = raster;
}
void draw_set_rasterize_stage( struct draw_context *draw,
struct draw_stage *stage )
{
- draw_flush( draw );
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
draw->pipeline.rasterize = stage;
}
void draw_set_clip_state( struct draw_context *draw,
const struct pipe_clip_state *clip )
{
- draw_flush( draw );
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
assert(clip->nr <= PIPE_MAX_CLIP_PLANES);
memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
void draw_set_viewport_state( struct draw_context *draw,
const struct pipe_viewport_state *viewport )
{
- draw_flush( draw );
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->viewport = *viewport; /* struct copy */
}
unsigned attr,
const struct pipe_vertex_buffer *buffer)
{
- draw_flush( draw );
-
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
assert(attr < PIPE_ATTRIB_MAX);
draw->vertex_buffer[attr] = *buffer;
}
unsigned attr,
const struct pipe_vertex_element *element)
{
- draw_flush( draw );
-
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
assert(attr < PIPE_ATTRIB_MAX);
draw->vertex_element[attr] = *element;
}
draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer)
{
- draw_flush( draw );
-
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
draw->user.vbuffer[attr] = buffer;
}
draw_set_mapped_constant_buffer(struct draw_context *draw,
const void *buffer)
{
- draw_flush( draw );
-
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
draw->user.constants = buffer;
}
void
draw_convert_wide_points(struct draw_context *draw, boolean enable)
{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->convert_wide_points = enable;
}
void
draw_convert_wide_lines(struct draw_context *draw, boolean enable)
{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->convert_wide_lines = enable;
}
static void draw_prim_queue_flush( struct draw_context *draw )
{
- // struct draw_stage *first = draw->pipeline.first;
unsigned i;
if (0)
fprintf(stdout,"Flushing with %d prims, %d verts\n",
draw->pq.queue_nr, draw->vs.queue_nr);
- /* Make sure all vertices are available/shaded:
- */
- if (draw->vs.queue_nr)
- draw_vertex_shader_queue_flush(draw);
+ if (draw->pq.queue_nr == 0)
+ return;
/* NOTE: we cannot save draw->pipeline->first in a local var because
* draw->pipeline->first is often changed by the first call to tri(),
}
-void draw_do_flush( struct draw_context *draw,
- unsigned flush )
+
+void draw_do_flush( struct draw_context *draw, unsigned flags )
{
- if ((flush & (DRAW_FLUSH_PRIM_QUEUE |
- DRAW_FLUSH_VERTEX_CACHE_INVALIDATE |
- DRAW_FLUSH_DRAW)) &&
- draw->pq.queue_nr)
- {
- draw_prim_queue_flush(draw);
- }
+ if (0)
+ fprintf(stdout,"Flushing with %d verts, %d prims\n",
+ draw->vs.queue_nr,
+ draw->pq.queue_nr );
- if ((flush & (DRAW_FLUSH_VERTEX_CACHE_INVALIDATE |
- DRAW_FLUSH_DRAW)) &&
- draw->drawing)
- {
- draw_vertex_cache_invalidate(draw);
- }
- if ((flush & DRAW_FLUSH_DRAW) &&
- draw->drawing)
- {
- draw->pipeline.first->flush( draw->pipeline.first, ~0 );
- draw->drawing = FALSE;
- draw->prim = ~0;
- draw->pipeline.first = draw->pipeline.validate;
- }
+ if (flags >= DRAW_FLUSH_SHADER_QUEUE) {
+ draw_vertex_shader_queue_flush(draw);
+
+ if (flags >= DRAW_FLUSH_PRIM_QUEUE) {
+ draw_prim_queue_flush(draw);
+ if (flags >= DRAW_FLUSH_VERTEX_CACHE) {
+ draw_vertex_cache_invalidate(draw);
+
+ if (flags >= DRAW_FLUSH_STATE_CHANGE) {
+ draw->pipeline.first->flush( draw->pipeline.first, flags );
+ draw->pipeline.first = draw->pipeline.validate;
+ draw->reduced_prim = ~0;
+ }
+ }
+ }
+ }
}
{
if (!draw_vertex_cache_check_space( draw, nr_verts )) {
// fprintf(stderr, "v");
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE_INVALIDATE );
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE );
}
else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) {
// fprintf(stderr, "p");
* Main entrypoint to draw some number of points/lines/triangles
*/
static void
-draw_prim( struct draw_context *draw, unsigned start, unsigned count )
+draw_prim( struct draw_context *draw,
+ unsigned prim, unsigned start, unsigned count )
{
unsigned i;
// _mesa_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count );
- switch (draw->prim) {
+ switch (prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < count; i ++) {
do_point( draw,
}
-static void
-draw_set_prim( struct draw_context *draw, unsigned prim )
-{
- assert(prim >= PIPE_PRIM_POINTS);
- assert(prim <= PIPE_PRIM_POLYGON);
-
- if (reduced_prim[prim] != draw->reduced_prim) {
- draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE );
- draw->reduced_prim = reduced_prim[prim];
- }
-
- draw->prim = prim;
-}
-
-
/**
draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count)
{
- if (!draw->drawing) {
- draw->drawing = TRUE;
- }
-
- if (draw->prim != prim) {
- draw_set_prim( draw, prim );
+ if (reduced_prim[prim] != draw->reduced_prim) {
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->reduced_prim = reduced_prim[prim];
}
/* drawing done here: */
- draw_prim(draw, start, count);
+ draw_prim(draw, prim, start, count);
}
struct prim_header * );
void (*flush)( struct draw_stage *,
- unsigned flags );
+ unsigned flags );
void (*reset_stipple_counter)( struct draw_stage * );
boolean convert_wide_points; /**< convert wide points to tris? */
boolean convert_wide_lines; /**< convert side lines to tris? */
- boolean drawing; /**< do we presently have something queued for drawing? */
- unsigned prim; /**< current prim type: PIPE_PRIM_x */
unsigned reduced_prim;
/** TGSI program interpreter runtime state */
unsigned count );
-#define DRAW_FLUSH_PRIM_QUEUE 0x1
-#define DRAW_FLUSH_VERTEX_CACHE_INVALIDATE 0x2
-#define DRAW_FLUSH_DRAW 0x4
+#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */
+#define DRAW_FLUSH_PRIM_QUEUE 0x2
+#define DRAW_FLUSH_VERTEX_CACHE 0x4
+#define DRAW_FLUSH_STATE_CHANGE 0x8
+#define DRAW_FLUSH_BACKEND 0x10
-void draw_do_flush( struct draw_context *draw,
- unsigned flags );
-
+void draw_do_flush( struct draw_context *draw, unsigned flags );
{
struct draw_context *draw = stage->draw;
struct draw_stage *next = draw->pipeline.rasterize;
+ int need_det = 0;
+ int precalc_flat = 0;
+
+ /* Set the validate's next stage to the rasterize stage, so that it
+ * can be found later if needed for flushing.
+ */
+ stage->next = next;
/*
* NOTE: we build up the pipeline in end-to-start order.
if (draw->rasterizer->line_stipple_enable) {
draw->pipeline.stipple->next = next;
next = draw->pipeline.stipple;
+ precalc_flat = 1; /* only needed for lines really */
}
if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) {
draw->pipeline.unfilled->next = next;
next = draw->pipeline.unfilled;
+ precalc_flat = 1; /* only needed for triangles really */
+ need_det = 1;
}
if (draw->rasterizer->offset_cw ||
draw->rasterizer->offset_ccw) {
draw->pipeline.offset->next = next;
next = draw->pipeline.offset;
+ need_det = 1;
}
if (draw->rasterizer->light_twoside) {
draw->pipeline.twoside->next = next;
next = draw->pipeline.twoside;
+ need_det = 1;
}
/* Always run the cull stage as we calculate determinant there
- * also. Fix this..
+ * also.
+ *
+ * This can actually be a win as culling out the triangles can lead
+ * to less work emitting vertices, smaller vertex buffers, etc.
+ * It's difficult to say whether this will be true in general.
*/
- {
+ if (need_det || draw->rasterizer->cull_mode) {
draw->pipeline.cull->next = next;
next = draw->pipeline.cull;
}
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
+ precalc_flat = 1; /* XXX: FIX ME! Only needed for clipped prims */
}
- /* Do software flatshading prior to clipping. XXX: should only do
- * this for clipped primitives, ie it is a part of the clip
- * routine.
- */
- if (draw->rasterizer->flatshade) {
+ if (draw->rasterizer->flatshade && precalc_flat) {
draw->pipeline.flatshade->next = next;
next = draw->pipeline.flatshade;
}
-
+
draw->pipeline.first = next;
- //BP draw->pipeline.first->begin( draw->pipeline.first );
return next;
}
-
static void validate_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct draw_stage *stage = CALLOC_STRUCT(draw_stage);
stage->draw = draw;
-
stage->next = NULL;
stage->point = validate_point;
stage->line = validate_line;
}
-static void
-vbuf_begin( struct draw_stage *stage )
-{
- /* no-op, vbuffer allocated by first point/line/tri */
-}
-
static void
vbuf_flush( struct draw_stage *stage, unsigned flags )
{
-// vbuf_flush_indices( stage );
- /* XXX: Overkill */
- vbuf_flush_vertices( stage );
-
+ vbuf_flush_indices( stage );
+
stage->point = vbuf_first_point;
stage->line = vbuf_first_line;
stage->tri = vbuf_first_tri;
+
+ if (flags & DRAW_FLUSH_BACKEND)
+ vbuf_flush_vertices( stage );
}
static void
vbuf_reset_stipple_counter( struct draw_stage *stage )
{
+ /* XXX: Need to do something here for hardware with linestipple.
+ */
(void) stage;
}
assert(draw->pq.queue_nr == 0);
assert(draw->vs.queue_nr == 0);
assert(draw->vcache.referenced == 0);
-
+ /* XXX memset() here */
+#if 0
for (i = 0; i < Elements( draw->vcache.idx ); i++)
draw->vcache.idx[i] = ~0;
-
+#else
+ memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx));
+#endif
// fprintf(stderr, "x\n");
}
int draw_vertex_cache_check_space( struct draw_context *draw,
- unsigned nr_verts )
+ unsigned nr_verts )
{
if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) {
/* The vs queue is sized so that this can never happen:
draw_bind_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs)
{
- draw_flush(draw);
- draw->vertex_shader = dvs;
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->vertex_shader = dvs;
draw->num_vs_outputs = dvs->state->num_outputs;
/* specify the fragment program to interpret/execute */