From 1603a33fb276d7e78a2e872dfa05aa0093d1329a Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 25 Jan 2008 17:21:05 -0700 Subject: [PATCH] gallium: better flush logic in draw module This is the other half of Keith's draw/flush patch. There are now 5 flush flags to control what's flushed (post-xform vertex cache, prim cache, vbuf, etc). The gears slow-down in this part of the patch was due to the cull stage not getting invoked. It was unconditional before, but is now gated by 'need_det'. But it also needs to be gated by draw->rasterizer->cull_mode. Gears uses back-face culling. --- src/mesa/pipe/draw/draw_context.c | 29 ++++----- src/mesa/pipe/draw/draw_prim.c | 85 ++++++++++--------------- src/mesa/pipe/draw/draw_private.h | 16 ++--- src/mesa/pipe/draw/draw_validate.c | 32 ++++++---- src/mesa/pipe/draw/draw_vbuf.c | 17 ++--- src/mesa/pipe/draw/draw_vertex_cache.c | 9 ++- src/mesa/pipe/draw/draw_vertex_shader.c | 4 +- 7 files changed, 89 insertions(+), 103 deletions(-) diff --git a/src/mesa/pipe/draw/draw_context.c b/src/mesa/pipe/draw/draw_context.c index ff23288fa87..e8ca1f035bb 100644 --- a/src/mesa/pipe/draw/draw_context.c +++ b/src/mesa/pipe/draw/draw_context.c @@ -80,7 +80,7 @@ struct draw_context *draw_create( void ) draw->convert_wide_points = TRUE; draw->convert_wide_lines = TRUE; - draw->prim = ~0; /* != any of PIPE_PRIM_x */ + draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ draw_vertex_cache_invalidate( draw ); draw_set_mapped_element_buffer( draw, 0, NULL ); @@ -111,8 +111,7 @@ void draw_destroy( struct draw_context *draw ) void draw_flush( struct draw_context *draw ) { - if (draw->drawing) - draw_do_flush( draw, DRAW_FLUSH_DRAW ); + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); } @@ -124,7 +123,8 @@ void draw_flush( struct draw_context *draw ) void draw_set_rasterizer_state( struct draw_context *draw, const struct pipe_rasterizer_state *raster ) { - draw_flush( draw ); + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->rasterizer = raster; } @@ -137,7 +137,8 @@ void draw_set_rasterizer_state( struct draw_context *draw, void draw_set_rasterize_stage( struct draw_context *draw, struct draw_stage *stage ) { - draw_flush( draw ); + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->pipeline.rasterize = stage; } @@ -148,7 +149,7 @@ void draw_set_rasterize_stage( struct draw_context *draw, void draw_set_clip_state( struct draw_context *draw, const struct pipe_clip_state *clip ) { - draw_flush( draw ); + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); assert(clip->nr <= PIPE_MAX_CLIP_PLANES); memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0])); @@ -162,7 +163,7 @@ void draw_set_clip_state( struct draw_context *draw, void draw_set_viewport_state( struct draw_context *draw, const struct pipe_viewport_state *viewport ) { - draw_flush( draw ); + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->viewport = *viewport; /* struct copy */ } @@ -173,8 +174,7 @@ draw_set_vertex_buffer(struct draw_context *draw, unsigned attr, const struct pipe_vertex_buffer *buffer) { - draw_flush( draw ); - + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); assert(attr < PIPE_ATTRIB_MAX); draw->vertex_buffer[attr] = *buffer; } @@ -185,8 +185,7 @@ draw_set_vertex_element(struct draw_context *draw, unsigned attr, const struct pipe_vertex_element *element) { - draw_flush( draw ); - + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); assert(attr < PIPE_ATTRIB_MAX); draw->vertex_element[attr] = *element; } @@ -199,8 +198,7 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer) { - draw_flush( draw ); - + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); draw->user.vbuffer[attr] = buffer; } @@ -209,8 +207,7 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer) { - draw_flush( draw ); - + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); draw->user.constants = buffer; } @@ -222,6 +219,7 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, void draw_convert_wide_points(struct draw_context *draw, boolean enable) { + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->convert_wide_points = enable; } @@ -233,6 +231,7 @@ draw_convert_wide_points(struct draw_context *draw, boolean enable) void draw_convert_wide_lines(struct draw_context *draw, boolean enable) { + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->convert_wide_lines = enable; } diff --git a/src/mesa/pipe/draw/draw_prim.c b/src/mesa/pipe/draw/draw_prim.c index 5703f5f0b0c..243381aec0c 100644 --- a/src/mesa/pipe/draw/draw_prim.c +++ b/src/mesa/pipe/draw/draw_prim.c @@ -57,17 +57,14 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { static void draw_prim_queue_flush( struct draw_context *draw ) { - // struct draw_stage *first = draw->pipeline.first; unsigned i; if (0) fprintf(stdout,"Flushing with %d prims, %d verts\n", draw->pq.queue_nr, draw->vs.queue_nr); - /* Make sure all vertices are available/shaded: - */ - if (draw->vs.queue_nr) - draw_vertex_shader_queue_flush(draw); + if (draw->pq.queue_nr == 0) + return; /* NOTE: we cannot save draw->pipeline->first in a local var because * draw->pipeline->first is often changed by the first call to tri(), @@ -102,33 +99,32 @@ static void draw_prim_queue_flush( struct draw_context *draw ) } -void draw_do_flush( struct draw_context *draw, - unsigned flush ) + +void draw_do_flush( struct draw_context *draw, unsigned flags ) { - if ((flush & (DRAW_FLUSH_PRIM_QUEUE | - DRAW_FLUSH_VERTEX_CACHE_INVALIDATE | - DRAW_FLUSH_DRAW)) && - draw->pq.queue_nr) - { - draw_prim_queue_flush(draw); - } + if (0) + fprintf(stdout,"Flushing with %d verts, %d prims\n", + draw->vs.queue_nr, + draw->pq.queue_nr ); - if ((flush & (DRAW_FLUSH_VERTEX_CACHE_INVALIDATE | - DRAW_FLUSH_DRAW)) && - draw->drawing) - { - draw_vertex_cache_invalidate(draw); - } - if ((flush & DRAW_FLUSH_DRAW) && - draw->drawing) - { - draw->pipeline.first->flush( draw->pipeline.first, ~0 ); - draw->drawing = FALSE; - draw->prim = ~0; - draw->pipeline.first = draw->pipeline.validate; - } + if (flags >= DRAW_FLUSH_SHADER_QUEUE) { + draw_vertex_shader_queue_flush(draw); + + if (flags >= DRAW_FLUSH_PRIM_QUEUE) { + draw_prim_queue_flush(draw); + if (flags >= DRAW_FLUSH_VERTEX_CACHE) { + draw_vertex_cache_invalidate(draw); + + if (flags >= DRAW_FLUSH_STATE_CHANGE) { + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; + draw->reduced_prim = ~0; + } + } + } + } } @@ -143,7 +139,7 @@ static struct prim_header *get_queued_prim( struct draw_context *draw, { if (!draw_vertex_cache_check_space( draw, nr_verts )) { // fprintf(stderr, "v"); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE_INVALIDATE ); + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE ); } else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) { // fprintf(stderr, "p"); @@ -251,13 +247,14 @@ static void do_quad( struct draw_context *draw, * Main entrypoint to draw some number of points/lines/triangles */ static void -draw_prim( struct draw_context *draw, unsigned start, unsigned count ) +draw_prim( struct draw_context *draw, + unsigned prim, unsigned start, unsigned count ) { unsigned i; // _mesa_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); - switch (draw->prim) { + switch (prim) { case PIPE_PRIM_POINTS: for (i = 0; i < count; i ++) { do_point( draw, @@ -389,21 +386,6 @@ draw_prim( struct draw_context *draw, unsigned start, unsigned count ) } -static void -draw_set_prim( struct draw_context *draw, unsigned prim ) -{ - assert(prim >= PIPE_PRIM_POINTS); - assert(prim <= PIPE_PRIM_POLYGON); - - if (reduced_prim[prim] != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE ); - draw->reduced_prim = reduced_prim[prim]; - } - - draw->prim = prim; -} - - /** @@ -417,16 +399,13 @@ void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - if (!draw->drawing) { - draw->drawing = TRUE; - } - - if (draw->prim != prim) { - draw_set_prim( draw, prim ); + if (reduced_prim[prim] != draw->reduced_prim) { + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->reduced_prim = reduced_prim[prim]; } /* drawing done here: */ - draw_prim(draw, start, count); + draw_prim(draw, prim, start, count); } diff --git a/src/mesa/pipe/draw/draw_private.h b/src/mesa/pipe/draw/draw_private.h index e393fa5fe2a..1c2e88264fd 100644 --- a/src/mesa/pipe/draw/draw_private.h +++ b/src/mesa/pipe/draw/draw_private.h @@ -111,7 +111,7 @@ struct draw_stage struct prim_header * ); void (*flush)( struct draw_stage *, - unsigned flags ); + unsigned flags ); void (*reset_stipple_counter)( struct draw_stage * ); @@ -191,8 +191,6 @@ struct draw_context boolean convert_wide_points; /**< convert wide points to tris? */ boolean convert_wide_lines; /**< convert side lines to tris? */ - boolean drawing; /**< do we presently have something queued for drawing? */ - unsigned prim; /**< current prim type: PIPE_PRIM_x */ unsigned reduced_prim; /** TGSI program interpreter runtime state */ @@ -278,14 +276,14 @@ extern void draw_vertex_fetch( struct draw_context *draw, unsigned count ); -#define DRAW_FLUSH_PRIM_QUEUE 0x1 -#define DRAW_FLUSH_VERTEX_CACHE_INVALIDATE 0x2 -#define DRAW_FLUSH_DRAW 0x4 +#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ +#define DRAW_FLUSH_PRIM_QUEUE 0x2 +#define DRAW_FLUSH_VERTEX_CACHE 0x4 +#define DRAW_FLUSH_STATE_CHANGE 0x8 +#define DRAW_FLUSH_BACKEND 0x10 -void draw_do_flush( struct draw_context *draw, - unsigned flags ); - +void draw_do_flush( struct draw_context *draw, unsigned flags ); diff --git a/src/mesa/pipe/draw/draw_validate.c b/src/mesa/pipe/draw/draw_validate.c index a626fb1fba5..86d5a5f8142 100644 --- a/src/mesa/pipe/draw/draw_validate.c +++ b/src/mesa/pipe/draw/draw_validate.c @@ -43,6 +43,13 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) { struct draw_context *draw = stage->draw; struct draw_stage *next = draw->pipeline.rasterize; + int need_det = 0; + int precalc_flat = 0; + + /* Set the validate's next stage to the rasterize stage, so that it + * can be found later if needed for flushing. + */ + stage->next = next; /* * NOTE: we build up the pipeline in end-to-start order. @@ -61,29 +68,38 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) if (draw->rasterizer->line_stipple_enable) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; + precalc_flat = 1; /* only needed for lines really */ } if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; next = draw->pipeline.unfilled; + precalc_flat = 1; /* only needed for triangles really */ + need_det = 1; } if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) { draw->pipeline.offset->next = next; next = draw->pipeline.offset; + need_det = 1; } if (draw->rasterizer->light_twoside) { draw->pipeline.twoside->next = next; next = draw->pipeline.twoside; + need_det = 1; } /* Always run the cull stage as we calculate determinant there - * also. Fix this.. + * also. + * + * This can actually be a win as culling out the triangles can lead + * to less work emitting vertices, smaller vertex buffers, etc. + * It's difficult to say whether this will be true in general. */ - { + if (need_det || draw->rasterizer->cull_mode) { draw->pipeline.cull->next = next; next = draw->pipeline.cull; } @@ -94,23 +110,18 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) { draw->pipeline.clip->next = next; next = draw->pipeline.clip; + precalc_flat = 1; /* XXX: FIX ME! Only needed for clipped prims */ } - /* Do software flatshading prior to clipping. XXX: should only do - * this for clipped primitives, ie it is a part of the clip - * routine. - */ - if (draw->rasterizer->flatshade) { + if (draw->rasterizer->flatshade && precalc_flat) { draw->pipeline.flatshade->next = next; next = draw->pipeline.flatshade; } - + draw->pipeline.first = next; - //BP draw->pipeline.first->begin( draw->pipeline.first ); return next; } - static void validate_tri( struct draw_stage *stage, struct prim_header *header ) { @@ -162,7 +173,6 @@ struct draw_stage *draw_validate_stage( struct draw_context *draw ) struct draw_stage *stage = CALLOC_STRUCT(draw_stage); stage->draw = draw; - stage->next = NULL; stage->point = validate_point; stage->line = validate_line; diff --git a/src/mesa/pipe/draw/draw_vbuf.c b/src/mesa/pipe/draw/draw_vbuf.c index d827f51d567..cd0b4fbbb98 100644 --- a/src/mesa/pipe/draw/draw_vbuf.c +++ b/src/mesa/pipe/draw/draw_vbuf.c @@ -387,29 +387,26 @@ vbuf_alloc_vertices( struct draw_stage *stage, } -static void -vbuf_begin( struct draw_stage *stage ) -{ - /* no-op, vbuffer allocated by first point/line/tri */ -} - static void vbuf_flush( struct draw_stage *stage, unsigned flags ) { -// vbuf_flush_indices( stage ); - /* XXX: Overkill */ - vbuf_flush_vertices( stage ); - + vbuf_flush_indices( stage ); + stage->point = vbuf_first_point; stage->line = vbuf_first_line; stage->tri = vbuf_first_tri; + + if (flags & DRAW_FLUSH_BACKEND) + vbuf_flush_vertices( stage ); } static void vbuf_reset_stipple_counter( struct draw_stage *stage ) { + /* XXX: Need to do something here for hardware with linestipple. + */ (void) stage; } diff --git a/src/mesa/pipe/draw/draw_vertex_cache.c b/src/mesa/pipe/draw/draw_vertex_cache.c index 97a40b876ea..b4b4906d70d 100644 --- a/src/mesa/pipe/draw/draw_vertex_cache.c +++ b/src/mesa/pipe/draw/draw_vertex_cache.c @@ -42,10 +42,13 @@ void draw_vertex_cache_invalidate( struct draw_context *draw ) assert(draw->pq.queue_nr == 0); assert(draw->vs.queue_nr == 0); assert(draw->vcache.referenced == 0); - + /* XXX memset() here */ +#if 0 for (i = 0; i < Elements( draw->vcache.idx ); i++) draw->vcache.idx[i] = ~0; - +#else + memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); +#endif // fprintf(stderr, "x\n"); } @@ -148,7 +151,7 @@ void draw_vertex_cache_unreference( struct draw_context *draw ) int draw_vertex_cache_check_space( struct draw_context *draw, - unsigned nr_verts ) + unsigned nr_verts ) { if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) { /* The vs queue is sized so that this can never happen: diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c index 5ca659dbf59..d19b60198dc 100644 --- a/src/mesa/pipe/draw/draw_vertex_shader.c +++ b/src/mesa/pipe/draw/draw_vertex_shader.c @@ -271,9 +271,9 @@ void draw_bind_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs) { - draw_flush(draw); - draw->vertex_shader = dvs; + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->vertex_shader = dvs; draw->num_vs_outputs = dvs->state->num_outputs; /* specify the fragment program to interpret/execute */ -- 2.30.2