From 08203428800554215657f1ebf19d74328103800e Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 25 Jun 2013 23:27:04 +0200 Subject: [PATCH] llvmpipe: rework query logic Previously lp_rast_begin_query commands were always inserted into each bin, and re-issued if the scene was restarted, while lp_rast_end_query commands were executed for each still active query at the end of tile rasterization. Also, the ps_invocations and vis_counter were set to zero when the respective command was encountered. This however cannot work for multiple queries of the same type (note that occlusion counter and occlusion predicate while different type were also affected). So, change the logic to always set the ps_invocations and vis_counter to zero at the start of tile rasterization, and then use "start" and "end" per-thread query values when encountering the begin/end query commands instead, which should work for multiple queries of the same type. This also means queries do not have to be reissued in a new scene, however they still need to be finished at end of tile rasterization, so a list of queries still active at the end of a scene needs to be maintained. Also while here don't bin the queries which don't do anything in rasterization. (This change does not actually handle multiple queries of the same type yet, as the list of active queries is just a simple fixed array and setup can still only have one query active per type.) Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_query.c | 15 ++-- src/gallium/drivers/llvmpipe/lp_query.h | 3 +- src/gallium/drivers/llvmpipe/lp_rast.c | 56 +++++--------- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 9 +-- src/gallium/drivers/llvmpipe/lp_scene.h | 4 + src/gallium/drivers/llvmpipe/lp_setup.c | 81 +++++++++++++-------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 5 ++ 7 files changed, 92 insertions(+), 81 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 1d3edffba94..38d6b84b8d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -120,19 +120,19 @@ llvmpipe_get_query_result(struct pipe_context *pipe, switch (pq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: for (i = 0; i < num_threads; i++) { - *result += pq->count[i]; + *result += pq->end[i]; } break; case PIPE_QUERY_OCCLUSION_PREDICATE: for (i = 0; i < num_threads; i++) { /* safer (still not guaranteed) when there's an overflow */ - vresult->b = vresult->b || pq->count[i]; + vresult->b = vresult->b || pq->end[i]; } break; case PIPE_QUERY_TIMESTAMP: for (i = 0; i < num_threads; i++) { - if (pq->count[i] > *result) { - *result = pq->count[i]; + if (pq->end[i] > *result) { + *result = pq->end[i]; } if (*result == 0) *result = os_time_get_nano(); @@ -170,7 +170,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, (struct pipe_query_data_pipeline_statistics *)vresult; /* only ps_invocations come from binned query */ for (i = 0; i < num_threads; i++) { - pq->stats.ps_invocations += pq->count[i]; + pq->stats.ps_invocations += pq->end[i]; } pq->stats.ps_invocations *= LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE; *stats = pq->stats; @@ -200,7 +200,8 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) } - memset(pq->count, 0, sizeof(pq->count)); + memset(pq->start, 0, sizeof(pq->start)); + memset(pq->end, 0, sizeof(pq->end)); lp_setup_begin_query(llvmpipe->setup, pq); switch (pq->type) { @@ -232,8 +233,6 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - /* Both active at same time will still fail all over the place. - * Then again several of each type can be active too... */ llvmpipe->active_occlusion_query++; llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY; break; diff --git a/src/gallium/drivers/llvmpipe/lp_query.h b/src/gallium/drivers/llvmpipe/lp_query.h index e29022ae6ee..62ad5fde188 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.h +++ b/src/gallium/drivers/llvmpipe/lp_query.h @@ -42,7 +42,8 @@ struct llvmpipe_context; struct llvmpipe_query { - uint64_t count[LP_MAX_THREADS]; /* a counter for each thread */ + uint64_t start[LP_MAX_THREADS]; /* start count value for each thread */ + uint64_t end[LP_MAX_THREADS]; /* end count value for each thread */ struct lp_fence *fence; /* fence from last scene this was binned in */ unsigned type; /* PIPE_QUERY_* */ unsigned num_primitives_generated; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 62a82e30788..871cc50fb4c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -61,7 +61,6 @@ static void lp_rast_begin( struct lp_rasterizer *rast, struct lp_scene *scene ) { - rast->curr_scene = scene; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -100,6 +99,9 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; + task->thread_data.vis_counter = 0; + task->ps_invocations = 0; + /* reset pointers to color and depth tile(s) */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); task->depth_tile = NULL; @@ -455,10 +457,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, * allocated 4x4 blocks hence need to filter them out here. */ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { - if (task->query[PIPE_QUERY_PIPELINE_STATISTICS]) { - /* not very accurate would need a popcount on the mask */ - task->ps_invocations++; - } + /* not very accurate would need a popcount on the mask */ + /* always count this not worth bothering? */ + task->ps_invocations++; + /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, @@ -490,28 +492,18 @@ lp_rast_begin_query(struct lp_rasterizer_task *task, { struct llvmpipe_query *pq = arg.query_obj; - assert(task->query[pq->type] == NULL); - switch (pq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - task->thread_data.vis_counter = 0; + pq->start[task->thread_index] = task->thread_data.vis_counter; break; case PIPE_QUERY_PIPELINE_STATISTICS: - task->ps_invocations = 0; - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - case PIPE_QUERY_TIMESTAMP_DISJOINT: + pq->start[task->thread_index] = task->ps_invocations; break; default: assert(0); break; } - - task->query[pq->type] = pq; } @@ -525,36 +517,26 @@ lp_rast_end_query(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct llvmpipe_query *pq = arg.query_obj; - assert(task->query[pq->type] == pq || - pq->type == PIPE_QUERY_TIMESTAMP || - pq->type == PIPE_QUERY_GPU_FINISHED); switch (pq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - pq->count[task->thread_index] += task->thread_data.vis_counter; + pq->end[task->thread_index] += + task->thread_data.vis_counter - pq->start[task->thread_index]; + pq->start[task->thread_index] = 0; break; case PIPE_QUERY_TIMESTAMP: - pq->count[task->thread_index] = os_time_get_nano(); + pq->end[task->thread_index] = os_time_get_nano(); break; case PIPE_QUERY_PIPELINE_STATISTICS: - pq->count[task->thread_index] += task->ps_invocations; - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - case PIPE_QUERY_TIMESTAMP_DISJOINT: - case PIPE_QUERY_GPU_FINISHED: + pq->end[task->thread_index] += + task->ps_invocations - pq->start[task->thread_index]; + pq->start[task->thread_index] = 0; break; default: assert(0); break; } - - if (task->query[pq->type] == pq) { - task->query[pq->type] = NULL; - } } @@ -575,10 +557,8 @@ lp_rast_tile_end(struct lp_rasterizer_task *task) { unsigned i; - for (i = 0; i < PIPE_QUERY_TYPES; ++i) { - if (task->query[i]) { - lp_rast_end_query(task, lp_rast_arg_query(task->query[i])); - } + for (i = 0; i < task->scene->num_active_queries; ++i) { + lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i])); } /* debug */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 30489407cdf..b8bc99c1939 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -99,7 +99,6 @@ struct lp_rasterizer_task /* occlude counter for visible pixels */ struct lp_jit_thread_data thread_data; - struct llvmpipe_query *query[PIPE_QUERY_TYPES]; uint64_t ps_invocations; pipe_semaphore work_ready; @@ -307,10 +306,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, * allocated 4x4 blocks hence need to filter them out here. */ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { - if (task->query[PIPE_QUERY_PIPELINE_STATISTICS]) { - /* not very accurate would need a popcount on the mask */ - task->ps_invocations++; - } + /* not very accurate would need a popcount on the mask */ + /* always count this not worth bothering? */ + task->ps_invocations++; + /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); variant->jit_function[RAST_WHOLE]( &state->jit_context, diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 2d63c002ce2..16f69698ed0 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -129,6 +129,10 @@ struct lp_scene { struct pipe_context *pipe; struct lp_fence *fence; + /* The queries still active at end of scene */ + struct llvmpipe_query *active_queries[3]; + unsigned num_active_queries; + /* Framebuffer mappings - valid only between begin_rasterization() * and end_rasterization(). */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6b644460cd3..d2c53255259 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -155,6 +155,23 @@ lp_setup_rasterize_scene( struct lp_setup_context *setup ) struct lp_scene *scene = setup->scene; struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen); + scene->num_active_queries = 0; + if (setup->active_query[PIPE_QUERY_OCCLUSION_COUNTER]) { + scene->active_queries[scene->num_active_queries] = + setup->active_query[PIPE_QUERY_OCCLUSION_COUNTER]; + scene->num_active_queries++; + } + if (setup->active_query[PIPE_QUERY_OCCLUSION_PREDICATE]) { + scene->active_queries[scene->num_active_queries] = + setup->active_query[PIPE_QUERY_OCCLUSION_PREDICATE]; + scene->num_active_queries++; + } + if (setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]) { + scene->active_queries[scene->num_active_queries] = + setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]; + scene->num_active_queries++; + } + lp_scene_end_binning(scene); lp_fence_reference(&setup->last_fence, scene->fence); @@ -181,7 +198,6 @@ begin_binning( struct lp_setup_context *setup ) struct lp_scene *scene = setup->scene; boolean need_zsload = FALSE; boolean ok; - unsigned i; assert(scene); assert(scene->fence == NULL); @@ -230,16 +246,6 @@ begin_binning( struct lp_setup_context *setup ) } } - for (i = 0; i < PIPE_QUERY_TYPES; ++i) { - if (setup->active_query[i]) { - ok = lp_scene_bin_everywhere( scene, - LP_RAST_OP_BEGIN_QUERY, - lp_rast_arg_query(setup->active_query[i]) ); - if (!ok) - return FALSE; - } - } - setup->clear.flags = 0; setup->clear.zsmask = 0; setup->clear.zsvalue = 0; @@ -1211,18 +1217,20 @@ void lp_setup_begin_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) { - /* init the query to its beginning state */ - assert(setup->active_query[pq->type] == NULL); set_scene_state(setup, SETUP_ACTIVE, "begin_query"); - setup->active_query[pq->type] = pq; + if (!(pq->type == PIPE_QUERY_OCCLUSION_COUNTER || + pq->type == PIPE_QUERY_OCCLUSION_PREDICATE || + pq->type == PIPE_QUERY_PIPELINE_STATISTICS)) + return; - /* XXX: It is possible that a query is created before the scene - * has been created. This means that setup->scene == NULL resulting - * in the query not being binned and thus is ignored. - */ + /* init the query to its beginning state */ + assert(setup->active_query[pq->type] == NULL); + + setup->active_query[pq->type] = pq; + assert(setup->scene); if (setup->scene) { if (!lp_scene_bin_everywhere(setup->scene, LP_RAST_OP_BEGIN_QUERY, @@ -1249,31 +1257,46 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) { set_scene_state(setup, SETUP_ACTIVE, "end_query"); - if (pq->type != PIPE_QUERY_TIMESTAMP && pq->type != PIPE_QUERY_GPU_FINISHED) { + if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER || + pq->type == PIPE_QUERY_OCCLUSION_PREDICATE || + pq->type == PIPE_QUERY_PIPELINE_STATISTICS) { assert(setup->active_query[pq->type] == pq); - setup->active_query[pq->type] = NULL; } - /* Setup will automatically re-issue any query which carried over a - * scene boundary, and the rasterizer automatically "ends" queries - * which are active at the end of a scene, so there is no need to - * retry this commands on failure. - */ + assert(setup->scene); if (setup->scene) { /* pq->fence should be the fence of the *last* scene which * contributed to the query result. */ lp_fence_reference(&pq->fence, setup->scene->fence); - if (!lp_scene_bin_everywhere(setup->scene, - LP_RAST_OP_END_QUERY, - lp_rast_arg_query(pq))) { - lp_setup_flush(setup, NULL, __FUNCTION__); + if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER || + pq->type == PIPE_QUERY_OCCLUSION_PREDICATE || + pq->type == PIPE_QUERY_PIPELINE_STATISTICS || + pq->type == PIPE_QUERY_TIMESTAMP) { + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_END_QUERY, + lp_rast_arg_query(pq))) { + if (!lp_setup_flush_and_restart(setup)) + goto fail; + + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_END_QUERY, + lp_rast_arg_query(pq))) { + goto fail; + } + } } } else { lp_fence_reference(&pq->fence, setup->last_fence); } + +fail: + /* Need to do this now not earlier since it still needs to be marked as + * active when binning it would cause a flush. + */ + setup->active_query[pq->type] = NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index bedd16beee1..62df5df555f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -215,6 +215,11 @@ lp_setup_whole_tile(struct lp_setup_context *setup, if (!scene->fb.zsbuf) { /* * All previous rendering will be overwritten so reset the bin. + * XXX This is wrong wrt to all queries arriving here (timestamp, + * occlusion, ps invocations). Not counting stuff might be ok but it + * will kill the begin/end query commands too which is definitely + * wrong (and at this point we don't even know if there were any + * such commands here). */ lp_scene_bin_reset( scene, tx, ty ); } -- 2.30.2