From 903d00cd32137161c0b57bcba95af51e47d91fa5 Mon Sep 17 00:00:00 2001 From: Bruce Cherniak Date: Tue, 27 Sep 2016 13:27:08 -0500 Subject: [PATCH] swr: Removed stalling SwrWaitForIdle from queries. Previous fundamental change in stats gathering added a temporary SwrWaitForIdle to begin_query and end_query. Code has been reworked to remove stall. Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/swr_context.cpp | 33 ++--- src/gallium/drivers/swr/swr_context.h | 11 +- src/gallium/drivers/swr/swr_query.cpp | 152 ++++++++++-------------- src/gallium/drivers/swr/swr_query.h | 10 +- 4 files changed, 87 insertions(+), 119 deletions(-) diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index 15e60cddf0a..cbc60e0aa01 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -24,6 +24,7 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" +#include "util/u_atomic.h" extern "C" { #include "util/u_transfer.h" @@ -352,9 +353,9 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats) if (!pDC) return; - struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx; + struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats; - SWR_STATS *pSwrStats = &ctx->stats; + SWR_STATS *pSwrStats = &pqr->core; pSwrStats->DepthPassCount += pStats->DepthPassCount; pSwrStats->PsInvocations += pStats->PsInvocations; @@ -369,22 +370,24 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats) if (!pDC) return; - struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx; + struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats; - SWR_STATS_FE *pSwrStats = &ctx->statsFE; - pSwrStats->IaVertices += pStats->IaVertices; - pSwrStats->IaPrimitives += pStats->IaPrimitives; - pSwrStats->VsInvocations += pStats->VsInvocations; - pSwrStats->HsInvocations += pStats->HsInvocations; - pSwrStats->DsInvocations += pStats->DsInvocations; - pSwrStats->GsInvocations += pStats->GsInvocations; - pSwrStats->CInvocations += pStats->CInvocations; - pSwrStats->CPrimitives += pStats->CPrimitives; - pSwrStats->GsPrimitives += pStats->GsPrimitives; + SWR_STATS_FE *pSwrStats = &pqr->coreFE; + p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices); + p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives); + p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations); + p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations); + p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations); + p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations); + p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations); + p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives); + p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives); for (unsigned i = 0; i < 4; i++) { - pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i]; - pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i]; + p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i], + pStats->SoPrimStorageNeeded[i]); + p_atomic_add(&pSwrStats->SoNumPrimsWritten[i], + pStats->SoNumPrimsWritten[i]); } } diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 6854d697db9..eecfe0d40de 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -92,7 +92,7 @@ struct swr_draw_context { float userClipPlanes[PIPE_MAX_CLIP_PLANES][4]; SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS]; - void *swr_ctx; + void *pStats; }; /* gen_llvm_types FINI */ @@ -159,9 +159,6 @@ struct swr_context { /* SWR private state - draw context */ struct swr_draw_context swrDC; - SWR_STATS stats; - SWR_STATS_FE statsFE; - unsigned dirty; /**< Mask of SWR_NEW_x flags */ }; @@ -172,11 +169,13 @@ swr_context(struct pipe_context *pipe) } static INLINE void -swr_update_draw_context(struct swr_context *ctx) +swr_update_draw_context(struct swr_context *ctx, + struct swr_query_result *pqr = nullptr) { swr_draw_context *pDC = (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); - ctx->swrDC.swr_ctx = ctx; + if (pqr) + ctx->swrDC.pStats = pqr; memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context)); } diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp index c51c529e5f3..8bb0b16f034 100644 --- a/src/gallium/drivers/swr/swr_query.cpp +++ b/src/gallium/drivers/swr/swr_query.cpp @@ -71,48 +71,6 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q) } -static void -swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq) -{ - struct swr_context *ctx = swr_context(pipe); - - assert(pq->result); - struct swr_query_result *result = pq->result; - boolean enable_stats = pq->enable_stats; - - /* A few results don't require the core, so don't involve it */ - switch (pq->type) { - case PIPE_QUERY_TIMESTAMP: - case PIPE_QUERY_TIME_ELAPSED: - result->timestamp = swr_get_timestamp(pipe->screen); - break; - case PIPE_QUERY_TIMESTAMP_DISJOINT: - case PIPE_QUERY_GPU_FINISHED: - /* nothing to do here */ - break; - default: - /* TODO: should fence instead of stalling pipeline */ - SwrWaitForIdle(ctx->swrContext); - memcpy(&result->core, &ctx->stats, sizeof(result->core)); - memcpy(&result->coreFE, &ctx->statsFE, sizeof(result->coreFE)); - -#if 0 - if (!pq->fence) { - struct swr_screen *screen = swr_screen(pipe->screen); - swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence); - } - swr_fence_submit(ctx, pq->fence); -#endif - - /* Only change stat collection if there are no active queries */ - if (ctx->active_queries == 0) - SwrEnableStats(ctx->swrContext, enable_stats); - - break; - } -} - - static boolean swr_get_query_result(struct pipe_context *pipe, struct pipe_query *q, @@ -120,8 +78,6 @@ swr_get_query_result(struct pipe_context *pipe, union pipe_query_result *result) { struct swr_query *pq = swr_query(q); - struct swr_query_result *start = &pq->start; - struct swr_query_result *end = &pq->end; unsigned index = pq->index; if (pq->fence) { @@ -132,40 +88,37 @@ swr_get_query_result(struct pipe_context *pipe, swr_fence_reference(pipe->screen, &pq->fence, NULL); } - /* XXX: Need to handle counter rollover */ - + /* All values are reset to 0 at swr_begin_query, except starting timestamp. + * Counters become simply end values. */ switch (pq->type) { /* Booleans */ case PIPE_QUERY_OCCLUSION_PREDICATE: - result->b = end->core.DepthPassCount != start->core.DepthPassCount; + result->b = pq->result.core.DepthPassCount != 0; break; case PIPE_QUERY_GPU_FINISHED: result->b = TRUE; break; /* Counters */ case PIPE_QUERY_OCCLUSION_COUNTER: - result->u64 = end->core.DepthPassCount - start->core.DepthPassCount; + result->u64 = pq->result.core.DepthPassCount; break; case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIME_ELAPSED: - result->u64 = end->timestamp - start->timestamp; + result->u64 = pq->result.timestamp_end - pq->result.timestamp_start; break; case PIPE_QUERY_PRIMITIVES_GENERATED: - result->u64 = end->coreFE.IaPrimitives - start->coreFE.IaPrimitives; + result->u64 = pq->result.coreFE.IaPrimitives; break; case PIPE_QUERY_PRIMITIVES_EMITTED: - result->u64 = end->coreFE.SoNumPrimsWritten[index] - - start->coreFE.SoNumPrimsWritten[index]; + result->u64 = pq->result.coreFE.SoNumPrimsWritten[index]; break; /* Structures */ case PIPE_QUERY_SO_STATISTICS: { struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; - struct SWR_STATS_FE *start = &pq->start.coreFE; - struct SWR_STATS_FE *end = &pq->end.coreFE; so_stats->num_primitives_written = - end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index]; + pq->result.coreFE.SoNumPrimsWritten[index]; so_stats->primitives_storage_needed = - end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index]; + pq->result.coreFE.SoPrimStorageNeeded[index]; } break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* os_get_time_nano returns nanoseconds */ @@ -175,29 +128,23 @@ swr_get_query_result(struct pipe_context *pipe, case PIPE_QUERY_PIPELINE_STATISTICS: { struct pipe_query_data_pipeline_statistics *p_stats = &result->pipeline_statistics; - struct SWR_STATS *start = &pq->start.core; - struct SWR_STATS *end = &pq->end.core; - struct SWR_STATS_FE *startFE = &pq->start.coreFE; - struct SWR_STATS_FE *endFE = &pq->end.coreFE; - p_stats->ia_vertices = endFE->IaVertices - startFE->IaVertices; - p_stats->ia_primitives = endFE->IaPrimitives - startFE->IaPrimitives; - p_stats->vs_invocations = endFE->VsInvocations - startFE->VsInvocations; - p_stats->gs_invocations = endFE->GsInvocations - startFE->GsInvocations; - p_stats->gs_primitives = endFE->GsPrimitives - startFE->GsPrimitives; - p_stats->c_invocations = endFE->CPrimitives - startFE->CPrimitives; - p_stats->c_primitives = endFE->CPrimitives - startFE->CPrimitives; - p_stats->ps_invocations = end->PsInvocations - start->PsInvocations; - p_stats->hs_invocations = endFE->HsInvocations - startFE->HsInvocations; - p_stats->ds_invocations = endFE->DsInvocations - startFE->DsInvocations; - p_stats->cs_invocations = end->CsInvocations - start->CsInvocations; + p_stats->ia_vertices = pq->result.coreFE.IaVertices; + p_stats->ia_primitives = pq->result.coreFE.IaPrimitives; + p_stats->vs_invocations = pq->result.coreFE.VsInvocations; + p_stats->gs_invocations = pq->result.coreFE.GsInvocations; + p_stats->gs_primitives = pq->result.coreFE.GsPrimitives; + p_stats->c_invocations = pq->result.coreFE.CPrimitives; + p_stats->c_primitives = pq->result.coreFE.CPrimitives; + p_stats->ps_invocations = pq->result.core.PsInvocations; + p_stats->hs_invocations = pq->result.coreFE.HsInvocations; + p_stats->ds_invocations = pq->result.coreFE.DsInvocations; + p_stats->cs_invocations = pq->result.core.CsInvocations; } break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { - struct SWR_STATS_FE *start = &pq->start.coreFE; - struct SWR_STATS_FE *end = &pq->end.coreFE; uint64_t num_primitives_written = - end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index]; + pq->result.coreFE.SoNumPrimsWritten[index]; uint64_t primitives_storage_needed = - end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index]; + pq->result.coreFE.SoPrimStorageNeeded[index]; result->b = num_primitives_written > primitives_storage_needed; } break; @@ -215,21 +162,27 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q) struct swr_context *ctx = swr_context(pipe); struct swr_query *pq = swr_query(q); - assert(!pq->enable_stats && "swr_begin_query: Query is already active!"); - /* Initialize Results */ - memset(&pq->start, 0, sizeof(pq->start)); - memset(&pq->end, 0, sizeof(pq->end)); + memset(&pq->result, 0, sizeof(pq->result)); + switch (pq->type) { + case PIPE_QUERY_TIMESTAMP: + /* nothing to do */ + break; + case PIPE_QUERY_TIME_ELAPSED: + pq->result.timestamp_start = swr_get_timestamp(pipe->screen); + break; + default: + /* Core counters required. Update draw context with location to + * store results. */ + swr_update_draw_context(ctx, &pq->result); - /* Gather start stats and enable SwrCore counters */ - pq->result = &pq->start; - pq->enable_stats = TRUE; - swr_gather_stats(pipe, pq); - ctx->active_queries++; + /* Only change stat collection if there are no active queries */ + if (ctx->active_queries == 0) + SwrEnableStats(ctx->swrContext, TRUE); + break; + } - /* override start timestamp to 0 for TIMESTAMP query */ - if (pq->type == PIPE_QUERY_TIMESTAMP) - pq->start.timestamp = 0; + ctx->active_queries++; return true; } @@ -244,10 +197,27 @@ swr_end_query(struct pipe_context *pipe, struct pipe_query *q) && "swr_end_query, there are no active queries!"); ctx->active_queries--; - /* Gather end stats and disable SwrCore counters */ - pq->result = &pq->end; - pq->enable_stats = FALSE; - swr_gather_stats(pipe, pq); + switch (pq->type) { + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + pq->result.timestamp_end = swr_get_timestamp(pipe->screen); + break; + default: + /* Stats are updated asynchronously, a fence is used to signal + * completion. */ + if (!pq->fence) { + struct swr_screen *screen = swr_screen(pipe->screen); + swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence); + } + swr_fence_submit(ctx, pq->fence); + + /* Only change stat collection if there are no active queries */ + if (ctx->active_queries == 0) + SwrEnableStats(ctx->swrContext, FALSE); + + break; + } + return true; } diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h index 931d687b005..c5160ceb453 100644 --- a/src/gallium/drivers/swr/swr_query.h +++ b/src/gallium/drivers/swr/swr_query.h @@ -30,20 +30,16 @@ struct swr_query_result { SWR_STATS core; SWR_STATS_FE coreFE; - uint64_t timestamp; + uint64_t timestamp_start; + uint64_t timestamp_end; }; struct swr_query { unsigned type; /* PIPE_QUERY_* */ unsigned index; - struct swr_query_result *result; - struct swr_query_result start; - struct swr_query_result end; - + struct swr_query_result result; struct pipe_fence_handle *fence; - - boolean enable_stats; }; extern void swr_query_init(struct pipe_context *pipe); -- 2.30.2