Add a per draw stats callback to update driver stats.
Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
pContext->pfnClearTile = pCreateInfo->pfnClearTile;
pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
+ pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
// pass pointer to bucket manager back to caller
#ifdef KNOB_ENABLE_RDTSC
HANDLE hContext,
SWR_STATS* pStats)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- pDC->FeWork.type = QUERYSTATS;
- pDC->FeWork.pfnWork = ProcessQueryStats;
- pDC->FeWork.desc.queryStats.pStats = pStats;
-
- // cannot execute until all previous draws have completed
- pDC->dependent = true;
-
- //enqueue
- QueueDraw(pContext);
+ SWR_ASSERT(0);
}
//////////////////////////////////////////////////////////////////////////
typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
uint32_t soBufferSlot, uint32_t soWriteOffset);
+//////////////////////////////////////////////////////////////////////////
+/// @brief Callback to allow driver to update their copy of stats.
+/// @param hPrivateContext - handle to private data
+/// @param pStats - pointer to draw stats
+typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
+ const SWR_STATS* pStats);
+
class BucketManager;
//////////////////////////////////////////////////////////////////////////
PFN_STORE_TILE pfnStoreTile;
PFN_CLEAR_TILE pfnClearTile;
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
// Pointer to rdtsc buckets mgr returned to the caller.
// Only populated when KNOB_ENABLE_RDTSC is set
///@todo Currently assumes only a single FE can do stream output for a draw.
uint32_t SoWriteOffset[4];
bool SoWriteOffsetDirty[4];
+
+ SWR_STATS stats[KNOB_MAX_NUM_THREADS];
};
// Draw Context
PFN_STORE_TILE pfnStoreTile;
PFN_CLEAR_TILE pfnClearTile;
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
// Global Stats
SWR_STATS stats[KNOB_MAX_NUM_THREADS];
void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
void WakeAllThreads(SWR_CONTEXT *pContext);
-#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; }
+#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
return pDC->dependent && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
}
+//////////////////////////////////////////////////////////////////////////
+/// @brief Update client stats.
+INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
+{
+ if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStats == false))
+ {
+ return;
+ }
+
+ DRAW_DYNAMIC_STATE& dynState = pDC->dynState;
+ SWR_STATS stats{ 0 };
+
+ // Sum up stats across all workers before sending to client.
+ for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
+ {
+ stats.DepthPassCount += dynState.stats[i].DepthPassCount;
+ stats.IaVertices += dynState.stats[i].IaVertices;
+ stats.IaPrimitives += dynState.stats[i].IaPrimitives;
+ stats.VsInvocations += dynState.stats[i].VsInvocations;
+ stats.HsInvocations += dynState.stats[i].HsInvocations;
+ stats.DsInvocations += dynState.stats[i].DsInvocations;
+ stats.GsInvocations += dynState.stats[i].GsInvocations;
+ stats.PsInvocations += dynState.stats[i].PsInvocations;
+ stats.CInvocations += dynState.stats[i].CInvocations;
+ stats.CsInvocations += dynState.stats[i].CsInvocations;
+ stats.CPrimitives += dynState.stats[i].CPrimitives;
+ stats.GsPrimitives += dynState.stats[i].GsPrimitives;
+
+ for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
+ {
+ stats.SoPrimStorageNeeded[stream] += dynState.stats[i].SoPrimStorageNeeded[stream];
+ stats.SoNumPrimsWritten[stream] += dynState.stats[i].SoNumPrimsWritten[stream];
+ }
+ }
+
+ pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
+}
+
INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
{
+ UpdateClientStats(pContext, pDC);
+
if (pDC->retireCallback.pfnCallbackFunc)
{
pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
ctx->render_cond_cond = condition;
}
+static void
+swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)
+{
+ swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+
+ if (!pDC)
+ return;
+
+ struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+
+ SWR_STATS *pSwrStats = &ctx->stats;
+ pSwrStats->DepthPassCount += pStats->DepthPassCount;
+ pSwrStats->IaVertices += pStats->IaVertices;
+ pSwrStats->IaPrimitives += pStats->IaPrimitives;
+ pSwrStats->VsInvocations += pStats->VsInvocations;
+ pSwrStats->HsInvocations += pStats->HsInvocations;
+ pSwrStats->DsInvocations += pStats->DsInvocations;
+ pSwrStats->GsInvocations += pStats->GsInvocations;
+ pSwrStats->PsInvocations += pStats->PsInvocations;
+ pSwrStats->CsInvocations += pStats->CsInvocations;
+ pSwrStats->CInvocations += pStats->CInvocations;
+ pSwrStats->CPrimitives += pStats->CPrimitives;
+ pSwrStats->GsPrimitives += pStats->GsPrimitives;
+
+ for (unsigned i = 0; i < 4; i++) {
+ pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i];
+ pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i];
+ }
+}
+
struct pipe_context *
swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
{
createInfo.pfnLoadTile = swr_LoadHotTile;
createInfo.pfnStoreTile = swr_StoreHotTile;
createInfo.pfnClearTile = swr_StoreHotTileClear;
+ createInfo.pfnUpdateStats = swr_UpdateStats;
ctx->swrContext = SwrCreateContext(&createInfo);
/* Init Load/Store/ClearTiles Tables */
float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
+ void *swr_ctx;
};
/* gen_llvm_types FINI */
/* SWR private state - draw context */
struct swr_draw_context swrDC;
+ SWR_STATS stats;
+
unsigned dirty; /**< Mask of SWR_NEW_x flags */
};
{
swr_draw_context *pDC =
(swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);
+ ctx->swrDC.swr_ctx = ctx;
memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));
}
/* nothing to do here */
break;
default:
- /*
- * All other results are collected from SwrCore counters via
- * SwrGetStats. This returns immediately, but results are later filled
- * in by the backend. Fence status is the only indication of
- * completion. */
- SwrGetStats(ctx->swrContext, &result->core);
+ /* TODO: should fence instead of stalling pipeline */
+ SwrWaitForIdle(ctx->swrContext);
+ memcpy(&result->core, &ctx->stats, sizeof(result->core));
+#if 0
if (!pq->fence) {
struct swr_screen *screen = swr_screen(pipe->screen);
swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
}
swr_fence_submit(ctx, pq->fence);
+#endif
/* Only change stat collection if there are no active queries */
if (ctx->active_queries == 0)