swr: [rasterizer core] fundamentally change how stats work
authorTim Rowley <timothy.o.rowley@intel.com>
Fri, 5 Aug 2016 17:17:25 +0000 (11:17 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Wed, 10 Aug 2016 16:08:23 +0000 (11:08 -0500)
Add a per draw stats callback to update driver stats.

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/threads.cpp
src/gallium/drivers/swr/swr_context.cpp
src/gallium/drivers/swr/swr_context.h
src/gallium/drivers/swr/swr_query.cpp

index 3922606b3a69d04605de00424aa48d11d4ed91f2..5ebefabb75382df5207915135460efe02190237a 100644 (file)
@@ -143,6 +143,7 @@ HANDLE SwrCreateContext(
     pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
     pContext->pfnClearTile = pCreateInfo->pfnClearTile;
     pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
+    pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
 
     // pass pointer to bucket manager back to caller
 #ifdef KNOB_ENABLE_RDTSC
@@ -1519,18 +1520,7 @@ void SwrGetStats(
     HANDLE hContext,
     SWR_STATS* pStats)
 {
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
-    pDC->FeWork.type = QUERYSTATS;
-    pDC->FeWork.pfnWork = ProcessQueryStats;
-    pDC->FeWork.desc.queryStats.pStats = pStats;
-
-    // cannot execute until all previous draws have completed
-    pDC->dependent = true;
-
-    //enqueue
-    QueueDraw(pContext);
+    SWR_ASSERT(0);
 }
 
 //////////////////////////////////////////////////////////////////////////
index d7621d548772c6c58cdc418ce48b05903af2968b..9c80526b1e523a444d1f553b3987fab94461cb30 100644 (file)
@@ -88,6 +88,13 @@ typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext,
 typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
     uint32_t soBufferSlot, uint32_t soWriteOffset);
 
+//////////////////////////////////////////////////////////////////////////
+/// @brief Callback to allow driver to update their copy of stats.
+/// @param hPrivateContext - handle to private data
+/// @param pStats - pointer to draw stats
+typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
+    const SWR_STATS* pStats);
+
 class BucketManager;
 
 //////////////////////////////////////////////////////////////////////////
@@ -118,6 +125,7 @@ struct SWR_CREATECONTEXT_INFO
     PFN_STORE_TILE pfnStoreTile;
     PFN_CLEAR_TILE pfnClearTile;
     PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+    PFN_UPDATE_STATS pfnUpdateStats;
 
     // Pointer to rdtsc buckets mgr returned to the caller.
     // Only populated when KNOB_ENABLE_RDTSC is set
index 47fea162acc6a7a48cc3b790fcf067949bf6c979..b38ec46ae285c41344dbfc9aaf6bdf624b6181b6 100644 (file)
@@ -372,6 +372,8 @@ struct DRAW_DYNAMIC_STATE
     ///@todo Currently assumes only a single FE can do stream output for a draw.
     uint32_t SoWriteOffset[4];
     bool     SoWriteOffsetDirty[4];
+
+    SWR_STATS stats[KNOB_MAX_NUM_THREADS];
 };
 
 // Draw Context
@@ -480,6 +482,7 @@ struct SWR_CONTEXT
     PFN_STORE_TILE pfnStoreTile;
     PFN_CLEAR_TILE pfnClearTile;
     PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+    PFN_UPDATE_STATS pfnUpdateStats;
 
     // Global Stats
     SWR_STATS stats[KNOB_MAX_NUM_THREADS];
@@ -496,4 +499,4 @@ struct SWR_CONTEXT
 void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
 void WakeAllThreads(SWR_CONTEXT *pContext);
 
-#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; }
+#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
index 434c8cb96ee64ce1619a31745dc531eed90dc6b0..fb17af1520318fb5b7c83ddfd7ca0842ad0f685a 100644 (file)
@@ -306,8 +306,48 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastReti
     return pDC->dependent && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
 }
 
+//////////////////////////////////////////////////////////////////////////
+/// @brief Update client stats.
+INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
+{
+    if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStats == false))
+    {
+        return;
+    }
+
+    DRAW_DYNAMIC_STATE& dynState = pDC->dynState;
+    SWR_STATS stats{ 0 };
+
+    // Sum up stats across all workers before sending to client.
+    for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
+    {
+        stats.DepthPassCount += dynState.stats[i].DepthPassCount;
+        stats.IaVertices     += dynState.stats[i].IaVertices;
+        stats.IaPrimitives   += dynState.stats[i].IaPrimitives;
+        stats.VsInvocations  += dynState.stats[i].VsInvocations;
+        stats.HsInvocations  += dynState.stats[i].HsInvocations;
+        stats.DsInvocations  += dynState.stats[i].DsInvocations;
+        stats.GsInvocations  += dynState.stats[i].GsInvocations;
+        stats.PsInvocations  += dynState.stats[i].PsInvocations;
+        stats.CInvocations   += dynState.stats[i].CInvocations;
+        stats.CsInvocations  += dynState.stats[i].CsInvocations;
+        stats.CPrimitives    += dynState.stats[i].CPrimitives;
+        stats.GsPrimitives   += dynState.stats[i].GsPrimitives;
+
+        for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
+        {
+            stats.SoPrimStorageNeeded[stream] += dynState.stats[i].SoPrimStorageNeeded[stream];
+            stats.SoNumPrimsWritten[stream]   += dynState.stats[i].SoNumPrimsWritten[stream];
+        }
+    }
+
+    pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
+}
+
 INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
 {
+    UpdateClientStats(pContext, pDC);
+
     if (pDC->retireCallback.pfnCallbackFunc)
     {
         pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
index c8d5cd60d73eb8554521e16228729f72337e2234..53d2b93089b4f0ef013f84f9b1e3f68456dba3b3 100644 (file)
@@ -344,6 +344,36 @@ swr_render_condition(struct pipe_context *pipe,
    ctx->render_cond_cond = condition;
 }
 
+static void
+swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)
+{
+   swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+
+   if (!pDC)
+      return;
+
+   struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+
+   SWR_STATS *pSwrStats = &ctx->stats;
+   pSwrStats->DepthPassCount += pStats->DepthPassCount;
+   pSwrStats->IaVertices += pStats->IaVertices;
+   pSwrStats->IaPrimitives += pStats->IaPrimitives;
+   pSwrStats->VsInvocations += pStats->VsInvocations;
+   pSwrStats->HsInvocations += pStats->HsInvocations;
+   pSwrStats->DsInvocations += pStats->DsInvocations;
+   pSwrStats->GsInvocations += pStats->GsInvocations;
+   pSwrStats->PsInvocations += pStats->PsInvocations;
+   pSwrStats->CsInvocations += pStats->CsInvocations;
+   pSwrStats->CInvocations += pStats->CInvocations;
+   pSwrStats->CPrimitives += pStats->CPrimitives;
+   pSwrStats->GsPrimitives += pStats->GsPrimitives;
+
+   for (unsigned i = 0; i < 4; i++) {
+      pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i];
+      pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i];
+   }
+}
+
 struct pipe_context *
 swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
 {
@@ -358,6 +388,7 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
    createInfo.pfnLoadTile = swr_LoadHotTile;
    createInfo.pfnStoreTile = swr_StoreHotTile;
    createInfo.pfnClearTile = swr_StoreHotTileClear;
+   createInfo.pfnUpdateStats = swr_UpdateStats;
    ctx->swrContext = SwrCreateContext(&createInfo);
 
    /* Init Load/Store/ClearTiles Tables */
index 75ecae334d57624ea1577d9b3ee96c8e952245a2..4133720cbf03c12dcc7c96251f0050111ce7fc60 100644 (file)
@@ -92,6 +92,7 @@ struct swr_draw_context {
    float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
 
    SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
+   void *swr_ctx;
 };
 
 /* gen_llvm_types FINI */
@@ -157,6 +158,8 @@ struct swr_context {
    /* SWR private state - draw context */
    struct swr_draw_context swrDC;
 
+   SWR_STATS stats;
+
    unsigned dirty; /**< Mask of SWR_NEW_x flags */
 };
 
@@ -171,6 +174,7 @@ swr_update_draw_context(struct swr_context *ctx)
 {
    swr_draw_context *pDC =
       (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);
+   ctx->swrDC.swr_ctx = ctx;
    memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));
 }
 
index 5b8f059cdae385f658bc7ecdb5215f9c2831df60..35d0e53fb2375c5d62e8823f39a8be7ba8ecf889 100644 (file)
@@ -91,18 +91,17 @@ swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
       /* nothing to do here */
       break;
    default:
-      /*
-       * All other results are collected from SwrCore counters via
-       * SwrGetStats. This returns immediately, but results are later filled
-       * in by the backend.  Fence status is the only indication of
-       * completion.  */
-      SwrGetStats(ctx->swrContext, &result->core);
+      /* TODO: should fence instead of stalling pipeline */
+      SwrWaitForIdle(ctx->swrContext);
+      memcpy(&result->core, &ctx->stats, sizeof(result->core));
 
+#if 0
       if (!pq->fence) {
          struct swr_screen *screen = swr_screen(pipe->screen);
          swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
       }
       swr_fence_submit(ctx, pq->fence);
+#endif
 
       /* Only change stat collection if there are no active queries */
       if (ctx->active_queries == 0)