swr: [rasterizer archrast] add support for stats files
authorTim Rowley <timothy.o.rowley@intel.com>
Mon, 10 Oct 2016 16:41:33 +0000 (11:41 -0500)
committerTim Rowley <timothy.o.rowley@intel.com>
Tue, 11 Oct 2016 16:48:23 +0000 (11:48 -0500)
Only stat and counter events are saved to the event files.

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/threads.cpp
src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandlerfile_h.template

index b1c5134caf18067a5a9718db1f25c57a6b2470b9..c29bb884588f31bc02860da7379be71bdc5cccae 100644 (file)
 
 namespace ArchRast
 {
+    //////////////////////////////////////////////////////////////////////////
+    /// @brief Event handler that saves stat events to event files. This
+    ///        handler filters out unwanted events.
+    class EventHandlerStatsFile : public EventHandlerFile
+    {
+    public:
+        EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
+
+        // These are events that we're not interested in saving in stats event files.
+        virtual void handle(Start& event) {}
+        virtual void handle(End& event) {}
+    };
+
     static EventManager* FromHandle(HANDLE hThreadContext)
     {
         return reinterpret_cast<EventManager*>(hThreadContext);
@@ -47,7 +60,7 @@ namespace ArchRast
         uint32_t id = counter.fetch_add(1);
 
         EventManager* pManager = new EventManager();
-        EventHandler* pHandler = new EventHandlerFile(id);
+        EventHandler* pHandler = new EventHandlerStatsFile(id);
 
         if (pManager && pHandler)
         {
index cb0098d3db385aca582495fa4cfe8497afd9ba73..119dbdee437c88120956e2a337d24c3d20d675c5 100644 (file)
@@ -1105,6 +1105,7 @@ void DrawInstanced(
     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
 
     AR_API_BEGIN(APIDraw, pDC->drawId);
+    AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertices, startVertex, numInstances, startInstance));
 
     uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
     uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
@@ -1230,7 +1231,7 @@ void DrawIndexedInstance(
     API_STATE* pState = &pDC->pState->state;
 
     AR_API_BEGIN(APIDrawIndexed, pDC->drawId);
-    AR_API_EVENT(DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
+    AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
 
     uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
     uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
@@ -1436,7 +1437,7 @@ void SwrDispatch(
     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
 
     AR_API_BEGIN(APIDispatch, pDC->drawId);
-
+    AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
     pDC->isCompute = true;      // This is a compute context.
 
     COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
@@ -1593,7 +1594,11 @@ void SwrEnableStats(
 void SWR_API SwrEndFrame(
     HANDLE hContext)
 {
-    RDTSC_ENDFRAME();
     SWR_CONTEXT *pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+
+    RDTSC_ENDFRAME();
+    AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId));
+
     pContext->frameCount++;
 }
index 08a4d17821c22d6016ccc1b2051eb4448ab2eb36..6e7495cdc50f14908d4b3542075f5ed58c869286 100644 (file)
@@ -315,7 +315,7 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastReti
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief Update client stats.
-INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
+INLINE void UpdateClientStats(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
 {
     if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStats == false))
     {
@@ -334,12 +334,13 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
         stats.CsInvocations  += dynState.pStats[i].CsInvocations;
     }
 
+
     pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
 }
 
-INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
+INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
 {
-    UpdateClientStats(pContext, pDC);
+    UpdateClientStats(pContext, workerId, pDC);
 
     if (pDC->retireCallback.pfnCallbackFunc)
     {
@@ -350,14 +351,14 @@ INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
 }
 
 // inlined-only version
-INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
+INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
 {
     int32_t result = InterlockedDecrement((volatile LONG*)&pDC->threadsDone);
     SWR_ASSERT(result >= 0);
 
     if (result == 0)
     {
-        ExecuteCallbacks(pContext, pDC);
+        ExecuteCallbacks(pContext, workerId, pDC);
 
         // Cleanup memory allocations
         pDC->pArena->Reset(true);
@@ -381,10 +382,10 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
 // available to other translation modules
 int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
 {
-    return CompleteDrawContextInl(pContext, pDC);
+    return CompleteDrawContextInl(pContext, 0, pDC);
 }
 
-INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t& curDrawBE, uint32_t& drawEnqueued)
+INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE, uint32_t& drawEnqueued)
 {
     // increment our current draw id to the first incomplete draw
     drawEnqueued = GetEnqueuedDraw(pContext);
@@ -402,7 +403,7 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t& curDrawBE,
         if (isWorkComplete)
         {
             curDrawBE++;
-            CompleteDrawContextInl(pContext, pDC);
+            CompleteDrawContextInl(pContext, workerId, pDC);
         }
         else
         {
@@ -442,7 +443,7 @@ bool WorkOnFifoBE(
     // Find the first incomplete draw that has pending work. If no such draw is found then
     // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
     uint32_t drawEnqueued = 0;
-    if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
+    if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false)
     {
         return false;
     }
@@ -537,7 +538,7 @@ bool WorkOnFifoBE(
                 {
                     // We can increment the current BE and safely move to next draw since we know this draw is complete.
                     curDrawBE++;
-                    CompleteDrawContextInl(pContext, pDC);
+                    CompleteDrawContextInl(pContext, workerId, pDC);
 
                     lastRetiredDraw++;
 
@@ -563,11 +564,20 @@ bool WorkOnFifoBE(
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief Called when FE work is complete for this DC.
-INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
+INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
 {
     if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStats)
     {
-        pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &pDC->dynState.statsFE);
+        SWR_STATS_FE& stats = pDC->dynState.statsFE;
+
+        AR_EVENT(FrontendStatsEvent(
+            stats.IaVertices, stats.IaPrimitives, stats.VsInvocations, stats.HsInvocations,
+            stats.DsInvocations, stats.GsInvocations, stats.GsPrimitives, stats.CInvocations, stats.CPrimitives,
+            stats.SoPrimStorageNeeded[0], stats.SoPrimStorageNeeded[1], stats.SoPrimStorageNeeded[2], stats.SoPrimStorageNeeded[3],
+            stats.SoNumPrimsWritten[0], stats.SoNumPrimsWritten[1], stats.SoNumPrimsWritten[2], stats.SoNumPrimsWritten[3]
+        ));
+
+        pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats);
     }
 
     if (pContext->pfnUpdateSoWriteOffset)
@@ -598,7 +608,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
         DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
         if (pDC->isCompute || pDC->doneFE || pDC->FeLock)
         {
-            CompleteDrawContextInl(pContext, pDC);
+            CompleteDrawContextInl(pContext, workerId, pDC);
             curDrawFE++;
         }
         else
@@ -621,7 +631,7 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
                 // successfully grabbed the DC, now run the FE
                 pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc);
 
-                CompleteDrawFE(pContext, pDC);
+                CompleteDrawFE(pContext, workerId, pDC);
             }
         }
         curDraw++;
@@ -641,7 +651,7 @@ void WorkOnCompute(
     uint32_t& curDrawBE)
 {
     uint32_t drawEnqueued = 0;
-    if (FindFirstIncompleteDraw(pContext, curDrawBE, drawEnqueued) == false)
+    if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false)
     {
         return;
     }
index 6264442b0899aac1b4d669212c9107590b6cad73..2e3b5c32464d75c8ef89177a0177189593e3272d 100644 (file)
@@ -32,6 +32,7 @@
 #include "common/os.h"
 #include "${event_header}"
 #include <fstream>
+#include <sstream>
 
 namespace ArchRast
 {
@@ -44,11 +45,19 @@ namespace ArchRast
         EventHandlerFile(uint32_t id)
         {
 #if defined(_WIN32)
+            DWORD pid = GetCurrentProcessId();
+            TCHAR procname[MAX_PATH];
+            GetModuleFileName(NULL, procname, MAX_PATH);
+            const char* pBaseName = strrchr(procname, '\\');
+            std::stringstream outDir;
+            outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
+            CreateDirectory(outDir.str().c_str(), NULL);
+
             char buf[255];
             // There could be multiple threads creating thread pools. We
             // want to make sure they are uniquly identified by adding in
             // the creator's thread id into the filename.
-            sprintf(buf, "\\ar_event%d_%d.bin", GetCurrentThreadId(), id);
+            sprintf(buf, "%s\\ar_event%d_%d.bin", outDir.str().c_str(), GetCurrentThreadId(), id);
             mFilename = std::string(buf);
 #else
             SWR_ASSERT(0);