swr/rast: Add shader stats infrastructure (WIP)
authorGeorge Kyriazis <george.kyriazis@intel.com>
Fri, 6 Apr 2018 20:48:00 +0000 (15:48 -0500)
committerGeorge Kyriazis <george.kyriazis@intel.com>
Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
src/gallium/drivers/swr/rasterizer/archrast/events.proto
src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
src/gallium/drivers/swr/rasterizer/jitter/builder.h

index 21846737b56c1c5405951732298f52062a34b588..871db793de9b342e81e2b5fa00b02d656799ce13 100644 (file)
@@ -61,7 +61,7 @@ namespace ArchRast
         //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
     };
 
-    struct GSInfo
+    struct GSStateInfo
     {
         uint32_t inputPrimCount;
         uint32_t primGeneratedCount;
@@ -155,7 +155,7 @@ namespace ArchRast
             mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
 
             //earlyZ test single and multi sample
-            mDSCombined.earlyZTestPassCount  += _mm_popcnt_u32(event.data.depthPassMask);
+            mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
             mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
 
             //earlyStencil test single and multi sample
@@ -257,11 +257,51 @@ namespace ArchRast
             mClipper.trivialAcceptCount += _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
         }
 
+        struct ShaderStats
+        {
+            uint32_t numInstExecuted;
+        };
+
+        virtual void Handle(const VSStats& event)
+        {
+            mShaderStats[SHADER_VERTEX].numInstExecuted += event.data.numInstExecuted;
+        }
+
+        virtual void Handle(const GSStats& event)
+        {
+            mShaderStats[SHADER_GEOMETRY].numInstExecuted += event.data.numInstExecuted;
+        }
+
+        virtual void Handle(const DSStats& event)
+        {
+            mShaderStats[SHADER_DOMAIN].numInstExecuted += event.data.numInstExecuted;
+        }
+
+        virtual void Handle(const HSStats& event)
+        {
+            mShaderStats[SHADER_HULL].numInstExecuted += event.data.numInstExecuted;
+        }
+
+        virtual void Handle(const PSStats& event)
+        {
+            mShaderStats[SHADER_PIXEL].numInstExecuted += event.data.numInstExecuted;
+            mNeedFlush = true;
+        }
+
+        virtual void Handle(const CSStats& event)
+        {
+            mShaderStats[SHADER_COMPUTE].numInstExecuted += event.data.numInstExecuted;
+            mNeedFlush = true;
+        }
+
         // Flush cached events for this draw
         virtual void FlushDraw(uint32_t drawId)
         {
             if (mNeedFlush == false) return;
 
+            EventHandlerFile::Handle(PSInfo(drawId, mShaderStats[SHADER_PIXEL].numInstExecuted));
+            EventHandlerFile::Handle(CSInfo(drawId, mShaderStats[SHADER_COMPUTE].numInstExecuted));
+
             //singleSample
             EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
             EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
@@ -297,7 +337,7 @@ namespace ArchRast
 
             // Primitive Culling
             EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
-            
+
             mDSSingleSample = {};
             mDSSampleRate = {};
             mDSCombined = {};
@@ -307,6 +347,10 @@ namespace ArchRast
             rastStats = {};
             mCullStats = {};
             mAlphaStats = {};
+
+            mShaderStats[SHADER_PIXEL] = {};
+            mShaderStats[SHADER_COMPUTE] = {};
+
             mNeedFlush = false;
         }
 
@@ -323,6 +367,16 @@ namespace ArchRast
             EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
             EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
 
+            EventHandlerFile::Handle(VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted));
+            EventHandlerFile::Handle(HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted));
+            EventHandlerFile::Handle(DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted));
+            EventHandlerFile::Handle(GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted));
+
+            mShaderStats[SHADER_VERTEX] = {};
+            mShaderStats[SHADER_HULL] = {};
+            mShaderStats[SHADER_DOMAIN] = {};
+            mShaderStats[SHADER_GEOMETRY] = {};
+
             //Reset Internal Counters
             mClipper = {};
             mTS = {};
@@ -369,11 +423,13 @@ namespace ArchRast
         DepthStencilStats mDSOmZ = {};
         CStats mClipper = {};
         TEStats mTS = {};
-        GSInfo mGS = {};
+        GSStateInfo mGS = {};
         RastStats rastStats = {};
         CullStats mCullStats = {};
         AlphaStats mAlphaStats = {};
 
+        ShaderStats mShaderStats[NUM_SHADER_TYPES];
+
     };
 
     static EventManager* FromHandle(HANDLE hThreadContext)
index f924b57dac428bb7dd76c327761309fbe6927b92..32bd81f895d1ca4c13ed2418f5432bce17840109 100644 (file)
@@ -115,36 +115,6 @@ event FrontendStatsEvent
     uint64_t SoNumPrimsWritten3;
 };
 
-event VSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event HSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event DSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event GSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event PSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event CSStats
-{
-    uint32_t numInstExecuted;
-};
-
 event BackendStatsEvent
 {
     uint32_t drawId;
@@ -351,3 +321,38 @@ event AlphaEvent
     uint32_t alphaBlendCount;
 };
 
+event VSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event HSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event DSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event GSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event PSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event CSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
\ No newline at end of file
index 37593befbca2fd0ef086984e47c0554338a99444..f5cfb470a14dc1bf2bc9228525eb608cc143dbf4 100644 (file)
@@ -165,3 +165,33 @@ event DrawIndexedInstancedEvent
     uint32_t soTopology;
     uint32_t splitId; // Split draw count or id.
 };
+
+event VSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event HSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event DSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event GSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event PSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event CSStats
+{
+    uint32_t numInstExecuted;
+};
\ No newline at end of file
index 27a32bc3e3956f44af89e8e3f6622d0dd40529e4..e2ad1e8b035490d4a173701c34484f2038f1a252 100644 (file)
 
 namespace SwrJit
 {
+    ///@todo Move this to better place
+    enum SHADER_STATS_COUNTER_TYPE
+    {
+        STATS_INST_EXECUTED = 0,
+        STATS_SAMPLE_EXECUTED = 1,
+        STATS_SAMPLE_L_EXECUTED = 2,
+        STATS_SAMPLE_B_EXECUTED = 3,
+        STATS_SAMPLE_C_EXECUTED = 4,
+        STATS_SAMPLE_C_LZ_EXECUTED = 5,
+        STATS_SAMPLE_C_D_EXECUTED = 6,
+        STATS_LOD_EXECUTED = 7,
+        STATS_GATHER4_EXECUTED = 8,
+        STATS_GATHER4_C_EXECUTED = 9,
+        STATS_GATHER4_C_PO_EXECUTED = 10,
+        STATS_GATHER4_C_PO_C_EXECUTED = 11,
+        STATS_LOAD_RAW_UAV = 12,
+        STATS_LOAD_RAW_RESOURCE = 13,
+        STATS_STORE_RAW_UAV = 14,
+        STATS_STORE_TGSM = 15,
+        STATS_DISCARD = 16,
+        STATS_BARRIER = 17,
+    };
+
     using namespace llvm;
     struct Builder
     {