swr/rast: Add some archrast counters
authorGeorge Kyriazis <george.kyriazis@intel.com>
Thu, 15 Mar 2018 20:58:10 +0000 (15:58 -0500)
committerGeorge Kyriazis <george.kyriazis@intel.com>
Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
Hook up archrast counters for shader stats: instructions executed.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
src/gallium/drivers/swr/rasterizer/archrast/events.proto
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/core/backend_impl.h
src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
src/gallium/drivers/swr/rasterizer/core/frontend.cpp

index 12dfc0e337271fb75e9c4b5999a34edaf88efc98..21846737b56c1c5405951732298f52062a34b588 100644 (file)
@@ -61,7 +61,7 @@ namespace ArchRast
         //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
     };
 
-    struct GSStats
+    struct GSInfo
     {
         uint32_t inputPrimCount;
         uint32_t primGeneratedCount;
@@ -369,7 +369,7 @@ namespace ArchRast
         DepthStencilStats mDSOmZ = {};
         CStats mClipper = {};
         TEStats mTS = {};
-        GSStats mGS = {};
+        GSInfo mGS = {};
         RastStats rastStats = {};
         CullStats mCullStats = {};
         AlphaStats mAlphaStats = {};
index deb0373bf5a71d13f2846e82a95811ee21da5f8e..f924b57dac428bb7dd76c327761309fbe6927b92 100644 (file)
@@ -115,6 +115,36 @@ event FrontendStatsEvent
     uint64_t SoNumPrimsWritten3;
 };
 
+event VSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event HSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event DSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event GSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event PSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event CSStats
+{
+    uint32_t numInstExecuted;
+};
+
 event BackendStatsEvent
 {
     uint32_t drawId;
index ccc7150283d55f25e37e60c200b4277f31faa313..1e0769ae57422c9cbb73e3721bc4df2df05c068e 100644 (file)
@@ -81,6 +81,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
     state.pfnCsFunc(GetPrivateState(pDC), &csContext);
 
     UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
+    AR_EVENT(CSStats(csContext.stats.numInstExecuted));
 
     RDTSC_END(BEDispatch, 1);
 }
index dd349a1d3c8c5f350a2df015244be3634dccdf68..20b2ec58287e19b2f4ddcb19a70f578aefd510d5 100644 (file)
@@ -968,6 +968,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
             UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
             RDTSC_END(BEPixelShader, 0);
 
+            // update stats
+            UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
+            AR_EVENT(PSStats(psContext.stats.numInstExecuted));
+
             // update active lanes to remove any discarded or oMask'd pixels
             activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
             if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
index 4982025a78126e006a413719a995a7c58f429863..c7c6c533e37f770c80d13ee39daeb363e8bd8ff8 100644 (file)
@@ -163,10 +163,13 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
 
                     // execute pixel shader
                     RDTSC_BEGIN(BEPixelShader, pDC->drawId);
-                    UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
                     state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
                     RDTSC_END(BEPixelShader, 0);
 
+                    // update stats
+                    UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
+                    AR_EVENT(PSStats(psContext.stats.numInstExecuted));
+
                     vCoverageMask = _simd_castsi_ps(psContext.activeMask);
 
                     // late-Z
index 452fba13edf1f45ecd936dc4468b45ddc0abffe8..26d5a75bd12308d07d5426edcfe2132b3b7b3de9 100644 (file)
@@ -146,10 +146,13 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
 
                 // execute pixel shader
                 RDTSC_BEGIN(BEPixelShader, pDC->drawId);
-                UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
                 state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
                 RDTSC_END(BEPixelShader, 0);
 
+                // update stats
+                UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
+                AR_EVENT(PSStats(psContext.stats.numInstExecuted));
+
                 vCoverageMask = _simd_castsi_ps(psContext.activeMask);
 
                 // late-Z
index c2be5d7bd11c6d5975f1677cc6d691d7c9efeb28..25d1073885d6c4c1a8f7f477365a92ce02a86b7a 100644 (file)
@@ -851,6 +851,7 @@ static void GeometryShaderStage(
 
         // execute the geometry shader
         state.pfnGsFunc(GetPrivateState(pDC), &gsContext);
+        AR_EVENT(GSStats(gsContext.stats.numInstExecuted));
 
         for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
         {
@@ -1253,6 +1254,7 @@ static void TessellationStages(
     RDTSC_END(FEHullShader, 0);
 
     UPDATE_STAT_FE(HsInvocations, numPrims);
+    AR_EVENT(HSStats(hsContext.stats.numInstExecuted));
 
     const uint32_t* pPrimId = (const uint32_t*)&primID;
 
@@ -1316,6 +1318,8 @@ static void TessellationStages(
             state.pfnDsFunc(GetPrivateState(pDC), &dsContext);
             RDTSC_END(FEDomainShader, 0);
 
+            AR_EVENT(DSStats(dsContext.stats.numInstExecuted));
+
             dsInvocations += KNOB_SIMD_WIDTH;
         }
         UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints);
@@ -1793,12 +1797,15 @@ void ProcessDraw(
                     RDTSC_BEGIN(FEVertexShader, pDC->drawId);
 #if USE_SIMD16_VS
                     state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
+                    AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
 #else
                     state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
+                    AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
 
                     if ((i + KNOB_SIMD_WIDTH) < endVertex)  // 1/2 of KNOB_SIMD16_WIDTH
                     {
                         state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi);
+                        AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted));
                     }
 #endif
                     RDTSC_END(FEVertexShader, 0);
@@ -2016,6 +2023,7 @@ void ProcessDraw(
                     RDTSC_END(FEVertexShader, 0);
 
                     UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
+                    AR_EVENT(VSStats(vsContext.stats.numInstExecuted));
                 }
             }