swr/rast: Add some archrast stats
authorGeorge Kyriazis <george.kyriazis@intel.com>
Wed, 7 Mar 2018 01:32:53 +0000 (19:32 -0600)
committerGeorge Kyriazis <george.kyriazis@intel.com>
Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
Add stats for degenerate and backfacing primitive counts

Wire archrast stats for alpha blend and alpha test.
pass value to jitter, upon return have archrast event increment a value

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
src/gallium/drivers/swr/rasterizer/archrast/events.proto
src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
src/gallium/drivers/swr/rasterizer/core/backend_impl.h
src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
src/gallium/drivers/swr/rasterizer/core/binner.cpp
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp

index 1f87dbabcd0cdb5a82bc446c0602c03051c3d01b..12dfc0e337271fb75e9c4b5999a34edaf88efc98 100644 (file)
@@ -73,6 +73,18 @@ namespace ArchRast
         uint32_t rasterTiles = 0;
     };
 
+    struct CullStats
+    {
+        uint32_t degeneratePrimCount = 0;
+        uint32_t backfacePrimCount = 0;
+    };
+
+    struct AlphaStats
+    {
+        uint32_t alphaTestCount = 0;
+        uint32_t alphaBlendCount = 0;
+    };
+
     //////////////////////////////////////////////////////////////////////////
     /// @brief Event handler that handles API thread events. This is shared
     ///        between the API and its caller (e.g. driver shim) but typically
@@ -280,7 +292,12 @@ namespace ArchRast
             // Rasterized Subspans
             EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
 
-            //Reset Internal Counters
+            // Alpha Subspans
+            EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
+
+            // Primitive Culling
+            EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
+            
             mDSSingleSample = {};
             mDSSampleRate = {};
             mDSCombined = {};
@@ -288,6 +305,8 @@ namespace ArchRast
             mDSNullPS = {};
 
             rastStats = {};
+            mCullStats = {};
+            mAlphaStats = {};
             mNeedFlush = false;
         }
 
@@ -327,6 +346,18 @@ namespace ArchRast
             rastStats.rasterTiles += event.data.rasterTiles;
         }
 
+        virtual void Handle(const CullInfoEvent& event)
+        {
+            mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
+            mCullStats.backfacePrimCount   += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
+        }
+
+        virtual void Handle(const AlphaInfoEvent& event)
+        {
+            mAlphaStats.alphaTestCount  += event.data.alphaTestEnable;
+            mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
+        }
+
     protected:
         bool mNeedFlush;
         // Per draw stats
@@ -340,6 +371,8 @@ namespace ArchRast
         TEStats mTS = {};
         GSStats mGS = {};
         RastStats rastStats = {};
+        CullStats mCullStats = {};
+        AlphaStats mAlphaStats = {};
 
     };
 
index 7d9a68d502ea32c0c2f95a68f48758a2e8c75b91..deb0373bf5a71d13f2846e82a95811ee21da5f8e 100644 (file)
@@ -180,6 +180,7 @@ event LateStencilSampleRate
     uint64_t failCount;
 };
 
+// Total Early-Z counts, SingleSample and SampleRate
 event EarlyZ
 {
     uint32_t drawId;
@@ -187,6 +188,7 @@ event EarlyZ
     uint64_t failCount;
 };     
 
+// Total LateZ counts, SingleSample and SampleRate
 event LateZ
 {
     uint32_t drawId;
@@ -194,6 +196,7 @@ event LateZ
     uint64_t failCount;
 };
 
+// Total EarlyStencil counts, SingleSample and SampleRate
 event EarlyStencil
 {
     uint32_t drawId; 
@@ -201,6 +204,7 @@ event EarlyStencil
     uint64_t failCount;
 };
 
+// Total LateStencil counts, SingleSample and SampleRate
 event LateStencil
 {
     uint32_t drawId; 
@@ -302,3 +306,18 @@ event ClipperEvent
     uint32_t trivialAcceptCount;
     uint32_t mustClipCount;
 };
+
+event CullEvent
+{
+    uint32_t drawId;
+    uint64_t backfacePrimCount;
+    uint64_t degeneratePrimCount;
+};
+
+event AlphaEvent
+{
+    uint32_t drawId;
+    uint32_t alphaTestCount;
+    uint32_t alphaBlendCount;
+};
+
index f0a93107e5e0c9fdb11a6ee8b7086052e397b9ca..37593befbca2fd0ef086984e47c0554338a99444 100644 (file)
@@ -120,6 +120,21 @@ event ClipInfoEvent
     uint32_t clipMask;
 };
 
+event CullInfoEvent
+{
+    uint32_t drawId;
+    uint64_t degeneratePrimMask;
+    uint64_t backfacePrimMask;
+    uint32_t validMask;
+};
+
+event AlphaInfoEvent
+{
+    uint32_t drawId;
+    uint32_t alphaTestEnable;
+    uint32_t alphaBlendEnable;
+};
+
 event DrawInstancedEvent
 {
     uint32_t drawId;
index 8c539e31dc635157138927e7994d243d62672f40..dd349a1d3c8c5f350a2df015244be3634dccdf68 100644 (file)
@@ -709,8 +709,8 @@ static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_P
 }
 
 // Merge Output to 4x2 SIMD Tile Format
-INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
-    const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask)
+INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
+    const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId)
 {
     // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
     const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -747,6 +747,9 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
             }
         }
 
+        // Track alpha events
+        AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+
         // final write mask 
         simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
 
@@ -777,8 +780,8 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
 
 #if USE_8x2_TILE_BACKEND
 // Merge Output to 8x2 SIMD16 Tile Format
-INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
-    const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset)
+INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
+    const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId)
 {
     // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
     uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -836,6 +839,9 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
             }
         }
 
+        // Track alpha events
+        AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+
         // final write mask 
         simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
 
@@ -1003,9 +1009,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
                 
                 // broadcast the results of the PS to all passing pixels
 #if USE_8x2_TILE_BACKEND
-                OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset);
+                OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
 #else // USE_8x2_TILE_BACKEND
-                OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask);
+                OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId);
 #endif // USE_8x2_TILE_BACKEND
 
                 if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
index 16418f7dc87ee83d6a14e44cf9f460eee4ad6821..4982025a78126e006a413719a995a7c58f429863 100644 (file)
@@ -196,9 +196,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                     // output merger
                     RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
 #if USE_8x2_TILE_BACKEND
-                    OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
+                    OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
 #else
-                    OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
+                    OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId);
 #endif
 
                     // do final depth write after all pixel kills
index 4cc1ed5266b1c9d211f0161805bf1ae1c99253bc..452fba13edf1f45ecd936dc4468b45ddc0abffe8 100644 (file)
@@ -181,9 +181,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
                 // output merger
                 RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
 #if USE_8x2_TILE_BACKEND
-                OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
+                OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
 #else
-                OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
+                OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId);
 #endif
 
                 // do final depth write after all pixel kills
index c9a37cb17aea0263802c1dc2ffc2aa77da072d03..d31fd37095d21b8282fef7a2e82e22e3669cd9d3 100644 (file)
@@ -773,6 +773,8 @@ void SIMDCALL BinTrianglesImpl(
         RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
     }
 
+    AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask));
+
     /// Note: these variable initializations must stay above any 'goto endBenTriangles'
     // compute per tri backface
     uint32_t frontFaceMask = frontWindingTris;
index 8c26ec60a2a9fd3e5f0ecafb11ad684ba2af4383..22acbe054a19db25e1dd61197a51329e935eb053 100644 (file)
@@ -887,6 +887,8 @@ struct SWR_BLEND_CONTEXT
     simdvector*             result;
     simdscalari*            oMask;
     simdscalari*            pMask;
+    uint32_t                isAlphaTested;
+    uint32_t                isAlphaBlended;
 };
 
 //////////////////////////////////////////////////////////////////////////
index 6b7efbfb6d67ddf2988e0e475fb7de7e3cb64d24..912a88fd00d530754a8a893558d848f610a5b503 100644 (file)
@@ -557,6 +557,8 @@ struct BlendJit : public Builder
         ppoMask->setName("ppoMask");
         Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask });
         ppMask->setName("pMask");
+        Value* AlphaTest1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+        ppMask->setName("AlphaTest1");
 
         static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
         Value* dst[4];
@@ -590,12 +592,22 @@ struct BlendJit : public Builder
         // alpha test
         if (state.desc.alphaTestEnable)
         {
+            // Gather for archrast stats
+            STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
             AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
         }
+        else
+        {
+            // Gather for archrast stats
+            STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
+        }
 
         // color blend
         if (state.blendState.blendEnable)
         {
+            // Gather for archrast stats
+            STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+
             // clamp sources
             Clamp(state.format, src);
             Clamp(state.format, src1);
@@ -647,6 +659,11 @@ struct BlendJit : public Builder
                 STORE(result[i], pResult, { 0, i });
             }
         }
+        else
+        {
+            // Gather for archrast stats
+            STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+        }
         
         if(state.blendState.logicOpEnable)
         {