From 0f6fef96329314262cf03fe91734b0ce9cce3fa0 Mon Sep 17 00:00:00 2001 From: George Kyriazis Date: Tue, 6 Mar 2018 19:32:53 -0600 Subject: [PATCH] swr/rast: Add some archrast stats Add stats for degenerate and backfacing primitive counts Wire archrast stats for alpha blend and alpha test. pass value to jitter, upon return have archrast event increment a value Reviewed-by: Bruce Cherniak --- .../swr/rasterizer/archrast/archrast.cpp | 35 ++++++++++++++++++- .../swr/rasterizer/archrast/events.proto | 19 ++++++++++ .../rasterizer/archrast/events_private.proto | 15 ++++++++ .../swr/rasterizer/core/backend_impl.h | 18 ++++++---- .../swr/rasterizer/core/backend_sample.cpp | 4 +-- .../rasterizer/core/backend_singlesample.cpp | 4 +-- .../drivers/swr/rasterizer/core/binner.cpp | 2 ++ .../drivers/swr/rasterizer/core/state.h | 2 ++ .../swr/rasterizer/jitter/blend_jit.cpp | 17 +++++++++ 9 files changed, 105 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index 1f87dbabcd0..12dfc0e3372 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -73,6 +73,18 @@ namespace ArchRast uint32_t rasterTiles = 0; }; + struct CullStats + { + uint32_t degeneratePrimCount = 0; + uint32_t backfacePrimCount = 0; + }; + + struct AlphaStats + { + uint32_t alphaTestCount = 0; + uint32_t alphaBlendCount = 0; + }; + ////////////////////////////////////////////////////////////////////////// /// @brief Event handler that handles API thread events. This is shared /// between the API and its caller (e.g. driver shim) but typically @@ -280,7 +292,12 @@ namespace ArchRast // Rasterized Subspans EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles)); - //Reset Internal Counters + // Alpha Subspans + EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount)); + + // Primitive Culling + EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount)); + mDSSingleSample = {}; mDSSampleRate = {}; mDSCombined = {}; @@ -288,6 +305,8 @@ namespace ArchRast mDSNullPS = {}; rastStats = {}; + mCullStats = {}; + mAlphaStats = {}; mNeedFlush = false; } @@ -327,6 +346,18 @@ namespace ArchRast rastStats.rasterTiles += event.data.rasterTiles; } + virtual void Handle(const CullInfoEvent& event) + { + mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask)); + mCullStats.backfacePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask)); + } + + virtual void Handle(const AlphaInfoEvent& event) + { + mAlphaStats.alphaTestCount += event.data.alphaTestEnable; + mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable; + } + protected: bool mNeedFlush; // Per draw stats @@ -340,6 +371,8 @@ namespace ArchRast TEStats mTS = {}; GSStats mGS = {}; RastStats rastStats = {}; + CullStats mCullStats = {}; + AlphaStats mAlphaStats = {}; }; diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index 7d9a68d502e..deb0373bf5a 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -180,6 +180,7 @@ event LateStencilSampleRate uint64_t failCount; }; +// Total Early-Z counts, SingleSample and SampleRate event EarlyZ { uint32_t drawId; @@ -187,6 +188,7 @@ event EarlyZ uint64_t failCount; }; +// Total LateZ counts, SingleSample and SampleRate event LateZ { uint32_t drawId; @@ -194,6 +196,7 @@ event LateZ uint64_t failCount; }; +// Total EarlyStencil counts, SingleSample and SampleRate event EarlyStencil { uint32_t drawId; @@ -201,6 +204,7 @@ event EarlyStencil uint64_t failCount; }; +// Total LateStencil counts, SingleSample and SampleRate event LateStencil { uint32_t drawId; @@ -302,3 +306,18 @@ event ClipperEvent uint32_t trivialAcceptCount; uint32_t mustClipCount; }; + +event CullEvent +{ + uint32_t drawId; + uint64_t backfacePrimCount; + uint64_t degeneratePrimCount; +}; + +event AlphaEvent +{ + uint32_t drawId; + uint32_t alphaTestCount; + uint32_t alphaBlendCount; +}; + diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto index f0a93107e5e..37593befbca 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto @@ -120,6 +120,21 @@ event ClipInfoEvent uint32_t clipMask; }; +event CullInfoEvent +{ + uint32_t drawId; + uint64_t degeneratePrimMask; + uint64_t backfacePrimMask; + uint32_t validMask; +}; + +event AlphaInfoEvent +{ + uint32_t drawId; + uint32_t alphaTestEnable; + uint32_t alphaBlendEnable; +}; + event DrawInstancedEvent { uint32_t drawId; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 8c539e31dc6..dd349a1d3c8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -709,8 +709,8 @@ static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_P } // Merge Output to 4x2 SIMD Tile Format -INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask) +INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, + const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); @@ -747,6 +747,9 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW } } + // Track alpha events + AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended)); + // final write mask simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask)); @@ -777,8 +780,8 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW #if USE_8x2_TILE_BACKEND // Merge Output to 8x2 SIMD16 Tile Format -INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset) +INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, + const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); @@ -836,6 +839,9 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW } } + // Track alpha events + AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended)); + // final write mask simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask)); @@ -1003,9 +1009,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // broadcast the results of the PS to all passing pixels #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else // USE_8x2_TILE_BACKEND - OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId); #endif // USE_8x2_TILE_BACKEND if(!state.psState.forceEarlyZ && !T::bForcedSampleCount) diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index 16418f7dc87..4982025a781 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -196,9 +196,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // output merger RDTSC_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else - OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId); #endif // do final depth write after all pixel kills diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 4cc1ed5266b..452fba13edf 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -181,9 +181,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // output merger RDTSC_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else - OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId); #endif // do final depth write after all pixel kills diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index c9a37cb17ae..d31fd37095d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -773,6 +773,8 @@ void SIMDCALL BinTrianglesImpl( RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0); } + AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask)); + /// Note: these variable initializations must stay above any 'goto endBenTriangles' // compute per tri backface uint32_t frontFaceMask = frontWindingTris; diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 8c26ec60a2a..22acbe054a1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -887,6 +887,8 @@ struct SWR_BLEND_CONTEXT simdvector* result; simdscalari* oMask; simdscalari* pMask; + uint32_t isAlphaTested; + uint32_t isAlphaBlended; }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp index 6b7efbfb6d6..912a88fd00d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -557,6 +557,8 @@ struct BlendJit : public Builder ppoMask->setName("ppoMask"); Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask }); ppMask->setName("pMask"); + Value* AlphaTest1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + ppMask->setName("AlphaTest1"); static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format"); Value* dst[4]; @@ -590,12 +592,22 @@ struct BlendJit : public Builder // alpha test if (state.desc.alphaTestEnable) { + // Gather for archrast stats + STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested }); AlphaTest(state, pBlendState, pSrc0Alpha, ppMask); } + else + { + // Gather for archrast stats + STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested }); + } // color blend if (state.blendState.blendEnable) { + // Gather for archrast stats + STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + // clamp sources Clamp(state.format, src); Clamp(state.format, src1); @@ -647,6 +659,11 @@ struct BlendJit : public Builder STORE(result[i], pResult, { 0, i }); } } + else + { + // Gather for archrast stats + STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + } if(state.blendState.logicOpEnable) { -- 2.30.2