From 7a5054aa1cc4db0e495af0eee327967886dcd4f7 Mon Sep 17 00:00:00 2001 From: George Kyriazis Date: Thu, 15 Mar 2018 15:58:10 -0500 Subject: [PATCH] swr/rast: Add some archrast counters Hook up archrast counters for shader stats: instructions executed. Reviewed-by: Bruce Cherniak --- .../swr/rasterizer/archrast/archrast.cpp | 4 +-- .../swr/rasterizer/archrast/events.proto | 30 +++++++++++++++++++ .../drivers/swr/rasterizer/core/backend.cpp | 1 + .../swr/rasterizer/core/backend_impl.h | 4 +++ .../swr/rasterizer/core/backend_sample.cpp | 5 +++- .../rasterizer/core/backend_singlesample.cpp | 5 +++- .../drivers/swr/rasterizer/core/frontend.cpp | 8 +++++ 7 files changed, 53 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index 12dfc0e3372..21846737b56 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -61,7 +61,7 @@ namespace ArchRast //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine. }; - struct GSStats + struct GSInfo { uint32_t inputPrimCount; uint32_t primGeneratedCount; @@ -369,7 +369,7 @@ namespace ArchRast DepthStencilStats mDSOmZ = {}; CStats mClipper = {}; TEStats mTS = {}; - GSStats mGS = {}; + GSInfo mGS = {}; RastStats rastStats = {}; CullStats mCullStats = {}; AlphaStats mAlphaStats = {}; diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index deb0373bf5a..f924b57dac4 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -115,6 +115,36 @@ event FrontendStatsEvent uint64_t SoNumPrimsWritten3; }; +event VSStats +{ + uint32_t numInstExecuted; +}; + +event HSStats +{ + uint32_t numInstExecuted; +}; + +event DSStats +{ + uint32_t numInstExecuted; +}; + +event GSStats +{ + uint32_t numInstExecuted; +}; + +event PSStats +{ + uint32_t numInstExecuted; +}; + +event CSStats +{ + uint32_t numInstExecuted; +}; + event BackendStatsEvent { uint32_t drawId; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index ccc7150283d..1e0769ae574 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -81,6 +81,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup state.pfnCsFunc(GetPrivateState(pDC), &csContext); UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup); + AR_EVENT(CSStats(csContext.stats.numInstExecuted)); RDTSC_END(BEDispatch, 1); } diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index dd349a1d3c8..20b2ec58287 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -968,6 +968,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); RDTSC_END(BEPixelShader, 0); + // update stats + UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); + AR_EVENT(PSStats(psContext.stats.numInstExecuted)); + // update active lanes to remove any discarded or oMask'd pixels activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si()))); if(!_simd_movemask_ps(activeLanes)) { goto Endtile; }; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index 4982025a781..c7c6c533e37 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -163,10 +163,13 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // execute pixel shader RDTSC_BEGIN(BEPixelShader, pDC->drawId); - UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); RDTSC_END(BEPixelShader, 0); + // update stats + UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); + AR_EVENT(PSStats(psContext.stats.numInstExecuted)); + vCoverageMask = _simd_castsi_ps(psContext.activeMask); // late-Z diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 452fba13edf..26d5a75bd12 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -146,10 +146,13 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // execute pixel shader RDTSC_BEGIN(BEPixelShader, pDC->drawId); - UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); RDTSC_END(BEPixelShader, 0); + // update stats + UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); + AR_EVENT(PSStats(psContext.stats.numInstExecuted)); + vCoverageMask = _simd_castsi_ps(psContext.activeMask); // late-Z diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index c2be5d7bd11..25d1073885d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -851,6 +851,7 @@ static void GeometryShaderStage( // execute the geometry shader state.pfnGsFunc(GetPrivateState(pDC), &gsContext); + AR_EVENT(GSStats(gsContext.stats.numInstExecuted)); for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) { @@ -1253,6 +1254,7 @@ static void TessellationStages( RDTSC_END(FEHullShader, 0); UPDATE_STAT_FE(HsInvocations, numPrims); + AR_EVENT(HSStats(hsContext.stats.numInstExecuted)); const uint32_t* pPrimId = (const uint32_t*)&primID; @@ -1316,6 +1318,8 @@ static void TessellationStages( state.pfnDsFunc(GetPrivateState(pDC), &dsContext); RDTSC_END(FEDomainShader, 0); + AR_EVENT(DSStats(dsContext.stats.numInstExecuted)); + dsInvocations += KNOB_SIMD_WIDTH; } UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints); @@ -1793,12 +1797,15 @@ void ProcessDraw( RDTSC_BEGIN(FEVertexShader, pDC->drawId); #if USE_SIMD16_VS state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo); + AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted)); #else state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo); + AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted)); if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH { state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi); + AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted)); } #endif RDTSC_END(FEVertexShader, 0); @@ -2016,6 +2023,7 @@ void ProcessDraw( RDTSC_END(FEVertexShader, 0); UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); + AR_EVENT(VSStats(vsContext.stats.numInstExecuted)); } } -- 2.30.2