From: Alok Hota Date: Thu, 7 Jun 2018 18:14:48 +0000 (-0500) Subject: swr/rast: update SWR rasterizer shader stats X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6e5eb4ead62131db3d6570fe8c0d0aa67f0149a9;p=mesa.git swr/rast: update SWR rasterizer shader stats Primarily refactoring internal stats types Reviewed-by: Bruce Cherniak --- diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index ceb06ae471f..a454fc12701 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -339,40 +339,57 @@ namespace ArchRast _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask); } - struct ShaderStats + void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate) { - uint32_t numInstExecuted; - }; + pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted; + pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted; + pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted; + pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted; + pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted; + pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted; + pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted; + pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed; + pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted; + pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted; + pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted; + pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted; + } virtual void Handle(const VSStats& event) { - mShaderStats[SHADER_VERTEX].numInstExecuted += event.data.numInstExecuted; + SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; + UpdateStats(&mShaderStats[SHADER_VERTEX], pStats); } virtual void Handle(const GSStats& event) { - mShaderStats[SHADER_GEOMETRY].numInstExecuted += event.data.numInstExecuted; + SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; + UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats); } virtual void Handle(const DSStats& event) { - mShaderStats[SHADER_DOMAIN].numInstExecuted += event.data.numInstExecuted; + SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; + UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats); } virtual void Handle(const HSStats& event) { - mShaderStats[SHADER_HULL].numInstExecuted += event.data.numInstExecuted; + SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; + UpdateStats(&mShaderStats[SHADER_HULL], pStats); } virtual void Handle(const PSStats& event) { - mShaderStats[SHADER_PIXEL].numInstExecuted += event.data.numInstExecuted; + SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; + UpdateStats(&mShaderStats[SHADER_PIXEL], pStats); mNeedFlush = true; } virtual void Handle(const CSStats& event) { - mShaderStats[SHADER_COMPUTE].numInstExecuted += event.data.numInstExecuted; + SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats; + UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats); mNeedFlush = true; } @@ -382,8 +399,32 @@ namespace ArchRast if (mNeedFlush == false) return; - EventHandlerFile::Handle(PSInfo(drawId, mShaderStats[SHADER_PIXEL].numInstExecuted)); - EventHandlerFile::Handle(CSInfo(drawId, mShaderStats[SHADER_COMPUTE].numInstExecuted)); + EventHandlerFile::Handle(PSInfo(drawId, + mShaderStats[SHADER_PIXEL].numInstExecuted, + mShaderStats[SHADER_PIXEL].numSampleExecuted, + mShaderStats[SHADER_PIXEL].numSampleLExecuted, + mShaderStats[SHADER_PIXEL].numSampleBExecuted, + mShaderStats[SHADER_PIXEL].numSampleCExecuted, + mShaderStats[SHADER_PIXEL].numSampleCLZExecuted, + mShaderStats[SHADER_PIXEL].numSampleCDExecuted, + mShaderStats[SHADER_PIXEL].numGather4Executed, + mShaderStats[SHADER_PIXEL].numGather4CExecuted, + mShaderStats[SHADER_PIXEL].numGather4CPOExecuted, + mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted, + mShaderStats[SHADER_PIXEL].numLodExecuted)); + EventHandlerFile::Handle(CSInfo(drawId, + mShaderStats[SHADER_COMPUTE].numInstExecuted, + mShaderStats[SHADER_COMPUTE].numSampleExecuted, + mShaderStats[SHADER_COMPUTE].numSampleLExecuted, + mShaderStats[SHADER_COMPUTE].numSampleBExecuted, + mShaderStats[SHADER_COMPUTE].numSampleCExecuted, + mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted, + mShaderStats[SHADER_COMPUTE].numSampleCDExecuted, + mShaderStats[SHADER_COMPUTE].numGather4Executed, + mShaderStats[SHADER_COMPUTE].numGather4CExecuted, + mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted, + mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted, + mShaderStats[SHADER_COMPUTE].numLodExecuted)); // singleSample EventHandlerFile::Handle(EarlyZSingleSample( @@ -480,14 +521,58 @@ namespace ArchRast EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount)); EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput)); - EventHandlerFile::Handle( - VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted)); - EventHandlerFile::Handle( - HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted)); - EventHandlerFile::Handle( - DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted)); - EventHandlerFile::Handle( - GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted)); + EventHandlerFile::Handle(VSInfo(event.data.drawId, + mShaderStats[SHADER_VERTEX].numInstExecuted, + mShaderStats[SHADER_VERTEX].numSampleExecuted, + mShaderStats[SHADER_VERTEX].numSampleLExecuted, + mShaderStats[SHADER_VERTEX].numSampleBExecuted, + mShaderStats[SHADER_VERTEX].numSampleCExecuted, + mShaderStats[SHADER_VERTEX].numSampleCLZExecuted, + mShaderStats[SHADER_VERTEX].numSampleCDExecuted, + mShaderStats[SHADER_VERTEX].numGather4Executed, + mShaderStats[SHADER_VERTEX].numGather4CExecuted, + mShaderStats[SHADER_VERTEX].numGather4CPOExecuted, + mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted, + mShaderStats[SHADER_VERTEX].numLodExecuted)); + EventHandlerFile::Handle(HSInfo(event.data.drawId, + mShaderStats[SHADER_HULL].numInstExecuted, + mShaderStats[SHADER_HULL].numSampleExecuted, + mShaderStats[SHADER_HULL].numSampleLExecuted, + mShaderStats[SHADER_HULL].numSampleBExecuted, + mShaderStats[SHADER_HULL].numSampleCExecuted, + mShaderStats[SHADER_HULL].numSampleCLZExecuted, + mShaderStats[SHADER_HULL].numSampleCDExecuted, + mShaderStats[SHADER_HULL].numGather4Executed, + mShaderStats[SHADER_HULL].numGather4CExecuted, + mShaderStats[SHADER_HULL].numGather4CPOExecuted, + mShaderStats[SHADER_HULL].numGather4CPOCExecuted, + mShaderStats[SHADER_HULL].numLodExecuted)); + EventHandlerFile::Handle(DSInfo(event.data.drawId, + mShaderStats[SHADER_DOMAIN].numInstExecuted, + mShaderStats[SHADER_DOMAIN].numSampleExecuted, + mShaderStats[SHADER_DOMAIN].numSampleLExecuted, + mShaderStats[SHADER_DOMAIN].numSampleBExecuted, + mShaderStats[SHADER_DOMAIN].numSampleCExecuted, + mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted, + mShaderStats[SHADER_DOMAIN].numSampleCDExecuted, + mShaderStats[SHADER_DOMAIN].numGather4Executed, + mShaderStats[SHADER_DOMAIN].numGather4CExecuted, + mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted, + mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted, + mShaderStats[SHADER_DOMAIN].numLodExecuted)); + EventHandlerFile::Handle(GSInfo(event.data.drawId, + mShaderStats[SHADER_GEOMETRY].numInstExecuted, + mShaderStats[SHADER_GEOMETRY].numSampleExecuted, + mShaderStats[SHADER_GEOMETRY].numSampleLExecuted, + mShaderStats[SHADER_GEOMETRY].numSampleBExecuted, + mShaderStats[SHADER_GEOMETRY].numSampleCExecuted, + mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted, + mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted, + mShaderStats[SHADER_GEOMETRY].numGather4Executed, + mShaderStats[SHADER_GEOMETRY].numGather4CExecuted, + mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted, + mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted, + mShaderStats[SHADER_GEOMETRY].numLodExecuted)); mShaderStats[SHADER_VERTEX] = {}; mShaderStats[SHADER_HULL] = {}; @@ -544,7 +629,7 @@ namespace ArchRast CullStats mCullStats = {}; AlphaStats mAlphaStats = {}; - ShaderStats mShaderStats[NUM_SHADER_TYPES]; + SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES]; }; diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index 32bd81f895d..fdf39ee34e6 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -325,34 +325,101 @@ event VSInfo { uint32_t drawId; uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; event HSInfo { uint32_t drawId; uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; event DSInfo { uint32_t drawId; uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; event GSInfo { uint32_t drawId; uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; + }; event PSInfo { uint32_t drawId; uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; event CSInfo { uint32_t drawId; uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; \ No newline at end of file diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto index f5cfb470a14..b49d4bf8de1 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto @@ -168,30 +168,30 @@ event DrawIndexedInstancedEvent event VSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; event HSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; event DSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; event GSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; event PSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; event CSStats { - uint32_t numInstExecuted; + HANDLE hStats; // SWR_SHADER_STATS }; \ No newline at end of file diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 8f8dbcf7884..883475cb753 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -88,7 +88,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, &csContext); UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup); - AR_EVENT(CSStats(csContext.stats.numInstExecuted)); + AR_EVENT(CSStats((HANDLE)&csContext.stats)); RDTSC_END(BEDispatch, 1); } diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 1798dad7bc5..d556c549704 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -1212,7 +1212,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC, // update stats UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); - AR_EVENT(PSStats(psContext.stats.numInstExecuted)); + AR_EVENT(PSStats((HANDLE)&psContext.stats)); // update active lanes to remove any discarded or oMask'd pixels activeLanes = _simd_castsi_ps(_simd_and_si( diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index a1a1185bcfb..ff09cc6caa7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -207,7 +207,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC, // update stats UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); - AR_EVENT(PSStats(psContext.stats.numInstExecuted)); + AR_EVENT(PSStats((HANDLE)&psContext.stats)); vCoverageMask = _simd_castsi_ps(psContext.activeMask); diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 2efb01f95db..1c065ab14bf 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -188,7 +188,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC, // update stats UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); - AR_EVENT(PSStats(psContext.stats.numInstExecuted)); + AR_EVENT(PSStats((HANDLE)&psContext.stats)); vCoverageMask = _simd_castsi_ps(psContext.activeMask); diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index b0d9f05b91b..b510feadee9 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -888,7 +888,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC, // execute the geometry shader state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext); - AR_EVENT(GSStats(gsContext.stats.numInstExecuted)); + AR_EVENT(GSStats((HANDLE)&gsContext.stats)); for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) { @@ -1375,7 +1375,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC, RDTSC_END(FEHullShader, 0); UPDATE_STAT_FE(HsInvocations, numPrims); - AR_EVENT(HSStats(hsContext.stats.numInstExecuted)); + AR_EVENT(HSStats((HANDLE)&hsContext.stats)); const uint32_t* pPrimId = (const uint32_t*)&primID; @@ -1443,7 +1443,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC, state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext); RDTSC_END(FEDomainShader, 0); - AR_EVENT(DSStats(dsContext.stats.numInstExecuted)); + AR_EVENT(DSStats((HANDLE)&dsContext.stats)); dsInvocations += KNOB_SIMD_WIDTH; } @@ -1950,15 +1950,15 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo RDTSC_BEGIN(FEVertexShader, pDC->drawId); #if USE_SIMD16_VS state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo); - AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted)); + AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats)); #else state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo); - AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted)); + AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats)); if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH { state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi); - AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted)); + AR_EVENT(VSStats((HANDLE)&vsContext_hi.stats)); } #endif RDTSC_END(FEVertexShader, 0); @@ -2214,7 +2214,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo RDTSC_END(FEVertexShader, 0); UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); - AR_EVENT(VSStats(vsContext.stats.numInstExecuted)); + AR_EVENT(VSStats((HANDLE)&vsContext.stats)); } } diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 0b42a457945..bdf4f6e2e8f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -221,7 +221,18 @@ struct SIMDVERTEX_T ///////////////////////////////////////////////////////////////////////// struct SWR_SHADER_STATS { - uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86. + uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86. + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h index 0ce8d025b5c..d252482a1d9 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h @@ -55,6 +55,9 @@ namespace SwrJit STATS_STORE_TGSM = 15, STATS_DISCARD = 16, STATS_BARRIER = 17, + + // ------------------ + STATS_TOTAL_COUNTERS }; using namespace llvm;