pContext->pfnClearTile = pCreateInfo->pfnClearTile;
pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
+ pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
// pass pointer to bucket manager back to caller
#ifdef KNOB_ENABLE_RDTSC
typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
const SWR_STATS* pStats);
+//////////////////////////////////////////////////////////////////////////
+/// @brief Callback to allow driver to update their copy of FE stats.
+/// @note Its optimal to have a separate callback for FE stats since
+/// there is only one DC per FE thread. This means we do not have
+/// to sum up the stats across all of the workers.
+/// @param hPrivateContext - handle to private data
+/// @param pStats - pointer to draw stats
+typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext,
+ const SWR_STATS_FE* pStats);
+
class BucketManager;
//////////////////////////////////////////////////////////////////////////
uint32_t privateStateSize;
// Callback functions
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Pointer to rdtsc buckets mgr returned to the caller.
// Only populated when KNOB_ENABLE_RDTSC is set
// update global pipeline stat
SWR_CONTEXT* pContext = this->pDC->pContext;
- UPDATE_STAT(CPrimitives, numClippedPrims);
+ UPDATE_STAT_FE(CPrimitives, numClippedPrims);
}
// execute the clipper stage
// update clipper invocations pipeline stat
SWR_CONTEXT* pContext = this->pDC->pContext;
uint32_t numInvoc = _mm_popcnt_u32(primMask);
- UPDATE_STAT(CInvocations, numInvoc);
+ UPDATE_STAT_FE(CInvocations, numInvoc);
ComputeClipCodes(prim);
{
// update CPrimitives pipeline state
SWR_CONTEXT* pContext = this->pDC->pContext;
- UPDATE_STAT(CPrimitives, _mm_popcnt_u32(validMask));
+ UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
uint32_t SoWriteOffset[4];
bool SoWriteOffsetDirty[4];
- SWR_STATS stats[KNOB_MAX_NUM_THREADS];
+ SWR_STATS_FE statsFE; // Only one FE thread per DC.
+ SWR_STATS stats[KNOB_MAX_NUM_THREADS];
};
// Draw Context
HotTileMgr *pHotTileMgr;
// Callback functions, passed in at create context time
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Global Stats
SWR_STATS stats[KNOB_MAX_NUM_THREADS];
void WakeAllThreads(SWR_CONTEXT *pContext);
#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
+#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
}
}
- UPDATE_STAT(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
- UPDATE_STAT(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
+ UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
+ UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
RDTSC_STOP(FEStreamout, 1, 0);
}
}
// update GS pipeline stats
- UPDATE_STAT(GsInvocations, numInputPrims * pState->instanceCount);
- UPDATE_STAT(GsPrimitives, totalPrimsGenerated);
+ UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
+ UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
RDTSC_STOP(FEGeometryShader, 1, 0);
}
state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
RDTSC_STOP(FEHullShader, 0, 0);
- UPDATE_STAT(HsInvocations, numPrims);
+ UPDATE_STAT_FE(HsInvocations, numPrims);
const uint32_t* pPrimId = (const uint32_t*)&primID;
dsInvocations += KNOB_SIMD_WIDTH;
}
- UPDATE_STAT(DsInvocations, tsData.NumDomainPoints);
+ UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints);
PA_TESS tessPa(
pDC,
*pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask));
}
- UPDATE_STAT(IaVertices, GetNumInvocations(i, endVertex));
+ UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex));
#if KNOB_ENABLE_TOSS_POINTS
if (!KNOB_TOSS_FETCH)
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
RDTSC_STOP(FEVertexShader, 0, 0);
- UPDATE_STAT(VsInvocations, GetNumInvocations(i, endVertex));
+ UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
}
}
{
if (assemble)
{
- UPDATE_STAT(IaPrimitives, pa.NumPrims());
+ UPDATE_STAT_FE(IaPrimitives, pa.NumPrims());
if (HasTessellationT::value)
{
uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
// Pipeline Stats
+ uint64_t PsInvocations; // Number of Pixel Shader invocations
+ uint64_t CsInvocations; // Number of Compute Shader invocations
+
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// SWR_STATS
+///
+/// @brief All statistics generated by FE.
+/////////////////////////////////////////////////////////////////////////
+struct SWR_STATS_FE
+{
uint64_t IaVertices; // Number of Fetch Shader vertices
uint64_t IaPrimitives; // Number of PA primitives.
uint64_t VsInvocations; // Number of Vertex Shader invocations
uint64_t HsInvocations; // Number of Hull Shader invocations
uint64_t DsInvocations; // Number of Domain Shader invocations
uint64_t GsInvocations; // Number of Geometry Shader invocations
- uint64_t PsInvocations; // Number of Pixel Shader invocations
- uint64_t CsInvocations; // Number of Compute Shader invocations
+ uint64_t GsPrimitives; // Number of prims GS outputs.
uint64_t CInvocations; // Number of clipper invocations
uint64_t CPrimitives; // Number of clipper primitives.
- uint64_t GsPrimitives; // Number of prims GS outputs.
// Streamout Stats
uint64_t SoPrimStorageNeeded[4];
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
stats.DepthPassCount += dynState.stats[i].DepthPassCount;
- stats.IaVertices += dynState.stats[i].IaVertices;
- stats.IaPrimitives += dynState.stats[i].IaPrimitives;
- stats.VsInvocations += dynState.stats[i].VsInvocations;
- stats.HsInvocations += dynState.stats[i].HsInvocations;
- stats.DsInvocations += dynState.stats[i].DsInvocations;
- stats.GsInvocations += dynState.stats[i].GsInvocations;
+
stats.PsInvocations += dynState.stats[i].PsInvocations;
- stats.CInvocations += dynState.stats[i].CInvocations;
stats.CsInvocations += dynState.stats[i].CsInvocations;
- stats.CPrimitives += dynState.stats[i].CPrimitives;
- stats.GsPrimitives += dynState.stats[i].GsPrimitives;
-
- for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
- {
- stats.SoPrimStorageNeeded[stream] += dynState.stats[i].SoPrimStorageNeeded[stream];
- stats.SoNumPrimsWritten[stream] += dynState.stats[i].SoNumPrimsWritten[stream];
- }
}
pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
{
_ReadWriteBarrier();
+ if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStats)
+ {
+ pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &pDC->dynState.statsFE);
+ }
+
if (pContext->pfnUpdateSoWriteOffset)
{
for (uint32_t i = 0; i < MAX_SO_BUFFERS; ++i)
struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
SWR_STATS *pSwrStats = &ctx->stats;
+
pSwrStats->DepthPassCount += pStats->DepthPassCount;
+ pSwrStats->PsInvocations += pStats->PsInvocations;
+ pSwrStats->CsInvocations += pStats->CsInvocations;
+}
+
+static void
+swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
+{
+ swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+
+ if (!pDC)
+ return;
+
+ struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+
+ SWR_STATS_FE *pSwrStats = &ctx->statsFE;
pSwrStats->IaVertices += pStats->IaVertices;
pSwrStats->IaPrimitives += pStats->IaPrimitives;
pSwrStats->VsInvocations += pStats->VsInvocations;
pSwrStats->HsInvocations += pStats->HsInvocations;
pSwrStats->DsInvocations += pStats->DsInvocations;
pSwrStats->GsInvocations += pStats->GsInvocations;
- pSwrStats->PsInvocations += pStats->PsInvocations;
- pSwrStats->CsInvocations += pStats->CsInvocations;
pSwrStats->CInvocations += pStats->CInvocations;
pSwrStats->CPrimitives += pStats->CPrimitives;
pSwrStats->GsPrimitives += pStats->GsPrimitives;
createInfo.pfnStoreTile = swr_StoreHotTile;
createInfo.pfnClearTile = swr_StoreHotTileClear;
createInfo.pfnUpdateStats = swr_UpdateStats;
+ createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
ctx->swrContext = SwrCreateContext(&createInfo);
/* Init Load/Store/ClearTiles Tables */
struct swr_draw_context swrDC;
SWR_STATS stats;
+ SWR_STATS_FE statsFE;
unsigned dirty; /**< Mask of SWR_NEW_x flags */
};
/* TODO: should fence instead of stalling pipeline */
SwrWaitForIdle(ctx->swrContext);
memcpy(&result->core, &ctx->stats, sizeof(result->core));
+ memcpy(&result->coreFE, &ctx->statsFE, sizeof(result->coreFE));
#if 0
if (!pq->fence) {
result->u64 = end->timestamp - start->timestamp;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
- result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
+ result->u64 = end->coreFE.IaPrimitives - start->coreFE.IaPrimitives;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
- result->u64 = end->core.SoNumPrimsWritten[index]
- - start->core.SoNumPrimsWritten[index];
+ result->u64 = end->coreFE.SoNumPrimsWritten[index]
+ - start->coreFE.SoNumPrimsWritten[index];
break;
/* Structures */
case PIPE_QUERY_SO_STATISTICS: {
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
- struct SWR_STATS *start = &pq->start.core;
- struct SWR_STATS *end = &pq->end.core;
+ struct SWR_STATS_FE *start = &pq->start.coreFE;
+ struct SWR_STATS_FE *end = &pq->end.coreFE;
so_stats->num_primitives_written =
end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
so_stats->primitives_storage_needed =
&result->pipeline_statistics;
struct SWR_STATS *start = &pq->start.core;
struct SWR_STATS *end = &pq->end.core;
- p_stats->ia_vertices = end->IaVertices - start->IaVertices;
- p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
- p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
- p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
- p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
- p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
- p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
+ struct SWR_STATS_FE *startFE = &pq->start.coreFE;
+ struct SWR_STATS_FE *endFE = &pq->end.coreFE;
+ p_stats->ia_vertices = endFE->IaVertices - startFE->IaVertices;
+ p_stats->ia_primitives = endFE->IaPrimitives - startFE->IaPrimitives;
+ p_stats->vs_invocations = endFE->VsInvocations - startFE->VsInvocations;
+ p_stats->gs_invocations = endFE->GsInvocations - startFE->GsInvocations;
+ p_stats->gs_primitives = endFE->GsPrimitives - startFE->GsPrimitives;
+ p_stats->c_invocations = endFE->CPrimitives - startFE->CPrimitives;
+ p_stats->c_primitives = endFE->CPrimitives - startFE->CPrimitives;
p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
- p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
- p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
+ p_stats->hs_invocations = endFE->HsInvocations - startFE->HsInvocations;
+ p_stats->ds_invocations = endFE->DsInvocations - startFE->DsInvocations;
p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
} break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
- struct SWR_STATS *start = &pq->start.core;
- struct SWR_STATS *end = &pq->end.core;
+ struct SWR_STATS_FE *start = &pq->start.coreFE;
+ struct SWR_STATS_FE *end = &pq->end.coreFE;
uint64_t num_primitives_written =
end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
uint64_t primitives_storage_needed =
struct swr_query_result {
SWR_STATS core;
+ SWR_STATS_FE coreFE;
uint64_t timestamp;
};