uint32_t rasterTiles = 0;
};
+ struct CullStats
+ {
+ uint32_t degeneratePrimCount = 0;
+ uint32_t backfacePrimCount = 0;
+ };
+
+ struct AlphaStats
+ {
+ uint32_t alphaTestCount = 0;
+ uint32_t alphaBlendCount = 0;
+ };
+
//////////////////////////////////////////////////////////////////////////
/// @brief Event handler that handles API thread events. This is shared
/// between the API and its caller (e.g. driver shim) but typically
// Rasterized Subspans
EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
- //Reset Internal Counters
+ // Alpha Subspans
+ EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
+
+ // Primitive Culling
+ EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
+
mDSSingleSample = {};
mDSSampleRate = {};
mDSCombined = {};
mDSNullPS = {};
rastStats = {};
+ mCullStats = {};
+ mAlphaStats = {};
mNeedFlush = false;
}
rastStats.rasterTiles += event.data.rasterTiles;
}
+ virtual void Handle(const CullInfoEvent& event)
+ {
+ mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
+ mCullStats.backfacePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
+ }
+
+ virtual void Handle(const AlphaInfoEvent& event)
+ {
+ mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
+ mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
+ }
+
protected:
bool mNeedFlush;
// Per draw stats
TEStats mTS = {};
GSStats mGS = {};
RastStats rastStats = {};
+ CullStats mCullStats = {};
+ AlphaStats mAlphaStats = {};
};
uint64_t failCount;
};
+// Total Early-Z counts, SingleSample and SampleRate
event EarlyZ
{
uint32_t drawId;
uint64_t failCount;
};
+// Total LateZ counts, SingleSample and SampleRate
event LateZ
{
uint32_t drawId;
uint64_t failCount;
};
+// Total EarlyStencil counts, SingleSample and SampleRate
event EarlyStencil
{
uint32_t drawId;
uint64_t failCount;
};
+// Total LateStencil counts, SingleSample and SampleRate
event LateStencil
{
uint32_t drawId;
uint32_t trivialAcceptCount;
uint32_t mustClipCount;
};
+
+event CullEvent
+{
+ uint32_t drawId;
+ uint64_t backfacePrimCount;
+ uint64_t degeneratePrimCount;
+};
+
+event AlphaEvent
+{
+ uint32_t drawId;
+ uint32_t alphaTestCount;
+ uint32_t alphaBlendCount;
+};
+
uint32_t clipMask;
};
+event CullInfoEvent
+{
+ uint32_t drawId;
+ uint64_t degeneratePrimMask;
+ uint64_t backfacePrimMask;
+ uint32_t validMask;
+};
+
+event AlphaInfoEvent
+{
+ uint32_t drawId;
+ uint32_t alphaTestEnable;
+ uint32_t alphaBlendEnable;
+};
+
event DrawInstancedEvent
{
uint32_t drawId;
}
// Merge Output to 4x2 SIMD Tile Format
-INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask)
+INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
+ const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId)
{
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
}
}
+ // Track alpha events
+ AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+
// final write mask
simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
#if USE_8x2_TILE_BACKEND
// Merge Output to 8x2 SIMD16 Tile Format
-INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset)
+INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
+ const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId)
{
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
}
}
+ // Track alpha events
+ AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+
// final write mask
simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
// broadcast the results of the PS to all passing pixels
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset);
+ OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
#else // USE_8x2_TILE_BACKEND
- OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask);
+ OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId);
#endif // USE_8x2_TILE_BACKEND
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
// output merger
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
+ OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
#else
- OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
+ OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId);
#endif
// do final depth write after all pixel kills
// output merger
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
+ OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
#else
- OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
+ OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId);
#endif
// do final depth write after all pixel kills
RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
}
+ AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask));
+
/// Note: these variable initializations must stay above any 'goto endBenTriangles'
// compute per tri backface
uint32_t frontFaceMask = frontWindingTris;
simdvector* result;
simdscalari* oMask;
simdscalari* pMask;
+ uint32_t isAlphaTested;
+ uint32_t isAlphaBlended;
};
//////////////////////////////////////////////////////////////////////////
ppoMask->setName("ppoMask");
Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask });
ppMask->setName("pMask");
+ Value* AlphaTest1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+ ppMask->setName("AlphaTest1");
static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
Value* dst[4];
// alpha test
if (state.desc.alphaTestEnable)
{
+ // Gather for archrast stats
+ STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
}
+ else
+ {
+ // Gather for archrast stats
+ STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
+ }
// color blend
if (state.blendState.blendEnable)
{
+ // Gather for archrast stats
+ STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+
// clamp sources
Clamp(state.format, src);
Clamp(state.format, src1);
STORE(result[i], pResult, { 0, i });
}
}
+ else
+ {
+ // Gather for archrast stats
+ STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+ }
if(state.blendState.logicOpEnable)
{