}
else
{
- AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId);
+ RDTSC_BEGIN(APIDrawWakeAllThreads, pDC->drawId);
WakeAllThreads(pContext);
- AR_API_END(APIDrawWakeAllThreads, 1);
+ RDTSC_END(APIDrawWakeAllThreads, 1);
}
// Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
{
- AR_API_BEGIN(APIGetDrawContext, 0);
+ RDTSC_BEGIN(APIGetDrawContext, 0);
// If current draw context is null then need to obtain a new draw context to use from ring.
if (pContext->pCurDrawContext == nullptr)
{
SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC");
}
- AR_API_END(APIGetDrawContext, 0);
+ RDTSC_END(APIGetDrawContext, 0);
return pContext->pCurDrawContext;
}
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- AR_API_BEGIN(APISync, 0);
+ RDTSC_BEGIN(APISync, 0);
pDC->FeWork.type = SYNC;
pDC->FeWork.pfnWork = ProcessSync;
//enqueue
QueueDraw(pContext);
- AR_API_END(APISync, 1);
+ RDTSC_END(APISync, 1);
}
void SwrStallBE(HANDLE hContext)
{
SWR_CONTEXT *pContext = GetContext(hContext);
- AR_API_BEGIN(APIWaitForIdle, 0);
+ RDTSC_BEGIN(APIWaitForIdle, 0);
while (!pContext->dcRing.IsEmpty())
{
_mm_pause();
}
- AR_API_END(APIWaitForIdle, 1);
+ RDTSC_END(APIWaitForIdle, 1);
}
void SwrWaitForIdleFE(HANDLE hContext)
{
SWR_CONTEXT *pContext = GetContext(hContext);
- AR_API_BEGIN(APIWaitForIdle, 0);
+ RDTSC_BEGIN(APIWaitForIdle, 0);
while (pContext->drawsOutstandingFE > 0)
{
_mm_pause();
}
- AR_API_END(APIWaitForIdle, 1);
+ RDTSC_END(APIWaitForIdle, 1);
}
void SwrSetVertexBuffers(
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- AR_API_BEGIN(APIDraw, pDC->drawId);
+ RDTSC_BEGIN(APIDraw, pDC->drawId);
AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertices, startVertex, numInstances, startInstance));
uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
- AR_API_END(APIDraw, numVertices * numInstances);
+ RDTSC_END(APIDraw, numVertices * numInstances);
}
//////////////////////////////////////////////////////////////////////////
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
API_STATE* pState = &pDC->pState->state;
- AR_API_BEGIN(APIDrawIndexed, pDC->drawId);
+ RDTSC_BEGIN(APIDrawIndexed, pDC->drawId);
AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
- AR_API_END(APIDrawIndexed, numIndices * numInstances);
+ RDTSC_END(APIDrawIndexed, numIndices * numInstances);
}
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- AR_API_BEGIN(APIDispatch, pDC->drawId);
+ RDTSC_BEGIN(APIDispatch, pDC->drawId);
AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
pDC->isCompute = true; // This is a compute context.
pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE);
QueueDispatch(pContext);
- AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ);
+ RDTSC_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ);
}
// Deswizzles, converts and stores current contents of the hot tiles to surface
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- AR_API_BEGIN(APIStoreTiles, pDC->drawId);
+ RDTSC_BEGIN(APIStoreTiles, pDC->drawId);
pDC->FeWork.type = STORETILES;
pDC->FeWork.pfnWork = ProcessStoreTiles;
AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId));
- AR_API_END(APIStoreTiles, 1);
+ RDTSC_END(APIStoreTiles, 1);
}
//////////////////////////////////////////////////////////////////////////
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- AR_API_BEGIN(APIClearRenderTarget, pDC->drawId);
+ RDTSC_BEGIN(APIClearRenderTarget, pDC->drawId);
pDC->FeWork.type = CLEAR;
pDC->FeWork.pfnWork = ProcessClear;
// enqueue draw
QueueDraw(pContext);
- AR_API_END(APIClearRenderTarget, 1);
+ RDTSC_END(APIClearRenderTarget, 1);
}
//////////////////////////////////////////////////////////////////////////
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(BEDispatch, pDC->drawId);
+ RDTSC_BEGIN(BEDispatch, pDC->drawId);
const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData();
SWR_ASSERT(pTaskData != nullptr);
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
- AR_END(BEDispatch, 1);
+ RDTSC_END(BEDispatch, 1);
}
//////////////////////////////////////////////////////////////////////////
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(BEStoreTiles, pDC->drawId);
+ RDTSC_BEGIN(BEStoreTiles, pDC->drawId);
SWR_FORMAT srcFormat;
switch (attachment)
}
}
}
- AR_END(BEStoreTiles, 1);
+ RDTSC_END(BEStoreTiles, 1);
}
void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(BENullBackend, pDC->drawId);
+ RDTSC_BEGIN(BENullBackend, pDC->drawId);
///@todo: handle center multisample pattern
- AR_BEGIN(BESetup, pDC->drawId);
+ RDTSC_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
SWR_PS_CONTEXT psContext;
// skip SetupPixelShaderContext(&psContext, ...); // not needed here
- AR_END(BESetup, 0);
+ RDTSC_END(BESetup, 0);
simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
}
- AR_BEGIN(BEBarycentric, pDC->drawId);
+ RDTSC_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- AR_END(BEBarycentric, 0);
+ RDTSC_END(BEBarycentric, 0);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
simdscalar stencilPassMask = vCoverageMask;
- AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
+ RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
- AR_END(BEEarlyDepthTest, 0);
+ RDTSC_END(BEEarlyDepthTest, 0);
uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
vYSamplePosUL = _simd_add_ps(vYSamplePosUL, dy);
}
- AR_END(BENullBackend, 0);
+ RDTSC_END(BENullBackend, 0);
}
PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {};
SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
- AR_BEGIN(BEClear, pDC->drawId);
+ RDTSC_BEGIN(BEClear, pDC->drawId);
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
{
pHotTile->state = HOTTILE_CLEAR;
}
- AR_END(BEClear, 1);
+ RDTSC_END(BEClear, 1);
}
else
{
// Legacy clear
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
- AR_BEGIN(BEClear, pDC->drawId);
+ RDTSC_BEGIN(BEClear, pDC->drawId);
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
{
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
}
- AR_END(BEClear, 1);
+ RDTSC_END(BEClear, 1);
}
}
vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz)));
}
- AR_BEGIN(BEBarycentric, pDC->drawId);
+ RDTSC_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
}
- AR_END(BEBarycentric, 0);
+ RDTSC_END(BEBarycentric, 0);
///@todo: perspective correct vs non-perspective correct clipping?
// if clip distances are enabled, we need to interpolate for each sample
// ZTest for this sample
///@todo Need to uncomment out this bucket.
- //AR_BEGIN(BEDepthBucket, pDC->drawId);
+ //RDTSC_BEGIN(BEDepthBucket, pDC->drawId);
depthPassMask[sample] = vCoverageMask[sample];
stencilPassMask[sample] = vCoverageMask[sample];
depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
vZ[sample], pDepthSample, vCoverageMask[sample],
pStencilSample, &stencilPassMask[sample]);
- //AR_END(BEDepthBucket, 0);
+ //RDTSC_END(BEDepthBucket, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(BEPixelRateBackend, pDC->drawId);
- AR_BEGIN(BESetup, pDC->drawId);
+ RDTSC_BEGIN(BEPixelRateBackend, pDC->drawId);
+ RDTSC_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
uint8_t *pDepthBuffer, *pStencilBuffer;
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
- AR_END(BESetup, 0);
+ RDTSC_END(BESetup, 0);
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.backendState.clipDistanceMask);
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
- AR_BEGIN(BEBarycentric, pDC->drawId);
+ RDTSC_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
- AR_END(BEBarycentric, 0);
+ RDTSC_END(BEBarycentric, 0);
if(T::bForcedSampleCount)
{
if(state.psState.usesSourceDepth)
{
- AR_BEGIN(BEBarycentric, pDC->drawId);
+ RDTSC_BEGIN(BEBarycentric, pDC->drawId);
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- AR_END(BEBarycentric, 0);
+ RDTSC_END(BEBarycentric, 0);
}
// pixels that are currently active
psContext.oMask = T::MultisampleT::FullSampleMask();
// execute pixel shader
- AR_BEGIN(BEPixelShader, pDC->drawId);
+ RDTSC_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
- AR_END(BEPixelShader, 0);
+ RDTSC_END(BEPixelShader, 0);
// update active lanes to remove any discarded or oMask'd pixels
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
// loop over all samples, broadcasting the results of the PS to all passing pixels
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++)
{
- AR_BEGIN(BEOutputMerger, pDC->drawId);
+ RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
simdscalar coverageMask, depthMask;
if(!_simd_movemask_ps(depthMask))
{
// stencil should already have been written in early/lateZ tests
- AR_END(BEOutputMerger, 0);
+ RDTSC_END(BEOutputMerger, 0);
continue;
}
}
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
}
- AR_END(BEOutputMerger, 0);
+ RDTSC_END(BEOutputMerger, 0);
}
Endtile:
- AR_BEGIN(BEEndTile, pDC->drawId);
+ RDTSC_BEGIN(BEEndTile, pDC->drawId);
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
- AR_END(BEEndTile, 0);
+ RDTSC_END(BEEndTile, 0);
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
- AR_END(BEPixelRateBackend, 0);
+ RDTSC_END(BEPixelRateBackend, 0);
}
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0,
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(BESampleRateBackend, pDC->drawId);
- AR_BEGIN(BESetup, pDC->drawId);
+ RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
+ RDTSC_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
uint8_t *pDepthBuffer, *pStencilBuffer;
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
- AR_END(BESetup, 0);
+ RDTSC_END(BESetup, 0);
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
- AR_BEGIN(BEBarycentric, pDC->drawId);
+ RDTSC_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
- AR_END(BEBarycentric, 0);
+ RDTSC_END(BEBarycentric, 0);
for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++)
{
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
}
- AR_BEGIN(BEBarycentric, pDC->drawId);
+ RDTSC_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- AR_END(BEBarycentric, 0);
+ RDTSC_END(BEBarycentric, 0);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
// Early-Z?
if (T::bCanEarlyZ)
{
- AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
+ RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
- AR_END(BEEarlyDepthTest, 0);
+ RDTSC_END(BEEarlyDepthTest, 0);
// early-exit if no samples passed depth or earlyZ is forced on.
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
- AR_BEGIN(BEPixelShader, pDC->drawId);
+ RDTSC_BEGIN(BEPixelShader, pDC->drawId);
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
- AR_END(BEPixelShader, 0);
+ RDTSC_END(BEPixelShader, 0);
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
// late-Z
if (!T::bCanEarlyZ)
{
- AR_BEGIN(BELateDepthTest, pDC->drawId);
+ RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
- AR_END(BELateDepthTest, 0);
+ RDTSC_END(BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
{
UPDATE_STAT_BE(DepthPassCount, statCount);
// output merger
- AR_BEGIN(BEOutputMerger, pDC->drawId);
+ RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
#else
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
}
- AR_END(BEOutputMerger, 0);
+ RDTSC_END(BEOutputMerger, 0);
}
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
Endtile:
ATTR_UNUSED;
- AR_BEGIN(BEEndTile, pDC->drawId);
+ RDTSC_BEGIN(BEEndTile, pDC->drawId);
if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
- AR_END(BEEndTile, 0);
+ RDTSC_END(BEEndTile, 0);
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
- AR_END(BESampleRateBackend, 0);
+ RDTSC_END(BESampleRateBackend, 0);
}
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(BESingleSampleBackend, pDC->drawId);
- AR_BEGIN(BESetup, pDC->drawId);
+ RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId);
+ RDTSC_BEGIN(BESetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
uint8_t *pDepthBuffer, *pStencilBuffer;
SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
- AR_END(BESetup, 1);
+ RDTSC_END(BESetup, 1);
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
- AR_BEGIN(BEBarycentric, pDC->drawId);
+ RDTSC_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- AR_END(BEBarycentric, 1);
+ RDTSC_END(BEBarycentric, 1);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
// Early-Z?
if (T::bCanEarlyZ)
{
- AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
+ RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
- AR_END(BEEarlyDepthTest, 0);
+ RDTSC_END(BEEarlyDepthTest, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
- AR_BEGIN(BEPixelShader, pDC->drawId);
+ RDTSC_BEGIN(BEPixelShader, pDC->drawId);
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
- AR_END(BEPixelShader, 0);
+ RDTSC_END(BEPixelShader, 0);
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
// late-Z
if (!T::bCanEarlyZ)
{
- AR_BEGIN(BELateDepthTest, pDC->drawId);
+ RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
- AR_END(BELateDepthTest, 0);
+ RDTSC_END(BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
{
UPDATE_STAT_BE(DepthPassCount, statCount);
// output merger
- AR_BEGIN(BEOutputMerger, pDC->drawId);
+ RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
#else
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
}
- AR_END(BEOutputMerger, 0);
+ RDTSC_END(BEOutputMerger, 0);
}
Endtile:
- AR_BEGIN(BEEndTile, pDC->drawId);
+ RDTSC_BEGIN(BEEndTile, pDC->drawId);
work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
- AR_END(BEEndTile, 0);
+ RDTSC_END(BEEndTile, 0);
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
- AR_END(BESingleSampleBackend, 0);
+ RDTSC_END(BESingleSampleBackend, 0);
}
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
SWR_CONTEXT *pContext = pDC->pContext;
const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
- AR_BEGIN(FEBinTriangles, pDC->drawId);
+ RDTSC_BEGIN(FEBinTriangles, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
if (!triMask)
{
- AR_END(FEBinTriangles, 1);
+ RDTSC_END(FEBinTriangles, 1);
return;
}
}
BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
- AR_END(FEBinTriangles, 1);
+ RDTSC_END(FEBinTriangles, 1);
return;
}
else if (rastState.fillMode == SWR_FILLMODE_POINT)
BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx);
BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
- AR_END(FEBinTriangles, 1);
+ RDTSC_END(FEBinTriangles, 1);
return;
}
triMask &= ~(1 << triIndex);
}
- AR_END(FEBinTriangles, 1);
+ RDTSC_END(FEBinTriangles, 1);
}
template <typename CT>
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEBinPoints, pDC->drawId);
+ RDTSC_BEGIN(FEBinPoints, pDC->drawId);
typename SIMD_T::Vec4 &primVerts = prim[0];
}
}
- AR_END(FEBinPoints, 1);
+ RDTSC_END(FEBinPoints, 1);
}
//////////////////////////////////////////////////////////////////////////
SWR_CONTEXT *pContext = pDC->pContext;
const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
- AR_BEGIN(FEBinLines, pDC->drawId);
+ RDTSC_BEGIN(FEBinLines, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
const SWR_RASTSTATE &rastState = state.rastState;
endBinLines:
- AR_END(FEBinLines, 1);
+ RDTSC_END(FEBinLines, 1);
}
//////////////////////////////////////////////////////////////////////////
simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEClipTriangles, pDC->drawId);
+ RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<SIMD256, 3> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- AR_END(FEClipTriangles, 1);
+ RDTSC_END(FEClipTriangles, 1);
}
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEClipLines, pDC->drawId);
+ RDTSC_BEGIN(FEClipLines, pDC->drawId);
Clipper<SIMD256, 2> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- AR_END(FEClipLines, 1);
+ RDTSC_END(FEClipLines, 1);
}
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEClipPoints, pDC->drawId);
+ RDTSC_BEGIN(FEClipPoints, pDC->drawId);
Clipper<SIMD256, 1> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- AR_END(FEClipPoints, 1);
+ RDTSC_END(FEClipPoints, 1);
}
#if USE_SIMD16_FRONTEND
simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEClipTriangles, pDC->drawId);
+ RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
enum { VERTS_PER_PRIM = 3 };
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- AR_END(FEClipTriangles, 1);
+ RDTSC_END(FEClipTriangles, 1);
}
void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEClipLines, pDC->drawId);
+ RDTSC_BEGIN(FEClipLines, pDC->drawId);
enum { VERTS_PER_PRIM = 2 };
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- AR_END(FEClipLines, 1);
+ RDTSC_END(FEClipLines, 1);
}
void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEClipPoints, pDC->drawId);
+ RDTSC_BEGIN(FEClipPoints, pDC->drawId);
enum { VERTS_PER_PRIM = 1 };
pa.useAlternateOffset = false;
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- AR_END(FEClipPoints, 1);
+ RDTSC_END(FEClipPoints, 1);
}
#endif
if (clipMask)
{
- AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
+ RDTSC_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx, rtIdx);
- AR_END(FEGuardbandClip, 1);
+ RDTSC_END(FEGuardbandClip, 1);
}
else if (validMask)
{
#define AR_WORKER_CTX pContext->pArContext[workerId]
#define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads]
+#ifdef KNOB_ENABLE_RDTSC
+#define RDTSC_BEGIN(type, drawid) RDTSC_START(type)
+#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0)
+#else
+#define RDTSC_BEGIN(type, count)
+#define RDTSC_END(type, count)
+#endif
+
#ifdef KNOB_ENABLE_AR
- #define _AR_BEGIN(ctx, type, id) ArchRast::Dispatch(ctx, ArchRast::Start(ArchRast::type, id))
- #define _AR_END(ctx, type, count) ArchRast::Dispatch(ctx, ArchRast::End(ArchRast::type, count))
#define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event)
#define _AR_FLUSH(ctx, id) ArchRast::FlushDraw(ctx, id)
#else
- #ifdef KNOB_ENABLE_RDTSC
- #define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type)
- #define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0)
- #else
- #define _AR_BEGIN(ctx, type, id) (void)ctx
- #define _AR_END(ctx, type, id)
- #endif
#define _AR_EVENT(ctx, event)
#define _AR_FLUSH(ctx, id)
#endif
// Use these macros for api thread.
-#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id)
-#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count)
#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event)
// Use these macros for worker threads.
-#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id)
-#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count)
#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)
#define AR_FLUSH(id) _AR_FLUSH(AR_WORKER_CTX, id)
uint32_t workerId,
void *pUserData)
{
- AR_BEGIN(FEProcessStoreTiles, pDC->drawId);
+ RDTSC_BEGIN(FEProcessStoreTiles, pDC->drawId);
MacroTileMgr *pTileMgr = pDC->pTileMgr;
STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
}
}
- AR_END(FEProcessStoreTiles, 0);
+ RDTSC_END(FEProcessStoreTiles, 0);
}
//////////////////////////////////////////////////////////////////////////
uint32_t workerId,
void *pUserData)
{
- AR_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
+ RDTSC_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
}
}
- AR_END(FEProcessInvalidateTiles, 0);
+ RDTSC_END(FEProcessInvalidateTiles, 0);
}
//////////////////////////////////////////////////////////////////////////
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEStreamout, pDC->drawId);
+ RDTSC_BEGIN(FEStreamout, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_STREAMOUT_STATE &soState = state.soState;
UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
- AR_END(FEStreamout, 1);
+ RDTSC_END(FEStreamout, 1);
}
#if USE_SIMD16_FRONTEND
{
SWR_CONTEXT *pContext = pDC->pContext;
- AR_BEGIN(FEGeometryShader, pDC->drawId);
+ RDTSC_BEGIN(FEGeometryShader, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_GS_STATE* pState = &state.gsState;
UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim*numInputPrims));
- AR_END(FEGeometryShader, 1);
+ RDTSC_END(FEGeometryShader, 1);
}
//////////////////////////////////////////////////////////////////////////
hsContext.mask = GenerateMask(numPrims);
// Run the HS
- AR_BEGIN(FEHullShader, pDC->drawId);
+ RDTSC_BEGIN(FEHullShader, pDC->drawId);
state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
- AR_END(FEHullShader, 0);
+ RDTSC_END(FEHullShader, 0);
UPDATE_STAT_FE(HsInvocations, numPrims);
{
// Run Tessellator
SWR_TS_TESSELLATED_DATA tsData = { 0 };
- AR_BEGIN(FETessellation, pDC->drawId);
+ RDTSC_BEGIN(FETessellation, pDC->drawId);
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
AR_EVENT(TessPrimCount(1));
- AR_END(FETessellation, 0);
+ RDTSC_END(FETessellation, 0);
if (tsData.NumPrimitives == 0)
{
{
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
- AR_BEGIN(FEDomainShader, pDC->drawId);
+ RDTSC_BEGIN(FEDomainShader, pDC->drawId);
state.pfnDsFunc(GetPrivateState(pDC), &dsContext);
- AR_END(FEDomainShader, 0);
+ RDTSC_END(FEDomainShader, 0);
dsInvocations += KNOB_SIMD_WIDTH;
}
#else
simdvector prim[3]; // Only deal with triangles, lines, or points
#endif
- AR_BEGIN(FEPAAssemble, pDC->drawId);
+ RDTSC_BEGIN(FEPAAssemble, pDC->drawId);
bool assemble =
#if USE_SIMD16_FRONTEND
tessPa.Assemble(VERTEX_POSITION_SLOT, prim_simd16);
#else
tessPa.Assemble(VERTEX_POSITION_SLOT, prim);
#endif
- AR_END(FEPAAssemble, 1);
+ RDTSC_END(FEPAAssemble, 1);
SWR_ASSERT(assemble);
SWR_ASSERT(pfnClipFunc);
}
#endif
- AR_BEGIN(FEProcessDraw, pDC->drawId);
+ RDTSC_BEGIN(FEProcessDraw, pDC->drawId);
DRAW_WORK& work = *(DRAW_WORK*)pUserData;
const API_STATE& state = GetApiState(pDC);
if (i < endVertex)
{
// 1. Execute FS/VS for a single SIMD.
- AR_BEGIN(FEFetchShader, pDC->drawId);
+ RDTSC_BEGIN(FEFetchShader, pDC->drawId);
#if USE_SIMD16_SHADERS
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin);
#else
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_hi, vin_hi);
}
#endif
- AR_END(FEFetchShader, 0);
+ RDTSC_END(FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
#if USE_SIMD16_SHADERS
if (!KNOB_TOSS_FETCH)
#endif
{
- AR_BEGIN(FEVertexShader, pDC->drawId);
+ RDTSC_BEGIN(FEVertexShader, pDC->drawId);
#if USE_SIMD16_VS
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
#else
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi);
}
#endif
- AR_END(FEVertexShader, 0);
+ RDTSC_END(FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
}
{
// 1. Execute FS/VS for a single SIMD.
- AR_BEGIN(FEFetchShader, pDC->drawId);
+ RDTSC_BEGIN(FEFetchShader, pDC->drawId);
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo, vout);
- AR_END(FEFetchShader, 0);
+ RDTSC_END(FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
vsContext.VertexID = fetchInfo.VertexID;
if (!KNOB_TOSS_FETCH)
#endif
{
- AR_BEGIN(FEVertexShader, pDC->drawId);
+ RDTSC_BEGIN(FEVertexShader, pDC->drawId);
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
- AR_END(FEVertexShader, 0);
+ RDTSC_END(FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
}
{
simdvector prim[MAX_NUM_VERTS_PER_PRIM];
// PaAssemble returns false if there is not enough verts to assemble.
- AR_BEGIN(FEPAAssemble, pDC->drawId);
+ RDTSC_BEGIN(FEPAAssemble, pDC->drawId);
bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim);
- AR_END(FEPAAssemble, 1);
+ RDTSC_END(FEPAAssemble, 1);
#if KNOB_ENABLE_TOSS_POINTS
if (!KNOB_TOSS_FETCH)
#endif
- AR_END(FEProcessDraw, numPrims * work.numInstances);
+ RDTSC_END(FEProcessDraw, numPrims * work.numInstances);
}
struct FEDrawChooser
#endif
// bloat line to two tris and call the triangle rasterizer twice
- AR_BEGIN(BERasterizeLine, pDC->drawId);
+ RDTSC_BEGIN(BERasterizeLine, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
const SWR_RASTSTATE &rastState = state.rastState;
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
- AR_END(BERasterizeLine, 1);
+ RDTSC_BEGIN(BERasterizeLine, 1);
}
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
- AR_BEGIN(BEPixelBackend, pDC->drawId);
+ RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
- AR_END(BEPixelBackend, 0);
+ RDTSC_END(BEPixelBackend, 0);
}
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
}
// not trivial accept or reject, must rasterize full tile
- AR_BEGIN(BERasterizePartial, pDC->drawId);
+ RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdgesAdj, pRastEdges);
- AR_END(BERasterizePartial, 0);
+ RDTSC_END(BERasterizePartial, 0);
}
};
return;
}
#endif
- AR_BEGIN(BERasterizeTriangle, pDC->drawId);
- AR_BEGIN(BETriangleSetup, pDC->drawId);
+ RDTSC_BEGIN(BERasterizeTriangle, pDC->drawId);
+ RDTSC_BEGIN(BETriangleSetup, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
const SWR_RASTSTATE &rastState = state.rastState;
SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
- AR_END(BETriangleSetup, 0);
+ RDTSC_END(BETriangleSetup, 0);
// update triangle desc
uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
if (numTilesX == 0 || numTilesY == 0)
{
RDTSC_EVENT(BEEmptyTriangle, 1, 0);
- AR_END(BERasterizeTriangle, 1);
+ RDTSC_END(BERasterizeTriangle, 1);
return;
}
- AR_BEGIN(BEStepSetup, pDC->drawId);
+ RDTSC_BEGIN(BEStepSetup, pDC->drawId);
// Step to pixel center of top-left pixel of the triangle bbox
// Align intersect bbox (top/left) to raster tile's (top/left).
}
}
- AR_END(BEStepSetup, 0);
+ RDTSC_END(BEStepSetup, 0);
uint32_t tY = minTileY;
uint32_t tX = minTileX;
}
// not trivial accept or reject, must rasterize full tile
- AR_BEGIN(BERasterizePartial, pDC->drawId);
+ RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
- AR_END(BERasterizePartial, 0);
+ RDTSC_END(BERasterizePartial, 0);
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage);
}
- AR_BEGIN(BEPixelBackend, pDC->drawId);
+ RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers);
- AR_END(BEPixelBackend, 0);
+ RDTSC_END(BEPixelBackend, 0);
}
// step to the next tile in X
StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow);
}
- AR_END(BERasterizeTriangle, 1);
+ RDTSC_END(BERasterizeTriangle, 1);
}
// Get pointers to hot tile memory for color RT, depth, stencil
{
BE_WORK *pWork;
- AR_BEGIN(WorkerFoundWork, pDC->drawId);
+ RDTSC_BEGIN(WorkerFoundWork, pDC->drawId);
uint32_t numWorkItems = tile->getNumQueued();
SWR_ASSERT(numWorkItems);
pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
tile->dequeue();
}
- AR_END(WorkerFoundWork, numWorkItems);
+ RDTSC_END(WorkerFoundWork, numWorkItems);
_ReadWriteBarrier();
if (IsBEThread)
{
- AR_BEGIN(WorkerWorkOnFifoBE, 0);
+ RDTSC_BEGIN(WorkerWorkOnFifoBE, 0);
bShutdown |= WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
- AR_END(WorkerWorkOnFifoBE, 0);
+ RDTSC_END(WorkerWorkOnFifoBE, 0);
WorkOnCompute(pContext, workerId, curDrawBE);
}
if (pHotTile->state == HOTTILE_INVALID)
{
- AR_BEGIN(BELoadTiles, pDC->drawId);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- AR_END(BELoadTiles, 0);
+ RDTSC_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- AR_BEGIN(BELoadTiles, pDC->drawId);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearColorHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- AR_END(BELoadTiles, 0);
+ RDTSC_END(BELoadTiles, 0);
}
colorHottileEnableMask &= ~(1 << rtSlot);
}
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
- AR_BEGIN(BELoadTiles, pDC->drawId);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- AR_END(BELoadTiles, 0);
+ RDTSC_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- AR_BEGIN(BELoadTiles, pDC->drawId);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearDepthHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- AR_END(BELoadTiles, 0);
+ RDTSC_END(BELoadTiles, 0);
}
}
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
- AR_BEGIN(BELoadTiles, pDC->drawId);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- AR_END(BELoadTiles, 0);
+ RDTSC_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- AR_BEGIN(BELoadTiles, pDC->drawId);
+ RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearStencilHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- AR_END(BELoadTiles, 0);
+ RDTSC_END(BELoadTiles, 0);
}
}
}