Switch all RDTSC_START/STOP macros to use AR_BEGIN/END macros.
Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
#include "common/simdintrin.h"
#include "common/os.h"
-#include "archrast/archrast.h"
-
static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
void SetupDefaultState(SWR_CONTEXT *pContext);
}
else
{
- RDTSC_START(APIDrawWakeAllThreads);
+ AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId);
WakeAllThreads(pContext);
- RDTSC_STOP(APIDrawWakeAllThreads, 1, 0);
+ AR_API_END(APIDrawWakeAllThreads, 1);
}
// Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
{
- RDTSC_START(APIGetDrawContext);
+ AR_API_BEGIN(APIGetDrawContext, 0);
// If current draw context is null then need to obtain a new draw context to use from ring.
if (pContext->pCurDrawContext == nullptr)
{
SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC");
}
- RDTSC_STOP(APIGetDrawContext, 0, 0);
+ AR_API_END(APIGetDrawContext, 0);
return pContext->pCurDrawContext;
}
void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint64_t userData2, uint64_t userData3)
{
- RDTSC_START(APISync);
-
SWR_ASSERT(pfnFunc != nullptr);
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APISync, 0);
+
pDC->FeWork.type = SYNC;
pDC->FeWork.pfnWork = ProcessSync;
//enqueue
QueueDraw(pContext);
- RDTSC_STOP(APISync, 1, 0);
+ AR_API_END(APISync, 1);
}
void SwrWaitForIdle(HANDLE hContext)
{
SWR_CONTEXT *pContext = GetContext(hContext);
- RDTSC_START(APIWaitForIdle);
+ AR_API_BEGIN(APIWaitForIdle, 0);
while (!pContext->dcRing.IsEmpty())
{
_mm_pause();
}
- RDTSC_STOP(APIWaitForIdle, 1, 0);
+ AR_API_END(APIWaitForIdle, 1);
}
void SwrWaitForIdleFE(HANDLE hContext)
{
SWR_CONTEXT *pContext = GetContext(hContext);
- RDTSC_START(APIWaitForIdle);
+ AR_API_BEGIN(APIWaitForIdle, 0);
while (pContext->drawsOutstandingFE > 0)
{
_mm_pause();
}
- RDTSC_STOP(APIWaitForIdle, 1, 0);
+ AR_API_END(APIWaitForIdle, 1);
}
void SwrSetVertexBuffers(
return;
}
- RDTSC_START(APIDraw);
-
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APIDraw, pDC->drawId);
+
uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
uint32_t remainingVerts = numVertices;
pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
- RDTSC_STOP(APIDraw, numVertices * numInstances, 0);
+ AR_API_END(APIDraw, numVertices * numInstances);
}
//////////////////////////////////////////////////////////////////////////
return;
}
- RDTSC_START(APIDrawIndexed);
-
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
API_STATE* pState = &pDC->pState->state;
- AR_BEGIN(AR_API_CTX, APIDrawIndexed, pDC->drawId);
- AR_EVENT(AR_API_CTX, DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
+ AR_API_BEGIN(APIDrawIndexed, pDC->drawId);
+ AR_API_EVENT(DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
- AR_END(AR_API_CTX, APIDrawIndexed, numIndices * numInstances);
- RDTSC_STOP(APIDrawIndexed, numIndices * numInstances, 0);
+ AR_API_END(APIDrawIndexed, numIndices * numInstances);
}
return;
}
- RDTSC_START(APIDispatch);
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APIDispatch, pDC->drawId);
+
pDC->isCompute = true; // This is a compute context.
COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
pDC->pDispatch->initialize(totalThreadGroups, pTaskData);
QueueDispatch(pContext);
- RDTSC_STOP(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ, 0);
+ AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ);
}
// Deswizzles, converts and stores current contents of the hot tiles to surface
return;
}
- RDTSC_START(APIStoreTiles);
-
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APIStoreTiles, pDC->drawId);
+
pDC->FeWork.type = STORETILES;
pDC->FeWork.pfnWork = ProcessStoreTiles;
pDC->FeWork.desc.storeTiles.attachment = attachment;
//enqueue
QueueDraw(pContext);
- RDTSC_STOP(APIStoreTiles, 0, 0);
+ AR_API_END(APIStoreTiles, 1);
}
//////////////////////////////////////////////////////////////////////////
return;
}
- RDTSC_START(APIClearRenderTarget);
-
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ AR_API_BEGIN(APIClearRenderTarget, pDC->drawId);
+
CLEAR_FLAGS flags;
flags.bits = 0;
flags.mask = clearMask;
// enqueue draw
QueueDraw(pContext);
- RDTSC_STOP(APIClearRenderTarget, 0, pDC->drawId);
+ AR_API_END(APIClearRenderTarget, 1);
}
//////////////////////////////////////////////////////////////////////////
/// @param threadGroupId - the linear index for the thread group within the dispatch.
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
{
- RDTSC_START(BEDispatch);
-
SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(BEDispatch, pDC->drawId);
+
const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData();
SWR_ASSERT(pTaskData != nullptr);
UPDATE_STAT(CsInvocations, state.totalThreadsInGroup);
- RDTSC_STOP(BEDispatch, 1, 0);
+ AR_END(BEDispatch, 1);
}
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
if (KNOB_FAST_CLEAR)
{
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
- SWR_CONTEXT *pContext = pDC->pContext;
SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
uint32_t numSamples = GetNumSamples(sampleCount);
SWR_ASSERT(pClear->flags.bits != 0); // shouldn't be here without a reason.
- RDTSC_START(BEClear);
+ AR_BEGIN(BEClear, pDC->drawId);
if (pClear->flags.mask & SWR_CLEAR_COLOR)
{
pHotTile->state = HOTTILE_CLEAR;
}
- RDTSC_STOP(BEClear, 0, 0);
+ AR_END(BEClear, 1);
}
else
{
// Legacy clear
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
- RDTSC_START(BEClear);
+ AR_BEGIN(BEClear, pDC->drawId);
if (pClear->flags.mask & SWR_CLEAR_COLOR)
{
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData, pClear->rect);
}
- RDTSC_STOP(BEClear, 0, 0);
+ AR_END(BEClear, 1);
}
}
void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
{
- RDTSC_START(BEStoreTiles);
STORE_TILES_DESC *pDesc = (STORE_TILES_DESC*)pData;
SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(BEStoreTiles, pDC->drawId);
+
#ifdef KNOB_ENABLE_RDTSC
uint32_t numTiles = 0;
#endif
pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState;
}
}
- RDTSC_STOP(BEStoreTiles, numTiles, pDC->drawId);
+ AR_END(BEStoreTiles, numTiles);
}
template<typename T>
void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- RDTSC_START(BESingleSampleBackend);
- RDTSC_START(BESetup);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(BESingleSampleBackend, pDC->drawId);
+ AR_BEGIN(BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
pColorBase[rt] = renderBuffers.pColor[rt];
}
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
- RDTSC_STOP(BESetup, 0, 0);
+ AR_END(BESetup, 1);
SWR_PS_CONTEXT psContext;
psContext.pAttribs = work.pAttribs;
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, pBlendState->sampleMask);
}
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
// for 1x case, centroid is pixel center
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 1);
simdmask clipCoverageMask = coverageMask & MASK;
// interpolate user clip distance if available
// Early-Z?
if(T::bCanEarlyZ)
{
- RDTSC_START(BEEarlyDepthTest);
+ AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
- RDTSC_STOP(BEEarlyDepthTest, 0, 0);
+ AR_END(BEEarlyDepthTest, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if(pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
- RDTSC_START(BEPixelShader);
+ AR_BEGIN(BEPixelShader, pDC->drawId);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
- RDTSC_STOP(BEPixelShader, 0, 0);
+ AR_END(BEPixelShader, 0);
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
// late-Z
if(!T::bCanEarlyZ)
{
- RDTSC_START(BELateDepthTest);
+ AR_BEGIN(BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
- RDTSC_STOP(BELateDepthTest, 0, 0);
+ AR_END(BELateDepthTest, 0);
if(!_simd_movemask_ps(depthPassMask))
{
UPDATE_STAT(DepthPassCount, statCount);
// output merger
- RDTSC_START(BEOutputMerger);
+ AR_BEGIN(BEOutputMerger, pDC->drawId);
OutputMerger(psContext, pColorBase, 0, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets);
// do final depth write after all pixel kills
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
}
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ AR_END(BEOutputMerger, 0);
}
Endtile:
- RDTSC_START(BEEndTile);
+ AR_BEGIN(BEEndTile, pDC->drawId);
coverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
{
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
- RDTSC_STOP(BEEndTile, 0, 0);
+ AR_END(BEEndTile, 0);
}
}
- RDTSC_STOP(BESingleSampleBackend, 0, 0);
+ AR_END(BESingleSampleBackend, 0);
}
template<typename T>
void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- RDTSC_START(BESampleRateBackend);
- RDTSC_START(BESetup);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(BESampleRateBackend, pDC->drawId);
+ AR_BEGIN(BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
pColorBase[rt] = renderBuffers.pColor[rt];
}
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
- RDTSC_STOP(BESetup, 0, 0);
+ AR_END(BESetup, 0);
SWR_PS_CONTEXT psContext;
psContext.pAttribs = work.pAttribs;
// pixel center
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
if(T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
if(T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
if(T::bIsStandardPattern)
{
CalcCentroidPos<T>(psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
psContext.vY.centroid = _simd_add_ps(psContext.vY.UL, _simd_set1_ps(0.5f));
}
CalcCentroidBarycentrics(coeffs, psContext, psContext.vX.UL, psContext.vY.UL);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
}
else
{
simdmask coverageMask = work.coverageMask[sample] & MASK;
if (coverageMask)
{
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
// interpolate user clip distance if available
if (rastState.clipDistanceMask)
// Early-Z?
if (T::bCanEarlyZ)
{
- RDTSC_START(BEEarlyDepthTest);
+ AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
- RDTSC_STOP(BEEarlyDepthTest, 0, 0);
+ AR_END(BEEarlyDepthTest, 0);
// early-exit if no samples passed depth or earlyZ is forced on.
if (pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
- RDTSC_START(BEPixelShader);
+ AR_BEGIN(BEPixelShader, pDC->drawId);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
- RDTSC_STOP(BEPixelShader, 0, 0);
+ AR_END(BEPixelShader, 0);
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
// late-Z
if (!T::bCanEarlyZ)
{
- RDTSC_START(BELateDepthTest);
+ AR_BEGIN(BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
- RDTSC_STOP(BELateDepthTest, 0, 0);
+ AR_END(BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
{
UPDATE_STAT(DepthPassCount, statCount);
// output merger
- RDTSC_START(BEOutputMerger);
+ AR_BEGIN(BEOutputMerger, pDC->drawId);
OutputMerger(psContext, pColorBase, sample, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets);
// do final depth write after all pixel kills
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
}
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ AR_END(BEOutputMerger, 0);
}
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
- RDTSC_START(BEEndTile);
+ AR_BEGIN(BEEndTile, pDC->drawId);
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
{
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
- RDTSC_STOP(BEEndTile, 0, 0);
+ AR_END(BEEndTile, 0);
}
}
- RDTSC_STOP(BESampleRateBackend, 0, 0);
+ AR_END(BESampleRateBackend, 0);
}
template<typename T>
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- RDTSC_START(BEPixelRateBackend);
- RDTSC_START(BESetup);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(BEPixelRateBackend, pDC->drawId);
+ AR_BEGIN(BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
pColorBase[rt] = renderBuffers.pColor[rt];
}
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
- RDTSC_STOP(BESetup, 0, 0);
+ AR_END(BESetup, 0);
SWR_PS_CONTEXT psContext;
psContext.pAttribs = work.pAttribs;
psContext.sampleIndex = 0;
- PixelRateZTestLoop<T> PixelRateZTest(pDC, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask);
+ PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask);
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
// set pixel center positions
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
if(T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
if(T::bIsStandardPattern)
{
CalcCentroidPos<T>(psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
}
CalcCentroidBarycentrics(coeffs, psContext, psContext.vX.UL, psContext.vY.UL);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
}
else
{
if(pPSState->usesSourceDepth)
{
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
}
// pixels that are currently active
psContext.oMask = T::MultisampleT::FullSampleMask();
// execute pixel shader
- RDTSC_START(BEPixelShader);
+ AR_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
- RDTSC_STOP(BEPixelShader, 0, 0);
+ AR_END(BEPixelShader, 0);
// update active lanes to remove any discarded or oMask'd pixels
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
// loop over all samples, broadcasting the results of the PS to all passing pixels
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(pBlendState->sampleCount); sample++)
{
- RDTSC_START(BEOutputMerger);
+ AR_BEGIN(BEOutputMerger, pDC->drawId);
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
simdscalar coverageMask, depthMask;
if(!_simd_movemask_ps(depthMask))
{
// stencil should already have been written in early/lateZ tests
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ AR_END(BEOutputMerger, 0);
continue;
}
}
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
}
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ AR_END(BEOutputMerger, 0);
}
Endtile:
- RDTSC_START(BEEndTile);
+ AR_BEGIN(BEEndTile, pDC->drawId);
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
{
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
- RDTSC_STOP(BEEndTile, 0, 0);
+ AR_END(BEEndTile, 0);
}
}
- RDTSC_STOP(BEPixelRateBackend, 0, 0);
+ AR_END(BEPixelRateBackend, 0);
}
// optimized backend flow with NULL PS
template<uint32_t sampleCountT>
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- RDTSC_START(BENullBackend);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(BENullBackend, pDC->drawId);
///@todo: handle center multisample pattern
typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
- RDTSC_START(BESetup);
+ AR_BEGIN(BESetup, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
- RDTSC_STOP(BESetup, 0, 0);
+ AR_END(BESetup, 0);
SWR_PS_CONTEXT psContext;
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
simdmask coverageMask = work.coverageMask[sample] & MASK;
if (coverageMask)
{
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
// interpolate user clip distance if available
if (rastState.clipDistanceMask)
uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
- RDTSC_START(BEEarlyDepthTest);
+ AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
- RDTSC_STOP(BEEarlyDepthTest, 0, 0);
+ AR_END(BEEarlyDepthTest, 0);
uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
pStencilBase += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
}
}
- RDTSC_STOP(BENullBackend, 0, 0);
+ AR_END(BENullBackend, 0);
}
void InitClearTilesTable()
template<typename T>
struct PixelRateZTestLoop
{
- PixelRateZTestLoop(DRAW_CONTEXT *DC, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
+ PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
uint8_t*& depthBase, uint8_t*& stencilBase, const uint8_t ClipDistanceMask) :
- work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
+ pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
clipDistanceMask(ClipDistanceMask), pDepthBase(depthBase), pStencilBase(stencilBase) {};
INLINE
uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
uint32_t statCount = 0;
simdscalar anyDepthSamplePassed = _simd_setzero_ps();
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
continue;
}
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
}
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
///@todo: perspective correct vs non-perspective correct clipping?
// if clip distances are enabled, we need to interpolate for each sample
uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
// ZTest for this sample
- RDTSC_START(BEDepthBucket);
+ ///@todo Need to uncomment out this bucket.
+ //AR_BEGIN(BEDepthBucket, pDC->drawId);
depthPassMask[sample] = vCoverageMask[sample];
stencilPassMask[sample] = vCoverageMask[sample];
depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
vZ[sample], pDepthSample, vCoverageMask[sample],
pStencilSample, &stencilPassMask[sample]);
- RDTSC_STOP(BEDepthBucket, 0, 0);
+ //AR_END(BEDepthBucket, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
private:
// functor inputs
+ DRAW_CONTEXT* pDC;
+ uint32_t workerId;
+
const SWR_TRIANGLE_DESC& work;
const BarycentricCoeffs& coeffs;
const API_STATE& state;
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
- RDTSC_START(FEClipTriangles);
+ SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<3> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
- RDTSC_STOP(FEClipTriangles, 1, 0);
+ AR_END(FEClipTriangles, 1);
}
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
- RDTSC_START(FEClipLines);
+ SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(FEClipLines, pDC->drawId);
Clipper<2> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
- RDTSC_STOP(FEClipLines, 1, 0);
+ AR_END(FEClipLines, 1);
}
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
- RDTSC_START(FEClipPoints);
+ SWR_CONTEXT *pContext = pDC->pContext;
+ AR_BEGIN(FEClipPoints, pDC->drawId);
Clipper<1> clipper(workerId, pDC);
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
- RDTSC_STOP(FEClipPoints, 1, 0);
+ AR_END(FEClipPoints, 1);
}
// execute the clipper stage
void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
{
+ SWR_ASSERT(pa.pDC != nullptr);
+
+ SWR_CONTEXT *pContext = pa.pDC->pContext;
+
// set up binner based on PA state
PFN_PROCESS_PRIMS pfnBinner;
switch (pa.binTopology)
if (clipMask)
{
- RDTSC_START(FEGuardbandClip);
+ AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx);
- RDTSC_STOP(FEGuardbandClip, 1, 0);
+ AR_END(FEGuardbandClip, 1);
}
else if (validMask)
{
#include "common/simdintrin.h"
#include "core/threads.h"
#include "ringbuffer.h"
+#include "archrast/archrast.h"
// x.8 fixed point precision values
#define FIXED_POINT_SHIFT 8
#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
// ArchRast instrumentation framework
-#ifdef KNOB_ENABLE_AR
-#define AR_WORKER_CTX pDC->pContext->pArContext[workerId]
-#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
+#define AR_WORKER_CTX pContext->pArContext[workerId]
+#define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads]
-#define AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id))
-#define AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count))
-#define AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event)
+#ifdef KNOB_ENABLE_AR
+ #define _AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id))
+ #define _AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count))
+ #define _AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event)
#else
-#define AR_BEGIN(ctx, type, id)
-#define AR_END(ctx, type, id)
-#define AR_EVENT(ctx, event)
-#endif
\ No newline at end of file
+ #ifdef KNOB_ENABLE_RDTSC
+ #define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type)
+ #define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0)
+ #else
+ #define _AR_BEGIN(ctx, type, id) (void)ctx
+ #define _AR_END(ctx, type, id)
+ #endif
+ #define _AR_EVENT(ctx, event)
+#endif
+
+// Use these macros for api thread.
+#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id)
+#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count)
+#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event)
+
+// Use these macros for worker threads.
+#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id)
+#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count)
+#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)
uint32_t workerId,
void *pUserData)
{
- RDTSC_START(FEProcessStoreTiles);
+ AR_BEGIN(FEProcessStoreTiles, pDC->drawId);
MacroTileMgr *pTileMgr = pDC->pTileMgr;
STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
}
}
- RDTSC_STOP(FEProcessStoreTiles, 0, pDC->drawId);
+ AR_END(FEProcessStoreTiles, 0);
}
//////////////////////////////////////////////////////////////////////////
uint32_t workerId,
void *pUserData)
{
- RDTSC_START(FEProcessInvalidateTiles);
+ AR_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
}
}
- RDTSC_STOP(FEProcessInvalidateTiles, 0, pDC->drawId);
+ AR_END(FEProcessInvalidateTiles, 0);
}
//////////////////////////////////////////////////////////////////////////
uint32_t* pPrimData,
uint32_t streamIndex)
{
- RDTSC_START(FEStreamout);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEStreamout, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_STREAMOUT_STATE &soState = state.soState;
UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
- RDTSC_STOP(FEStreamout, 1, 0);
+ AR_END(FEStreamout, 1);
}
//////////////////////////////////////////////////////////////////////////
uint32_t* pSoPrimData,
simdscalari primID)
{
- RDTSC_START(FEGeometryShader);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEGeometryShader, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_GS_STATE* pState = &state.gsState;
UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
- RDTSC_STOP(FEGeometryShader, 1, 0);
+ AR_END(FEGeometryShader, 1);
}
//////////////////////////////////////////////////////////////////////////
uint32_t* pSoPrimData,
simdscalari primID)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
const API_STATE& state = GetApiState(pDC);
const SWR_TS_STATE& tsState = state.tsState;
hsContext.mask = GenerateMask(numPrims);
// Run the HS
- RDTSC_START(FEHullShader);
+ AR_BEGIN(FEHullShader, pDC->drawId);
state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
- RDTSC_STOP(FEHullShader, 0, 0);
+ AR_END(FEHullShader, 0);
UPDATE_STAT_FE(HsInvocations, numPrims);
{
// Run Tessellator
SWR_TS_TESSELLATED_DATA tsData = { 0 };
- RDTSC_START(FETessellation);
+ AR_BEGIN(FETessellation, pDC->drawId);
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
- RDTSC_STOP(FETessellation, 0, 0);
+ AR_END(FETessellation, 0);
if (tsData.NumPrimitives == 0)
{
{
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
- RDTSC_START(FEDomainShader);
+ AR_BEGIN(FEDomainShader, pDC->drawId);
state.pfnDsFunc(GetPrivateState(pDC), &dsContext);
- RDTSC_STOP(FEDomainShader, 0, 0);
+ AR_END(FEDomainShader, 0);
dsInvocations += KNOB_SIMD_WIDTH;
}
if (HasRastT::value)
{
simdvector prim[3]; // Only deal with triangles, lines, or points
- RDTSC_START(FEPAAssemble);
+ AR_BEGIN(FEPAAssemble, pDC->drawId);
#if SWR_ENABLE_ASSERTS
bool assemble =
#endif
tessPa.Assemble(VERTEX_POSITION_SLOT, prim);
- RDTSC_STOP(FEPAAssemble, 1, 0);
+ AR_END(FEPAAssemble, 1);
SWR_ASSERT(assemble);
SWR_ASSERT(pfnClipFunc);
}
#endif
- RDTSC_START(FEProcessDraw);
+ AR_BEGIN(FEProcessDraw, pDC->drawId);
DRAW_WORK& work = *(DRAW_WORK*)pUserData;
const API_STATE& state = GetApiState(pDC);
{
// 1. Execute FS/VS for a single SIMD.
- RDTSC_START(FEFetchShader);
+ AR_BEGIN(FEFetchShader, pDC->drawId);
state.pfnFetchFunc(fetchInfo, vin);
- RDTSC_STOP(FEFetchShader, 0, 0);
+ AR_END(FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
vsContext.VertexID = fetchInfo.VertexID;
if (!KNOB_TOSS_FETCH)
#endif
{
- RDTSC_START(FEVertexShader);
+ AR_BEGIN(FEVertexShader, pDC->drawId);
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
- RDTSC_STOP(FEVertexShader, 0, 0);
+ AR_END(FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
}
{
simdvector prim[MAX_NUM_VERTS_PER_PRIM];
// PaAssemble returns false if there is not enough verts to assemble.
- RDTSC_START(FEPAAssemble);
+ AR_BEGIN(FEPAAssemble, pDC->drawId);
bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim);
- RDTSC_STOP(FEPAAssemble, 1, 0);
+ AR_END(FEPAAssemble, 1);
#if KNOB_ENABLE_TOSS_POINTS
if (!KNOB_TOSS_FETCH)
pa.Reset();
}
- RDTSC_STOP(FEProcessDraw, numPrims * work.numInstances, pDC->drawId);
+ AR_END(FEProcessDraw, numPrims * work.numInstances);
}
struct FEDrawChooser
simdscalari primID,
simdscalari viewportIdx)
{
- RDTSC_START(FEBinTriangles);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEBinTriangles, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
}
endBinTriangles:
- RDTSC_STOP(FEBinTriangles, 1, 0);
+ AR_END(FEBinTriangles, 1);
}
struct FEBinTrianglesChooser
simdscalari primID,
simdscalari viewportIdx)
{
- RDTSC_START(FEBinPoints);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEBinPoints, pDC->drawId);
simdvector& primVerts = prim[0];
}
}
-
-
-
- RDTSC_STOP(FEBinPoints, 1, 0);
+ AR_END(FEBinPoints, 1);
}
//////////////////////////////////////////////////////////////////////////
simdscalari primID,
simdscalari viewportIdx)
{
- RDTSC_START(FEBinLines);
+ SWR_CONTEXT *pContext = pDC->pContext;
+
+ AR_BEGIN(FEBinLines, pDC->drawId);
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
endBinLines:
- RDTSC_STOP(FEBinLines, 1, 0);
+ AR_END(FEBinLines, 1);
}
template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
struct GenerateSVInnerCoverage
{
- INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, EDGE*, double*, uint64_t &){};
+ INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*, uint64_t &){};
};
//////////////////////////////////////////////////////////////////////////
template <typename RT>
struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
{
- INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask)
+ INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, uint32_t workerId, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
double startQuadEdgesAdj[RT::NumEdgesT::value];
for(uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
{
}
// not trivial accept or reject, must rasterize full tile
- RDTSC_START(BERasterizePartial);
+ AR_BEGIN(BERasterizePartial, pDC->drawId);
innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdgesAdj, pRastEdges);
- RDTSC_STOP(BERasterizePartial, 0, 0);
+ AR_END(BERasterizePartial, 0);
}
};
template <typename RT>
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
return;
}
#endif
- RDTSC_START(BERasterizeTriangle);
+ AR_BEGIN(BERasterizeTriangle, pDC->drawId);
+ AR_BEGIN(BETriangleSetup, pDC->drawId);
- RDTSC_START(BETriangleSetup);
const API_STATE &state = GetApiState(pDC);
const SWR_RASTSTATE &rastState = state.rastState;
const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
- RDTSC_STOP(BETriangleSetup, 0, pDC->drawId);
+ AR_END(BETriangleSetup, 0);
// update triangle desc
uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
if (numTilesX == 0 || numTilesY == 0)
{
RDTSC_EVENT(BEEmptyTriangle, 1, 0);
- RDTSC_STOP(BERasterizeTriangle, 1, 0);
+ AR_END(BERasterizeTriangle, 1);
return;
}
- RDTSC_START(BEStepSetup);
+ AR_BEGIN(BEStepSetup, pDC->drawId);
// Step to pixel center of top-left pixel of the triangle bbox
// Align intersect bbox (top/left) to raster tile's (top/left).
}
}
- RDTSC_STOP(BEStepSetup, 0, pDC->drawId);
+ AR_END(BEStepSetup, 0);
uint32_t tY = minTileY;
uint32_t tX = minTileX;
}
// not trivial accept or reject, must rasterize full tile
- RDTSC_START(BERasterizePartial);
+ AR_BEGIN(BERasterizePartial, pDC->drawId);
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
- RDTSC_STOP(BERasterizePartial, 0, 0);
+ AR_END(BERasterizePartial, 0);
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
// Output SV InnerCoverage, if needed
- GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
+ GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
}
}
else
UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage);
}
- RDTSC_START(BEPixelBackend);
+ AR_BEGIN(BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers);
- RDTSC_STOP(BEPixelBackend, 0, 0);
+ AR_END(BEPixelBackend, 0);
}
// step to the next tile in X
StepRasterTileY<RT>(state.psState.numRenderTargets, renderBuffers, currentRenderBufferRow);
}
- RDTSC_STOP(BERasterizeTriangle, 1, 0);
+ AR_END(BERasterizeTriangle, 1);
}
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
{
GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
- RDTSC_START(BEPixelBackend);
+ AR_BEGIN(BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
- RDTSC_STOP(BEPixelBackend, 0, 0);
+ AR_END(BEPixelBackend, 0);
}
// Get pointers to hot tile memory for color RT, depth, stencil
void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData);
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
#endif
// bloat line to two tris and call the triangle rasterizer twice
- RDTSC_START(BERasterizeLine);
+ AR_BEGIN(BERasterizeLine, pDC->drawId);
const API_STATE &state = GetApiState(pDC);
const SWR_RASTSTATE &rastState = state.rastState;
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
- RDTSC_STOP(BERasterizeLine, 1, 0);
+ AR_END(BERasterizeLine, 1);
}
struct RasterizerChooser
{
BE_WORK *pWork;
- RDTSC_START(WorkerFoundWork);
+ AR_BEGIN(WorkerFoundWork, pDC->drawId);
uint32_t numWorkItems = tile->getNumQueued();
SWR_ASSERT(numWorkItems);
SWR_ASSERT(pWork);
if (pWork->type == DRAW)
{
- pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, tileID);
+ pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, workerId, tileID);
}
while ((pWork = tile->peek()) != nullptr)
pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
tile->dequeue();
}
- RDTSC_STOP(WorkerFoundWork, numWorkItems, pDC->drawId);
+ AR_END(WorkerFoundWork, numWorkItems);
_ReadWriteBarrier();
break;
}
- RDTSC_START(WorkerWaitForThreadEvent);
+ AR_BEGIN(WorkerWaitForThreadEvent, 0);
pContext->FifosNotEmpty.wait(lock);
lock.unlock();
- RDTSC_STOP(WorkerWaitForThreadEvent, 0, 0);
+ AR_END(WorkerWaitForThreadEvent, 0);
if (pContext->threadPool.inThreadShutdown)
{
if (IsBEThread)
{
- RDTSC_START(WorkerWorkOnFifoBE);
+ AR_BEGIN(WorkerWorkOnFifoBE, 0);
WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
- RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0);
+ AR_END(WorkerWorkOnFifoBE, 0);
WorkOnCompute(pContext, workerId, curDrawBE);
}
/// to avoid unnecessary setup every triangle
/// @todo support deferred clear
/// @param pCreateInfo - pointer to creation info.
-void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID)
+void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
{
const API_STATE& state = GetApiState(pDC);
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearColorHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
colorHottileEnableMask &= ~(1 << rtSlot);
}
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearDepthHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
}
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
{
- RDTSC_START(BELoadTiles);
+ AR_BEGIN(BELoadTiles, pDC->drawId);
// Clear the tile.
ClearStencilHotTile(pHotTile);
pHotTile->state = HOTTILE_DIRTY;
- RDTSC_STOP(BELoadTiles, 0, 0);
+ AR_END(BELoadTiles, 0);
}
}
}
}
}
- void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID);
+ void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
uint32_t renderTargetArrayIndex = 0);