From e3f92bb7af720e9d774cfa46bd908a06991a0f85 Mon Sep 17 00:00:00 2001 From: George Kyriazis Date: Tue, 30 Jan 2018 17:29:48 -0600 Subject: [PATCH] swr/rast: Separate RDTSC code from archrast Renamed rdstc defines more appropriately Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/core/api.cpp | 40 ++++++------- .../drivers/swr/rasterizer/core/backend.cpp | 24 ++++---- .../swr/rasterizer/core/backend_clear.cpp | 8 +-- .../swr/rasterizer/core/backend_impl.h | 38 ++++++------- .../swr/rasterizer/core/backend_sample.cpp | 36 ++++++------ .../rasterizer/core/backend_singlesample.cpp | 32 +++++------ .../drivers/swr/rasterizer/core/binner.cpp | 18 +++--- .../drivers/swr/rasterizer/core/clip.cpp | 24 ++++---- .../drivers/swr/rasterizer/core/clip.h | 4 +- .../drivers/swr/rasterizer/core/context.h | 21 +++---- .../drivers/swr/rasterizer/core/frontend.cpp | 56 +++++++++---------- .../swr/rasterizer/core/rasterizer.cpp | 8 +-- .../swr/rasterizer/core/rasterizer_impl.h | 26 ++++----- .../drivers/swr/rasterizer/core/threads.cpp | 8 +-- .../drivers/swr/rasterizer/core/tilemgr.cpp | 24 ++++---- 15 files changed, 181 insertions(+), 186 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 09b482dcc09..5e27e4d1f0b 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -256,9 +256,9 @@ void QueueWork(SWR_CONTEXT *pContext) } else { - AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId); + RDTSC_BEGIN(APIDrawWakeAllThreads, pDC->drawId); WakeAllThreads(pContext); - AR_API_END(APIDrawWakeAllThreads, 1); + RDTSC_END(APIDrawWakeAllThreads, 1); } // Set current draw context to NULL so that next state call forces a new draw context to be created and populated. @@ -278,7 +278,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext) DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) { - AR_API_BEGIN(APIGetDrawContext, 0); + RDTSC_BEGIN(APIGetDrawContext, 0); // If current draw context is null then need to obtain a new draw context to use from ring. if (pContext->pCurDrawContext == nullptr) { @@ -367,7 +367,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC"); } - AR_API_END(APIGetDrawContext, 0); + RDTSC_END(APIGetDrawContext, 0); return pContext->pCurDrawContext; } @@ -477,7 +477,7 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - AR_API_BEGIN(APISync, 0); + RDTSC_BEGIN(APISync, 0); pDC->FeWork.type = SYNC; pDC->FeWork.pfnWork = ProcessSync; @@ -493,7 +493,7 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint //enqueue QueueDraw(pContext); - AR_API_END(APISync, 1); + RDTSC_END(APISync, 1); } void SwrStallBE(HANDLE hContext) @@ -508,28 +508,28 @@ void SwrWaitForIdle(HANDLE hContext) { SWR_CONTEXT *pContext = GetContext(hContext); - AR_API_BEGIN(APIWaitForIdle, 0); + RDTSC_BEGIN(APIWaitForIdle, 0); while (!pContext->dcRing.IsEmpty()) { _mm_pause(); } - AR_API_END(APIWaitForIdle, 1); + RDTSC_END(APIWaitForIdle, 1); } void SwrWaitForIdleFE(HANDLE hContext) { SWR_CONTEXT *pContext = GetContext(hContext); - AR_API_BEGIN(APIWaitForIdle, 0); + RDTSC_BEGIN(APIWaitForIdle, 0); while (pContext->drawsOutstandingFE > 0) { _mm_pause(); } - AR_API_END(APIWaitForIdle, 1); + RDTSC_END(APIWaitForIdle, 1); } void SwrSetVertexBuffers( @@ -1167,7 +1167,7 @@ void DrawInstanced( SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - AR_API_BEGIN(APIDraw, pDC->drawId); + RDTSC_BEGIN(APIDraw, pDC->drawId); AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertices, startVertex, numInstances, startInstance)); uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology); @@ -1230,7 +1230,7 @@ void DrawInstanced( pDC = GetDrawContext(pContext); pDC->pState->state.rastState.cullMode = oldCullMode; - AR_API_END(APIDraw, numVertices * numInstances); + RDTSC_END(APIDraw, numVertices * numInstances); } ////////////////////////////////////////////////////////////////////////// @@ -1295,7 +1295,7 @@ void DrawIndexedInstance( DRAW_CONTEXT* pDC = GetDrawContext(pContext); API_STATE* pState = &pDC->pState->state; - AR_API_BEGIN(APIDrawIndexed, pDC->drawId); + RDTSC_BEGIN(APIDrawIndexed, pDC->drawId); AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance)); uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology); @@ -1376,7 +1376,7 @@ void DrawIndexedInstance( pDC = GetDrawContext(pContext); pDC->pState->state.rastState.cullMode = oldCullMode; - AR_API_END(APIDrawIndexed, numIndices * numInstances); + RDTSC_END(APIDrawIndexed, numIndices * numInstances); } @@ -1508,7 +1508,7 @@ void SwrDispatch( SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - AR_API_BEGIN(APIDispatch, pDC->drawId); + RDTSC_BEGIN(APIDispatch, pDC->drawId); AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ)); pDC->isCompute = true; // This is a compute context. @@ -1524,7 +1524,7 @@ void SwrDispatch( pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE); QueueDispatch(pContext); - AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ); + RDTSC_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ); } // Deswizzles, converts and stores current contents of the hot tiles to surface @@ -1543,7 +1543,7 @@ void SWR_API SwrStoreTiles( SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - AR_API_BEGIN(APIStoreTiles, pDC->drawId); + RDTSC_BEGIN(APIStoreTiles, pDC->drawId); pDC->FeWork.type = STORETILES; pDC->FeWork.pfnWork = ProcessStoreTiles; @@ -1557,7 +1557,7 @@ void SWR_API SwrStoreTiles( AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId)); - AR_API_END(APIStoreTiles, 1); + RDTSC_END(APIStoreTiles, 1); } ////////////////////////////////////////////////////////////////////////// @@ -1586,7 +1586,7 @@ void SWR_API SwrClearRenderTarget( SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - AR_API_BEGIN(APIClearRenderTarget, pDC->drawId); + RDTSC_BEGIN(APIClearRenderTarget, pDC->drawId); pDC->FeWork.type = CLEAR; pDC->FeWork.pfnWork = ProcessClear; @@ -1604,7 +1604,7 @@ void SWR_API SwrClearRenderTarget( // enqueue draw QueueDraw(pContext); - AR_API_END(APIClearRenderTarget, 1); + RDTSC_END(APIClearRenderTarget, 1); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 6282e87f318..5878361be58 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -48,7 +48,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(BEDispatch, pDC->drawId); + RDTSC_BEGIN(BEDispatch, pDC->drawId); const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData(); SWR_ASSERT(pTaskData != nullptr); @@ -82,7 +82,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup); - AR_END(BEDispatch, 1); + RDTSC_END(BEDispatch, 1); } ////////////////////////////////////////////////////////////////////////// @@ -107,7 +107,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(BEStoreTiles, pDC->drawId); + RDTSC_BEGIN(BEStoreTiles, pDC->drawId); SWR_FORMAT srcFormat; switch (attachment) @@ -159,7 +159,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile } } } - AR_END(BEStoreTiles, 1); + RDTSC_END(BEStoreTiles, 1); } void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData) @@ -201,9 +201,9 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(BENullBackend, pDC->drawId); + RDTSC_BEGIN(BENullBackend, pDC->drawId); ///@todo: handle center multisample pattern - AR_BEGIN(BESetup, pDC->drawId); + RDTSC_BEGIN(BESetup, pDC->drawId); const API_STATE &state = GetApiState(pDC); @@ -216,7 +216,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_PS_CONTEXT psContext; // skip SetupPixelShaderContext(&psContext, ...); // not needed here - AR_END(BESetup, 0); + RDTSC_END(BESetup, 0); simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast(y))); @@ -257,7 +257,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz); } - AR_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample)); @@ -269,7 +269,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - AR_END(BEBarycentric, 0); + RDTSC_END(BEBarycentric, 0); // interpolate user clip distance if available if (state.backendState.clipDistanceMask) @@ -280,13 +280,13 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, simdscalar vCoverageMask = _simd_vmask_ps(coverageMask); simdscalar stencilPassMask = vCoverageMask; - AR_BEGIN(BEEarlyDepthTest, pDC->drawId); + RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); - AR_END(BEEarlyDepthTest, 0); + RDTSC_END(BEEarlyDepthTest, 0); uint32_t statMask = _simd_movemask_ps(depthPassMask); uint32_t statCount = _mm_popcnt_u32(statMask); @@ -307,7 +307,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, vYSamplePosUL = _simd_add_ps(vYSamplePosUL, dy); } - AR_END(BENullBackend, 0); + RDTSC_END(BENullBackend, 0); } PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {}; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp index 0ef54e266d7..baaa7e61f75 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp @@ -181,7 +181,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason. - AR_BEGIN(BEClear, pDC->drawId); + RDTSC_BEGIN(BEClear, pDC->drawId); if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR) { @@ -217,13 +217,13 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo pHotTile->state = HOTTILE_CLEAR; } - AR_END(BEClear, 1); + RDTSC_END(BEClear, 1); } else { // Legacy clear CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData; - AR_BEGIN(BEClear, pDC->drawId); + RDTSC_BEGIN(BEClear, pDC->drawId); if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR) { @@ -265,7 +265,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); } - AR_END(BEClear, 1); + RDTSC_END(BEClear, 1); } } diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 593082bd7de..b62ff03bffc 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -600,7 +600,7 @@ struct PixelRateZTestLoop vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz))); } - AR_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample)); @@ -622,7 +622,7 @@ struct PixelRateZTestLoop vZ[sample] = state.pfnQuantizeDepth(vZ[sample]); } - AR_END(BEBarycentric, 0); + RDTSC_END(BEBarycentric, 0); ///@todo: perspective correct vs non-perspective correct clipping? // if clip distances are enabled, we need to interpolate for each sample @@ -635,13 +635,13 @@ struct PixelRateZTestLoop // ZTest for this sample ///@todo Need to uncomment out this bucket. - //AR_BEGIN(BEDepthBucket, pDC->drawId); + //RDTSC_BEGIN(BEDepthBucket, pDC->drawId); depthPassMask[sample] = vCoverageMask[sample]; stencilPassMask[sample] = vCoverageMask[sample]; depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, vZ[sample], pDepthSample, vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]); - //AR_END(BEDepthBucket, 0); + //RDTSC_END(BEDepthBucket, 0); // early-exit if no pixels passed depth or earlyZ is forced on if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample])) @@ -869,8 +869,8 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(BEPixelRateBackend, pDC->drawId); - AR_BEGIN(BESetup, pDC->drawId); + RDTSC_BEGIN(BEPixelRateBackend, pDC->drawId); + RDTSC_BEGIN(BESetup, pDC->drawId); const API_STATE &state = GetApiState(pDC); @@ -884,7 +884,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t uint8_t *pDepthBuffer, *pStencilBuffer; SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers); - AR_END(BESetup, 0); + RDTSC_END(BESetup, 0); PixelRateZTestLoop PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.backendState.clipDistanceMask); @@ -916,13 +916,13 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t generateInputCoverage(pCoverageMask, psContext.inputMask, state.blendState.sampleMask); } - AR_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); CalcCentroid(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask); - AR_END(BEBarycentric, 0); + RDTSC_END(BEBarycentric, 0); if(T::bForcedSampleCount) { @@ -944,11 +944,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t if(state.psState.usesSourceDepth) { - AR_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(BEBarycentric, pDC->drawId); // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - AR_END(BEBarycentric, 0); + RDTSC_END(BEBarycentric, 0); } // pixels that are currently active @@ -956,10 +956,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t psContext.oMask = T::MultisampleT::FullSampleMask(); // execute pixel shader - AR_BEGIN(BEPixelShader, pDC->drawId); + RDTSC_BEGIN(BEPixelShader, pDC->drawId); state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); - AR_END(BEPixelShader, 0); + RDTSC_END(BEPixelShader, 0); // update active lanes to remove any discarded or oMask'd pixels activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si()))); @@ -980,7 +980,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // loop over all samples, broadcasting the results of the PS to all passing pixels for(uint32_t sample = 0; sample < GetNumOMSamples(state.blendState.sampleCount); sample++) { - AR_BEGIN(BEOutputMerger, pDC->drawId); + RDTSC_BEGIN(BEOutputMerger, pDC->drawId); // center pattern does a single coverage/depth/stencil test, standard pattern tests all samples uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample; simdscalar coverageMask, depthMask; @@ -995,7 +995,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t if(!_simd_movemask_ps(depthMask)) { // stencil should already have been written in early/lateZ tests - AR_END(BEOutputMerger, 0); + RDTSC_END(BEOutputMerger, 0); continue; } } @@ -1015,10 +1015,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum], pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]); } - AR_END(BEOutputMerger, 0); + RDTSC_END(BEOutputMerger, 0); } Endtile: - AR_BEGIN(BEEndTile, pDC->drawId); + RDTSC_BEGIN(BEEndTile, pDC->drawId); for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++) { @@ -1054,7 +1054,7 @@ Endtile: pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits::bpp) / 8; pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits::bpp) / 8; - AR_END(BEEndTile, 0); + RDTSC_END(BEEndTile, 0); psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); @@ -1064,7 +1064,7 @@ Endtile: psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } - AR_END(BEPixelRateBackend, 0); + RDTSC_END(BEPixelRateBackend, 0); } templatepContext; - AR_BEGIN(BESampleRateBackend, pDC->drawId); - AR_BEGIN(BESetup, pDC->drawId); + RDTSC_BEGIN(BESampleRateBackend, pDC->drawId); + RDTSC_BEGIN(BESetup, pDC->drawId); const API_STATE &state = GetApiState(pDC); @@ -57,7 +57,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ uint8_t *pDepthBuffer, *pStencilBuffer; SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers); - AR_END(BESetup, 0); + RDTSC_END(BESetup, 0); psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast(y))); psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast(y))); @@ -83,13 +83,13 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ generateInputCoverage(pCoverageMask, psContext.inputMask, state.blendState.sampleMask); } - AR_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); CalcCentroid(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask); - AR_END(BEBarycentric, 0); + RDTSC_END(BEBarycentric, 0); for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++) { @@ -113,7 +113,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz); } - AR_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(BEBarycentric, pDC->drawId); // calculate per sample positions psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample)); @@ -125,7 +125,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - AR_END(BEBarycentric, 0); + RDTSC_END(BEBarycentric, 0); // interpolate user clip distance if available if (state.backendState.clipDistanceMask) @@ -140,11 +140,11 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // Early-Z? if (T::bCanEarlyZ) { - AR_BEGIN(BEEarlyDepthTest, pDC->drawId); + RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); - AR_END(BEEarlyDepthTest, 0); + RDTSC_END(BEEarlyDepthTest, 0); // early-exit if no samples passed depth or earlyZ is forced on. if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask)) @@ -164,21 +164,21 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ psContext.activeMask = _simd_castps_si(vCoverageMask); // execute pixel shader - AR_BEGIN(BEPixelShader, pDC->drawId); + RDTSC_BEGIN(BEPixelShader, pDC->drawId); UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); - AR_END(BEPixelShader, 0); + RDTSC_END(BEPixelShader, 0); vCoverageMask = _simd_castsi_ps(psContext.activeMask); // late-Z if (!T::bCanEarlyZ) { - AR_BEGIN(BELateDepthTest, pDC->drawId); + RDTSC_BEGIN(BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); - AR_END(BELateDepthTest, 0); + RDTSC_END(BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) { @@ -196,7 +196,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ UPDATE_STAT_BE(DepthPassCount, statCount); // output merger - AR_BEGIN(BEOutputMerger, pDC->drawId); + RDTSC_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); #else @@ -209,7 +209,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); } - AR_END(BEOutputMerger, 0); + RDTSC_END(BEOutputMerger, 0); } work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); } @@ -217,7 +217,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ Endtile: ATTR_UNUSED; - AR_BEGIN(BEEndTile, pDC->drawId); + RDTSC_BEGIN(BEEndTile, pDC->drawId); if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) { @@ -247,7 +247,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits::bpp) / 8; pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits::bpp) / 8; - AR_END(BEEndTile, 0); + RDTSC_END(BEEndTile, 0); psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); @@ -257,7 +257,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } - AR_END(BESampleRateBackend, 0); + RDTSC_END(BESampleRateBackend, 0); } // Recursive template used to auto-nest conditionals. Converts dynamic enum function diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 686b97912cc..57338afe0cc 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -42,8 +42,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(BESingleSampleBackend, pDC->drawId); - AR_BEGIN(BESetup, pDC->drawId); + RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId); + RDTSC_BEGIN(BESetup, pDC->drawId); const API_STATE &state = GetApiState(pDC); @@ -57,7 +57,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 uint8_t *pDepthBuffer, *pStencilBuffer; SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers); - AR_END(BESetup, 1); + RDTSC_END(BESetup, 1); psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast(y))); psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast(y))); @@ -99,7 +99,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 generateInputCoverage(pCoverageMask, psContext.inputMask, state.blendState.sampleMask); } - AR_BEGIN(BEBarycentric, pDC->drawId); + RDTSC_BEGIN(BEBarycentric, pDC->drawId); CalcPixelBarycentrics(coeffs, psContext); @@ -109,7 +109,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); - AR_END(BEBarycentric, 1); + RDTSC_END(BEBarycentric, 1); // interpolate user clip distance if available if (state.backendState.clipDistanceMask) @@ -124,11 +124,11 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // Early-Z? if (T::bCanEarlyZ) { - AR_BEGIN(BEEarlyDepthTest, pDC->drawId); + RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask); AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); - AR_END(BEEarlyDepthTest, 0); + RDTSC_END(BEEarlyDepthTest, 0); // early-exit if no pixels passed depth or earlyZ is forced on if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask)) @@ -147,21 +147,21 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 psContext.activeMask = _simd_castps_si(vCoverageMask); // execute pixel shader - AR_BEGIN(BEPixelShader, pDC->drawId); + RDTSC_BEGIN(BEPixelShader, pDC->drawId); UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask))); state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); - AR_END(BEPixelShader, 0); + RDTSC_END(BEPixelShader, 0); vCoverageMask = _simd_castsi_ps(psContext.activeMask); // late-Z if (!T::bCanEarlyZ) { - AR_BEGIN(BELateDepthTest, pDC->drawId); + RDTSC_BEGIN(BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask); AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask))); - AR_END(BELateDepthTest, 0); + RDTSC_END(BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) { @@ -181,7 +181,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 UPDATE_STAT_BE(DepthPassCount, statCount); // output merger - AR_BEGIN(BEOutputMerger, pDC->drawId); + RDTSC_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); #else @@ -194,11 +194,11 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask); } - AR_END(BEOutputMerger, 0); + RDTSC_END(BEOutputMerger, 0); } Endtile: - AR_BEGIN(BEEndTile, pDC->drawId); + RDTSC_BEGIN(BEEndTile, pDC->drawId); work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM); if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) @@ -229,7 +229,7 @@ Endtile: pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits::bpp) / 8; pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits::bpp) / 8; - AR_END(BEEndTile, 0); + RDTSC_END(BEEndTile, 0); psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx); psContext.vX.center = _simd_add_ps(psContext.vX.center, dx); @@ -239,7 +239,7 @@ Endtile: psContext.vY.center = _simd_add_ps(psContext.vY.center, dy); } - AR_END(BESingleSampleBackend, 0); + RDTSC_END(BESingleSampleBackend, 0); } // Recursive template used to auto-nest conditionals. Converts dynamic enum function diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index e2f32641f30..986ecc6a734 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -651,7 +651,7 @@ void SIMDCALL BinTrianglesImpl( SWR_CONTEXT *pContext = pDC->pContext; const uint32_t *aRTAI = reinterpret_cast(&rtIdx); - AR_BEGIN(FEBinTriangles, pDC->drawId); + RDTSC_BEGIN(FEBinTriangles, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_RASTSTATE& rastState = state.rastState; @@ -958,7 +958,7 @@ void SIMDCALL BinTrianglesImpl( if (!triMask) { - AR_END(FEBinTriangles, 1); + RDTSC_END(FEBinTriangles, 1); return; } } @@ -998,7 +998,7 @@ endBinTriangles: BinPostSetupLinesImpl(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx); - AR_END(FEBinTriangles, 1); + RDTSC_END(FEBinTriangles, 1); return; } else if (rastState.fillMode == SWR_FILLMODE_POINT) @@ -1008,7 +1008,7 @@ endBinTriangles: BinPostSetupPointsImpl(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx); BinPostSetupPointsImpl(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx); - AR_END(FEBinTriangles, 1); + RDTSC_END(FEBinTriangles, 1); return; } @@ -1114,7 +1114,7 @@ endBinTriangles: triMask &= ~(1 << triIndex); } - AR_END(FEBinTriangles, 1); + RDTSC_END(FEBinTriangles, 1); } template @@ -1197,7 +1197,7 @@ void BinPostSetupPointsImpl( { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEBinPoints, pDC->drawId); + RDTSC_BEGIN(FEBinPoints, pDC->drawId); typename SIMD_T::Vec4 &primVerts = prim[0]; @@ -1480,7 +1480,7 @@ void BinPostSetupPointsImpl( } } - AR_END(FEBinPoints, 1); + RDTSC_END(FEBinPoints, 1); } ////////////////////////////////////////////////////////////////////////// @@ -1608,7 +1608,7 @@ void BinPostSetupLinesImpl( SWR_CONTEXT *pContext = pDC->pContext; const uint32_t *aRTAI = reinterpret_cast(&rtIdx); - AR_BEGIN(FEBinLines, pDC->drawId); + RDTSC_BEGIN(FEBinLines, pDC->drawId); const API_STATE &state = GetApiState(pDC); const SWR_RASTSTATE &rastState = state.rastState; @@ -1789,7 +1789,7 @@ void BinPostSetupLinesImpl( endBinLines: - AR_END(FEBinLines, 1); + RDTSC_END(FEBinLines, 1); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp index 72058029b03..22d89bc5996 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp @@ -164,30 +164,30 @@ void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvecto simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEClipTriangles, pDC->drawId); + RDTSC_BEGIN(FEClipTriangles, pDC->drawId); Clipper clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - AR_END(FEClipTriangles, 1); + RDTSC_END(FEClipTriangles, 1); } void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEClipLines, pDC->drawId); + RDTSC_BEGIN(FEClipLines, pDC->drawId); Clipper clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - AR_END(FEClipLines, 1); + RDTSC_END(FEClipLines, 1); } void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEClipPoints, pDC->drawId); + RDTSC_BEGIN(FEClipPoints, pDC->drawId); Clipper clipper(workerId, pDC); clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - AR_END(FEClipPoints, 1); + RDTSC_END(FEClipPoints, 1); } #if USE_SIMD16_FRONTEND @@ -195,7 +195,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEClipTriangles, pDC->drawId); + RDTSC_BEGIN(FEClipTriangles, pDC->drawId); enum { VERTS_PER_PRIM = 3 }; @@ -204,14 +204,14 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor pa.useAlternateOffset = false; clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - AR_END(FEClipTriangles, 1); + RDTSC_END(FEClipTriangles, 1); } void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEClipLines, pDC->drawId); + RDTSC_BEGIN(FEClipLines, pDC->drawId); enum { VERTS_PER_PRIM = 2 }; @@ -220,14 +220,14 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI pa.useAlternateOffset = false; clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - AR_END(FEClipLines, 1); + RDTSC_END(FEClipLines, 1); } void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx) { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEClipPoints, pDC->drawId); + RDTSC_BEGIN(FEClipPoints, pDC->drawId); enum { VERTS_PER_PRIM = 1 }; @@ -236,7 +236,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t worker pa.useAlternateOffset = false; clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx); - AR_END(FEClipPoints, 1); + RDTSC_END(FEClipPoints, 1); } #endif diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index 592c9bfa73e..cda40f14f22 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -719,11 +719,11 @@ public: if (clipMask) { - AR_BEGIN(FEGuardbandClip, pa.pDC->drawId); + RDTSC_BEGIN(FEGuardbandClip, pa.pDC->drawId); // we have to clip tris, execute the clipper, which will also // call the binner ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx, rtIdx); - AR_END(FEGuardbandClip, 1); + RDTSC_END(FEGuardbandClip, 1); } else if (validMask) { diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 6a63838eb5d..5bae53f3806 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -526,30 +526,25 @@ struct SWR_CONTEXT #define AR_WORKER_CTX pContext->pArContext[workerId] #define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads] +#ifdef KNOB_ENABLE_RDTSC +#define RDTSC_BEGIN(type, drawid) RDTSC_START(type) +#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0) +#else +#define RDTSC_BEGIN(type, count) +#define RDTSC_END(type, count) +#endif + #ifdef KNOB_ENABLE_AR - #define _AR_BEGIN(ctx, type, id) ArchRast::Dispatch(ctx, ArchRast::Start(ArchRast::type, id)) - #define _AR_END(ctx, type, count) ArchRast::Dispatch(ctx, ArchRast::End(ArchRast::type, count)) #define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event) #define _AR_FLUSH(ctx, id) ArchRast::FlushDraw(ctx, id) #else - #ifdef KNOB_ENABLE_RDTSC - #define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type) - #define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0) - #else - #define _AR_BEGIN(ctx, type, id) (void)ctx - #define _AR_END(ctx, type, id) - #endif #define _AR_EVENT(ctx, event) #define _AR_FLUSH(ctx, id) #endif // Use these macros for api thread. -#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id) -#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count) #define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event) // Use these macros for worker threads. -#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id) -#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count) #define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event) #define AR_FLUSH(id) _AR_FLUSH(AR_WORKER_CTX, id) diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 66c4b74942e..a9b1372d9ab 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -150,7 +150,7 @@ void ProcessStoreTiles( uint32_t workerId, void *pUserData) { - AR_BEGIN(FEProcessStoreTiles, pDC->drawId); + RDTSC_BEGIN(FEProcessStoreTiles, pDC->drawId); MacroTileMgr *pTileMgr = pDC->pTileMgr; STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData; @@ -175,7 +175,7 @@ void ProcessStoreTiles( } } - AR_END(FEProcessStoreTiles, 0); + RDTSC_END(FEProcessStoreTiles, 0); } ////////////////////////////////////////////////////////////////////////// @@ -191,7 +191,7 @@ void ProcessDiscardInvalidateTiles( uint32_t workerId, void *pUserData) { - AR_BEGIN(FEProcessInvalidateTiles, pDC->drawId); + RDTSC_BEGIN(FEProcessInvalidateTiles, pDC->drawId); DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData; MacroTileMgr *pTileMgr = pDC->pTileMgr; @@ -230,7 +230,7 @@ void ProcessDiscardInvalidateTiles( } } - AR_END(FEProcessInvalidateTiles, 0); + RDTSC_END(FEProcessInvalidateTiles, 0); } ////////////////////////////////////////////////////////////////////////// @@ -507,7 +507,7 @@ static void StreamOut( { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEStreamout, pDC->drawId); + RDTSC_BEGIN(FEStreamout, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_STREAMOUT_STATE &soState = state.soState; @@ -582,7 +582,7 @@ static void StreamOut( UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded); UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten); - AR_END(FEStreamout, 1); + RDTSC_END(FEStreamout, 1); } #if USE_SIMD16_FRONTEND @@ -801,7 +801,7 @@ static void GeometryShaderStage( { SWR_CONTEXT *pContext = pDC->pContext; - AR_BEGIN(FEGeometryShader, pDC->drawId); + RDTSC_BEGIN(FEGeometryShader, pDC->drawId); const API_STATE& state = GetApiState(pDC); const SWR_GS_STATE* pState = &state.gsState; @@ -1073,7 +1073,7 @@ static void GeometryShaderStage( UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount); UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated); AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim*numInputPrims)); - AR_END(FEGeometryShader, 1); + RDTSC_END(FEGeometryShader, 1); } ////////////////////////////////////////////////////////////////////////// @@ -1253,9 +1253,9 @@ static void TessellationStages( hsContext.mask = GenerateMask(numPrims); // Run the HS - AR_BEGIN(FEHullShader, pDC->drawId); + RDTSC_BEGIN(FEHullShader, pDC->drawId); state.pfnHsFunc(GetPrivateState(pDC), &hsContext); - AR_END(FEHullShader, 0); + RDTSC_END(FEHullShader, 0); UPDATE_STAT_FE(HsInvocations, numPrims); @@ -1265,10 +1265,10 @@ static void TessellationStages( { // Run Tessellator SWR_TS_TESSELLATED_DATA tsData = { 0 }; - AR_BEGIN(FETessellation, pDC->drawId); + RDTSC_BEGIN(FETessellation, pDC->drawId); TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData); AR_EVENT(TessPrimCount(1)); - AR_END(FETessellation, 0); + RDTSC_END(FETessellation, 0); if (tsData.NumPrimitives == 0) { @@ -1317,9 +1317,9 @@ static void TessellationStages( { dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations); - AR_BEGIN(FEDomainShader, pDC->drawId); + RDTSC_BEGIN(FEDomainShader, pDC->drawId); state.pfnDsFunc(GetPrivateState(pDC), &dsContext); - AR_END(FEDomainShader, 0); + RDTSC_END(FEDomainShader, 0); dsInvocations += KNOB_SIMD_WIDTH; } @@ -1390,14 +1390,14 @@ static void TessellationStages( #else simdvector prim[3]; // Only deal with triangles, lines, or points #endif - AR_BEGIN(FEPAAssemble, pDC->drawId); + RDTSC_BEGIN(FEPAAssemble, pDC->drawId); bool assemble = #if USE_SIMD16_FRONTEND tessPa.Assemble(VERTEX_POSITION_SLOT, prim_simd16); #else tessPa.Assemble(VERTEX_POSITION_SLOT, prim); #endif - AR_END(FEPAAssemble, 1); + RDTSC_END(FEPAAssemble, 1); SWR_ASSERT(assemble); SWR_ASSERT(pfnClipFunc); @@ -1520,7 +1520,7 @@ void ProcessDraw( } #endif - AR_BEGIN(FEProcessDraw, pDC->drawId); + RDTSC_BEGIN(FEProcessDraw, pDC->drawId); DRAW_WORK& work = *(DRAW_WORK*)pUserData; const API_STATE& state = GetApiState(pDC); @@ -1725,7 +1725,7 @@ void ProcessDraw( if (i < endVertex) { // 1. Execute FS/VS for a single SIMD. - AR_BEGIN(FEFetchShader, pDC->drawId); + RDTSC_BEGIN(FEFetchShader, pDC->drawId); #if USE_SIMD16_SHADERS state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin); #else @@ -1736,7 +1736,7 @@ void ProcessDraw( state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_hi, vin_hi); } #endif - AR_END(FEFetchShader, 0); + RDTSC_END(FEFetchShader, 0); // forward fetch generated vertex IDs to the vertex shader #if USE_SIMD16_SHADERS @@ -1780,7 +1780,7 @@ void ProcessDraw( if (!KNOB_TOSS_FETCH) #endif { - AR_BEGIN(FEVertexShader, pDC->drawId); + RDTSC_BEGIN(FEVertexShader, pDC->drawId); #if USE_SIMD16_VS state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo); #else @@ -1791,7 +1791,7 @@ void ProcessDraw( state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi); } #endif - AR_END(FEVertexShader, 0); + RDTSC_END(FEVertexShader, 0); UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); } @@ -1979,9 +1979,9 @@ void ProcessDraw( { // 1. Execute FS/VS for a single SIMD. - AR_BEGIN(FEFetchShader, pDC->drawId); + RDTSC_BEGIN(FEFetchShader, pDC->drawId); state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo, vout); - AR_END(FEFetchShader, 0); + RDTSC_END(FEFetchShader, 0); // forward fetch generated vertex IDs to the vertex shader vsContext.VertexID = fetchInfo.VertexID; @@ -2001,9 +2001,9 @@ void ProcessDraw( if (!KNOB_TOSS_FETCH) #endif { - AR_BEGIN(FEVertexShader, pDC->drawId); + RDTSC_BEGIN(FEVertexShader, pDC->drawId); state.pfnVertexFunc(GetPrivateState(pDC), &vsContext); - AR_END(FEVertexShader, 0); + RDTSC_END(FEVertexShader, 0); UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); } @@ -2014,9 +2014,9 @@ void ProcessDraw( { simdvector prim[MAX_NUM_VERTS_PER_PRIM]; // PaAssemble returns false if there is not enough verts to assemble. - AR_BEGIN(FEPAAssemble, pDC->drawId); + RDTSC_BEGIN(FEPAAssemble, pDC->drawId); bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim); - AR_END(FEPAAssemble, 1); + RDTSC_END(FEPAAssemble, 1); #if KNOB_ENABLE_TOSS_POINTS if (!KNOB_TOSS_FETCH) @@ -2104,7 +2104,7 @@ void ProcessDraw( #endif - AR_END(FEProcessDraw, numPrims * work.numInstances); + RDTSC_END(FEProcessDraw, numPrims * work.numInstances); } struct FEDrawChooser diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp index ae1e9c779ba..6c5f17d8ec8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp @@ -53,7 +53,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi #endif // bloat line to two tris and call the triangle rasterizer twice - AR_BEGIN(BERasterizeLine, pDC->drawId); + RDTSC_BEGIN(BERasterizeLine, pDC->drawId); const API_STATE &state = GetApiState(pDC); const SWR_RASTSTATE &rastState = state.rastState; @@ -246,7 +246,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); } - AR_END(BERasterizeLine, 1); + RDTSC_BEGIN(BERasterizeLine, 1); } void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData) @@ -308,9 +308,9 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT, renderBuffers, triDesc.triFlags.renderTargetArrayIndex); - AR_BEGIN(BEPixelBackend, pDC->drawId); + RDTSC_BEGIN(BEPixelBackend, pDC->drawId); backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers); - AR_END(BEPixelBackend, 0); + RDTSC_END(BEPixelBackend, 0); } void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData) diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h index 081e4dd67d7..6dba1b66a75 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h @@ -781,9 +781,9 @@ struct GenerateSVInnerCoverage } // not trivial accept or reject, must rasterize full tile - AR_BEGIN(BERasterizePartial, pDC->drawId); + RDTSC_BEGIN(BERasterizePartial, pDC->drawId); innerCoverageMask = rasterizePartialTile(pDC, startQuadEdgesAdj, pRastEdges); - AR_END(BERasterizePartial, 0); + RDTSC_END(BERasterizePartial, 0); } }; @@ -847,8 +847,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, return; } #endif - AR_BEGIN(BERasterizeTriangle, pDC->drawId); - AR_BEGIN(BETriangleSetup, pDC->drawId); + RDTSC_BEGIN(BERasterizeTriangle, pDC->drawId); + RDTSC_BEGIN(BETriangleSetup, pDC->drawId); const API_STATE &state = GetApiState(pDC); const SWR_RASTSTATE &rastState = state.rastState; @@ -1014,7 +1014,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0); - AR_END(BETriangleSetup, 0); + RDTSC_END(BETriangleSetup, 0); // update triangle desc uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); @@ -1027,11 +1027,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, if (numTilesX == 0 || numTilesY == 0) { RDTSC_EVENT(BEEmptyTriangle, 1, 0); - AR_END(BERasterizeTriangle, 1); + RDTSC_END(BERasterizeTriangle, 1); return; } - AR_BEGIN(BEStepSetup, pDC->drawId); + RDTSC_BEGIN(BEStepSetup, pDC->drawId); // Step to pixel center of top-left pixel of the triangle bbox // Align intersect bbox (top/left) to raster tile's (top/left). @@ -1140,7 +1140,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, } } - AR_END(BEStepSetup, 0); + RDTSC_END(BEStepSetup, 0); uint32_t tY = minTileY; uint32_t tX = minTileX; @@ -1233,9 +1233,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, } // not trivial accept or reject, must rasterize full tile - AR_BEGIN(BERasterizePartial, pDC->drawId); + RDTSC_BEGIN(BERasterizePartial, pDC->drawId); triDesc.coverageMask[sampleNum] = rasterizePartialTile(pDC, startQuadEdges, rastEdges); - AR_END(BERasterizePartial, 0); + RDTSC_END(BERasterizePartial, 0); triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum]; @@ -1271,9 +1271,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage); } - AR_BEGIN(BEPixelBackend, pDC->drawId); + RDTSC_BEGIN(BEPixelBackend, pDC->drawId); backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers); - AR_END(BEPixelBackend, 0); + RDTSC_END(BEPixelBackend, 0); } // step to the next tile in X @@ -1292,7 +1292,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, StepRasterTileY(state.colorHottileEnable, renderBuffers, currentRenderBufferRow); } - AR_END(BERasterizeTriangle, 1); + RDTSC_END(BERasterizeTriangle, 1); } // Get pointers to hot tile memory for color RT, depth, stencil diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index d684ffe7278..4d79168d2dc 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -541,7 +541,7 @@ bool WorkOnFifoBE( { BE_WORK *pWork; - AR_BEGIN(WorkerFoundWork, pDC->drawId); + RDTSC_BEGIN(WorkerFoundWork, pDC->drawId); uint32_t numWorkItems = tile->getNumQueued(); SWR_ASSERT(numWorkItems); @@ -562,7 +562,7 @@ bool WorkOnFifoBE( pWork->pfnWork(pDC, workerId, tileID, &pWork->desc); tile->dequeue(); } - AR_END(WorkerFoundWork, numWorkItems); + RDTSC_END(WorkerFoundWork, numWorkItems); _ReadWriteBarrier(); @@ -849,9 +849,9 @@ DWORD workerThreadMain(LPVOID pData) if (IsBEThread) { - AR_BEGIN(WorkerWorkOnFifoBE, 0); + RDTSC_BEGIN(WorkerWorkOnFifoBE, 0); bShutdown |= WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask); - AR_END(WorkerWorkOnFifoBE, 0); + RDTSC_END(WorkerWorkOnFifoBE, 0); WorkOnCompute(pContext, workerId, curDrawBE); } diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp index 3ade6e4333e..f4686703291 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp @@ -396,19 +396,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui if (pHotTile->state == HOTTILE_INVALID) { - AR_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - AR_END(BELoadTiles, 0); + RDTSC_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - AR_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearColorHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - AR_END(BELoadTiles, 0); + RDTSC_END(BELoadTiles, 0); } colorHottileEnableMask &= ~(1 << rtSlot); } @@ -419,19 +419,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { - AR_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - AR_END(BELoadTiles, 0); + RDTSC_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - AR_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearDepthHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - AR_END(BELoadTiles, 0); + RDTSC_END(BELoadTiles, 0); } } @@ -441,19 +441,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { - AR_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; - AR_END(BELoadTiles, 0); + RDTSC_END(BELoadTiles, 0); } else if (pHotTile->state == HOTTILE_CLEAR) { - AR_BEGIN(BELoadTiles, pDC->drawId); + RDTSC_BEGIN(BELoadTiles, pDC->drawId); // Clear the tile. ClearStencilHotTile(pHotTile); pHotTile->state = HOTTILE_DIRTY; - AR_END(BELoadTiles, 0); + RDTSC_END(BELoadTiles, 0); } } } -- 2.30.2