#include <smmintrin.h>
-#include "rdtsc_core.h"
#include "backend.h"
#include "depthstencil.h"
#include "tilemgr.h"
#include <algorithm>
-const __m128 vTileOffsetsX = {0.5, KNOB_TILE_X_DIM - 0.5, 0.5, KNOB_TILE_X_DIM - 0.5};
-const __m128 vTileOffsetsY = {0.5, 0.5, KNOB_TILE_Y_DIM - 0.5, KNOB_TILE_Y_DIM - 0.5};
-
-/// @todo move to common lib
-#define MASKTOVEC(i3,i2,i1,i0) {-i0,-i1,-i2,-i3}
-static const __m128 gMaskToVec[] = {
- MASKTOVEC(0,0,0,0),
- MASKTOVEC(0,0,0,1),
- MASKTOVEC(0,0,1,0),
- MASKTOVEC(0,0,1,1),
- MASKTOVEC(0,1,0,0),
- MASKTOVEC(0,1,0,1),
- MASKTOVEC(0,1,1,0),
- MASKTOVEC(0,1,1,1),
- MASKTOVEC(1,0,0,0),
- MASKTOVEC(1,0,0,1),
- MASKTOVEC(1,0,1,0),
- MASKTOVEC(1,0,1,1),
- MASKTOVEC(1,1,0,0),
- MASKTOVEC(1,1,0,1),
- MASKTOVEC(1,1,1,0),
- MASKTOVEC(1,1,1,1),
-};
-
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4]);
static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
/// @param pDC - pointer to draw context (dispatch).
/// @param workerId - The unique worker ID that is assigned to this thread.
/// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId)
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
{
RDTSC_START(BEDispatch);
SWR_ASSERT(pTaskData != nullptr);
// Ensure spill fill memory has been allocated.
- if (pDC->pSpillFill[workerId] == nullptr)
+ size_t spillFillSize = pDC->pState->state.totalSpillFillSize;
+ if (spillFillSize && pSpillFillBuffer == nullptr)
{
- ///@todo Add state which indicates the spill fill size.
- pDC->pSpillFill[workerId] = (uint8_t*)pDC->pArena->AllocAlignedSync(4096 * 1024, sizeof(float) * 8);
+ pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES);
}
const API_STATE& state = GetApiState(pDC);
csContext.dispatchDims[1] = pTaskData->threadGroupCountY;
csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
csContext.pTGSM = pContext->pScratch[workerId];
- csContext.pSpillFillBuffer = pDC->pSpillFill[workerId];
+ csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
state.pfnCsFunc(GetPrivateState(pDC), &csContext);
}
template<SWR_FORMAT format>
-void ClearRasterTile(BYTE *pTileBuffer, simdvector &value)
+void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
{
auto lambda = [&](int comp)
{
/// @todo clear data should come in as RGBA32_FLOAT
DWORD clearData[4];
float clearFloat[4];
- clearFloat[0] = ((BYTE*)(&pClear->clearRTColor))[0] / 255.0f;
- clearFloat[1] = ((BYTE*)(&pClear->clearRTColor))[1] / 255.0f;
- clearFloat[2] = ((BYTE*)(&pClear->clearRTColor))[2] / 255.0f;
- clearFloat[3] = ((BYTE*)(&pClear->clearRTColor))[3] / 255.0f;
+ clearFloat[0] = ((uint8_t*)(&pClear->clearRTColor))[0] / 255.0f;
+ clearFloat[1] = ((uint8_t*)(&pClear->clearRTColor))[1] / 255.0f;
+ clearFloat[2] = ((uint8_t*)(&pClear->clearRTColor))[2] / 255.0f;
+ clearFloat[3] = ((uint8_t*)(&pClear->clearRTColor))[3] / 255.0f;
clearData[0] = *(DWORD*)&clearFloat[0];
clearData[1] = *(DWORD*)&clearFloat[1];
clearData[2] = *(DWORD*)&clearFloat[2];
}
-void ProcessInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
+void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
{
- INVALIDATE_TILES_DESC *pDesc = (INVALIDATE_TILES_DESC*)pData;
+ DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData;
SWR_CONTEXT *pContext = pDC->pContext;
+ const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
+
for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i)
{
if (pDesc->attachmentMask & (1 << i))
{
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)i, false);
+ HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(
+ pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)i, pDesc->createNewTiles, numSamples);
if (pHotTile)
{
- pHotTile->state = HOTTILE_INVALID;
+ pHotTile->state = (HOTTILE_STATE)pDesc->newTileState;
}
}
}
return _simd_movemask_ps(vClipMask);
}
-template<bool perspMask>
-INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
-{
- if(perspMask)
- {
- // evaluate I,J
- psContext.vI.center = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center);
- psContext.vJ.center = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.center, psContext.vY.center);
- psContext.vI.center = _simd_mul_ps(psContext.vI.center, coeffs.vRecipDet);
- psContext.vJ.center = _simd_mul_ps(psContext.vJ.center, coeffs.vRecipDet);
-
- // interpolate 1/w
- psContext.vOneOverW.center = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.center, psContext.vJ.center);
- }
-}
-
-template<bool perspMask>
-INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
-{
- if(perspMask)
- {
- // evaluate I,J
- psContext.vI.sample = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample);
- psContext.vJ.sample = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.sample, psContext.vY.sample);
- psContext.vI.sample = _simd_mul_ps(psContext.vI.sample, coeffs.vRecipDet);
- psContext.vJ.sample = _simd_mul_ps(psContext.vJ.sample, coeffs.vRecipDet);
-
- // interpolate 1/w
- psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample);
- }
-}
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// Centroid behaves exactly as follows :
-// (1) If all samples in the primitive are covered, the attribute is evaluated at the pixel center (even if the sample pattern does not happen to
-// have a sample location there).
-// (2) Else the attribute is evaluated at the first covered sample, in increasing order of sample index, where sample coverage is after ANDing the
-// coverage with the SampleMask Rasterizer State.
-// (3) If no samples are covered, such as on helper pixels executed off the bounds of a primitive to fill out 2x2 pixel stamps, the attribute is
-// evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the pixel, then the first sample covered by the
-// SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-template<SWR_MULTISAMPLE_COUNT sampleCount, bool bForcedSampleCount>
-INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const uint64_t *const coverageMask, const uint32_t sampleMask,
- const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
-{
- uint32_t inputMask[KNOB_SIMD_WIDTH];
-
- generateInputCoverage<sampleCount, 1, bForcedSampleCount>(coverageMask, inputMask, sampleMask);
-
- // Case (2) - partially covered pixel
-
- // scan for first covered sample per pixel in the 4x2 span
- unsigned long sampleNum[KNOB_SIMD_WIDTH];
- (inputMask[0] > 0) ? (_BitScanForward(&sampleNum[0], inputMask[0])) : (sampleNum[0] = 0);
- (inputMask[1] > 0) ? (_BitScanForward(&sampleNum[1], inputMask[1])) : (sampleNum[1] = 0);
- (inputMask[2] > 0) ? (_BitScanForward(&sampleNum[2], inputMask[2])) : (sampleNum[2] = 0);
- (inputMask[3] > 0) ? (_BitScanForward(&sampleNum[3], inputMask[3])) : (sampleNum[3] = 0);
- (inputMask[4] > 0) ? (_BitScanForward(&sampleNum[4], inputMask[4])) : (sampleNum[4] = 0);
- (inputMask[5] > 0) ? (_BitScanForward(&sampleNum[5], inputMask[5])) : (sampleNum[5] = 0);
- (inputMask[6] > 0) ? (_BitScanForward(&sampleNum[6], inputMask[6])) : (sampleNum[6] = 0);
- (inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
-
- // look up and set the sample offsets from UL pixel corner for first covered sample
- __m256 vXSample = _mm256_set_ps(MultisampleTraits<sampleCount>::X(sampleNum[7]),
- MultisampleTraits<sampleCount>::X(sampleNum[6]),
- MultisampleTraits<sampleCount>::X(sampleNum[5]),
- MultisampleTraits<sampleCount>::X(sampleNum[4]),
- MultisampleTraits<sampleCount>::X(sampleNum[3]),
- MultisampleTraits<sampleCount>::X(sampleNum[2]),
- MultisampleTraits<sampleCount>::X(sampleNum[1]),
- MultisampleTraits<sampleCount>::X(sampleNum[0]));
-
- __m256 vYSample = _mm256_set_ps(MultisampleTraits<sampleCount>::Y(sampleNum[7]),
- MultisampleTraits<sampleCount>::Y(sampleNum[6]),
- MultisampleTraits<sampleCount>::Y(sampleNum[5]),
- MultisampleTraits<sampleCount>::Y(sampleNum[4]),
- MultisampleTraits<sampleCount>::Y(sampleNum[3]),
- MultisampleTraits<sampleCount>::Y(sampleNum[2]),
- MultisampleTraits<sampleCount>::Y(sampleNum[1]),
- MultisampleTraits<sampleCount>::Y(sampleNum[0]));
- // add sample offset to UL pixel corner
- vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
- vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
-
- // Case (1) and case (3b) - All samples covered or not covered with full SampleMask
- static const __m256i vFullyCoveredMask = MultisampleTraits<sampleCount>::FullSampleMask();
- __m256i vInputCoveragei = _mm256_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]);
- __m256i vAllSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vFullyCoveredMask);
-
- static const __m256i vZero = _simd_setzero_si();
- const __m256i vSampleMask = _simd_and_si(_simd_set1_epi32(sampleMask), vFullyCoveredMask);
- __m256i vNoSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vZero);
- __m256i vIsFullSampleMask = _simd_cmpeq_epi32(vSampleMask, vFullyCoveredMask);
- __m256i vCase3b = _simd_and_si(vNoSamplesCovered, vIsFullSampleMask);
-
- __m256i vEvalAtCenter = _simd_or_si(vAllSamplesCovered, vCase3b);
-
- // set the centroid position based on results from above
- psContext.vX.centroid = _simd_blendv_ps(vXSample, psContext.vX.center, _simd_castsi_ps(vEvalAtCenter));
- psContext.vY.centroid = _simd_blendv_ps(vYSample, psContext.vY.center, _simd_castsi_ps(vEvalAtCenter));
-
- // Case (3a) No samples covered and partial sample mask
- __m256i vSomeSampleMaskSamples = _simd_cmplt_epi32(vSampleMask, vFullyCoveredMask);
- // sample mask should never be all 0's for this case, but handle it anyways
- unsigned long firstCoveredSampleMaskSample = 0;
- (sampleMask > 0) ? (_BitScanForward(&firstCoveredSampleMaskSample, sampleMask)) : (firstCoveredSampleMaskSample = 0);
-
- __m256i vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
-
- vXSample = _simd_set1_ps(MultisampleTraits<sampleCount>::X(firstCoveredSampleMaskSample));
- vYSample = _simd_set1_ps(MultisampleTraits<sampleCount>::Y(firstCoveredSampleMaskSample));
-
- // blend in case 3a pixel locations
- psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
- psContext.vY.centroid = _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a));
-}
-
-template<uint32_t sampleCount, uint32_t persp, uint32_t standardPattern, uint32_t forcedMultisampleCount>
-INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext,
- const uint64_t *const coverageMask, const uint32_t sampleMask,
- const simdscalar vXSamplePosUL, const simdscalar vYSamplePosUL)
-{
- static const bool bPersp = (bool)persp;
- static const bool bIsStandardPattern = (bool)standardPattern;
- static const bool bForcedMultisampleCount = (bool)forcedMultisampleCount;
-
- // calculate centroid positions
- if(bPersp)
- {
- if(bIsStandardPattern)
- {
- ///@ todo: don't need to generate input coverage 2x if input coverage and centroid
- CalcCentroidPos<(SWR_MULTISAMPLE_COUNT)sampleCount, bForcedMultisampleCount>(psContext, coverageMask, sampleMask, vXSamplePosUL, vYSamplePosUL);
- }
- else
- {
- static const __m256 pixelCenter = _simd_set1_ps(0.5f);
- psContext.vX.centroid = _simd_add_ps(vXSamplePosUL, pixelCenter);
- psContext.vY.centroid = _simd_add_ps(vYSamplePosUL, pixelCenter);
- }
- // evaluate I,J
- psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
- psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
- psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet);
- psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet);
-
- // interpolate 1/w
- psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
- }
-}
-
-template<uint32_t NumRT, uint32_t sampleCountT>
-void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask)
-{
- // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
- static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
- uint32_t rasterTileColorOffset = MultisampleTraits<sampleCount>::RasterTileColorOffset(sample);
- simdvector blendOut;
-
- for(uint32_t rt = 0; rt < NumRT; ++rt)
- {
- uint8_t *pColorSample;
- if(sampleCount == SWR_MULTISAMPLE_1X)
- {
- pColorSample = pColorBase[rt];
- }
- else
- {
- pColorSample = pColorBase[rt] + rasterTileColorOffset;
- }
-
- const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
- // pfnBlendFunc may not update all channels. Initialize with PS output.
- /// TODO: move this into the blend JIT.
- blendOut = psContext.shaded[rt];
-
- // Blend outputs and update coverage mask for alpha test
- if(pfnBlendFunc[rt] != nullptr)
- {
- pfnBlendFunc[rt](
- pBlendState,
- psContext.shaded[rt],
- psContext.shaded[1],
- sample,
- pColorSample,
- blendOut,
- &psContext.oMask,
- (simdscalari*)&coverageMask);
- }
-
- // final write mask
- simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
-
- ///@todo can only use maskstore fast path if bpc is 32. Assuming hot tile is RGBA32_FLOAT.
- static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
-
- const uint32_t simd = KNOB_SIMD_WIDTH * sizeof(float);
-
- // store with color mask
- if(!pRTBlend->writeDisableRed)
- {
- _simd_maskstore_ps((float*)pColorSample, outputMask, blendOut.x);
- }
- if(!pRTBlend->writeDisableGreen)
- {
- _simd_maskstore_ps((float*)(pColorSample + simd), outputMask, blendOut.y);
- }
- if(!pRTBlend->writeDisableBlue)
- {
- _simd_maskstore_ps((float*)(pColorSample + simd * 2), outputMask, blendOut.z);
- }
- if(!pRTBlend->writeDisableAlpha)
- {
- _simd_maskstore_ps((float*)(pColorSample + simd * 3), outputMask, blendOut.w);
- }
- }
-}
-
-template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
+template<typename T>
void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
+ RDTSC_START(BESingleSampleBackend);
RDTSC_START(BESetup);
- // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
- static const bool bInputCoverage = (bool)inputCoverage;
- static const bool bCentroidPos = (bool)centroidPos;
SWR_CONTEXT *pContext = pDC->pContext;
const API_STATE& state = GetApiState(pDC);
const SWR_RASTSTATE& rastState = state.rastState;
const SWR_PS_STATE *pPSState = &state.psState;
const SWR_BLEND_STATE *pBlendState = &state.blendState;
- const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
uint64_t coverageMask = work.coverageMask[0];
// broadcast scalars
psContext.J = work.J;
psContext.recipDet = work.recipDet;
psContext.pRecipW = work.pRecipW;
- psContext.pSamplePosX = (const float*)&MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosX;
- psContext.pSamplePosY = (const float*)&MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosY;
+ psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
+ psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
{
- if(bInputCoverage)
- {
- generateInputCoverage<SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, false>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
- }
-
if(coverageMask & MASK)
{
- RDTSC_START(BEBarycentric);
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps((float)xx));
// pixel center
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
- backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
+ if(T::bInputCoverage)
+ {
+ generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
+ }
+
+ RDTSC_START(BEBarycentric);
+ CalcPixelBarycentrics(coeffs, psContext);
- if(bCentroidPos)
+ if(T::bCentroidPos)
{
// for 1x case, centroid is pixel center
psContext.vX.centroid = psContext.vX.center;
psContext.vOneOverW.centroid = psContext.vOneOverW.center;
}
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_STOP(BEBarycentric, 0, 0);
simdmask clipCoverageMask = coverageMask & MASK;
-
// interpolate user clip distance if available
if(rastState.clipDistanceMask)
{
simdscalar stencilPassMask = vCoverageMask;
// Early-Z?
- if(CanEarlyZ(pPSState))
+ if(T::bCanEarlyZ)
{
RDTSC_START(BEEarlyDepthTest);
- depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
// late-Z
- if(!CanEarlyZ(pPSState))
+ if(!T::bCanEarlyZ)
{
RDTSC_START(BELateDepthTest);
- depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
RDTSC_STOP(BELateDepthTest, 0, 0);
// output merger
RDTSC_START(BEOutputMerger);
- backendFuncs.pfnOutputMerger(psContext, pColorBase, 0, pBlendState, state.pfnBlendFunc,
- vCoverageMask, depthPassMask);
+ OutputMerger(psContext, pColorBase, 0, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets);
// do final depth write after all pixel kills
if (!pPSState->forceEarlyZ)
RDTSC_STOP(BEEndTile, 0, 0);
}
}
+ RDTSC_STOP(BESingleSampleBackend, 0, 0);
}
-template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
+template<typename T>
void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
- static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
- static const bool bInputCoverage = (bool)inputCoverage;
- static const bool bCentroidPos = (bool)centroidPos;
-
+ RDTSC_START(BESampleRateBackend);
RDTSC_START(BESetup);
SWR_CONTEXT *pContext = pDC->pContext;
const SWR_RASTSTATE& rastState = state.rastState;
const SWR_PS_STATE *pPSState = &state.psState;
const SWR_BLEND_STATE *pBlendState = &state.blendState;
- const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
// broadcast scalars
BarycentricCoeffs coeffs;
psContext.I = work.I;
psContext.J = work.J;
psContext.recipDet = work.recipDet;
- psContext.pSamplePosX = (const float*)&MultisampleTraits<sampleCount>::samplePosX;
- psContext.pSamplePosY = (const float*)&MultisampleTraits<sampleCount>::samplePosY;
- const uint32_t numSamples = MultisampleTraits<sampleCount>::numSamples;
+ psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
+ psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
RDTSC_START(BEBarycentric);
- backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
+ CalcPixelBarycentrics(coeffs, psContext);
RDTSC_STOP(BEBarycentric, 0, 0);
- if(bInputCoverage)
+ if(T::bInputCoverage)
{
- generateInputCoverage<sampleCount, SWR_MSAA_STANDARD_PATTERN, false>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
+ generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
}
- if(bCentroidPos)
+ if(T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
RDTSC_START(BEBarycentric);
- backendFuncs.pfnCalcCentroidBarycentrics(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
+ CalcCentroidBarycentrics<T>(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
RDTSC_STOP(BEBarycentric, 0, 0);
}
- for(uint32_t sample = 0; sample < numSamples; sample++)
+ for(uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++)
{
- if (work.coverageMask[sample] & MASK)
+ simdmask coverageMask = work.coverageMask[sample] & MASK;
+ if (coverageMask)
{
RDTSC_START(BEBarycentric);
-
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, MultisampleTraits<sampleCount>::vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, MultisampleTraits<sampleCount>::vY(sample));
-
- simdmask coverageMask = work.coverageMask[sample] & MASK;
- simdscalar vCoverageMask = vMask(coverageMask);
+ psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
+ psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
- backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
+ CalcSampleBarycentrics(coeffs, psContext);
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
-
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_STOP(BEBarycentric, 0, 0);
// interpolate user clip distance if available
coverageMask &= ~ComputeUserClipMask(rastState.clipDistanceMask, work.pUserClipBuffer,
psContext.vI.sample, psContext.vJ.sample);
}
-
+
+ simdscalar vCoverageMask = vMask(coverageMask);
simdscalar depthPassMask = vCoverageMask;
simdscalar stencilPassMask = vCoverageMask;
// offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
- uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
+ uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
+ uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
// Early-Z?
- if (CanEarlyZ(pPSState))
+ if (T::bCanEarlyZ)
{
RDTSC_START(BEEarlyDepthTest);
- depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
- //// late-Z
- if (!CanEarlyZ(pPSState))
+ // late-Z
+ if (!T::bCanEarlyZ)
{
RDTSC_START(BELateDepthTest);
- depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
RDTSC_STOP(BELateDepthTest, 0, 0);
// output merger
RDTSC_START(BEOutputMerger);
- backendFuncs.pfnOutputMerger(psContext, pColorBase, sample, pBlendState, state.pfnBlendFunc,
- vCoverageMask, depthPassMask);
+ OutputMerger(psContext, pColorBase, sample, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets);
// do final depth write after all pixel kills
if (!pPSState->forceEarlyZ)
RDTSC_STOP(BEEndTile, 0, 0);
}
}
+ RDTSC_STOP(BESampleRateBackend, 0, 0);
}
-template<uint32_t sampleCountT, uint32_t samplePattern, uint32_t inputCoverage, uint32_t centroidPos, uint32_t forcedSampleCount>
+template<typename T>
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
- // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
- static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
- static const bool bIsStandardPattern = (bool)samplePattern;
- static const bool bInputCoverage = (bool)inputCoverage;
- static const bool bCentroidPos = (bool)centroidPos;
- static const bool bForcedSampleCount = (bool)forcedSampleCount;
-
+ RDTSC_START(BEPixelRateBackend);
RDTSC_START(BESetup);
SWR_CONTEXT *pContext = pDC->pContext;
const SWR_RASTSTATE& rastState = state.rastState;
const SWR_PS_STATE *pPSState = &state.psState;
const SWR_BLEND_STATE *pBlendState = &state.blendState;
- const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
// broadcast scalars
BarycentricCoeffs coeffs;
psContext.I = work.I;
psContext.J = work.J;
psContext.recipDet = work.recipDet;
- psContext.pSamplePosX = (const float*)&MultisampleTraits<sampleCount>::samplePosX;
- psContext.pSamplePosY = (const float*)&MultisampleTraits<sampleCount>::samplePosY;
+ psContext.pSamplePosX = (const float*)&T::MultisampleT::samplePosX;
+ psContext.pSamplePosY = (const float*)&T::MultisampleT::samplePosY;
psContext.sampleIndex = 0;
-
- uint32_t numCoverageSamples;
- if(bIsStandardPattern)
- {
- numCoverageSamples = MultisampleTraits<sampleCount>::numSamples;
- }
- else
- {
- numCoverageSamples = 1;
- }
-
- uint32_t numOMSamples;
- // RT has to be single sample if we're in forcedMSAA mode
- if(bForcedSampleCount && (sampleCount > SWR_MULTISAMPLE_1X))
- {
- numOMSamples = 1;
- }
- // unless we're forced to single sample, in which case we run the OM at the sample count of the RT
- else if(bForcedSampleCount && (sampleCount == SWR_MULTISAMPLE_1X))
- {
- numOMSamples = GetNumSamples(pBlendState->sampleCount);
- }
- // else we're in normal MSAA mode and rasterizer and OM are running at the same sample count
- else
- {
- numOMSamples = MultisampleTraits<sampleCount>::numSamples;
- }
+ PixelRateZTestLoop<T> PixelRateZTest(pDC, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask);
+
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps((float)yy));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps((float)yy));
for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
{
- simdscalar vZ[MultisampleTraits<sampleCount>::numSamples];
+ if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
+
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps((float)xx));
// set pixel center positions
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
- if (bInputCoverage)
+ RDTSC_START(BEBarycentric);
+ CalcPixelBarycentrics(coeffs, psContext);
+ RDTSC_STOP(BEBarycentric, 0, 0);
+
+ if (T::bInputCoverage)
{
- generateInputCoverage<sampleCount, bIsStandardPattern, bForcedSampleCount>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
+ generateInputCoverage<T>(&work.coverageMask[0], psContext.inputMask, pBlendState->sampleMask);
}
- if(bCentroidPos)
+ if(T::bCentroidPos)
{
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
RDTSC_START(BEBarycentric);
- backendFuncs.pfnCalcCentroidBarycentrics(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
+ CalcCentroidBarycentrics<T>(coeffs, psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
RDTSC_STOP(BEBarycentric, 0, 0);
}
- // if oDepth written to, or there is a potential to discard any samples, we need to
- // run the PS early, then interp or broadcast Z and test
- if(pPSState->writesODepth || pPSState->killsPixel)
+ simdscalar activeLanes;
+ if(T::bForcedSampleCount)
{
- RDTSC_START(BEBarycentric);
- backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
-
- // interpolate z
- psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
- RDTSC_STOP(BEBarycentric, 0, 0);
+ // candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
+ const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(pBlendState->sampleMask), _simd_setzero_si()));
+ activeLanes = _simd_and_ps(vMask(work.anyCoveredSamples & MASK), vSampleMask);
+ }
- // execute pixel shader
- RDTSC_START(BEPixelShader);
- state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
- RDTSC_STOP(BEPixelShader, 0, 0);
+ // Early-Z?
+ if(T::bCanEarlyZ && !T::bForcedSampleCount)
+ {
+ activeLanes = _simd_setzero_ps();
+ uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
+ UPDATE_STAT(DepthPassCount, depthPassCount);
}
- else
+ // if we can't do early z, set the active mask to any samples covered in the current simd
+ else if(!T::bCanEarlyZ && !T::bForcedSampleCount)
{
- psContext.activeMask = _simd_set1_epi32(-1);
+ activeLanes = vMask(work.anyCoveredSamples & MASK);
}
- // need to declare enough space for all samples
- simdscalar vCoverageMask[MultisampleTraits<sampleCount>::numSamples];
- simdscalar depthPassMask[MultisampleTraits<sampleCount>::numSamples];
- simdscalar stencilPassMask[MultisampleTraits<sampleCount>::numSamples];
- simdscalar anyDepthSamplePassed = _simd_setzero_ps();
- simdscalar anyStencilSamplePassed = _simd_setzero_ps();
- for(uint32_t sample = 0; sample < numCoverageSamples; sample++)
+ // if we have no covered samples that passed depth at this point, go to next tile
+ if(!_simd_movemask_ps(activeLanes))
{
- vCoverageMask[sample] = vMask(work.coverageMask[sample] & MASK);
-
- // pull mask back out for any discards and and with coverage
- vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], _simd_castsi_ps(psContext.activeMask));
-
- if (!_simd_movemask_ps(vCoverageMask[sample]))
- {
- vCoverageMask[sample] = depthPassMask[sample] = stencilPassMask[sample] = _simd_setzero_ps();
- continue;
- }
-
- if(bForcedSampleCount)
- {
- // candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
- const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(pBlendState->sampleMask), _simd_setzero_si()));
- anyDepthSamplePassed = _simd_or_ps(anyDepthSamplePassed, _simd_and_ps(vCoverageMask[sample], vSampleMask));
- continue;
- }
-
- depthPassMask[sample] = vCoverageMask[sample];
-
- // if oDepth isn't written to, we need to interpolate Z for each sample
- // if clip distances are enabled, we need to interpolate for each sample
- if(!pPSState->writesODepth || rastState.clipDistanceMask)
- {
- RDTSC_START(BEBarycentric);
- if(bIsStandardPattern)
- {
- // calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, MultisampleTraits<sampleCount>::vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, MultisampleTraits<sampleCount>::vY(sample));
- }
- else
- {
- psContext.vX.sample = psContext.vX.center;
- psContext.vY.sample = psContext.vY.center;
- }
-
- // calc I & J per sample
- backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
-
- // interpolate z
- if (!pPSState->writesODepth)
- {
- vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
- }
-
- ///@todo: perspective correct vs non-perspective correct clipping?
- // interpolate clip distances
- if (rastState.clipDistanceMask)
- {
- uint8_t clipMask = ComputeUserClipMask(rastState.clipDistanceMask, work.pUserClipBuffer,
- psContext.vI.sample, psContext.vJ.sample);
- vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], vMask(~clipMask));
- }
- RDTSC_STOP(BEBarycentric, 0, 0);
- }
- // else 'broadcast' and test psContext.vZ written from the PS each sample
- else
- {
- vZ[sample] = psContext.vZ;
- }
-
- // offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
- uint8_t * pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
-
- // ZTest for this sample
- RDTSC_START(BEEarlyDepthTest);
- stencilPassMask[sample] = vCoverageMask[sample];
- depthPassMask[sample] = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
- vZ[sample], pDepthSample, vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]);
- RDTSC_STOP(BEEarlyDepthTest, 0, 0);
-
- anyDepthSamplePassed = _simd_or_ps(anyDepthSamplePassed, depthPassMask[sample]);
- anyStencilSamplePassed = _simd_or_ps(anyStencilSamplePassed, stencilPassMask[sample]);
- uint32_t statMask = _simd_movemask_ps(depthPassMask[sample]);
- uint32_t statCount = _mm_popcnt_u32(statMask);
- UPDATE_STAT(DepthPassCount, statCount);
+ goto Endtile;
}
- // if we didn't have to execute the PS early, and at least 1 sample passed the depth test, run the PS
- if(!pPSState->writesODepth && !pPSState->killsPixel && _simd_movemask_ps(anyDepthSamplePassed))
+ if(pPSState->usesSourceDepth)
{
RDTSC_START(BEBarycentric);
- backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_STOP(BEBarycentric, 0, 0);
+ }
- // execute pixel shader
- RDTSC_START(BEPixelShader);
- state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
- RDTSC_STOP(BEPixelShader, 0, 0);
+ // pixels that are currently active
+ psContext.activeMask = _simd_castps_si(activeLanes);
+ psContext.oMask = T::MultisampleT::FullSampleMask();
+
+ // execute pixel shader
+ RDTSC_START(BEPixelShader);
+ state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
+ UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
+ RDTSC_STOP(BEPixelShader, 0, 0);
+
+ // update active lanes to remove any discarded or oMask'd pixels
+ activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
+ if(!_simd_movemask_ps(activeLanes))
+ {
+ goto Endtile;
+ }
+
+ // late-Z
+ if(!T::bCanEarlyZ && !T::bForcedSampleCount)
+ {
+ uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
+ UPDATE_STAT(DepthPassCount, depthPassCount);
}
- ///@todo: make sure this works for kill pixel
- else if(!_simd_movemask_ps(anyStencilSamplePassed))
+
+ // if we have no covered samples that passed depth at this point, skip OM and go to next tile
+ if(!_simd_movemask_ps(activeLanes))
{
goto Endtile;
}
+ // output merger
// loop over all samples, broadcasting the results of the PS to all passing pixels
- for(uint32_t sample = 0; sample < numOMSamples; sample++)
+ for(uint32_t sample = 0; sample < GetNumOMSamples<T>(pBlendState->sampleCount); sample++)
{
- uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
- uint8_t * pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
-
- // output merger
RDTSC_START(BEOutputMerger);
-
- // skip if none of the pixels for this sample passed
- simdscalar coverageMaskSample;
- simdscalar depthMaskSample;
- simdscalar stencilMaskSample;
- simdscalar vInterpolatedZ;
-
- // forcedSampleCount outputs to any pixels with covered samples not masked off by SampleMask
- // depth test is disabled, so just set the z val to 0.
- if(bForcedSampleCount)
- {
- coverageMaskSample = depthMaskSample = anyDepthSamplePassed;
- vInterpolatedZ = _simd_setzero_ps();
- }
- else if(bIsStandardPattern)
+ // center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
+ uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
+ simdscalar coverageMask, depthMask;
+ if(T::bForcedSampleCount)
{
- if(!_simd_movemask_ps(depthPassMask[sample]))
- {
- depthPassMask[sample] = _simd_setzero_ps();
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample], pDepthSample, depthPassMask[sample],
- vCoverageMask[sample], pStencilSample, stencilPassMask[sample]);
- continue;
- }
- coverageMaskSample = vCoverageMask[sample];
- depthMaskSample = depthPassMask[sample];
- stencilMaskSample = stencilPassMask[sample];
- vInterpolatedZ = vZ[sample];
+ coverageMask = depthMask = activeLanes;
}
else
{
- // center pattern only needs to use a single depth test as all samples are at the same position
- if(!_simd_movemask_ps(depthPassMask[0]))
+ coverageMask = PixelRateZTest.vCoverageMask[coverageSampleNum];
+ depthMask = PixelRateZTest.depthPassMask[coverageSampleNum];
+ if(!_simd_movemask_ps(depthMask))
{
- depthPassMask[0] = _simd_setzero_ps();
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, vZ[0], pDepthSample, depthPassMask[0],
- vCoverageMask[0], pStencilSample, stencilPassMask[0]);
+ // stencil should already have been written in early/lateZ tests
+ RDTSC_STOP(BEOutputMerger, 0, 0);
continue;
}
- coverageMaskSample = (vCoverageMask[0]);
- depthMaskSample = depthPassMask[0];
- stencilMaskSample = stencilPassMask[0];
- vInterpolatedZ = vZ[0];
}
+
+ // broadcast the results of the PS to all passing pixels
+ OutputMerger(psContext, pColorBase, sample, pBlendState, state.pfnBlendFunc, coverageMask, depthMask, pPSState->numRenderTargets);
- // output merger
- RDTSC_START(BEOutputMerger);
- backendFuncs.pfnOutputMerger(psContext, pColorBase, sample, pBlendState, state.pfnBlendFunc,
- coverageMaskSample, depthMaskSample);
+ if(!pPSState->forceEarlyZ && !T::bForcedSampleCount)
+ {
+ uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
+ uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, vInterpolatedZ, pDepthSample, depthMaskSample,
- coverageMaskSample, pStencilSample, stencilMaskSample);
- RDTSC_STOP(BEOutputMerger, 0, 0);
+ DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
+ pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
+ }
+ RDTSC_STOP(BEOutputMerger, 0, 0);
}
-
Endtile:
RDTSC_START(BEEndTile);
- for(uint32_t sample = 0; sample < numCoverageSamples; sample++)
+ for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
+ work.anyCoveredSamples >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
pDepthBase += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
pStencilBase += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
RDTSC_STOP(BEEndTile, 0, 0);
}
}
+ RDTSC_STOP(BEPixelRateBackend, 0, 0);
}
// optimized backend flow with NULL PS
template<uint32_t sampleCountT>
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
+ RDTSC_START(BENullBackend);
+ ///@todo: handle center multisample pattern
+ typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
RDTSC_START(BESetup);
- static const SWR_MULTISAMPLE_COUNT sampleCount = (SWR_MULTISAMPLE_COUNT)sampleCountT;
SWR_CONTEXT *pContext = pDC->pContext;
const API_STATE& state = GetApiState(pDC);
- const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
+ const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
// broadcast scalars
BarycentricCoeffs coeffs;
coeffs.vRecipDet = _simd_broadcast_ss(&work.recipDet);
- BYTE *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
+ uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
RDTSC_STOP(BESetup, 0, 0);
while (_BitScanForward(&sample, sampleMask))
{
sampleMask &= ~(1 << sample);
- if (work.coverageMask[sample] & MASK)
+ simdmask coverageMask = work.coverageMask[sample] & MASK;
+ if (coverageMask)
{
RDTSC_START(BEBarycentric);
// calculate per sample positions
- psContext.vX.sample = _simd_add_ps(vXSamplePosUL, MultisampleTraits<sampleCount>::vX(sample));
- psContext.vY.sample = _simd_add_ps(vYSamplePosUL, MultisampleTraits<sampleCount>::vY(sample));
+ psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
+ psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
- backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
+ CalcSampleBarycentrics(coeffs, psContext);
- // interpolate z
+ // interpolate and quantize z
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+ psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_STOP(BEBarycentric, 0, 0);
- simdscalar vCoverageMask = vMask(work.coverageMask[sample] & MASK);
+ // interpolate user clip distance if available
+ if (rastState.clipDistanceMask)
+ {
+ coverageMask &= ~ComputeUserClipMask(rastState.clipDistanceMask, work.pUserClipBuffer,
+ psContext.vI.sample, psContext.vJ.sample);
+ }
+
+ simdscalar vCoverageMask = vMask(coverageMask);
simdscalar stencilPassMask = vCoverageMask;
// offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBase + MultisampleTraits<sampleCount>::RasterTileDepthOffset(sample);
- uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
+ uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
+ uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
RDTSC_START(BEEarlyDepthTest);
- simdscalar depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+ simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
pStencilBase += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
}
}
+ RDTSC_STOP(BENullBackend, 0, 0);
}
void InitClearTilesTable()
}
PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_MAX];
-PFN_BACKEND_FUNC gBackendSingleSample[2][2] = {};
-PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][SWR_INPUT_COVERAGE_MAX][2][2] = {};
-PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX][2] = {};
-PFN_OUTPUT_MERGER gBackendOutputMergerTable[SWR_NUM_RENDERTARGETS+1][SWR_MULTISAMPLE_TYPE_MAX] = {};
-PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable[2] = {};
-PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable[2] = {};
-PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2] = {};
+PFN_BACKEND_FUNC gBackendSingleSample[2] // input coverage
+ [2] // centroid
+ [2] // canEarlyZ
+ = {};
+PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX]
+ [SWR_MSAA_SAMPLE_PATTERN_MAX]
+ [SWR_INPUT_COVERAGE_MAX]
+ [2] // centroid
+ [2] // forcedSampleCount
+ [2] // canEarlyZ
+ = {};
+PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX]
+ [2] // centroid
+ [2] // canEarlyZ
+ = {};
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
// arguments to static template arguments.
template <uint32_t... ArgsT>
-struct OMChooser
-{
- // Last Arg Terminator
- static PFN_OUTPUT_MERGER GetFunc(SWR_MULTISAMPLE_COUNT tArg)
- {
- switch(tArg)
- {
- case SWR_MULTISAMPLE_1X: return OutputMerger<ArgsT..., SWR_MULTISAMPLE_1X>; break;
- case SWR_MULTISAMPLE_2X: return OutputMerger<ArgsT..., SWR_MULTISAMPLE_2X>; break;
- case SWR_MULTISAMPLE_4X: return OutputMerger<ArgsT..., SWR_MULTISAMPLE_4X>; break;
- case SWR_MULTISAMPLE_8X: return OutputMerger<ArgsT..., SWR_MULTISAMPLE_8X>; break;
- case SWR_MULTISAMPLE_16X: return OutputMerger<ArgsT..., SWR_MULTISAMPLE_16X>; break;
- default:
- SWR_ASSERT(0 && "Invalid sample count\n");
- return nullptr;
- break;
- }
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_OUTPUT_MERGER GetFunc(uint32_t tArg, TArgsT... remainingArgs)
- {
- switch(tArg)
- {
- case 0: return OMChooser<ArgsT..., 0>::GetFunc(remainingArgs...); break;
- case 1: return OMChooser<ArgsT..., 1>::GetFunc(remainingArgs...); break;
- case 2: return OMChooser<ArgsT..., 2>::GetFunc(remainingArgs...); break;
- case 3: return OMChooser<ArgsT..., 3>::GetFunc(remainingArgs...); break;
- case 4: return OMChooser<ArgsT..., 4>::GetFunc(remainingArgs...); break;
- case 5: return OMChooser<ArgsT..., 5>::GetFunc(remainingArgs...); break;
- case 6: return OMChooser<ArgsT..., 6>::GetFunc(remainingArgs...); break;
- case 7: return OMChooser<ArgsT..., 7>::GetFunc(remainingArgs...); break;
- case 8: return OMChooser<ArgsT..., 8>::GetFunc(remainingArgs...); break;
- default:
- SWR_ASSERT(0 && "Invalid RT index\n");
- return nullptr;
- break;
- }
- }
-};
-
-// Recursive template used to auto-nest conditionals. Converts dynamic enum function
-// arguments to static template arguments.
-template <uint32_t... ArgsT>
-struct BECentroidBarycentricChooser
+struct BEChooser
{
-
// Last Arg Terminator
- template <typename... TArgsT>
- static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(uint32_t tArg)
- {
- if(tArg > 0)
- {
- return CalcCentroidBarycentrics<ArgsT..., 1>;
- }
-
- return CalcCentroidBarycentrics<ArgsT..., 0>;
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
+ static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
{
switch(tArg)
{
- case SWR_MULTISAMPLE_1X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_2X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_4X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_8X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_16X: return BECentroidBarycentricChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
+ case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<SwrBackendTraits<ArgsT...>>; break;
+ case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<SwrBackendTraits<ArgsT...>>; break;
+ case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break;
default:
- SWR_ASSERT(0 && "Invalid sample count\n");
+ SWR_ASSERT(0 && "Invalid backend func\n");
return nullptr;
break;
}
// Recursively parse args
template <typename... TArgsT>
- static PFN_CALC_CENTROID_BARYCENTRICS GetFunc(uint32_t tArg, TArgsT... remainingArgs)
- {
- if(tArg > 0)
- {
- return BECentroidBarycentricChooser<ArgsT..., 1>::GetFunc(remainingArgs...);
- }
-
- return BECentroidBarycentricChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
- }
-};
-
-// Recursive template used to auto-nest conditionals. Converts dynamic enum function
-// arguments to static template arguments.
-template <uint32_t... ArgsT>
-struct BEChooser
-{
- // Last Arg Terminator
- static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
+ static PFN_BACKEND_FUNC GetFunc(SWR_MSAA_SAMPLE_PATTERN tArg, TArgsT... remainingArgs)
{
switch(tArg)
{
- case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<ArgsT...>; break;
- case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<ArgsT...>; break;
- case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<ArgsT...>; break;
+ case SWR_MSAA_CENTER_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_CENTER_PATTERN>::GetFunc(remainingArgs...); break;
+ case SWR_MSAA_STANDARD_PATTERN: return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...); break;
default:
- SWR_ASSERT(0 && "Invalid backend func\n");
- return nullptr;
- break;
+ SWR_ASSERT(0 && "Invalid sample pattern\n");
+ return BEChooser<ArgsT..., SWR_MSAA_STANDARD_PATTERN>::GetFunc(remainingArgs...);
+ break;
}
}
-
// Recursively parse args
template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
case SWR_MULTISAMPLE_8X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
case SWR_MULTISAMPLE_16X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
default:
- SWR_ASSERT(0 && "Invalid sample count\n");
- return nullptr;
- break;
+ SWR_ASSERT(0 && "Invalid sample count\n");
+ return BEChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
+ break;
}
}
// Recursively parse args
template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(uint32_t tArg, TArgsT... remainingArgs)
+ static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
{
- if(tArg > 0)
+ if(tArg == true)
{
return BEChooser<ArgsT..., 1>::GetFunc(remainingArgs...);
}
- return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
+ return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
}
};
-template <uint32_t numRenderTargets, SWR_MULTISAMPLE_COUNT numSampleRates>
-void InitBackendOMFuncTable(PFN_OUTPUT_MERGER (&table)[numRenderTargets][numSampleRates])
+void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[2][2][2])
{
- for(uint32_t rtNum = SWR_ATTACHMENT_COLOR0; rtNum < numRenderTargets; rtNum++)
+ for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < SWR_INPUT_COVERAGE_MAX; inputCoverage++)
{
- for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < numSampleRates; sampleCount++)
+ for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
{
- table[rtNum][sampleCount] =
- OMChooser<>::GetFunc((SWR_RENDERTARGET_ATTACHMENT)rtNum, (SWR_MULTISAMPLE_COUNT)sampleCount);
+ for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
+ {
+ table[inputCoverage][isCentroid][canEarlyZ] =
+ BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL),
+ (isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
+ }
}
}
}
-template <SWR_MULTISAMPLE_COUNT numSampleRates>
-void InitBackendBarycentricsTables(PFN_CALC_PIXEL_BARYCENTRICS (&pixelTable)[2],
- PFN_CALC_SAMPLE_BARYCENTRICS (&sampleTable)[2],
- PFN_CALC_CENTROID_BARYCENTRICS (¢roidTable)[numSampleRates][2][2][2])
+void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][SWR_INPUT_COVERAGE_MAX]
+ [2][2][2])
{
- pixelTable[0] = CalcPixelBarycentrics<0>;
- pixelTable[1] = CalcPixelBarycentrics<1>;
-
- sampleTable[0] = CalcSampleBarycentrics<0>;
- sampleTable[1] = CalcSampleBarycentrics<1>;
-
- for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < numSampleRates; sampleCount++)
+ for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_MAX; sampleCount++)
{
- for(uint32_t baryMask = 0; baryMask < 2; baryMask++)
+ for(uint32_t samplePattern = SWR_MSAA_CENTER_PATTERN; samplePattern < SWR_MSAA_SAMPLE_PATTERN_MAX; samplePattern++)
{
- for(uint32_t patternNum = 0; patternNum < 2; patternNum++)
+ for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < SWR_INPUT_COVERAGE_MAX; inputCoverage++)
{
- for(uint32_t forcedSampleEnable = 0; forcedSampleEnable < 2; forcedSampleEnable++)
+ for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
{
- centroidTable[sampleCount][baryMask][patternNum][forcedSampleEnable]=
- BECentroidBarycentricChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, baryMask, patternNum, forcedSampleEnable);
+ for(uint32_t forcedSampleCount = 0; forcedSampleCount < 2; forcedSampleCount++)
+ {
+ for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
+ {
+ table[sampleCount][samplePattern][inputCoverage][isCentroid][forcedSampleCount][canEarlyZ] =
+ BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL),
+ (isCentroid > 0), (forcedSampleCount > 0), (canEarlyZ > 0), SWR_BACKEND_MSAA_PIXEL_RATE);
+ }
+ }
}
}
}
}
}
-void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[2][2])
+void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX][2][2])
{
- gBackendSingleSample[0][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NONE, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
- gBackendSingleSample[0][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NONE, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
- gBackendSingleSample[1][0] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NORMAL, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
- gBackendSingleSample[1][1] = BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, SWR_INPUT_COVERAGE_NORMAL, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_SINGLE_SAMPLE);
-}
-
-template <SWR_MULTISAMPLE_COUNT numSampleRates, SWR_MSAA_SAMPLE_PATTERN numSamplePatterns, SWR_INPUT_COVERAGE numCoverageModes>
-void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[numSampleRates][numSamplePatterns][numCoverageModes][2][2])
-{
- for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < numSampleRates; sampleCount++)
+ for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_MAX; sampleCount++)
{
- for(uint32_t samplePattern = SWR_MSAA_CENTER_PATTERN; samplePattern < numSamplePatterns; samplePattern++)
+ for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < SWR_INPUT_COVERAGE_MAX; inputCoverage++)
{
- for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < numCoverageModes; inputCoverage++)
+ for(uint32_t centroid = 0; centroid < 2; centroid++)
{
- for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
+ for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
- table[sampleCount][samplePattern][inputCoverage][isCentroid][0] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, samplePattern, inputCoverage, isCentroid, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_PIXEL_RATE);
- table[sampleCount][samplePattern][inputCoverage][isCentroid][1] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, samplePattern, inputCoverage, isCentroid, 1, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_PIXEL_RATE);
+ table[sampleCount][inputCoverage][centroid][canEarlyZ] =
+ BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL),
+ (centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
}
}
}
}
}
-template <uint32_t numSampleRates, uint32_t numCoverageModes>
-void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[numSampleRates][numCoverageModes][2])
-{
- for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < numSampleRates; sampleCount++)
- {
- for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < numCoverageModes; inputCoverage++)
- {
- table[sampleCount][inputCoverage][0] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, inputCoverage, 0, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
- table[sampleCount][inputCoverage][1] =
- BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, inputCoverage, 1, 0, (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
- }
- }
-}
-
void InitBackendFuncTables()
{
- InitBackendSampleFuncTable(gBackendSingleSample);
- InitBackendPixelFuncTable<(SWR_MULTISAMPLE_COUNT)SWR_MULTISAMPLE_TYPE_MAX, SWR_MSAA_SAMPLE_PATTERN_MAX, SWR_INPUT_COVERAGE_MAX>(gBackendPixelRateTable);
- InitBackendSampleFuncTable<SWR_MULTISAMPLE_TYPE_MAX, SWR_INPUT_COVERAGE_MAX>(gBackendSampleRateTable);
- InitBackendOMFuncTable<SWR_NUM_RENDERTARGETS+1, SWR_MULTISAMPLE_TYPE_MAX>(gBackendOutputMergerTable);
- InitBackendBarycentricsTables<(SWR_MULTISAMPLE_COUNT)(SWR_MULTISAMPLE_TYPE_MAX)>(gPixelBarycentricTable, gSampleBarycentricTable, gCentroidBarycentricTable);
+ InitBackendSingleFuncTable(gBackendSingleSample);
+ InitBackendPixelFuncTable(gBackendPixelRateTable);
+ InitBackendSampleFuncTable(gBackendSampleRateTable);
gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ;
gBackendNullPs[SWR_MULTISAMPLE_2X] = &BackendNullPS < SWR_MULTISAMPLE_2X > ;