void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
-void ProcessQueryStatsBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
-void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
+void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
+void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers);
void InitClearTilesTable();
simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar vI, simdscalar vJ);
INLINE generateInputCoverage(const uint64_t *const coverageMask, uint32_t (&inputMask)[KNOB_SIMD_WIDTH], const uint32_t sampleMask)
{
- unsigned long index;
uint32_t simdCoverage = (coverageMask[0] & MASK);
static const uint32_t FullCoverageMask = (1 << T::MultisampleT::numSamples) - 1;
- while(_BitScanForward(&index, simdCoverage))
+ for(int i = 0; i < KNOB_SIMD_WIDTH; i++)
{
- // set all samples to covered
- inputMask[index] = FullCoverageMask;
+ // set all samples to covered if conservative coverage mask is set for that pixel
+ inputMask[i] = (((1 << i) & simdCoverage) > 0) ? FullCoverageMask : 0;
}
}
};
psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
}
+INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar z, float minz, float maxz)
+{
+ const simdscalar minzMask = _simd_cmpge_ps(z, _simd_set1_ps(minz));
+ const simdscalar maxzMask = _simd_cmple_ps(z, _simd_set1_ps(maxz));
+
+ return _simd_movemask_ps(_simd_and_ps(minzMask, maxzMask));
+}
+
template<typename T>
INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount)
{
template<typename T>
struct PixelRateZTestLoop
{
- PixelRateZTestLoop(DRAW_CONTEXT *DC, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
+ PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
uint8_t*& depthBase, uint8_t*& stencilBase, const uint8_t ClipDistanceMask) :
- work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
+ pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
clipDistanceMask(ClipDistanceMask), pDepthBase(depthBase), pStencilBase(stencilBase) {};
INLINE
uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
{
+ SWR_CONTEXT *pContext = pDC->pContext;
+
uint32_t statCount = 0;
simdscalar anyDepthSamplePassed = _simd_setzero_ps();
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
continue;
}
- RDTSC_START(BEBarycentric);
+ AR_BEGIN(BEBarycentric, pDC->drawId);
// calculate per sample positions
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
}
- RDTSC_STOP(BEBarycentric, 0, 0);
+ AR_END(BEBarycentric, 0);
///@todo: perspective correct vs non-perspective correct clipping?
// if clip distances are enabled, we need to interpolate for each sample
uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
+ if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
+ {
+ static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+
+ const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+
+ const float minz = state.depthBoundsState.depthBoundsTestMinValue;
+ const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
+
+ vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], vMask(CalcDepthBoundsAcceptMask(z, minz, maxz)));
+ }
+
// ZTest for this sample
- RDTSC_START(BEDepthBucket);
+ ///@todo Need to uncomment out this bucket.
+ //AR_BEGIN(BEDepthBucket, pDC->drawId);
depthPassMask[sample] = vCoverageMask[sample];
stencilPassMask[sample] = vCoverageMask[sample];
- depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, vZ[sample], pDepthSample,
- vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]);
- RDTSC_STOP(BEDepthBucket, 0, 0);
+ depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
+ vZ[sample], pDepthSample, vCoverageMask[sample],
+ pStencilSample, &stencilPassMask[sample]);
+ //AR_END(BEDepthBucket, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
{
- DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample],
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample],
pDepthSample, depthPassMask[sample], vCoverageMask[sample], pStencilSample, stencilPassMask[sample]);
if(!_simd_movemask_ps(depthPassMask[sample]))
private:
// functor inputs
+ DRAW_CONTEXT* pDC;
+ uint32_t workerId;
+
const SWR_TRIANGLE_DESC& work;
const BarycentricCoeffs& coeffs;
const API_STATE& state;