From e62b6d2f0fb9435a11f139b4c9da583152be8a35 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Wed, 21 Dec 2016 18:01:56 -0600 Subject: [PATCH] swr: [rasterizer core] fix SIMD16 output merger Honor the colorHottileEnable mask when accessing colorBuffer pointers. Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/core/backend.cpp | 20 +++++++++---------- .../drivers/swr/rasterizer/core/backend.h | 18 +++++++++++------ 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 71655f1072a..a53b51ff2d0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -541,7 +541,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 AR_BEGIN(BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask); - AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); + AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); AR_END(BEEarlyDepthTest, 0); // early-exit if no pixels passed depth or earlyZ is forced on @@ -574,7 +574,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 AR_BEGIN(BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask); - AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); + AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); AR_END(BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) @@ -593,7 +593,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // output merger AR_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND - OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, useAlternateOffset); + OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset); #else OutputMerger(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets); #endif @@ -752,7 +752,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ AR_BEGIN(BEEarlyDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); - AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); + AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); AR_END(BEEarlyDepthTest, 0); // early-exit if no samples passed depth or earlyZ is forced on. @@ -786,7 +786,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ AR_BEGIN(BELateDepthTest, pDC->drawId); depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); - AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); + AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); AR_END(BELateDepthTest, 0); if (!_simd_movemask_ps(depthPassMask)) @@ -807,7 +807,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // output merger AR_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND - OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, useAlternateOffset); + OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset); #else OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets); #endif @@ -935,7 +935,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t { uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest); UPDATE_STAT_BE(DepthPassCount, depthPassCount); - AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes))); + AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes))); } // if we have no covered samples that passed depth at this point, go to next tile @@ -969,7 +969,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t { uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest); UPDATE_STAT_BE(DepthPassCount, depthPassCount); - AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes))); + AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes))); } // if we have no covered samples that passed depth at this point, skip OM and go to next tile @@ -1001,7 +1001,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // broadcast the results of the PS to all passing pixels #if USE_8x2_TILE_BACKEND - OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, useAlternateOffset); + OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset); #else OutputMerger(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets); #endif @@ -1148,7 +1148,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, AR_BEGIN(BEEarlyDepthTest, pDC->drawId); simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); - AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); + AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask))); DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); AR_END(BEEarlyDepthTest, 0); diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h index 48cfce2bb85..de37b1a789d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend.h @@ -691,6 +691,7 @@ INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTE psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample); } +// Merge Output to 4x2 SIMD Tile Format INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT) { @@ -751,8 +752,9 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N } #if USE_8x2_TILE_BACKEND +// Merge Output to 8x2 SIMD16 Tile Format INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT, bool useAlternateOffset) + const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT, const uint32_t colorBufferEnableMask, bool useAlternateOffset) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); @@ -765,7 +767,8 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N simdvector blendSrc; simdvector blendOut; - for (uint32_t rt = 0; rt < NumRT; ++rt) + uint32_t colorBufferBit = 1; + for (uint32_t rt = 0; rt < NumRT; rt += 1, colorBufferBit <<= 1) { simdscalar *pColorSample = reinterpret_cast(pColorBase[rt] + rasterTileColorOffset); @@ -774,10 +777,13 @@ INLINE void OutputMerger(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_N /// TODO: move this into the blend JIT. blendOut = psContext.shaded[rt]; - blendSrc[0] = pColorSample[0]; - blendSrc[1] = pColorSample[2]; - blendSrc[2] = pColorSample[4]; - blendSrc[3] = pColorSample[6]; + if (colorBufferBit & colorBufferEnableMask) + { + blendSrc[0] = pColorSample[0]; + blendSrc[1] = pColorSample[2]; + blendSrc[2] = pColorSample[4]; + blendSrc[3] = pColorSample[6]; + } // Blend outputs and update coverage mask for alpha test if (pfnBlendFunc[rt] != nullptr) -- 2.30.2