swr: [rasterizer core] Quantize depth to depth buffer precision prior to depth test...
authorTim Rowley <timothy.o.rowley@intel.com>
Fri, 25 Mar 2016 23:24:45 +0000 (17:24 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Tue, 12 Apr 2016 16:52:05 +0000 (11:52 -0500)
Fixes z-fighting issues.

Acked-by: Brian Paul <brianp@vmware.com>
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/depthstencil.h
src/gallium/drivers/swr/rasterizer/core/state.h

index 442cdd420f4caa617dfbaa1e428362862c7cce5d..9e13ee142a5f7332707191fabfc5f759e4d73a93 100644 (file)
@@ -33,6 +33,7 @@
 #include "core/api.h"
 #include "core/backend.h"
 #include "core/context.h"
+#include "core/depthstencil.h"
 #include "core/frontend.h"
 #include "core/rasterizer.h"
 #include "core/rdtsc_core.h"
@@ -884,6 +885,25 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
                  !pState->state.blendState.renderTarget[rt].writeDisableBlue) ? (1 << rt) : 0;
         }
     }
+
+    // Setup depth quantization function
+    if (pState->state.depthHottileEnable)
+    {
+        switch (pState->state.rastState.depthFormat)
+        {
+        case R32_FLOAT_X8X24_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT_X8X24_TYPELESS > ; break;
+        case R32_FLOAT: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; break;
+        case R24_UNORM_X8_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R24_UNORM_X8_TYPELESS > ; break;
+        case R16_UNORM: pState->state.pfnQuantizeDepth = QuantizeDepth < R16_UNORM > ; break;
+        default: SWR_ASSERT(false, "Unsupported depth format for depth quantiztion.");
+            pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
+        }
+    }
+    else
+    {
+        // set up pass-through quantize if depth isn't enabled
+        pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
+    }
 }
 
 //////////////////////////////////////////////////////////////////////////
index ad0a5a070325311b6197e2c0ceb07798561704ae..842ea326e68229490c409c1a7984a5328428b8d3 100644 (file)
@@ -772,8 +772,10 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
                     psContext.vOneOverW.centroid = psContext.vOneOverW.center;
                 }
 
-                // interpolate z
+                // interpolate and quantize z
                 psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+                psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
+
                 RDTSC_STOP(BEBarycentric, 0, 0);
 
                 simdmask clipCoverageMask = coverageMask & MASK;
@@ -793,7 +795,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
                 if(CanEarlyZ(pPSState))
                 {
                     RDTSC_START(BEEarlyDepthTest);
-                    depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+                    depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
                                                         psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
                     RDTSC_STOP(BEEarlyDepthTest, 0, 0);
 
@@ -825,7 +827,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
                 if(!CanEarlyZ(pPSState))
                 {
                     RDTSC_START(BELateDepthTest);
-                    depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+                    depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
                                                         psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
                     RDTSC_STOP(BELateDepthTest, 0, 0);
 
@@ -977,8 +979,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
 
                     backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
 
-                    // interpolate z
+                    // interpolate and quantize z
                     psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+                    psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
 
                     RDTSC_STOP(BEBarycentric, 0, 0);
 
@@ -1000,7 +1003,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                     if (CanEarlyZ(pPSState))
                     {
                         RDTSC_START(BEEarlyDepthTest);
-                        depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+                        depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
                                               psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
                         RDTSC_STOP(BEEarlyDepthTest, 0, 0);
 
@@ -1033,7 +1036,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                     if (!CanEarlyZ(pPSState))
                     {
                         RDTSC_START(BELateDepthTest);
-                        depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+                        depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
                                               psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
                         RDTSC_STOP(BELateDepthTest, 0, 0);
 
@@ -1200,8 +1203,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
                 RDTSC_START(BEBarycentric);
                 backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
 
-                // interpolate z
+                // interpolate and quantize z
                 psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+                psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
                 RDTSC_STOP(BEBarycentric, 0, 0);
 
                 // execute pixel shader
@@ -1263,10 +1267,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
                     // calc I & J per sample
                     backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
 
-                    // interpolate z
+                    // interpolate and quantize z
                     if (!pPSState->writesODepth)
                     {
                         vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+                        vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
                     }
                     
                     ///@todo: perspective correct vs non-perspective correct clipping?
@@ -1292,7 +1297,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
                 // ZTest for this sample
                 RDTSC_START(BEEarlyDepthTest);
                 stencilPassMask[sample] = vCoverageMask[sample];
-                depthPassMask[sample] = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+                depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing,
                                         vZ[sample], pDepthSample, vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]);
                 RDTSC_STOP(BEEarlyDepthTest, 0, 0);
 
@@ -1308,8 +1313,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
             {
                 RDTSC_START(BEBarycentric);
                 backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext);
-                // interpolate z
+                // interpolate and quantize z
                 psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+                psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
                 RDTSC_STOP(BEBarycentric, 0, 0);
 
                 // execute pixel shader
@@ -1463,8 +1469,9 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
 
                     backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext);
 
-                    // interpolate z
+                    // interpolate and quantize z
                     psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+                    psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
 
                     RDTSC_STOP(BEBarycentric, 0, 0);
 
@@ -1483,7 +1490,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
                     uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample);
 
                     RDTSC_START(BEEarlyDepthTest);
-                    simdscalar depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing,
+                    simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing,
                         psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
                     DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
                         pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
index 39f23372a18e04e52795913d83370ff4f42a304b..27abe437718486c8291dcbb90916be7bdadd13b2 100644 (file)
@@ -308,6 +308,8 @@ OSALIGNLINE(struct) API_STATE
         uint32_t depthHottileEnable: 1;
         uint32_t stencilHottileEnable : 1;
     };
+
+    PFN_QUANTIZE_DEPTH      pfnQuantizeDepth;
 };
 
 class MacroTileMgr;
index 2cc9d4054acde43858634ab291470457972c3a17..7b55580bf0ae689e7074945da1f3485b740e1acf 100644 (file)
@@ -80,14 +80,52 @@ void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simds
 }
 
 
+template<SWR_FORMAT depthFormatT>
+simdscalar QuantizeDepth(simdscalar depth)
+{
+    SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
+    uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
+
+    if (depthType == SWR_TYPE_FLOAT)
+    {
+        // assume only 32bit float depth supported
+        SWR_ASSERT(depthBpc == 32);
+
+        // matches shader precision, no quantizing needed
+        return depth;
+    }
+
+    // should be unorm depth if not float
+    SWR_ASSERT(depthType == SWR_TYPE_UNORM);
+
+    float quantize = (float)((1 << depthBpc) - 1);
+    simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
+    result = _simd_add_ps(result, _simd_set1_ps(0.5f));
+    result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
+    
+    if (depthBpc > 16)
+    {
+        result = _simd_div_ps(result, _simd_set1_ps(quantize));
+    }
+    else
+    {
+        result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
+    }
+
+    return result;
+}
+
 INLINE
-simdscalar DepthStencilTest(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
+simdscalar DepthStencilTest(const API_STATE* pState,
                  bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, uint8_t *pStencilBase,
                  simdscalar* pStencilMask)
 {
     static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
     static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
 
+    const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
+    const SWR_VIEWPORT* pViewport = &pState->vp[0];
+
     simdscalar depthResult = _simd_set1_ps(-1.0f);
     simdscalar zbuf;
 
index 5752094ca1032add357df732d6583577bfae027a..503610680259fc20ec302c04aae9e30c969d7ccd 100644 (file)
@@ -790,6 +790,7 @@ typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, uint8_t*, simdvector&, simdscalari*, simdscalari*);
+typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar);
 
 //////////////////////////////////////////////////////////////////////////
 /// FRONTEND_STATE