From e3748eb19ba444f411ad66905dfddbe809225ca0 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <fxkuehl@gmx.de>
Date: Mon, 3 Jan 2005 22:24:44 +0000
Subject: [PATCH] Added support for floating point depth buffers on
 Savage4-based hardware. By also reversing the depth range this can compensate
 the loss of accuracy of far objects caused by the projective transformation.
 Software fallbacks work but are slightly slower since floats in a custom (non
 IEEE) format have to be encoded and decoded. I havn't done anything about
 polygon offsets yet. There doesn't seem to be an easy way do get it right
 except making the offset unit as big as the lowest resolution of depth
 values. For now float depth is disabled by default but can be enabled through
 driconf (though I have seen only positive effects so far).

---
 src/mesa/drivers/dri/savage/savage_xmesa.c  | 10 ++-
 src/mesa/drivers/dri/savage/savagecontext.h |  1 +
 src/mesa/drivers/dri/savage/savageioctl.c   | 16 +++-
 src/mesa/drivers/dri/savage/savagespan.c    | 91 +++++++++++++++----
 src/mesa/drivers/dri/savage/savagespan.h    | 98 +++++++++++++++++++++
 src/mesa/drivers/dri/savage/savagestate.c   | 45 ++++++----
 6 files changed, 221 insertions(+), 40 deletions(-)

diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c
index f1798de134d..e2697050731 100644
--- a/src/mesa/drivers/dri/savage/savage_xmesa.c
+++ b/src/mesa/drivers/dri/savage/savage_xmesa.c
@@ -64,6 +64,7 @@ DRI_CONF_BEGIN
     DRI_CONF_SECTION_QUALITY
         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
         DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+        DRI_CONF_FLOAT_DEPTH(false)
     DRI_CONF_SECTION_END
     DRI_CONF_SECTION_PERFORMANCE
         DRI_CONF_MAX_TEXTURE_UNITS(2,1,2)
@@ -72,7 +73,7 @@ DRI_CONF_BEGIN
         DRI_CONF_NO_RAST(false)
     DRI_CONF_SECTION_END
 DRI_CONF_END;
-static const GLuint __driNConfigOptions = 4;
+static const GLuint __driNConfigOptions = 5;
 
 #ifdef USE_NEW_INTERFACE
 static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
@@ -308,6 +309,9 @@ savageCreateContext( const __GLcontextModes *mesaVis,
    driParseConfigFiles (&imesa->optionCache, &savageScreen->optionCache,
                         sPriv->myNum, "savage");
 
+   imesa->float_depth = driQueryOptionb(&imesa->optionCache, "float_depth") &&
+       savageScreen->chipset >= S3_SAVAGE4;
+   imesa->no_rast = driQueryOptionb(&imesa->optionCache, "no_rast");
    imesa->texture_depth = driQueryOptioni (&imesa->optionCache,
 					   "texture_depth");
    if (imesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
@@ -407,7 +411,7 @@ savageCreateContext( const __GLcontextModes *mesaVis,
 
    imesa->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
    imesa->depth_scale = (imesa->savageScreen->zpp == 2) ?
-       (1.0F/0x10000):(1.0F/0x1000000);
+       (1.0F/0xffff):(1.0F/0xffffff);
 
    imesa->bufferSize = savageScreen->bufferSize;
    imesa->dmaVtxBuf.total = 0;
@@ -479,8 +483,6 @@ savageCreateContext( const __GLcontextModes *mesaVis,
 
    savageDDInitState( imesa );
 
-   imesa->no_rast = driQueryOptionb(&imesa->optionCache, "no_rast");
-
    driContextPriv->driverPrivate = (void *) imesa;
 
    return GL_TRUE;
diff --git a/src/mesa/drivers/dri/savage/savagecontext.h b/src/mesa/drivers/dri/savage/savagecontext.h
index 8978f9247cf..8d77d1851dd 100644
--- a/src/mesa/drivers/dri/savage/savagecontext.h
+++ b/src/mesa/drivers/dri/savage/savagecontext.h
@@ -294,6 +294,7 @@ struct savage_context_t {
     driOptionCache optionCache;
     GLint texture_depth;
     GLboolean no_rast;
+    GLboolean float_depth;
 };
 
 #define SAVAGE_CONTEXT(ctx) ((savageContextPtr)(ctx->DriverCtx))
diff --git a/src/mesa/drivers/dri/savage/savageioctl.c b/src/mesa/drivers/dri/savage/savageioctl.c
index dd7a84e2e72..62e7142d246 100644
--- a/src/mesa/drivers/dri/savage/savageioctl.c
+++ b/src/mesa/drivers/dri/savage/savageioctl.c
@@ -38,6 +38,7 @@
 #include "savageioctl.h"
 #include "savage_bci.h"
 #include "savagestate.h"
+#include "savagespan.h"
 
 #include "drm.h"
 #include <sys/ioctl.h>
@@ -333,10 +334,17 @@ static void savageDDClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
        fprintf (stderr, "%s\n", __FUNCTION__);
 
    clearColor = imesa->ClearColor;
-   if(imesa->savageScreen->zpp == 2)
-       clearDepth = (GLuint) (ctx->Depth.Clear * DEPTH_SCALE_16);
-   else
-       clearDepth = (GLuint) (ctx->Depth.Clear * DEPTH_SCALE_24);
+   if (imesa->float_depth) {
+       if (imesa->savageScreen->zpp == 2)
+	   clearDepth = savageEncodeFloat16(1.0 - ctx->Depth.Clear);
+       else
+	   clearDepth = savageEncodeFloat24(1.0 - ctx->Depth.Clear);
+   } else {
+       if (imesa->savageScreen->zpp == 2)
+	   clearDepth = (GLuint) ((1.0 - ctx->Depth.Clear) * DEPTH_SCALE_16);
+       else
+	   clearDepth = (GLuint) ((1.0 - ctx->Depth.Clear) * DEPTH_SCALE_24);
+   }
 
    colorMask = *((GLuint *) &ctx->Color.ColorMask);
    depthMask = 0;
diff --git a/src/mesa/drivers/dri/savage/savagespan.c b/src/mesa/drivers/dri/savage/savagespan.c
index 0caee3a652d..6cda13cc295 100644
--- a/src/mesa/drivers/dri/savage/savagespan.c
+++ b/src/mesa/drivers/dri/savage/savagespan.c
@@ -158,13 +158,14 @@ do {								\
 
 
 
-/* 16 bit depthbuffer functions.
+/* 16 bit integer depthbuffer functions
+ * Depth range is reversed. See also savageCalcViewport.
  */
 #define WRITE_DEPTH( _x, _y, d ) \
-    *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = d
+    *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = 0xFFFF - d
 
 #define READ_DEPTH( d, _x, _y ) \
-    d = *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch)
+    d = 0xFFFF - *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch)
 
 #define TAG(x) savage##x##_16
 #include "depthtmp.h"
@@ -173,22 +174,62 @@ do {								\
 
 
 
-/* 8-bit stencil /24-bit depth depthbuffer functions.
+/* 16 bit float depthbuffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d ) \
+    *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = \
+        savageEncodeFloat16( 1.0 - (GLfloat)d/65535.0 )
+
+#define READ_DEPTH( d, _x, _y ) \
+    d = 65535 - \
+        savageDecodeFloat16( *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) ) * \
+	65535.0
+
+#define TAG(x) savage##x##_16f
+#include "depthtmp.h"
+
+
+
+
+
+/* 8-bit stencil /24-bit integer depth depthbuffer functions.
+ * Depth range is reversed. See also savageCalcViewport.
  */
 #define WRITE_DEPTH( _x, _y, d ) do {				\
    GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
    tmp &= 0xFF000000;						\
-   tmp |= d;							\
+   tmp |= 0x00FFFFFF - d;					\
    *(GLuint *)(buf + (_x<<2) + _y*pitch)  = tmp;		\
 } while(0)
 
 #define READ_DEPTH( d, _x, _y )	\
-   d = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch)
+   d = 0x00FFFFFF - (*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0x00FFFFFF)
 
 #define TAG(x) savage##x##_8_24
 #include "depthtmp.h"
 
 
+
+
+
+/* 24 bit float depthbuffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d ) do {				\
+    GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
+    tmp &= 0xFF000000;						\
+    tmp |= savageEncodeFloat24( 1.0 - (GLfloat)d/16777215.0 );	\
+   *(GLuint *)(buf + (_x<<2) + _y*pitch)  = tmp;		\
+} while(0)
+
+#define READ_DEPTH( d, _x, _y )					\
+    d = 16777215 - savageDecodeFloat24(				\
+	*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0x00FFFFFF)	\
+	* 16777215.0
+
+#define TAG(x) savage##x##_8_24f
+#include "depthtmp.h"
+
+
 #define WRITE_STENCIL( _x, _y, d ) do {				\
    GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
    tmp &= 0x00FFFFFF;						\
@@ -315,20 +356,36 @@ void savageDDInitSpanFuncs( GLcontext *ctx )
 
    switch (imesa->savageScreen->zpp)
    {
-   case 2: 
-       swdd->ReadDepthSpan = savageReadDepthSpan_16;
-       swdd->WriteDepthSpan = savageWriteDepthSpan_16;
-       swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_16;
-       swdd->ReadDepthPixels = savageReadDepthPixels_16;
-       swdd->WriteDepthPixels = savageWriteDepthPixels_16;
+   case 2:
+       if (imesa->float_depth) {
+	   swdd->ReadDepthSpan = savageReadDepthSpan_16f;
+	   swdd->WriteDepthSpan = savageWriteDepthSpan_16f;
+	   swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_16f;
+	   swdd->ReadDepthPixels = savageReadDepthPixels_16f;
+	   swdd->WriteDepthPixels = savageWriteDepthPixels_16f;
+       } else {
+	   swdd->ReadDepthSpan = savageReadDepthSpan_16;
+	   swdd->WriteDepthSpan = savageWriteDepthSpan_16;
+	   swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_16;
+	   swdd->ReadDepthPixels = savageReadDepthPixels_16;
+	   swdd->WriteDepthPixels = savageWriteDepthPixels_16;
+       }
        
        break;
    case 4: 
-       swdd->ReadDepthSpan = savageReadDepthSpan_8_24;
-       swdd->WriteDepthSpan = savageWriteDepthSpan_8_24;
-       swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_8_24;
-       swdd->ReadDepthPixels = savageReadDepthPixels_8_24;
-       swdd->WriteDepthPixels = savageWriteDepthPixels_8_24;    
+       if (imesa->float_depth) {
+	   swdd->ReadDepthSpan = savageReadDepthSpan_8_24f;
+	   swdd->WriteDepthSpan = savageWriteDepthSpan_8_24f;
+	   swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_8_24f;
+	   swdd->ReadDepthPixels = savageReadDepthPixels_8_24f;
+	   swdd->WriteDepthPixels = savageWriteDepthPixels_8_24f;    
+       } else {
+	   swdd->ReadDepthSpan = savageReadDepthSpan_8_24;
+	   swdd->WriteDepthSpan = savageWriteDepthSpan_8_24;
+	   swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_8_24;
+	   swdd->ReadDepthPixels = savageReadDepthPixels_8_24;
+	   swdd->WriteDepthPixels = savageWriteDepthPixels_8_24;    
+       }
        swdd->ReadStencilSpan = savageReadStencilSpan_8_24;
        swdd->WriteStencilSpan = savageWriteStencilSpan_8_24;
        swdd->ReadStencilPixels = savageReadStencilPixels_8_24;
diff --git a/src/mesa/drivers/dri/savage/savagespan.h b/src/mesa/drivers/dri/savage/savagespan.h
index 35247b47061..cb3a1b52fd7 100644
--- a/src/mesa/drivers/dri/savage/savagespan.h
+++ b/src/mesa/drivers/dri/savage/savagespan.h
@@ -27,4 +27,102 @@
 
 extern void savageDDInitSpanFuncs( GLcontext *ctx );
 
+/*
+ * Savage 16-bit float depth format with zExpOffset=16:
+ *   4 bit unsigned exponent, 12 bit mantissa
+ *
+ * The meaning of the mantissa is different from IEEE floatint point
+ * formats. The same number can't be encoded with different exponents.
+ * So no bits are wasted.
+ *
+ * exponent | range encoded by mantissa | accuracy or mantissa
+ * ---------+---------------------------+---------------------
+ *       15 | 2^-1 .. 1                 | 2^-13
+ *       14 | 2^-2 .. 2^-1              | 2^-14
+ *       13 | 2^-3 .. 2^-2              | 2^-15
+ *      ... | ...                       |
+ *        2 | 2^-14 .. 2^-13            | 2^-27
+ *        1 | 2^-15 .. 2^-14            | 2^-27
+ *        0 | 2^-16 .. 2^-15            | 2^-28
+ *
+ * Note that there is no encoding for numbers < 2^-16.
+ */
+static __inline GLuint savageEncodeFloat16( GLdouble x )
+{
+    GLint r = (GLint)(x * 0x10000000);
+    GLint exp = 0;
+    if (r < 0x1000)
+	return 0;
+    while (r - 0x1000 > 0x0fff) {
+	r >>= 1;
+	exp++;
+    }
+    return exp > 0xf ? 0xffff : (r - 0x1000) | (exp << 12);
+}
+static __inline GLdouble savageDecodeFloat16( GLuint x )
+{
+    static const GLdouble pow2[16] = {
+	1.0/(1<<28), 1.0/(1<<27), 1.0/(1<<26), 1.0/(1<<25),
+	1.0/(1<<24), 1.0/(1<<23), 1.0/(1<<22), 1.0/(1<<21),
+	1.0/(1<<20), 1.0/(1<<19), 1.0/(1<<18), 1.0/(1<<17),
+	1.0/(1<<16), 1.0/(1<<15), 1.0/(1<<14), 1.0/(1<<13)
+    };
+    static const GLdouble bias[16] = {
+	1.0/(1<<16), 1.0/(1<<15), 1.0/(1<<14), 1.0/(1<<13),
+	1.0/(1<<12), 1.0/(1<<11), 1.0/(1<<10), 1.0/(1<< 9),
+	1.0/(1<< 8), 1.0/(1<< 7), 1.0/(1<< 6), 1.0/(1<< 5),
+	1.0/(1<< 4), 1.0/(1<< 3), 1.0/(1<< 2), 1.0/(1<< 1)
+    };
+    GLuint mant = x & 0x0fff;
+    GLuint exp = (x >> 12) & 0xf;
+    return bias[exp] + pow2[exp]*mant;
+}
+
+/*
+ * Savage 24-bit float depth format with zExpOffset=32:
+ *   5 bit unsigned exponent, 19 bit mantissa
+ *
+ * Details analogous to the 16-bit format.
+ */
+static __inline GLuint savageEncodeFloat24( GLdouble x )
+{
+    int64_t r = (int64_t)(x * ((int64_t)1 << (19+32)));
+    GLint exp = 0;
+    if (r < 0x80000)
+	return 0;
+    while (r - 0x80000 > 0x7ffff) {
+	r >>= 1;
+	exp++;
+    }
+    return exp > 0x1f ? 0xffffff : (r - 0x80000) | (exp << 19);
+}
+#define _1 (int64_t)1
+static __inline GLdouble savageDecodeFloat24( GLuint x )
+{
+    static const GLdouble pow2[32] = {
+	1.0/(_1<<51), 1.0/(_1<<50), 1.0/(_1<<49), 1.0/(_1<<48),
+	1.0/(_1<<47), 1.0/(_1<<46), 1.0/(_1<<45), 1.0/(_1<<44),
+	1.0/(_1<<43), 1.0/(_1<<42), 1.0/(_1<<41), 1.0/(_1<<40),
+	1.0/(_1<<39), 1.0/(_1<<38), 1.0/(_1<<37), 1.0/(_1<<36),
+	1.0/(_1<<35), 1.0/(_1<<34), 1.0/(_1<<33), 1.0/(_1<<32),
+	1.0/(_1<<31), 1.0/(_1<<30), 1.0/(_1<<29), 1.0/(_1<<28),
+	1.0/(_1<<27), 1.0/(_1<<26), 1.0/(_1<<25), 1.0/(_1<<24),
+	1.0/(_1<<23), 1.0/(_1<<22), 1.0/(_1<<21), 1.0/(_1<<20)
+    };
+    static const GLdouble bias[32] = {
+	1.0/(_1<<32), 1.0/(_1<<31), 1.0/(_1<<30), 1.0/(_1<<29),
+	1.0/(_1<<28), 1.0/(_1<<27), 1.0/(_1<<26), 1.0/(_1<<25),
+	1.0/(_1<<24), 1.0/(_1<<23), 1.0/(_1<<22), 1.0/(_1<<21),
+	1.0/(_1<<20), 1.0/(_1<<19), 1.0/(_1<<18), 1.0/(_1<<17),
+	1.0/(_1<<16), 1.0/(_1<<15), 1.0/(_1<<14), 1.0/(_1<<13),
+	1.0/(_1<<12), 1.0/(_1<<11), 1.0/(_1<<10), 1.0/(_1<< 9),
+	1.0/(_1<< 8), 1.0/(_1<< 7), 1.0/(_1<< 6), 1.0/(_1<< 5),
+	1.0/(_1<< 4), 1.0/(_1<< 3), 1.0/(_1<< 2), 1.0/(_1<< 1)
+    };
+    GLuint mant = x & 0x7ffff;
+    GLuint exp = (x >> 19) & 0x1f;
+    return bias[exp] + pow2[exp]*mant;
+}
+#undef _1
+
 #endif
diff --git a/src/mesa/drivers/dri/savage/savagestate.c b/src/mesa/drivers/dri/savage/savagestate.c
index d6048291bee..ec3b5c4adc0 100644
--- a/src/mesa/drivers/dri/savage/savagestate.c
+++ b/src/mesa/drivers/dri/savage/savagestate.c
@@ -470,14 +470,14 @@ static void savageDDDepthFunc_s4(GLcontext *ctx, GLenum func)
      * set up z read/write watermarks register (global)
      */
 
-    switch(func)  { 
+    switch(func)  { /* reversed (see savageCalcViewport) */
     case GL_NEVER: zmode = CF_Never; break;
     case GL_ALWAYS: zmode = CF_Always; break;
-    case GL_LESS: zmode = CF_Less; break; 
-    case GL_LEQUAL: zmode = CF_LessEqual; break;
+    case GL_LESS: zmode = CF_Greater; break; 
+    case GL_LEQUAL: zmode = CF_GreaterEqual; break;
     case GL_EQUAL: zmode = CF_Equal; break;
-    case GL_GREATER: zmode = CF_Greater; break;
-    case GL_GEQUAL: zmode = CF_GreaterEqual; break;
+    case GL_GREATER: zmode = CF_Less; break;
+    case GL_GEQUAL: zmode = CF_LessEqual; break;
     case GL_NOTEQUAL: zmode = CF_NotEqual; break;
     default:return;
     } 
@@ -539,14 +539,14 @@ static void savageDDDepthFunc_s3d(GLcontext *ctx, GLenum func)
      * set up z-buffer offset register (global)
      * set up z read/write watermarks register (global)
      */
-    switch(func)  { 
+    switch(func)  { /* reversed (see savageCalcViewport) */
     case GL_NEVER: zmode = CF_Never; break;
     case GL_ALWAYS: zmode = CF_Always; break;
-    case GL_LESS: zmode = CF_Less; break; 
-    case GL_LEQUAL: zmode = CF_LessEqual; break;
+    case GL_LESS: zmode = CF_Greater; break; 
+    case GL_LEQUAL: zmode = CF_GreaterEqual; break;
     case GL_EQUAL: zmode = CF_Equal; break;
-    case GL_GREATER: zmode = CF_Greater; break;
-    case GL_GEQUAL: zmode = CF_GreaterEqual; break;
+    case GL_GREATER: zmode = CF_Less; break;
+    case GL_GEQUAL: zmode = CF_LessEqual; break;
     case GL_NOTEQUAL: zmode = CF_NotEqual; break;
     default:return;
     } 
@@ -716,14 +716,22 @@ static void savageCalcViewport( GLcontext *ctx )
    const GLfloat *v = ctx->Viewport._WindowMap.m;
    GLfloat *m = imesa->hw_viewport;
 
-   /* See also mga_translate_vertex.
-    */
    m[MAT_SX] =   v[MAT_SX];
    m[MAT_TX] =   v[MAT_TX] + imesa->drawX + SUBPIXEL_X;
    m[MAT_SY] = - v[MAT_SY];
    m[MAT_TY] = - v[MAT_TY] + imesa->driDrawable->h + imesa->drawY + SUBPIXEL_Y;
-   m[MAT_SZ] =   v[MAT_SZ] * imesa->depth_scale;
-   m[MAT_TZ] =   v[MAT_TZ] * imesa->depth_scale;
+   /* Depth range is reversed (far: 0, near: 1) so that float depth
+    * compensates for loss of accuracy of far coordinates. */
+   if (imesa->float_depth && imesa->savageScreen->zpp == 2) {
+       /* The Savage 16-bit floating point depth format can't encode
+	* numbers < 2^-16. Make sure all depth values stay greater
+	* than that. */
+       m[MAT_SZ] = - v[MAT_SZ] * imesa->depth_scale * (65535.0/65536.0);
+       m[MAT_TZ] = 1.0 - v[MAT_TZ] * imesa->depth_scale * (65535.0/65536.0);
+   } else {
+       m[MAT_SZ] = - v[MAT_SZ] * imesa->depth_scale;
+       m[MAT_TZ] = 1.0 - v[MAT_TZ] * imesa->depth_scale;
+   }
 
    imesa->SetupNewInputs = ~0;
 }
@@ -1612,7 +1620,14 @@ static void savageDDInitState_s4( savageContextPtr imesa )
 
     imesa->regs.s4.zBufCtrl.ni.zCmpFunc = CF_Less;
     imesa->regs.s4.zBufCtrl.ni.wToZEn               = GL_TRUE;
-    /*imesa->regs.s4.ZBufCtrl.ni.floatZEn          = GL_TRUE;*/
+    if (imesa->float_depth) {
+	imesa->regs.s4.zBufCtrl.ni.zExpOffset =
+	    imesa->savageScreen->zpp == 2 ? 16 : 32;
+	imesa->regs.s4.zBufCtrl.ni.floatZEn = GL_TRUE;
+    } else {
+	imesa->regs.s4.zBufCtrl.ni.zExpOffset = 0;
+	imesa->regs.s4.zBufCtrl.ni.floatZEn = GL_FALSE;
+    }
     imesa->regs.s4.texBlendCtrl[0].ui            = TBC_NoTexMap;
     imesa->regs.s4.texBlendCtrl[1].ui            = TBC_NoTexMap1;
     imesa->regs.s4.drawCtrl0.ui         = 0;
-- 
2.30.2