doc updates; GLUT timer additions; fixed compilation warnings
[mesa.git] / src / mesa / drivers / glide / fxddspan.c
index 6d59e96f462fbe79e6cbb0982aa8ccbc676b8ee3..158e34e08e1650b6e08e6c794fe020be302c0727 100644 (file)
@@ -1,9 +1,8 @@
-
 /*
  * Mesa 3-D graphics library
- * Version:  3.3
+ * Version:  4.0
  *
- * Copyright (C) 1999-2000  Brian Paul   All Rights Reserved.
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- *
- * Original Mesa / 3Dfx device driver (C) 1999 David Bucciarelli, by the
- * terms stated above.
- *
- * Thank you for your contribution, David!
- *
- * Please make note of the above copyright/license statement.  If you
- * contributed code or bug fixes to this code under the previous (GNU
- * Library) license and object to the new license, your code will be
- * removed at your request.  Please see the Mesa docs/COPYRIGHT file
- * for more information.
- *
- * Additional Mesa/3Dfx driver developers:
- *   Daryll Strauss <daryll@precisioninsight.com>
- *   Keith Whitwell <keith@precisioninsight.com>
- *
- * See fxapi.h for more revision/author details.
+ */
+
+/* Authors:
+ *    David Bucciarelli
+ *    Brian Paul
+ *    Daryll Strauss
+ *    Keith Whitwell
+ *    Daniel Borca
+ *    Hiroshi Morii
  */
 
 
 #if defined(FX)
 
 #include "fxdrv.h"
+#include "fxglidew.h"
 #include "swrast/swrast.h"
 
-#ifdef _MSC_VER
-#ifdef _WIN32
-#pragma warning( disable : 4090 4022 )
-/* 4101 : "different 'const' qualifier"
- * 4022 : "pointer mistmatch for actual parameter 'n'
+
+/************************************************************************/
+/*****                    Span functions                            *****/
+/************************************************************************/
+
+#define DBG 0
+
+
+#define LOCAL_VARS                                                     \
+    GLuint pitch = info.strideInBytes;                                 \
+    GLuint height = fxMesa->height;                                    \
+    char *buf = (char *)((char *)info.lfbPtr + 0 /* x, y offset */);   \
+    GLuint p;                                                          \
+    (void) buf; (void) p;
+
+#define CLIPPIXEL( _x, _y )    ( _x >= minx && _x < maxx &&            \
+                                 _y >= miny && _y < maxy )
+
+#define CLIPSPAN( _x, _y, _n, _x1, _n1, _i )                           \
+    if ( _y < miny || _y >= maxy ) {                                   \
+       _n1 = 0, _x1 = x;                                               \
+    } else {                                                           \
+       _n1 = _n;                                                       \
+       _x1 = _x;                                                       \
+       if ( _x1 < minx ) _i += (minx-_x1), n1 -= (minx-_x1), _x1 = minx;\
+       if ( _x1 + _n1 >= maxx ) n1 -= (_x1 + n1 - maxx);               \
+    }
+
+#define Y_FLIP(_y)             (height - _y - 1)
+
+#define HW_WRITE_LOCK()                                                        \
+    fxMesaContext fxMesa = FX_CONTEXT(ctx);                            \
+    GrLfbInfo_t info;                                                  \
+    info.size = sizeof(GrLfbInfo_t);                                   \
+    if ( grLfbLock( GR_LFB_WRITE_ONLY,                                 \
+                   fxMesa->currentFB, LFB_MODE,                                \
+                  GR_ORIGIN_UPPER_LEFT, FXFALSE, &info ) ) {
+
+#define HW_WRITE_UNLOCK()                                              \
+       grLfbUnlock( GR_LFB_WRITE_ONLY, fxMesa->currentFB );            \
+    }
+
+#define HW_READ_LOCK()                                                 \
+    fxMesaContext fxMesa = FX_CONTEXT(ctx);                            \
+    GrLfbInfo_t info;                                                  \
+    info.size = sizeof(GrLfbInfo_t);                                   \
+    if ( grLfbLock( GR_LFB_READ_ONLY, fxMesa->currentFB,               \
+                    LFB_MODE, GR_ORIGIN_UPPER_LEFT, FXFALSE, &info ) ) {
+
+#define HW_READ_UNLOCK()                                               \
+       grLfbUnlock( GR_LFB_READ_ONLY, fxMesa->currentFB );             \
+    }
+
+#define HW_WRITE_CLIPLOOP()                                            \
+    do {                                                               \
+       int _nc = 1; /* numcliprects */                                 \
+       /* [dBorca] Hack alert: */                                      \
+       /* remember, we need to flip the scissor, too */                \
+       /* is it better to do it inside fxDDScissor? */                 \
+       while (_nc--) {                                                 \
+           const int minx = fxMesa->clipMinX;                          \
+           const int maxy = Y_FLIP(fxMesa->clipMinY);                  \
+           const int maxx = fxMesa->clipMaxX;                          \
+           const int miny = Y_FLIP(fxMesa->clipMaxY);
+
+#define HW_READ_CLIPLOOP()                                             \
+    do {                                                               \
+       int _nc = 1; /* numcliprects */                                 \
+       /* [dBorca] Hack alert: */                                      \
+       /* remember, we need to flip the scissor, too */                \
+       /* is it better to do it inside fxDDScissor? */                 \
+       while (_nc--) {                                                 \
+           const int minx = fxMesa->clipMinX;                          \
+           const int maxy = Y_FLIP(fxMesa->clipMinY);                  \
+           const int maxx = fxMesa->clipMaxX;                          \
+           const int miny = Y_FLIP(fxMesa->clipMaxY);
+
+#define HW_ENDCLIPLOOP()                                               \
+       }                                                               \
+    } while (0)
+
+
+/* 16 bit, ARGB1555 color spanline and pixel functions */
+
+#undef LFB_MODE
+#define LFB_MODE       GR_LFBWRITEMODE_1555
+
+#undef BYTESPERPIXEL
+#define BYTESPERPIXEL 2
+
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+    p = TDFXPACKCOLOR1555( color[RCOMP], color[GCOMP], color[BCOMP], color[ACOMP] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )                               \
+    *(GLushort *)(buf + _x*BYTESPERPIXEL + _y*pitch) =                 \
+                                       TDFXPACKCOLOR1555( r, g, b, a )
+
+#define WRITE_PIXEL( _x, _y, p )                                       \
+    *(GLushort *)(buf + _x*BYTESPERPIXEL + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )                                      \
+    do {                                                               \
+       GLushort p = *(GLushort *)(buf + _x*BYTESPERPIXEL + _y*pitch);  \
+       rgba[0] = FX_rgb_scale_5[(p >> 10) & 0x1F];                     \
+       rgba[1] = FX_rgb_scale_5[(p >> 5)  & 0x1F];                     \
+       rgba[2] = FX_rgb_scale_5[ p        & 0x1F];                     \
+       rgba[3] = (p & 0x8000) ? 255 : 0;                               \
+    } while (0)
+
+#define TAG(x) tdfx##x##_ARGB1555
+#include "../dri/common/spantmp.h"
+
+
+/* 16 bit, RGB565 color spanline and pixel functions */
+/* [dBorca] Hack alert:
+ * This is wrong. The alpha value is lost, even when we provide
+ * HW alpha (565 w/o depth buffering). To really update alpha buffer,
+ * we would need to do the 565 writings via 8888 colorformat and rely
+ * on the Voodoo to perform color scaling. In which case our 565 span
+ * would look nicer! But this violates FSAA rules...
  */
-#endif
-#endif
 
+#undef LFB_MODE
+#define LFB_MODE       GR_LFBWRITEMODE_565
 
-#if !defined(FXMESA_USE_ARGB)
+#undef BYTESPERPIXEL
+#define BYTESPERPIXEL 2
 
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+    p = TDFXPACKCOLOR565( color[RCOMP], color[GCOMP], color[BCOMP] )
 
+#define WRITE_RGBA( _x, _y, r, g, b, a )                               \
+    *(GLushort *)(buf + _x*BYTESPERPIXEL + _y*pitch) =                 \
+                                       TDFXPACKCOLOR565( r, g, b )
 
-#define writeRegionClipped(fxm,dst_buffer,dst_x,dst_y,src_format,src_width,src_height,src_stride,src_data)             \
-  FX_grLfbWriteRegion(dst_buffer,dst_x,dst_y,src_format,src_width,src_height,src_stride,src_data)
+#define WRITE_PIXEL( _x, _y, p )                                       \
+    *(GLushort *)(buf + _x*BYTESPERPIXEL + _y*pitch) = p
 
+#define READ_RGBA( rgba, _x, _y )                                      \
+    do {                                                               \
+       GLushort p = *(GLushort *)(buf + _x*BYTESPERPIXEL + _y*pitch);  \
+       rgba[0] = FX_rgb_scale_5[(p >> 11) & 0x1F];                     \
+       rgba[1] = FX_rgb_scale_6[(p >> 5)  & 0x3F];                     \
+       rgba[2] = FX_rgb_scale_5[ p        & 0x1F];                     \
+       rgba[3] = 0xff;                                                 \
+    } while (0)
 
+#define TAG(x) tdfx##x##_RGB565
+#include "../dri/common/spantmp.h"
 
-/* KW: Rearranged the args in the call to grLfbWriteRegion().
- */
-#define LFB_WRITE_SPAN_MESA(dst_buffer,                \
-                           dst_x,              \
-                           dst_y,              \
-                           src_width,          \
-                           src_stride,         \
-                           src_data)           \
-  writeRegionClipped(fxMesa, dst_buffer,       \
-                  dst_x,                       \
-                  dst_y,                       \
-                  GR_LFB_SRC_FMT_8888,         \
-                  src_width,                   \
-                  1,                           \
-                  src_stride,                  \
-                  src_data)                    \
-
-
-#else /* !defined(FXMESA_USE_RGBA) */
-
-#define writeRegionClipped(fxm,dst_buffer,dst_x,dst_y,src_format,src_width,src_height,src_stride,src_data)             \
-  FX_grLfbWriteRegion(dst_buffer,dst_x,dst_y,src_format,src_width,src_height,src_stride,src_data)
-
-
-#define MESACOLOR_TO_ARGB(c) (                         \
-             ( ((unsigned int)(c[ACOMP]))<<24 ) |      \
-             ( ((unsigned int)(c[RCOMP]))<<16 ) |      \
-             ( ((unsigned int)(c[GCOMP]))<<8 )  |      \
-             (  (unsigned int)(c[BCOMP])) )
-
-inline void
-LFB_WRITE_SPAN_MESA(GrBuffer_t dst_buffer,
-                   FxU32 dst_x,
-                   FxU32 dst_y,
-                   FxU32 src_width, FxI32 src_stride, void *src_data)
-{
-   /* Covert to ARGB */
-   GLubyte(*rgba)[4] = src_data;
-   GLuint argb[MAX_WIDTH];
-   int i;
 
-   for (i = 0; i < src_width; i++) {
-      argb[i] = MESACOLOR_TO_ARGB(rgba[i]);
-   }
-   writeRegionClipped( /*fxMesa, */ NULL, dst_buffer,
-                     dst_x,
-                     dst_y,
-                     GR_LFB_SRC_FMT_8888,
-                     src_width, 1, src_stride, (void *) argb);
-}
+/* 32 bit, ARGB8888 color spanline and pixel functions */
+
+#undef LFB_MODE
+#define LFB_MODE       GR_LFBWRITEMODE_8888
+
+#undef BYTESPERPIXEL
+#define BYTESPERPIXEL 4
+
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+    p = TDFXPACKCOLOR8888( color[RCOMP], color[GCOMP], color[BCOMP], color[ACOMP] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )                               \
+    *(GLuint *)(buf + _x*BYTESPERPIXEL + _y*pitch) =                   \
+                                       TDFXPACKCOLOR8888( r, g, b, a )
+
+#define WRITE_PIXEL( _x, _y, p )                                       \
+    *(GLuint *)(buf + _x*BYTESPERPIXEL + _y*pitch) = p
 
-#endif /* !defined(FXMESA_USE_RGBA) */
+#define READ_RGBA( rgba, _x, _y )                                      \
+    do {                                                               \
+       GLuint p = *(GLuint *)(buf + _x*BYTESPERPIXEL + _y*pitch);      \
+        rgba[0] = (p >> 16) & 0xff;                                    \
+        rgba[1] = (p >>  8) & 0xff;                                    \
+        rgba[2] = (p >>  0) & 0xff;                                    \
+        rgba[3] = (p >> 24) & 0xff;                                    \
+    } while (0)
+
+#define TAG(x) tdfx##x##_ARGB8888
+#include "../dri/common/spantmp.h"
 
 
 /************************************************************************/
-/*****                    Span functions                            *****/
+/*****                    Depth functions                           *****/
 /************************************************************************/
 
+#define DBG 0
 
-static void
-fxDDWriteRGBASpan(const GLcontext * ctx,
-                 GLuint n, GLint x, GLint y,
-                 const GLubyte rgba[][4], const GLubyte mask[])
-{
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GLuint i;
-   GLint bottom = fxMesa->height - 1;
+#undef HW_WRITE_LOCK
+#undef HW_WRITE_UNLOCK
+#undef HW_READ_LOCK
+#undef HW_READ_UNLOCK
 
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDWriteRGBASpan(...)\n");
-   }
+#define HW_CLIPLOOP HW_WRITE_CLIPLOOP
 
-   if (mask) {
-      int span = 0;
-
-      for (i = 0; i < n; i++) {
-        if (mask[i]) {
-           ++span;
-        }
-        else {
-           if (span > 0) {
-              LFB_WRITE_SPAN_MESA(fxMesa->currentFB, x + i - span,
-                                  bottom - y,
-                                  /* GR_LFB_SRC_FMT_8888, */ span, /*1, */ 0,
-                                  (void *) rgba[i - span]);
-              span = 0;
-           }
-        }
-      }
-
-      if (span > 0)
-        LFB_WRITE_SPAN_MESA(fxMesa->currentFB, x + n - span, bottom - y,
-                            /* GR_LFB_SRC_FMT_8888, */ span, /*1, */ 0,
-                            (void *) rgba[n - span]);
-   }
-   else
-      LFB_WRITE_SPAN_MESA(fxMesa->currentFB, x, bottom - y,    /* GR_LFB_SRC_FMT_8888, */
-                         n, /* 1, */ 0, (void *) rgba);
-}
+#define LOCAL_DEPTH_VARS                                               \
+    GLuint pitch = info.strideInBytes;                                 \
+    GLuint height = fxMesa->height;                                    \
+    char *buf = (char *)((char *)info.lfbPtr + 0 /* x, y offset */);   \
+    (void) buf;
 
+#define HW_WRITE_LOCK()                                                        \
+    fxMesaContext fxMesa = FX_CONTEXT(ctx);                            \
+    GrLfbInfo_t info;                                                  \
+    info.size = sizeof(GrLfbInfo_t);                                   \
+    if ( grLfbLock( GR_LFB_WRITE_ONLY,                                 \
+                   GR_BUFFER_AUXBUFFER, LFB_MODE,                      \
+                  GR_ORIGIN_UPPER_LEFT, FXFALSE, &info ) ) {
 
-static void
-fxDDWriteRGBSpan(const GLcontext * ctx,
-                GLuint n, GLint x, GLint y,
-                const GLubyte rgb[][3], const GLubyte mask[])
-{
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GLuint i;
-   GLint bottom = fxMesa->height - 1;
-   GLubyte rgba[MAX_WIDTH][4];
+#define HW_WRITE_UNLOCK()                                              \
+       grLfbUnlock( GR_LFB_WRITE_ONLY, GR_BUFFER_AUXBUFFER);           \
+    }
 
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDWriteRGBSpan()\n");
-   }
+#define HW_READ_LOCK()                                                 \
+    fxMesaContext fxMesa = FX_CONTEXT(ctx);                            \
+    GrLfbInfo_t info;                                                  \
+    info.size = sizeof(GrLfbInfo_t);                                   \
+    if ( grLfbLock( GR_LFB_READ_ONLY, GR_BUFFER_AUXBUFFER,             \
+                    LFB_MODE, GR_ORIGIN_UPPER_LEFT, FXFALSE, &info ) ) {
 
-   if (mask) {
-      int span = 0;
-
-      for (i = 0; i < n; i++) {
-        if (mask[i]) {
-           rgba[span][RCOMP] = rgb[i][0];
-           rgba[span][GCOMP] = rgb[i][1];
-           rgba[span][BCOMP] = rgb[i][2];
-           rgba[span][ACOMP] = 255;
-           ++span;
-        }
-        else {
-           if (span > 0) {
-              LFB_WRITE_SPAN_MESA(fxMesa->currentFB, x + i - span,
-                                  bottom - y,
-                                  /*GR_LFB_SRC_FMT_8888, */ span, /* 1, */ 0,
-                                  (void *) rgba);
-              span = 0;
-           }
-        }
-      }
-
-      if (span > 0)
-        LFB_WRITE_SPAN_MESA(fxMesa->currentFB, x + n - span, bottom - y,
-                            /*GR_LFB_SRC_FMT_8888, */ span, /* 1, */ 0,
-                            (void *) rgba);
-   }
-   else {
-      for (i = 0; i < n; i++) {
-        rgba[i][RCOMP] = rgb[i][0];
-        rgba[i][GCOMP] = rgb[i][1];
-        rgba[i][BCOMP] = rgb[i][2];
-        rgba[i][ACOMP] = 255;
-      }
-
-      LFB_WRITE_SPAN_MESA(fxMesa->currentFB, x, bottom - y,    /* GR_LFB_SRC_FMT_8888, */
-                         n, /* 1, */ 0, (void *) rgba);
-   }
-}
+#define HW_READ_UNLOCK()                                               \
+       grLfbUnlock( GR_LFB_READ_ONLY, GR_BUFFER_AUXBUFFER);            \
+    }
 
 
-static void
-fxDDWriteMonoRGBASpan(const GLcontext * ctx,
-                     GLuint n, GLint x, GLint y,
-                     const GLchan color[4], const GLubyte mask[])
-{
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GLuint i;
-   GLint bottom = fxMesa->height - 1;
-   GLuint data[MAX_WIDTH];
-   GrColor_t gColor = FXCOLOR4(color);
+/* 16 bit, depth spanline and pixel functions */
 
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDWriteMonoRGBASpan(...)\n");
-   }
+#undef LFB_MODE
+#define LFB_MODE       GR_LFBWRITEMODE_ZA16
 
-   if (mask) {
-      int span = 0;
-
-      for (i = 0; i < n; i++) {
-        if (mask[i]) {
-           data[span] = (GLuint) gColor;
-           ++span;
-        }
-        else {
-           if (span > 0) {
-              writeRegionClipped(fxMesa, fxMesa->currentFB, x + i - span,
-                                 bottom - y, GR_LFB_SRC_FMT_8888, span, 1, 0,
-                                 (void *) data);
-              span = 0;
-           }
-        }
-      }
-
-      if (span > 0)
-        writeRegionClipped(fxMesa, fxMesa->currentFB, x + n - span,
-                           bottom - y, GR_LFB_SRC_FMT_8888, span, 1, 0,
-                           (void *) data);
-   }
-   else {
-      for (i = 0; i < n; i++) {
-        data[i] = (GLuint) gColor;
-      }
+#undef BYTESPERPIXEL
+#define BYTESPERPIXEL 2
 
-      writeRegionClipped(fxMesa, fxMesa->currentFB, x, bottom - y,
-                        GR_LFB_SRC_FMT_8888, n, 1, 0, (void *) data);
-   }
-}
+#define WRITE_DEPTH( _x, _y, d )                                       \
+    *(GLushort *)(buf + _x*BYTESPERPIXEL + _y*pitch) = d
 
+#define READ_DEPTH( d, _x, _y )                                                \
+    d = *(GLushort *)(buf + _x*BYTESPERPIXEL + _y*pitch)
 
-#if 0
-static void
-fxDDReadRGBASpan(const GLcontext * ctx,
-                GLuint n, GLint x, GLint y, GLubyte rgba[][4])
-{
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GLushort data[MAX_WIDTH];
-   GLuint i;
-   GLint bottom = fxMesa->height - 1;
+#define TAG(x) tdfx##x##_Z16
+#include "../dri/common/depthtmp.h"
 
-   printf("read span %d, %d, %d\n", x, y, n);
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDReadRGBASpan(...)\n");
-   }
 
-   assert(n < MAX_WIDTH);
+/* 24 bit, depth spanline and pixel functions (for use w/ stencil) */
+/* [dBorca] Hack alert:
+ * This is evil. The incoming Mesa's 24bit depth value
+ * is shifted left 8 bits, to obtain a full 32bit value,
+ * which will be thrown into the framebuffer. We rely on
+ * the fact that Voodoo hardware transforms a 32bit value
+ * into 24bit value automatically and, MOST IMPORTANT, won't
+ * alter the upper 8bits of the value already existing in the
+ * framebuffer (where stencil resides).
+ */
 
-   FX_grLfbReadRegion(fxMesa->currentFB, x, bottom - y, n, 1, 0, data);
+#undef LFB_MODE
+#define LFB_MODE       GR_LFBWRITEMODE_Z32
 
-   for (i = 0; i < n; i++) {
-      GLushort pixel = data[i];
-      rgba[i][RCOMP] = FX_PixelToR[pixel];
-      rgba[i][GCOMP] = FX_PixelToG[pixel];
-      rgba[i][BCOMP] = FX_PixelToB[pixel];
-      rgba[i][ACOMP] = 255;
-   }
-}
-#endif
+#undef BYTESPERPIXEL
+#define BYTESPERPIXEL 4
 
+#define WRITE_DEPTH( _x, _y, d )                                       \
+    *(GLuint *)(buf + _x*BYTESPERPIXEL + _y*pitch) = d << 8
 
-/*
- * Read a span of 16-bit RGB pixels.  Note, we don't worry about cliprects
- * since OpenGL says obscured pixels have undefined values.
- */
-static void
-read_R5G6B5_span(const GLcontext * ctx,
-                GLuint n, GLint x, GLint y, GLubyte rgba[][4])
-{
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GrLfbInfo_t info;
-   BEGIN_BOARD_LOCK();
-   if (grLfbLock(GR_LFB_READ_ONLY,
-                fxMesa->currentFB,
-                GR_LFBWRITEMODE_ANY, GR_ORIGIN_UPPER_LEFT, FXFALSE, &info)) {
-      const GLint winX = 0;
-      const GLint winY = fxMesa->height - 1;
-      const GLint srcStride = info.strideInBytes / 2;  /* stride in GLushorts */
-      const GLushort *data16 = (const GLushort *) info.lfbPtr
-        + (winY - y) * srcStride + (winX + x);
-      const GLuint *data32 = (const GLuint *) data16;
-      GLuint i, j;
-      GLuint extraPixel = (n & 1);
-      n -= extraPixel;
-      for (i = j = 0; i < n; i += 2, j++) {
-        GLuint pixel = data32[j];
-        GLuint pixel0 = pixel & 0xffff;
-        GLuint pixel1 = pixel >> 16;
-        rgba[i][RCOMP] = FX_PixelToR[pixel0];
-        rgba[i][GCOMP] = FX_PixelToG[pixel0];
-        rgba[i][BCOMP] = FX_PixelToB[pixel0];
-        rgba[i][ACOMP] = 255;
-        rgba[i + 1][RCOMP] = FX_PixelToR[pixel1];
-        rgba[i + 1][GCOMP] = FX_PixelToG[pixel1];
-        rgba[i + 1][BCOMP] = FX_PixelToB[pixel1];
-        rgba[i + 1][ACOMP] = 255;
-      }
-      if (extraPixel) {
-        GLushort pixel = data16[n];
-        rgba[n][RCOMP] = FX_PixelToR[pixel];
-        rgba[n][GCOMP] = FX_PixelToG[pixel];
-        rgba[n][BCOMP] = FX_PixelToB[pixel];
-        rgba[n][ACOMP] = 255;
-      }
-
-      grLfbUnlock(GR_LFB_READ_ONLY, fxMesa->currentFB);
-   }
-   END_BOARD_LOCK();
-}
+#define READ_DEPTH( d, _x, _y )                                                \
+    d = (*(GLuint *)(buf + _x*BYTESPERPIXEL + _y*pitch)) & 0xffffff
+
+#define TAG(x) tdfx##x##_Z24
+#include "../dri/common/depthtmp.h"
 
 
+/* 32 bit, depth spanline and pixel functions (for use w/o stencil) */
+/* [dBorca] Hack alert:
+ * This is more evil. We make Mesa run in 32bit depth, but
+ * tha Voodoo HW can only handle 24bit depth. Well, exploiting
+ * the pixel pipeline, we can achieve 24:8 format for greater
+ * precision...
+ * If anyone tells me how to really store 32bit values into the
+ * depth buffer, I'll write the *_Z32 routines. Howver, bear in
+ * mind that means running without stencil!
+ */
+
 /************************************************************************/
-/*****                    Pixel functions                           *****/
+/*****                    Span functions (optimized)                *****/
 /************************************************************************/
 
-static void
-fxDDWriteRGBAPixels(const GLcontext * ctx,
-                   GLuint n, const GLint x[], const GLint y[],
-                   CONST GLubyte rgba[][4], const GLubyte mask[])
+/*
+ * Read a span of 15-bit RGB pixels.  Note, we don't worry about cliprects
+ * since OpenGL says obscured pixels have undefined values.
+ */
+static void fxReadRGBASpan_ARGB1555 (const GLcontext * ctx,
+                                     GLuint n,
+                                     GLint x, GLint y,
+                                     GLubyte rgba[][4])
 {
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GLuint i;
-   GLint bottom = fxMesa->height - 1;
-
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDWriteRGBAPixels(...)\n");
-   }
-
-   for (i = 0; i < n; i++)
-      if (mask[i])
-        LFB_WRITE_SPAN_MESA(fxMesa->currentFB, x[i], bottom - y[i],
-                            1, 1, (void *) rgba[i]);
+ fxMesaContext fxMesa = FX_CONTEXT(ctx);
+ GrLfbInfo_t info;
+ info.size = sizeof(GrLfbInfo_t);
+ if (grLfbLock(GR_LFB_READ_ONLY, fxMesa->currentFB,
+               GR_LFBWRITEMODE_ANY, GR_ORIGIN_UPPER_LEFT, FXFALSE, &info)) {
+    const GLint winX = 0;
+    const GLint winY = fxMesa->height - 1;
+    const GLushort *data16 = (const GLushort *)((const GLubyte *)info.lfbPtr +
+                                               (winY - y) * info.strideInBytes +
+                                                (winX + x) * 2);
+    const GLuint *data32 = (const GLuint *) data16;
+    GLuint i, j;
+    GLuint extraPixel = (n & 1);
+    n -= extraPixel;
+
+    for (i = j = 0; i < n; i += 2, j++) {
+       GLuint pixel = data32[j];
+       rgba[i][0] = FX_rgb_scale_5[(pixel >> 10) & 0x1F];
+       rgba[i][1] = FX_rgb_scale_5[(pixel >> 5)  & 0x1F];
+       rgba[i][2] = FX_rgb_scale_5[ pixel        & 0x1F];
+       rgba[i][3] = (pixel & 0x8000) ? 255 : 0;
+       rgba[i+1][0] = FX_rgb_scale_5[(pixel >> 26) & 0x1F];
+       rgba[i+1][1] = FX_rgb_scale_5[(pixel >> 21) & 0x1F];
+       rgba[i+1][2] = FX_rgb_scale_5[(pixel >> 16) & 0x1F];
+       rgba[i+1][3] = (pixel & 0x80000000) ? 255 : 0;
+    }
+    if (extraPixel) {
+       GLushort pixel = data16[n];
+       rgba[n][0] = FX_rgb_scale_5[(pixel >> 10) & 0x1F];
+       rgba[n][1] = FX_rgb_scale_5[(pixel >> 5)  & 0x1F];
+       rgba[n][2] = FX_rgb_scale_5[ pixel        & 0x1F];
+       rgba[n][3] = (pixel & 0x8000) ? 255 : 0;
+    }
+
+    grLfbUnlock(GR_LFB_READ_ONLY, fxMesa->currentFB);
+ }
 }
 
-static void
-fxDDWriteMonoRGBAPixels(const GLcontext * ctx,
-                       GLuint n, const GLint x[], const GLint y[],
-                       const GLchan color[4], const GLubyte mask[])
+/*
+ * Read a span of 16-bit RGB pixels.  Note, we don't worry about cliprects
+ * since OpenGL says obscured pixels have undefined values.
+ */
+static void fxReadRGBASpan_RGB565 (const GLcontext * ctx,
+                                   GLuint n,
+                                   GLint x, GLint y,
+                                   GLubyte rgba[][4])
 {
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GLuint i;
-   GLint bottom = fxMesa->height - 1;
-   GrColor_t gColor = FXCOLOR4(color);
-
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDWriteMonoRGBAPixels(...)\n");
-   }
-
-   for (i = 0; i < n; i++)
-      if (mask[i])
-        writeRegionClipped(fxMesa, fxMesa->currentFB, x[i], bottom - y[i],
-                           GR_LFB_SRC_FMT_8888, 1, 1, 0, (void *) &gColor);
+ fxMesaContext fxMesa = FX_CONTEXT(ctx);
+ GrLfbInfo_t info;
+ info.size = sizeof(GrLfbInfo_t);
+ if (grLfbLock(GR_LFB_READ_ONLY, fxMesa->currentFB,
+               GR_LFBWRITEMODE_ANY, GR_ORIGIN_UPPER_LEFT, FXFALSE, &info)) {
+    const GLint winX = 0;
+    const GLint winY = fxMesa->height - 1;
+    const GLushort *data16 = (const GLushort *)((const GLubyte *)info.lfbPtr +
+                                               (winY - y) * info.strideInBytes +
+                                                (winX + x) * 2);
+    const GLuint *data32 = (const GLuint *) data16;
+    GLuint i, j;
+    GLuint extraPixel = (n & 1);
+    n -= extraPixel;
+
+    for (i = j = 0; i < n; i += 2, j++) {
+        GLuint pixel = data32[j];
+       rgba[i][0] = FX_rgb_scale_5[(pixel >> 11) & 0x1F];
+       rgba[i][1] = FX_rgb_scale_6[(pixel >> 5)  & 0x3F];
+       rgba[i][2] = FX_rgb_scale_5[ pixel        & 0x1F];
+       rgba[i][3] = 255;
+       rgba[i+1][0] = FX_rgb_scale_5[(pixel >> 27) & 0x1F];
+       rgba[i+1][1] = FX_rgb_scale_6[(pixel >> 21) & 0x3F];
+       rgba[i+1][2] = FX_rgb_scale_5[(pixel >> 16) & 0x1F];
+       rgba[i+1][3] = 255;
+    }
+    if (extraPixel) {
+       GLushort pixel = data16[n];
+       rgba[n][0] = FX_rgb_scale_5[(pixel >> 11) & 0x1F];
+       rgba[n][1] = FX_rgb_scale_6[(pixel >> 5)  & 0x3F];
+       rgba[n][2] = FX_rgb_scale_5[ pixel        & 0x1F];
+       rgba[n][3] = 255;
+    }
+
+    grLfbUnlock(GR_LFB_READ_ONLY, fxMesa->currentFB);
+ }
 }
 
-
-static void
-read_R5G6B5_pixels(const GLcontext * ctx,
-                  GLuint n, const GLint x[], const GLint y[],
-                  GLubyte rgba[][4], const GLubyte mask[])
+/*
+ * Read a span of 32-bit RGB pixels.  Note, we don't worry about cliprects
+ * since OpenGL says obscured pixels have undefined values.
+ */
+static void fxReadRGBASpan_ARGB8888 (const GLcontext * ctx,
+                                     GLuint n,
+                                     GLint x, GLint y,
+                                     GLubyte rgba[][4])
 {
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GrLfbInfo_t info;
-   BEGIN_BOARD_LOCK();
-   if (grLfbLock(GR_LFB_READ_ONLY,
-                fxMesa->currentFB,
-                GR_LFBWRITEMODE_ANY, GR_ORIGIN_UPPER_LEFT, FXFALSE, &info)) {
-      const GLint srcStride = info.strideInBytes / 2;  /* stride in GLushorts */
-      const GLint winX = 0;
-      const GLint winY = fxMesa->height - 1;
-      GLuint i;
-      for (i = 0; i < n; i++) {
-        if (mask[i]) {
-           const GLushort *data16 = (const GLushort *) info.lfbPtr
-              + (winY - y[i]) * srcStride + (winX + x[i]);
-           const GLushort pixel = *data16;
-           rgba[i][RCOMP] = FX_PixelToR[pixel];
-           rgba[i][GCOMP] = FX_PixelToG[pixel];
-           rgba[i][BCOMP] = FX_PixelToB[pixel];
-           rgba[i][ACOMP] = 255;
-        }
-      }
-      grLfbUnlock(GR_LFB_READ_ONLY, fxMesa->currentFB);
-   }
-   END_BOARD_LOCK();
+ fxMesaContext fxMesa = FX_CONTEXT(ctx);
+ GLuint i;
+ grLfbReadRegion(fxMesa->currentFB, x, fxMesa->height - 1 - y, n, 1, n * 4, rgba);
+ for (i = 0; i < n; i++) {
+     GLubyte c = rgba[i][0];
+     rgba[i][0] = rgba[i][2];
+     rgba[i][2] = c;
+ }
 }
 
 
-
 /************************************************************************/
-/*****                    Depth functions                           *****/
+/*****                    Depth functions (optimized)               *****/
 /************************************************************************/
 
 void
-fxDDWriteDepthSpan(GLcontext * ctx,
-                  GLuint n, GLint x, GLint y, const GLdepth depth[],
-                  const GLubyte mask[])
+fxReadDepthSpan_Z16(GLcontext * ctx,
+                   GLuint n, GLint x, GLint y, GLdepth depth[])
 {
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
+   fxMesaContext fxMesa = FX_CONTEXT(ctx);
    GLint bottom = fxMesa->height - 1;
+   GLushort depth16[MAX_WIDTH];
+   GLuint i;
 
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDWriteDepthSpan(...)\n");
+   if (TDFX_DEBUG & VERBOSE_DRIVER) {
+      fprintf(stderr, "%s(...)\n", __FUNCTION__);
    }
 
-
-   if (mask) {
-      GLint i;
-      for (i = 0; i < n; i++) {
-        if (mask[i]) {
-           GLshort d = depth[i];
-           writeRegionClipped(fxMesa, GR_BUFFER_AUXBUFFER, x + i, bottom - y,
-                              GR_LFB_SRC_FMT_ZA16, 1, 1, 0, (void *) &d);
-        }
-      }
-   }
-   else {
-      GLushort depth16[MAX_WIDTH];
-      GLint i;
-      for (i = 0; i < n; i++) {
-        depth16[i] = depth[i];
-      }
-      writeRegionClipped(fxMesa, GR_BUFFER_AUXBUFFER, x, bottom - y,
-                        GR_LFB_SRC_FMT_ZA16, n, 1, 0, (void *) depth16);
+   grLfbReadRegion(GR_BUFFER_AUXBUFFER, x, bottom - y, n, 1, 0, depth16);
+   for (i = 0; i < n; i++) {
+      depth[i] = depth16[i];
    }
 }
 
 
 void
-fxDDReadDepthSpan(GLcontext * ctx,
-                 GLuint n, GLint x, GLint y, GLdepth depth[])
+fxReadDepthSpan_Z24(GLcontext * ctx,
+                   GLuint n, GLint x, GLint y, GLdepth depth[])
 {
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
+   fxMesaContext fxMesa = FX_CONTEXT(ctx);
    GLint bottom = fxMesa->height - 1;
-   GLushort depth16[MAX_WIDTH];
    GLuint i;
 
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDReadDepthSpan(...)\n");
+   if (TDFX_DEBUG & VERBOSE_DRIVER) {
+      fprintf(stderr, "%s(...)\n", __FUNCTION__);
    }
 
-   FX_grLfbReadRegion(GR_BUFFER_AUXBUFFER, x, bottom - y, n, 1, 0, depth16);
+   grLfbReadRegion(GR_BUFFER_AUXBUFFER, x, bottom - y, n, 1, 0, depth);
    for (i = 0; i < n; i++) {
-      depth[i] = depth16[i];
+      depth[i] &= 0xffffff;
    }
 }
 
 
+/************************************************************************/
+/*****                    Stencil functions (optimized)             *****/
+/************************************************************************/
+
+void fxWriteStencilSpan (GLcontext *ctx, GLuint n, GLint x, GLint y,
+                         const GLstencil stencil[], const GLubyte mask[])
+{
+ /*
+  * XXX todo
+  */
+}
 
 void
-fxDDWriteDepthPixels(GLcontext * ctx,
-                    GLuint n, const GLint x[], const GLint y[],
-                    const GLdepth depth[], const GLubyte mask[])
+fxReadStencilSpan(GLcontext * ctx,
+                 GLuint n, GLint x, GLint y, GLstencil stencil[])
 {
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
+   fxMesaContext fxMesa = FX_CONTEXT(ctx);
    GLint bottom = fxMesa->height - 1;
+   GLuint zs32[MAX_WIDTH];
    GLuint i;
 
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDWriteDepthPixels(...)\n");
+   if (TDFX_DEBUG & VERBOSE_DRIVER) {
+      fprintf(stderr, "%s(...)\n", __FUNCTION__);
    }
 
+   grLfbReadRegion(GR_BUFFER_AUXBUFFER, x, bottom - y, n, 1, 0, zs32);
    for (i = 0; i < n; i++) {
-      if (mask[i]) {
-        int xpos = x[i];
-        int ypos = bottom - y[i];
-        GLushort d = depth[i];
-        writeRegionClipped(fxMesa, GR_BUFFER_AUXBUFFER, xpos, ypos,
-                           GR_LFB_SRC_FMT_ZA16, 1, 1, 0, (void *) &d);
-      }
+      stencil[i] = zs32[i] >> 24;
    }
 }
 
-
-void
-fxDDReadDepthPixels(GLcontext * ctx, GLuint n,
-                   const GLint x[], const GLint y[], GLdepth depth[])
+void fxWriteStencilPixels (GLcontext *ctx, GLuint n,
+                           const GLint x[], const GLint y[],
+                           const GLstencil stencil[],
+                           const GLubyte mask[])
 {
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
-   GLint bottom = fxMesa->height - 1;
-   GLuint i;
-
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDReadDepthPixels(...)\n");
-   }
+ /*
+  * XXX todo
+  */
+}
 
-   for (i = 0; i < n; i++) {
-      int xpos = x[i];
-      int ypos = bottom - y[i];
-      GLushort d;
-      FX_grLfbReadRegion(GR_BUFFER_AUXBUFFER, xpos, ypos, 1, 1, 0, &d);
-      depth[i] = d;
-   }
+void fxReadStencilPixels (GLcontext *ctx, GLuint n,
+                          const GLint x[], const GLint y[],
+                          GLstencil stencil[])
+{
+ /*
+  * XXX todo
+  */
 }
 
 
 
-/* Set the buffer used for reading */
-/* XXX support for separate read/draw buffers hasn't been tested */
+/*
+ * This function is called to specify which buffer to read and write
+ * for software rasterization (swrast) fallbacks.  This doesn't necessarily
+ * correspond to glDrawBuffer() or glReadBuffer() calls.
+ */
 static void
-fxDDSetReadBuffer(GLcontext * ctx, GLframebuffer * buffer, GLenum mode)
+fxDDSetBuffer(GLcontext * ctx, GLframebuffer * buffer, GLuint bufferBit)
 {
-   fxMesaContext fxMesa = (fxMesaContext) ctx->DriverCtx;
+   fxMesaContext fxMesa = FX_CONTEXT(ctx);
    (void) buffer;
 
-   if (MESA_VERBOSE & VERBOSE_DRIVER) {
-      fprintf(stderr, "fxmesa: fxDDSetBuffer(%x)\n", (int) mode);
+   if (TDFX_DEBUG & VERBOSE_DRIVER) {
+      fprintf(stderr, "%s(%x)\n", __FUNCTION__, (int)bufferBit);
    }
 
-   if (mode == GL_FRONT_LEFT) {
+   if (bufferBit == FRONT_LEFT_BIT) {
       fxMesa->currentFB = GR_BUFFER_FRONTBUFFER;
-      FX_grRenderBuffer(fxMesa->currentFB);
+      grRenderBuffer(fxMesa->currentFB);
    }
-   else if (mode == GL_BACK_LEFT) {
+   else if (bufferBit == BACK_LEFT_BIT) {
       fxMesa->currentFB = GR_BUFFER_BACKBUFFER;
-      FX_grRenderBuffer(fxMesa->currentFB);
+      grRenderBuffer(fxMesa->currentFB);
    }
 }
 
@@ -574,23 +576,73 @@ void
 fxSetupDDSpanPointers(GLcontext * ctx)
 {
    struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference( ctx );
+   fxMesaContext fxMesa = FX_CONTEXT(ctx);
+
+   swdd->SetBuffer = fxDDSetBuffer;
+
+   switch (fxMesa->colDepth) {
+          case 15:
+               swdd->WriteRGBASpan = tdfxWriteRGBASpan_ARGB1555;
+               swdd->WriteRGBSpan = tdfxWriteRGBSpan_ARGB1555;
+               swdd->WriteRGBAPixels = tdfxWriteRGBAPixels_ARGB1555;
+               swdd->WriteMonoRGBASpan = tdfxWriteMonoRGBASpan_ARGB1555;
+               swdd->WriteMonoRGBAPixels = tdfxWriteMonoRGBAPixels_ARGB1555;
+               swdd->ReadRGBASpan = /*td*/fxReadRGBASpan_ARGB1555;
+               swdd->ReadRGBAPixels = tdfxReadRGBAPixels_ARGB1555;
+
+               swdd->WriteDepthSpan = tdfxWriteDepthSpan_Z16;
+               swdd->WriteDepthPixels = tdfxWriteDepthPixels_Z16;
+               swdd->ReadDepthSpan = /*td*/fxReadDepthSpan_Z16;
+               swdd->ReadDepthPixels = tdfxReadDepthPixels_Z16;
+               break;
+          case 16:
+               swdd->WriteRGBASpan = tdfxWriteRGBASpan_RGB565;
+               swdd->WriteRGBSpan = tdfxWriteRGBSpan_RGB565;
+               swdd->WriteRGBAPixels = tdfxWriteRGBAPixels_RGB565;
+               swdd->WriteMonoRGBASpan = tdfxWriteMonoRGBASpan_RGB565;
+               swdd->WriteMonoRGBAPixels = tdfxWriteMonoRGBAPixels_RGB565;
+               swdd->ReadRGBASpan = /*td*/fxReadRGBASpan_RGB565;
+               swdd->ReadRGBAPixels = tdfxReadRGBAPixels_RGB565;
+
+               swdd->WriteDepthSpan = tdfxWriteDepthSpan_Z16;
+               swdd->WriteDepthPixels = tdfxWriteDepthPixels_Z16;
+               swdd->ReadDepthSpan = /*td*/fxReadDepthSpan_Z16;
+               swdd->ReadDepthPixels = tdfxReadDepthPixels_Z16;
+               break;
+          case 32:
+               swdd->WriteRGBASpan = tdfxWriteRGBASpan_ARGB8888;
+               swdd->WriteRGBSpan = tdfxWriteRGBSpan_ARGB8888;
+               swdd->WriteRGBAPixels = tdfxWriteRGBAPixels_ARGB8888;
+               swdd->WriteMonoRGBASpan = tdfxWriteMonoRGBASpan_ARGB8888;
+               swdd->WriteMonoRGBAPixels = tdfxWriteMonoRGBAPixels_ARGB8888;
+               swdd->ReadRGBASpan = /*td*/fxReadRGBASpan_ARGB8888;
+               swdd->ReadRGBAPixels = tdfxReadRGBAPixels_ARGB8888;
+
+               swdd->WriteDepthSpan = tdfxWriteDepthSpan_Z24;
+               swdd->WriteDepthPixels = tdfxWriteDepthPixels_Z24;
+               swdd->ReadDepthSpan = /*td*/fxReadDepthSpan_Z24;
+               swdd->ReadDepthPixels = tdfxReadDepthPixels_Z24;
+               break;
+   }
 
-   swdd->SetReadBuffer = fxDDSetReadBuffer;
-
-   swdd->WriteRGBASpan = fxDDWriteRGBASpan;
-   swdd->WriteRGBSpan = fxDDWriteRGBSpan;
-   swdd->WriteMonoRGBASpan = fxDDWriteMonoRGBASpan;
-   swdd->WriteRGBAPixels = fxDDWriteRGBAPixels;
-   swdd->WriteMonoRGBAPixels = fxDDWriteMonoRGBAPixels;
-
-   swdd->WriteDepthSpan = fxDDWriteDepthSpan;
-   swdd->WriteDepthPixels = fxDDWriteDepthPixels;
-   swdd->ReadDepthSpan = fxDDReadDepthSpan;
-   swdd->ReadDepthPixels = fxDDReadDepthPixels;
-
-   /*  swdd->ReadRGBASpan        =fxDDReadRGBASpan; */
-   swdd->ReadRGBASpan = read_R5G6B5_span;
-   swdd->ReadRGBAPixels = read_R5G6B5_pixels;
+   if (fxMesa->haveHwStencil) {
+      swdd->WriteStencilSpan = fxWriteStencilSpan;
+      swdd->ReadStencilSpan = fxReadStencilSpan;
+      swdd->WriteStencilPixels = fxWriteStencilPixels;
+      swdd->ReadStencilPixels = fxReadStencilPixels;
+   }
+#if 0
+   swdd->WriteCI8Span          = NULL;
+   swdd->WriteCI32Span         = NULL;
+   swdd->WriteMonoCISpan       = NULL;
+   swdd->WriteCI32Pixels       = NULL;
+   swdd->WriteMonoCIPixels     = NULL;
+   swdd->ReadCI32Span          = NULL;
+   swdd->ReadCI32Pixels                = NULL;
+
+   swdd->SpanRenderStart        = tdfxSpanRenderStart; /* BEGIN_BOARD_LOCK */
+   swdd->SpanRenderFinish       = tdfxSpanRenderFinish; /* END_BOARD_LOCK */
+#endif
 }