Merge remote branch 'main/radeon-rewrite'
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
index 38ba5a50e505e1cb95fdcce97d94821e790d95c6..e28f28662b458da7b15a587e8862e2f965236c7f 100644 (file)
@@ -1,9 +1,13 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.c,v 1.6 2002/10/30 12:51:56 alanh Exp $ */
 /**************************************************************************
 
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
                      VA Linux Systems Inc., Fremont, California.
 
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
 All Rights Reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining
@@ -36,149 +40,243 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  */
 
-#include "glheader.h"
+#include "main/glheader.h"
 #include "swrast/swrast.h"
 
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-#include "radeon_state.h"
+#include "radeon_common.h"
+#include "radeon_lock.h"
 #include "radeon_span.h"
-#include "radeon_tex.h"
 
 #define DBG 0
 
-#define LOCAL_VARS                                                     \
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);                       \
-   radeonScreenPtr radeonScreen = rmesa->radeonScreen;                 \
-   __DRIscreenPrivate *sPriv = rmesa->dri.screen;                      \
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;                  \
-   GLuint pitch = radeonScreen->frontPitch * radeonScreen->cpp;                \
-   GLuint height = dPriv->h;                                           \
-   char *buf = (char *)(sPriv->pFB +                                   \
-                       rmesa->state.color.drawOffset +                 \
-                       (dPriv->x * radeonScreen->cpp) +                \
-                       (dPriv->y * pitch));                            \
-   char *read_buf = (char *)(sPriv->pFB +                              \
-                            rmesa->state.pixel.readOffset +            \
-                            (dPriv->x * radeonScreen->cpp) +           \
-                            (dPriv->y * pitch));                       \
-   GLuint p;                                                           \
-   (void) read_buf; (void) buf; (void) p
-
-#define LOCAL_DEPTH_VARS                                               \
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);                       \
-   radeonScreenPtr radeonScreen = rmesa->radeonScreen;                 \
-   __DRIscreenPrivate *sPriv = rmesa->dri.screen;                      \
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;                  \
-   GLuint height = dPriv->h;                                           \
-   GLuint xo = dPriv->x;                                               \
-   GLuint yo = dPriv->y;                                               \
-   char *buf = (char *)(sPriv->pFB + radeonScreen->depthOffset);       \
-   (void) buf
-
-#define LOCAL_STENCIL_VARS     LOCAL_DEPTH_VARS
-
-
-#define CLIPPIXEL( _x, _y )                                            \
-   ((_x >= minx) && (_x < maxx) && (_y >= miny) && (_y < maxy))
-
-
-#define CLIPSPAN( _x, _y, _n, _x1, _n1, _i )                           \
-   if ( _y < miny || _y >= maxy ) {                                    \
-      _n1 = 0, _x1 = x;                                                        \
-   } else {                                                            \
-      _n1 = _n;                                                                \
-      _x1 = _x;                                                                \
-      if ( _x1 < minx ) _i += (minx-_x1), n1 -= (minx-_x1), _x1 = minx; \
-      if ( _x1 + _n1 >= maxx ) n1 -= (_x1 + n1 - maxx);                        \
-   }
-
-#define Y_FLIP( _y )           (height - _y - 1)
-
-
-#define HW_LOCK() 
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
 
-#define HW_CLIPLOOP()                                                  \
-   do {                                                                        \
-      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;               \
-      int _nc = dPriv->numClipRects;                                   \
-                                                                       \
-      while ( _nc-- ) {                                                        \
-        int minx = dPriv->pClipRects[_nc].x1 - dPriv->x;               \
-        int miny = dPriv->pClipRects[_nc].y1 - dPriv->y;               \
-        int maxx = dPriv->pClipRects[_nc].x2 - dPriv->x;               \
-        int maxy = dPriv->pClipRects[_nc].y2 - dPriv->y;
+static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
+                            GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+    GLint nmacroblkpl;
+    GLint nmicroblkpl;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+                nmacroblkpl = rrb->pitch >> 5;
+                offset += ((y >> 4) * nmacroblkpl) << 11;
+                offset += ((y & 15) >> 1) << 8;
+                offset += (y & 1) << 4;
+                offset += (x >> 5) << 11;
+                offset += ((x & 31) >> 2) << 5;
+                offset += (x & 3) << 2;
+            } else {
+                nmacroblkpl = rrb->pitch >> 6;
+                offset += ((y >> 3) * nmacroblkpl) << 11;
+                offset += (y & 7) << 8;
+                offset += (x >> 6) << 11;
+                offset += ((x & 63) >> 3) << 5;
+                offset += (x & 7) << 2;
+            }
+        } else {
+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
+            offset += (y * nmicroblkpl) << 5;
+            offset += (x >> 3) << 5;
+            offset += (x & 7) << 2;
+        }
+    }
+    return &ptr[offset];
+}
 
-#define HW_ENDCLIPLOOP()                                               \
-      }                                                                        \
-   } while (0)
+static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
+                            GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+    GLint nmacroblkpl;
+    GLint nmicroblkpl;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+                nmacroblkpl = rrb->pitch >> 6;
+                offset += ((y >> 4) * nmacroblkpl) << 11;
+                offset += ((y & 15) >> 1) << 8;
+                offset += (y & 1) << 4;
+                offset += (x >> 6) << 11;
+                offset += ((x & 63) >> 3) << 5;
+                offset += (x & 7) << 1;
+            } else {
+                nmacroblkpl = rrb->pitch >> 7;
+                offset += ((y >> 3) * nmacroblkpl) << 11;
+                offset += (y & 7) << 8;
+                offset += (x >> 7) << 11;
+                offset += ((x & 127) >> 4) << 5;
+                offset += (x & 15) << 2;
+            }
+        } else {
+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
+            offset += (y * nmicroblkpl) << 5;
+            offset += (x >> 4) << 5;
+            offset += (x & 15) << 2;
+        }
+    }
+    return &ptr[offset];
+}
 
-#define HW_UNLOCK()                                                    
+static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
+                          GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+    GLint microblkxs;
+    GLint macroblkxs;
+    GLint nmacroblkpl;
+    GLint nmicroblkpl;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+                microblkxs = 16 / rrb->cpp;
+                macroblkxs = 128 / rrb->cpp;
+                nmacroblkpl = rrb->pitch / macroblkxs;
+                offset += ((y >> 4) * nmacroblkpl) << 11;
+                offset += ((y & 15) >> 1) << 8;
+                offset += (y & 1) << 4;
+                offset += (x / macroblkxs) << 11;
+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
+                offset += (x & (microblkxs - 1)) * rrb->cpp;
+            } else {
+                microblkxs = 32 / rrb->cpp;
+                macroblkxs = 256 / rrb->cpp;
+                nmacroblkpl = rrb->pitch / macroblkxs;
+                offset += ((y >> 3) * nmacroblkpl) << 11;
+                offset += (y & 7) << 8;
+                offset += (x / macroblkxs) << 11;
+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
+                offset += (x & (microblkxs - 1)) * rrb->cpp;
+            }
+        } else {
+            microblkxs = 32 / rrb->cpp;
+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
+            offset += (y * nmicroblkpl) << 5;
+            offset += (x / microblkxs) << 5;
+            offset += (x & (microblkxs - 1)) * rrb->cpp;
+        }
+    }
+    return &ptr[offset];
+}
 
+#ifndef COMPILE_R300
+static uint32_t
+z24s8_to_s8z24(uint32_t val)
+{
+   return (val << 24) | (val >> 8);
+}
 
+static uint32_t
+s8z24_to_z24s8(uint32_t val)
+{
+   return (val >> 24) | (val << 8);
+}
+#endif
 
+/*
+ * Note that all information needed to access pixels in a renderbuffer
+ * should be obtained through the gl_renderbuffer parameter, not per-context
+ * information.
+ */
+#define LOCAL_VARS                                             \
+   struct radeon_context *radeon = RADEON_CONTEXT(ctx);                        \
+   struct radeon_renderbuffer *rrb = (void *) rb;              \
+   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;                        \
+   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+   unsigned int num_cliprects;                                         \
+   struct drm_clip_rect *cliprects;                                    \
+   int x_off, y_off;                                                   \
+   GLuint p;                                           \
+   (void)p;                                            \
+   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
+
+#define LOCAL_DEPTH_VARS                               \
+   struct radeon_context *radeon = RADEON_CONTEXT(ctx);                        \
+   struct radeon_renderbuffer *rrb = (void *) rb;      \
+   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;                        \
+   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+   unsigned int num_cliprects;                                         \
+   struct drm_clip_rect *cliprects;                                    \
+   int x_off, y_off;                                                   \
+  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+
+#define Y_FLIP(_y) ((_y) * yScale + yBias)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+/* XXX FBO: this is identical to the macro in spantmp2.h except we get
+ * the cliprect info from the context, not the driDrawable.
+ * Move this into spantmp2.h someday.
+ */
+#define HW_CLIPLOOP()                                                  \
+   do {                                                                        \
+      int _nc = num_cliprects;                                         \
+      while ( _nc-- ) {                                                        \
+        int minx = cliprects[_nc].x1 - x_off;                          \
+        int miny = cliprects[_nc].y1 - y_off;                          \
+        int maxx = cliprects[_nc].x2 - x_off;                          \
+        int maxy = cliprects[_nc].y2 - y_off;
+       
 /* ================================================================
  * Color buffer
  */
 
 /* 16 bit, RGB565 color spanline and pixel functions
  */
-#define INIT_MONO_PIXEL(p, color) \
-  p = PACK_COLOR_565( color[0], color[1], color[2] )
-
-#define WRITE_RGBA( _x, _y, r, g, b, a )                               \
-   *(GLushort *)(buf + _x*2 + _y*pitch) = ((((int)r & 0xf8) << 8) |    \
-                                          (((int)g & 0xfc) << 3) |     \
-                                          (((int)b & 0xf8) >> 3))
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
 
-#define WRITE_PIXEL( _x, _y, p )                                       \
-   *(GLushort *)(buf + _x*2 + _y*pitch) = p
+#define TAG(x)    radeon##x##_RGB565
+#define TAG2(x,y) radeon##x##_RGB565##y
+#define GET_PTR(X,Y) radeon_ptr16(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
 
-#define READ_RGBA( rgba, _x, _y )                                      \
-   do {                                                                        \
-      GLushort p = *(GLushort *)(read_buf + _x*2 + _y*pitch);          \
-      rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;                                \
-      rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;                                \
-      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;                                \
-      rgba[3] = 0xff;                                                  \
-   } while (0)
-
-#define TAG(x) radeon##x##_RGB565
-#include "spantmp.h"
-
-/* 32 bit, ARGB8888 color spanline and pixel functions
+/* 32 bit, xRGB8888 color spanline and pixel functions
  */
-#undef INIT_MONO_PIXEL
-#define INIT_MONO_PIXEL(p, color) \
-  p = PACK_COLOR_8888( color[3], color[0], color[1], color[2] )
-
-#define WRITE_RGBA( _x, _y, r, g, b, a )                       \
-do {                                                           \
-   *(GLuint *)(buf + _x*4 + _y*pitch) = ((b <<  0) |           \
-                                        (g <<  8) |            \
-                                        (r << 16) |            \
-                                        (a << 24) );           \
-} while (0)
-
-#define WRITE_PIXEL( _x, _y, p )                       \
-do {                                                   \
-   *(GLuint *)(buf + _x*4 + _y*pitch) = p;             \
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    radeon##x##_xRGB8888
+#define TAG2(x,y) radeon##x##_xRGB8888##y
+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
+   *_ptr = d;                                                          \
 } while (0)
+#include "spantmp2.h"
 
-#define READ_RGBA( rgba, _x, _y )                              \
-do {                                                           \
-   volatile GLuint *ptr = (volatile GLuint *)(read_buf + _x*4 + _y*pitch); \
-   GLuint p = *ptr;                                    \
-   rgba[0] = (p >> 16) & 0xff;                                 \
-   rgba[1] = (p >>  8) & 0xff;                                 \
-   rgba[2] = (p >>  0) & 0xff;                                 \
-   rgba[3] = (p >> 24) & 0xff;                                 \
-} while (0)
-
-#define TAG(x) radeon##x##_ARGB8888
-#include "spantmp.h"
-
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
 
+#define TAG(x)    radeon##x##_ARGB8888
+#define TAG2(x,y) radeon##x##_ARGB8888##y
+#define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
 
 /* ================================================================
  * Depth buffer
@@ -190,89 +288,105 @@ do {                                                             \
  * is calculated, and then wired with x and y to produce the final
  * memory address.
  * The chip will do address translation on its own if the surface registers
- * are set up correctly. It is not quite enough to get it working with hyperz too...
+ * are set up correctly. It is not quite enough to get it working with hyperz
+ * too...
  */
 
-static GLuint radeon_mba_z32( radeonContextPtr rmesa,
-                                      GLint x, GLint y )
-{
-   GLuint pitch = rmesa->radeonScreen->frontPitch;
-   if (rmesa->radeonScreen->depthHasSurface) {
-      return 4*(x + y*pitch);
-   }
-   else {
-      GLuint ba, address = 0;                  /* a[0..1] = 0           */
-
-      ba = (y / 16) * (pitch / 16) + (x / 16);
-
-      address |= (x & 0x7) << 2;                       /* a[2..4] = x[0..2]     */
-      address |= (y & 0x3) << 5;                       /* a[5..6] = y[0..1]     */
-      address |=
-         (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7]    = x[4] ^ y[2] */
-      address |= (ba & 0x3) << 8;                      /* a[8..9] = ba[0..1]    */
-
-      address |= (y & 0x8) << 7;                       /* a[10]   = y[3]        */
-      address |=
-         (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11]   = x[3] ^ y[4] */
-      address |= (ba & ~0x3) << 10;            /* a[12..] = ba[2..]     */
-
-      return address;
-   }
-}
-
-static __inline GLuint radeon_mba_z16( radeonContextPtr rmesa, GLint x, GLint y )
-{
-   GLuint pitch = rmesa->radeonScreen->frontPitch;
-   if (rmesa->radeonScreen->depthHasSurface) {
-      return 2*(x + y*pitch);
-   }
-   else {
-      GLuint ba, address = 0;                  /* a[0]    = 0           */
-
-      ba = (y / 16) * (pitch / 32) + (x / 32);
-
-      address |= (x & 0x7) << 1;                       /* a[1..3] = x[0..2]     */
-      address |= (y & 0x7) << 4;                       /* a[4..6] = y[0..2]     */
-      address |= (x & 0x8) << 4;                       /* a[7]    = x[3]        */
-      address |= (ba & 0x3) << 8;                      /* a[8..9] = ba[0..1]    */
-      address |= (y & 0x8) << 7;                       /* a[10]   = y[3]        */
-      address |= ((x & 0x10) ^ (y & 0x10)) << 7;       /* a[11]   = x[4] ^ y[4] */
-      address |= (ba & ~0x3) << 10;            /* a[12..] = ba[2..]     */
-
-      return address;
-   }
-}
-
-
 /* 16-bit depth buffer functions
  */
+#define VALUE_TYPE GLushort
+
 #define WRITE_DEPTH( _x, _y, d )                                       \
-   *(GLushort *)(buf + radeon_mba_z16( rmesa, _x + xo, _y + yo )) = d;
+   *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d
 
 #define READ_DEPTH( d, _x, _y )                                                \
-   d = *(GLushort *)(buf + radeon_mba_z16( rmesa, _x + xo, _y + yo ));
+   d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off)
 
-#define TAG(x) radeon##x##_16
+#define TAG(x) radeon##x##_z16
 #include "depthtmp.h"
 
-/* 24 bit depth, 8 bit stencil depthbuffer functions
+/* 24 bit depth
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
  */
+#define VALUE_TYPE GLuint
+
+#ifdef COMPILE_R300
+#define WRITE_DEPTH( _x, _y, d )                                       \
+do {                                                                   \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
+   GLuint tmp = *_ptr;                         \
+   tmp &= 0x000000ff;                                                  \
+   tmp |= ((d << 8) & 0xffffff00);                                     \
+   *_ptr = tmp;                                        \
+} while (0)
+#else
 #define WRITE_DEPTH( _x, _y, d )                                       \
 do {                                                                   \
-   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );          \
-   GLuint tmp = *(GLuint *)(buf + offset);                             \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );        \
+   GLuint tmp = *_ptr;                                                 \
    tmp &= 0xff000000;                                                  \
    tmp |= ((d) & 0x00ffffff);                                          \
-   *(GLuint *)(buf + offset) = tmp;                                    \
+   *_ptr = tmp;                                        \
 } while (0)
+#endif
 
+#ifdef COMPILE_R300
 #define READ_DEPTH( d, _x, _y )                                                \
-   d = *(GLuint *)(buf + radeon_mba_z32( rmesa, _x + xo,               \
-                                        _y + yo )) & 0x00ffffff;
-
-#define TAG(x) radeon##x##_24_8
+  do {                                                                 \
+    d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
+  }while(0)
+#else
+#define READ_DEPTH( d, _x, _y )        \
+  d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
+#endif
+/*
+    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
+   d = *(GLuint*)(radeon_ptr(rrb, _x,  _y )) & 0x00ffffff;
+*/
+#define TAG(x) radeon##x##_z24
 #include "depthtmp.h"
 
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ * EXT_depth_stencil
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+ */
+#define VALUE_TYPE GLuint
+
+#ifdef COMPILE_R300
+#define WRITE_DEPTH( _x, _y, d )                                       \
+do {                                                                   \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
+   *_ptr = d;                                                          \
+} while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d )                                       \
+do {                                                                   \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );        \
+   GLuint tmp = z24s8_to_s8z24(d);                                     \
+   *_ptr = tmp;                                        \
+} while (0)
+#endif
+
+#ifdef COMPILE_R300
+#define READ_DEPTH( d, _x, _y )                                                \
+  do { \
+    d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)));       \
+  }while(0)
+#else
+#define READ_DEPTH( d, _x, _y )        do {                                    \
+    d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr32(rrb, _x + x_off,        _y + y_off ))); \
+  } while (0)
+#endif
+/*
+    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
+   d = *(GLuint*)(radeon_ptr(rrb, _x,  _y )) & 0x00ffffff;
+*/
+#define TAG(x) radeon##x##_z24_s8
+#include "depthtmp.h"
 
 /* ================================================================
  * Stencil buffer
@@ -280,148 +394,172 @@ do {                                                                    \
 
 /* 24 bit depth, 8 bit stencil depthbuffer functions
  */
+#ifdef COMPILE_R300
 #define WRITE_STENCIL( _x, _y, d )                                     \
 do {                                                                   \
-   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );          \
-   GLuint tmp = *(GLuint *)(buf + offset);                             \
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off);          \
+   GLuint tmp = *_ptr;                         \
+   tmp &= 0xffffff00;                                                  \
+   tmp |= (d) & 0xff;                                                  \
+   *_ptr = tmp;                                        \
+} while (0)
+#else
+#define WRITE_STENCIL( _x, _y, d )                                     \
+do {                                                                   \
+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off);          \
+   GLuint tmp = *_ptr;                         \
    tmp &= 0x00ffffff;                                                  \
    tmp |= (((d) & 0xff) << 24);                                                \
-   *(GLuint *)(buf + offset) = tmp;                                    \
+   *_ptr = tmp;                                        \
 } while (0)
+#endif
 
+#ifdef COMPILE_R300
 #define READ_STENCIL( d, _x, _y )                                      \
 do {                                                                   \
-   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );          \
-   GLuint tmp = *(GLuint *)(buf + offset);                             \
-   tmp &= 0xff000000;                                                  \
-   d = tmp >> 24;                                                      \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
+   GLuint tmp = *_ptr;                         \
+   d = tmp & 0x000000ff;                                               \
+} while (0)
+#else
+#define READ_STENCIL( d, _x, _y )                                      \
+do {                                                                   \
+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );                \
+   GLuint tmp = *_ptr;                         \
+   d = (tmp & 0xff000000) >> 24;                                       \
 } while (0)
+#endif
 
-#define TAG(x) radeon##x##_24_8
+#define TAG(x) radeon##x##_z24_s8
 #include "stenciltmp.h"
 
 
-/*
- * This function is called to specify which buffer to read and write
- * for software rasterization (swrast) fallbacks.  This doesn't necessarily
- * correspond to glDrawBuffer() or glReadBuffer() calls.
- */
-static void radeonSetBuffer( GLcontext *ctx,
-                             GLframebuffer *colorBuffer,
-                             GLuint bufferBit )
+static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   switch ( bufferBit ) {
-   case DD_FRONT_LEFT_BIT:
-      if ( rmesa->sarea->pfCurrentPage == 1 ) {
-        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
-        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
-        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
-        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
-      } else {
-       rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
-       rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
-       rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
-       rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
-      }
-      break;
-   case DD_BACK_LEFT_BIT:
-      if ( rmesa->sarea->pfCurrentPage == 1 ) {
-       rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
-       rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
-       rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
-       rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
-      } else {
-        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
-        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
-        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
-        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
-      }
-      break;
-   default:
-      assert(0);
-      break;
-   }
+       struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+       int r;
+       
+       if (rrb == NULL || !rrb->bo)
+               return;
+
+       if (flag) {
+               r = radeon_bo_map(rrb->bo, 1);
+               if (r) {
+                       fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
+                               __FUNCTION__, r);
+               }
+
+               radeonSetSpanFunctions(rrb);
+       } else {
+               radeon_bo_unmap(rrb->bo);
+               rb->GetRow = NULL;
+               rb->PutRow = NULL;
+       }
 }
 
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above).  WaitForIdle() is the main
- * culprit.
- */
+static void
+radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
+{
+       GLuint i, j;
+
+       /* color draw buffers */
+       for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
+               map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
+
+       /* check for render to textures */
+       for (i = 0; i < BUFFER_COUNT; i++) {
+               struct gl_renderbuffer_attachment *att =
+                       ctx->DrawBuffer->Attachment + i;
+               struct gl_texture_object *tex = att->Texture;
+               if (tex) {
+                       /* render to texture */
+                       ASSERT(att->Renderbuffer);
+                       if (map)
+                               ctx->Driver.MapTexture(ctx, tex);
+                       else
+                               ctx->Driver.UnmapTexture(ctx, tex);
+               }
+       }
+       
+       map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
+
+       /* depth buffer (Note wrapper!) */
+       if (ctx->DrawBuffer->_DepthBuffer)
+               map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
+       
+       if (ctx->DrawBuffer->_StencilBuffer)
+               map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
 
-static void radeonSpanRenderStart( GLcontext *ctx )
+}
+static void radeonSpanRenderStart(GLcontext * ctx)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+       radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+       int i;
+
+       radeon_firevertices(rmesa);
+
+       /* The locking and wait for idle should really only be needed in classic mode.
+        * In a future memory manager based implementation, this should become
+        * unnecessary due to the fact that mapping our buffers, textures, etc.
+        * should implicitly wait for any previous rendering commands that must
+        * be waited on. */
+       if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+               LOCK_HARDWARE(rmesa);
+               radeonWaitForIdleLocked(rmesa);
+       }
+       for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled)
+                       ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
+       }
+
+       radeon_map_unmap_buffers(ctx, 1);
+
+
 
-   RADEON_FIREVERTICES( rmesa );
-   LOCK_HARDWARE( rmesa );
-   radeonWaitForIdleLocked( rmesa );
 }
 
-static void radeonSpanRenderFinish( GLcontext *ctx )
+static void radeonSpanRenderFinish(GLcontext * ctx)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-   _swrast_flush( ctx );
-   UNLOCK_HARDWARE( rmesa );
+       radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+       int i;
+       _swrast_flush(ctx);
+       if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+               UNLOCK_HARDWARE(rmesa);
+       }
+       for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+               if (ctx->Texture.Unit[i]._ReallyEnabled)
+                       ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
+       }
+
+       radeon_map_unmap_buffers(ctx, 0);
 }
 
-void radeonInitSpanFuncs( GLcontext *ctx )
+void radeonInitSpanFuncs(GLcontext * ctx)
+{
+       struct swrast_device_driver *swdd =
+           _swrast_GetDeviceDriverReference(ctx);
+       swdd->SpanRenderStart = radeonSpanRenderStart;
+       swdd->SpanRenderFinish = radeonSpanRenderFinish;
+}
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
-
-   swdd->SetBuffer = radeonSetBuffer;
-
-   switch ( rmesa->radeonScreen->cpp ) {
-   case 2:
-      swdd->WriteRGBASpan      = radeonWriteRGBASpan_RGB565;
-      swdd->WriteRGBSpan       = radeonWriteRGBSpan_RGB565;
-      swdd->WriteMonoRGBASpan  = radeonWriteMonoRGBASpan_RGB565;
-      swdd->WriteRGBAPixels    = radeonWriteRGBAPixels_RGB565;
-      swdd->WriteMonoRGBAPixels        = radeonWriteMonoRGBAPixels_RGB565;
-      swdd->ReadRGBASpan       = radeonReadRGBASpan_RGB565;
-      swdd->ReadRGBAPixels      = radeonReadRGBAPixels_RGB565;
-      break;
-
-   case 4:
-      swdd->WriteRGBASpan      = radeonWriteRGBASpan_ARGB8888;
-      swdd->WriteRGBSpan       = radeonWriteRGBSpan_ARGB8888;
-      swdd->WriteMonoRGBASpan   = radeonWriteMonoRGBASpan_ARGB8888;
-      swdd->WriteRGBAPixels     = radeonWriteRGBAPixels_ARGB8888;
-      swdd->WriteMonoRGBAPixels = radeonWriteMonoRGBAPixels_ARGB8888;
-      swdd->ReadRGBASpan       = radeonReadRGBASpan_ARGB8888;
-      swdd->ReadRGBAPixels      = radeonReadRGBAPixels_ARGB8888;
-      break;
-
-   default:
-      break;
-   }
-
-   switch ( rmesa->glCtx->Visual.depthBits ) {
-   case 16:
-      swdd->ReadDepthSpan      = radeonReadDepthSpan_16;
-      swdd->WriteDepthSpan     = radeonWriteDepthSpan_16;
-      swdd->ReadDepthPixels    = radeonReadDepthPixels_16;
-      swdd->WriteDepthPixels   = radeonWriteDepthPixels_16;
-      break;
-
-   case 24:
-      swdd->ReadDepthSpan      = radeonReadDepthSpan_24_8;
-      swdd->WriteDepthSpan     = radeonWriteDepthSpan_24_8;
-      swdd->ReadDepthPixels    = radeonReadDepthPixels_24_8;
-      swdd->WriteDepthPixels   = radeonWriteDepthPixels_24_8;
-
-      swdd->ReadStencilSpan    = radeonReadStencilSpan_24_8;
-      swdd->WriteStencilSpan   = radeonWriteStencilSpan_24_8;
-      swdd->ReadStencilPixels  = radeonReadStencilPixels_24_8;
-      swdd->WriteStencilPixels = radeonWriteStencilPixels_24_8;
-      break;
-
-   default:
-      break;
-   }
-
-   swdd->SpanRenderStart          = radeonSpanRenderStart;
-   swdd->SpanRenderFinish         = radeonSpanRenderFinish; 
+       if (rrb->base._ActualFormat == GL_RGB5) {
+               radeonInitPointers_RGB565(&rrb->base);
+       } else if (rrb->base._ActualFormat == GL_RGB8) {
+               radeonInitPointers_xRGB8888(&rrb->base);
+       } else if (rrb->base._ActualFormat == GL_RGBA8) {
+               radeonInitPointers_ARGB8888(&rrb->base);
+       } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
+               radeonInitDepthPointers_z16(&rrb->base);
+       } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
+               radeonInitDepthPointers_z24(&rrb->base);
+       } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
+               radeonInitDepthPointers_z24_s8(&rrb->base);
+       } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
+               radeonInitStencilPointers_z24_s8(&rrb->base);
+       }
 }