swrast: avoid calling _mesa_get_srgb_format_linear() inside a loop
[mesa.git] / src / mesa / swrast / s_readpix.c
index 368311e14dd89f90fe26f4f0ffa91b4e9e86136d..3cef7304a4ab90da1db5ca4279b4db4c193c9629 100644 (file)
 
 
 #include "main/glheader.h"
-#include "main/bufferobj.h"
 #include "main/colormac.h"
-#include "main/convolve.h"
-#include "main/context.h"
 #include "main/feedback.h"
 #include "main/formats.h"
+#include "main/format_unpack.h"
 #include "main/image.h"
-#include "main/macros.h"
 #include "main/imports.h"
+#include "main/macros.h"
+#include "main/pack.h"
+#include "main/pbo.h"
 #include "main/state.h"
 
 #include "s_context.h"
 #include "s_span.h"
 #include "s_stencil.h"
 
+/**
+ * Tries to implement glReadPixels() of GL_DEPTH_COMPONENT using memcpy of the
+ * mapping.
+ */
+static GLboolean
+fast_read_depth_pixels( struct gl_context *ctx,
+                       GLint x, GLint y,
+                       GLsizei width, GLsizei height,
+                       GLenum type, GLvoid *pixels,
+                       const struct gl_pixelstore_attrib *packing )
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+   GLubyte *map, *dst;
+   int stride, dstStride, j;
+
+   if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0)
+      return GL_FALSE;
+
+   if (packing->SwapBytes)
+      return GL_FALSE;
+
+   if (_mesa_get_format_datatype(rb->Format) != GL_UNSIGNED_INT)
+      return GL_FALSE;
+
+   if (!((type == GL_UNSIGNED_SHORT && rb->Format == MESA_FORMAT_Z16) ||
+        type == GL_UNSIGNED_INT))
+      return GL_FALSE;
+
+   ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT,
+                              &map, &stride);
+
+   dstStride = _mesa_image_row_stride(packing, width, GL_DEPTH_COMPONENT, type);
+   dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
+                                          GL_DEPTH_COMPONENT, type, 0, 0);
+
+   for (j = 0; j < height; j++) {
+      if (type == GL_UNSIGNED_INT) {
+        _mesa_unpack_uint_z_row(rb->Format, width, map, (GLuint *)dst);
+      } else {
+        ASSERT(type == GL_UNSIGNED_SHORT && rb->Format == MESA_FORMAT_Z16);
+        memcpy(dst, map, width * 2);
+      }
+
+      map += stride;
+      dst += dstStride;
+   }
+   ctx->Driver.UnmapRenderbuffer(ctx, rb);
+
+   return GL_TRUE;
+}
 
 /**
  * Read pixels for format=GL_DEPTH_COMPONENT.
  */
 static void
-read_depth_pixels( GLcontext *ctx,
+read_depth_pixels( struct gl_context *ctx,
                    GLint x, GLint y,
                    GLsizei width, GLsizei height,
                    GLenum type, GLvoid *pixels,
                    const struct gl_pixelstore_attrib *packing )
 {
    struct gl_framebuffer *fb = ctx->ReadBuffer;
-   struct gl_renderbuffer *rb = fb->_DepthBuffer;
-   const GLboolean biasOrScale
-      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+   struct gl_renderbuffer *rb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+   GLint j;
+   GLubyte *dst, *map;
+   int dstStride, stride;
 
    if (!rb)
       return;
@@ -67,73 +119,27 @@ read_depth_pixels( GLcontext *ctx,
    /* width should never be > MAX_WIDTH since we did clipping earlier */
    ASSERT(width <= MAX_WIDTH);
 
-   if (type == GL_UNSIGNED_SHORT && fb->Visual.depthBits == 16
-       && !biasOrScale && !packing->SwapBytes) {
-      /* Special case: directly read 16-bit unsigned depth values. */
-      GLint j;
-      ASSERT(rb->Format == MESA_FORMAT_Z16);
-      ASSERT(rb->DataType == GL_UNSIGNED_SHORT);
-      for (j = 0; j < height; j++, y++) {
-         void *dest =_mesa_image_address2d(packing, pixels, width, height,
-                                           GL_DEPTH_COMPONENT, type, j, 0);
-         rb->GetRow(ctx, rb, width, x, y, dest);
-      }
-   }
-   else if (type == GL_UNSIGNED_INT && fb->Visual.depthBits == 24
-            && !biasOrScale && !packing->SwapBytes) {
-      /* Special case: directly read 24-bit unsigned depth values. */
-      GLint j;
-      ASSERT(rb->Format == MESA_FORMAT_X8_Z24 ||
-             rb->Format == MESA_FORMAT_S8_Z24 ||
-             rb->Format == MESA_FORMAT_Z24_X8 ||
-             rb->Format == MESA_FORMAT_Z24_S8);
-      ASSERT(rb->DataType == GL_UNSIGNED_INT ||
-             rb->DataType == GL_UNSIGNED_INT_24_8);
-      for (j = 0; j < height; j++, y++) {
-         GLuint *dest = (GLuint *)
-            _mesa_image_address2d(packing, pixels, width, height,
-                                  GL_DEPTH_COMPONENT, type, j, 0);
-         GLint k;
-         rb->GetRow(ctx, rb, width, x, y, dest);
-         /* convert range from 24-bit to 32-bit */
-         if (rb->Format == MESA_FORMAT_X8_Z24 ||
-             rb->Format == MESA_FORMAT_S8_Z24) {
-            for (k = 0; k < width; k++) {
-               /* Note: put MSByte of 24-bit value into LSByte */
-               dest[k] = (dest[k] << 8) | ((dest[k] >> 16) & 0xff);
-            }
-         }
-         else {
-            for (k = 0; k < width; k++) {
-               /* Note: fill in LSByte by replication */
-               dest[k] = dest[k] | ((dest[k] >> 8) & 0xff);
-            }
-         }
-      }
-   }
-   else if (type == GL_UNSIGNED_INT && fb->Visual.depthBits == 32
-            && !biasOrScale && !packing->SwapBytes) {
-      /* Special case: directly read 32-bit unsigned depth values. */
-      GLint j;
-      ASSERT(rb->Format == MESA_FORMAT_Z32);
-      ASSERT(rb->DataType == GL_UNSIGNED_INT);
-      for (j = 0; j < height; j++, y++) {
-         void *dest = _mesa_image_address2d(packing, pixels, width, height,
-                                            GL_DEPTH_COMPONENT, type, j, 0);
-         rb->GetRow(ctx, rb, width, x, y, dest);
-      }
-   }
-   else {
-      /* General case (slower) */
-      GLint j;
-      for (j = 0; j < height; j++, y++) {
-         GLfloat depthValues[MAX_WIDTH];
-         GLvoid *dest = _mesa_image_address2d(packing, pixels, width, height,
-                                              GL_DEPTH_COMPONENT, type, j, 0);
-         _swrast_read_depth_span_float(ctx, rb, width, x, y, depthValues);
-         _mesa_pack_depth_span(ctx, width, dest, type, depthValues, packing);
-      }
+   if (fast_read_depth_pixels(ctx, x, y, width, height, type, pixels, packing))
+      return;
+
+   dstStride = _mesa_image_row_stride(packing, width, GL_DEPTH_COMPONENT, type);
+   dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
+                                          GL_DEPTH_COMPONENT, type, 0, 0);
+
+   ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT,
+                              &map, &stride);
+
+   /* General case (slower) */
+   for (j = 0; j < height; j++, y++) {
+      GLfloat depthValues[MAX_WIDTH];
+      _mesa_unpack_float_z_row(rb->Format, width, map, depthValues);
+      _mesa_pack_depth_span(ctx, width, dst, type, depthValues, packing);
+
+      dst += dstStride;
+      map += stride;
    }
+
+   ctx->Driver.UnmapRenderbuffer(ctx, rb);
 }
 
 
@@ -141,15 +147,17 @@ read_depth_pixels( GLcontext *ctx,
  * Read pixels for format=GL_STENCIL_INDEX.
  */
 static void
-read_stencil_pixels( GLcontext *ctx,
+read_stencil_pixels( struct gl_context *ctx,
                      GLint x, GLint y,
                      GLsizei width, GLsizei height,
                      GLenum type, GLvoid *pixels,
                      const struct gl_pixelstore_attrib *packing )
 {
    struct gl_framebuffer *fb = ctx->ReadBuffer;
-   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   struct gl_renderbuffer *rb = fb->Attachment[BUFFER_STENCIL].Renderbuffer;
    GLint j;
+   GLubyte *map;
+   GLint stride;
 
    if (!rb)
       return;
@@ -157,146 +165,120 @@ read_stencil_pixels( GLcontext *ctx,
    /* width should never be > MAX_WIDTH since we did clipping earlier */
    ASSERT(width <= MAX_WIDTH);
 
+   ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT,
+                              &map, &stride);
+
    /* process image row by row */
-   for (j=0;j<height;j++,y++) {
+   for (j = 0; j < height; j++) {
       GLvoid *dest;
       GLstencil stencil[MAX_WIDTH];
 
-      _swrast_read_stencil_span(ctx, rb, width, x, y, stencil);
-
+      _mesa_unpack_ubyte_stencil_row(rb->Format, width, map, stencil);
       dest = _mesa_image_address2d(packing, pixels, width, height,
                                    GL_STENCIL_INDEX, type, j, 0);
 
       _mesa_pack_stencil_span(ctx, width, type, dest, stencil, packing);
-   }
-}
 
+      map += stride;
+   }
 
+   ctx->Driver.UnmapRenderbuffer(ctx, rb);
+}
 
-/**
- * Optimized glReadPixels for particular pixel formats when pixel
- * scaling, biasing, mapping, etc. are disabled.
- * \return GL_TRUE if success, GL_FALSE if unable to do the readpixels
- */
 static GLboolean
-fast_read_rgba_pixels( GLcontext *ctx,
-                       GLint x, GLint y,
-                       GLsizei width, GLsizei height,
-                       GLenum format, GLenum type,
-                       GLvoid *pixels,
-                       const struct gl_pixelstore_attrib *packing,
-                       GLbitfield transferOps)
+fast_read_rgba_pixels_memcpy( struct gl_context *ctx,
+                             GLint x, GLint y,
+                             GLsizei width, GLsizei height,
+                             GLenum format, GLenum type,
+                             GLvoid *pixels,
+                             const struct gl_pixelstore_attrib *packing,
+                             GLbitfield transferOps )
 {
    struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+   GLubyte *dst, *map;
+   int dstStride, stride, j, texelBytes;
 
-   if (!rb)
+   if (!_mesa_format_matches_format_and_type(rb->Format, format, type))
       return GL_FALSE;
 
-   ASSERT(rb->_BaseFormat == GL_RGBA || rb->_BaseFormat == GL_RGB);
-
-   /* clipping should have already been done */
-   ASSERT(x + width <= (GLint) rb->Width);
-   ASSERT(y + height <= (GLint) rb->Height);
-
    /* check for things we can't handle here */
-   if (transferOps ||
-       packing->SwapBytes ||
+   if (packing->SwapBytes ||
        packing->LsbFirst) {
       return GL_FALSE;
    }
 
-   if (format == GL_RGBA && rb->DataType == type) {
-      const GLint dstStride = _mesa_image_row_stride(packing, width,
-                                                     format, type);
-      GLubyte *dest
-         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
-                                             format, type, 0, 0);
-      GLint row;
-      ASSERT(rb->GetRow);
-      for (row = 0; row < height; row++) {
-         rb->GetRow(ctx, rb, width, x, y + row, dest);
-         dest += dstStride;
-      }
-      return GL_TRUE;
-   }
+   dstStride = _mesa_image_row_stride(packing, width, format, type);
+   dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
+                                          format, type, 0, 0);
 
-   if (format == GL_RGB &&
-       rb->DataType == GL_UNSIGNED_BYTE &&
-       type == GL_UNSIGNED_BYTE) {
-      const GLint dstStride = _mesa_image_row_stride(packing, width,
-                                                     format, type);
-      GLubyte *dest
-         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
-                                             format, type, 0, 0);
-      GLint row;
-      ASSERT(rb->GetRow);
-      for (row = 0; row < height; row++) {
-         GLubyte tempRow[MAX_WIDTH][4];
-         GLint col;
-         rb->GetRow(ctx, rb, width, x, y + row, tempRow);
-         /* convert RGBA to RGB */
-         for (col = 0; col < width; col++) {
-            dest[col * 3 + 0] = tempRow[col][0];
-            dest[col * 3 + 1] = tempRow[col][1];
-            dest[col * 3 + 2] = tempRow[col][2];
-         }
-         dest += dstStride;
-      }
-      return GL_TRUE;
+   ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT,
+                              &map, &stride);
+
+   texelBytes = _mesa_get_format_bytes(rb->Format);
+   for (j = 0; j < height; j++) {
+      memcpy(dst, map, width * texelBytes);
+      dst += dstStride;
+      map += stride;
    }
 
-   /* not handled */
-   return GL_FALSE;
-}
+   ctx->Driver.UnmapRenderbuffer(ctx, rb);
 
+   return GL_TRUE;
+}
 
-/**
- * When we're using a low-precision color buffer (like 16-bit 5/6/5)
- * we have to adjust our color values a bit to pass conformance.
- * The problem is when a 5 or 6-bit color value is converted to an 8-bit
- * value and then a floating point value, the floating point values don't
- * increment uniformly as the 5 or 6-bit value is incremented.
- *
- * This function adjusts floating point values to compensate.
- */
-static void
-adjust_colors(const struct gl_framebuffer *fb, GLuint n, GLfloat rgba[][4])
+static GLboolean
+slow_read_rgba_pixels( struct gl_context *ctx,
+                      GLint x, GLint y,
+                      GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      GLvoid *pixels,
+                      const struct gl_pixelstore_attrib *packing,
+                      GLbitfield transferOps )
 {
-   const GLuint rShift = 8 - fb->Visual.redBits;
-   const GLuint gShift = 8 - fb->Visual.greenBits;
-   const GLuint bShift = 8 - fb->Visual.blueBits;
-   const GLfloat rScale = 1.0F / (GLfloat) ((1 << fb->Visual.redBits  ) - 1);
-   const GLfloat gScale = 1.0F / (GLfloat) ((1 << fb->Visual.greenBits) - 1);
-   const GLfloat bScale = 1.0F / (GLfloat) ((1 << fb->Visual.blueBits ) - 1);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLint r, g, b;
-      /* convert float back to ubyte */
-      CLAMPED_FLOAT_TO_UBYTE(r, rgba[i][RCOMP]);
-      CLAMPED_FLOAT_TO_UBYTE(g, rgba[i][GCOMP]);
-      CLAMPED_FLOAT_TO_UBYTE(b, rgba[i][BCOMP]);
-      /* using only the N most significant bits of the ubyte value, convert to
-       * float in [0,1].
-       */
-      rgba[i][RCOMP] = (GLfloat) (r >> rShift) * rScale;
-      rgba[i][GCOMP] = (GLfloat) (g >> gShift) * gScale;
-      rgba[i][BCOMP] = (GLfloat) (b >> bShift) * bScale;
+   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+   const gl_format rbFormat = _mesa_get_srgb_format_linear(rb->Format);
+   union {
+      float f[MAX_WIDTH][4];
+      unsigned int i[MAX_WIDTH][4];
+   } rgba;
+   GLubyte *dst, *map;
+   int dstStride, stride, j;
+
+   dstStride = _mesa_image_row_stride(packing, width, format, type);
+   dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
+                                          format, type, 0, 0);
+
+   ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT,
+                              &map, &stride);
+
+   for (j = 0; j < height; j++) {
+      if (_mesa_is_integer_format(format)) {
+        _mesa_unpack_int_rgba_row(rbFormat, width, map, rgba.i);
+        _mesa_pack_rgba_span_int(ctx, width, rgba.i, format, type, dst);
+      } else {
+        _mesa_unpack_rgba_row(rbFormat, width, map, rgba.f);
+        _mesa_pack_rgba_span_float(ctx, width, rgba.f, format, type, dst,
+                                   packing, transferOps);
+      }
+      dst += dstStride;
+      map += stride;
    }
-}
 
+   ctx->Driver.UnmapRenderbuffer(ctx, rb);
 
+   return GL_TRUE;
+}
 
 /*
  * Read R, G, B, A, RGB, L, or LA pixels.
  */
 static void
-read_rgba_pixels( GLcontext *ctx,
+read_rgba_pixels( struct gl_context *ctx,
                   GLint x, GLint y,
                   GLsizei width, GLsizei height,
                   GLenum format, GLenum type, GLvoid *pixels,
                   const struct gl_pixelstore_attrib *packing )
 {
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
    GLbitfield transferOps = ctx->_ImageTransferState;
    struct gl_framebuffer *fb = ctx->ReadBuffer;
    struct gl_renderbuffer *rb = fb->_ColorReadBuffer;
@@ -304,102 +286,147 @@ read_rgba_pixels( GLcontext *ctx,
    if (!rb)
       return;
 
-   if (type == GL_FLOAT && ((ctx->Color.ClampReadColor == GL_TRUE) ||
-                            (ctx->Color.ClampReadColor == GL_FIXED_ONLY_ARB &&
-                             rb->DataType != GL_FLOAT)))
+   if ((ctx->Color._ClampReadColor == GL_TRUE || type != GL_FLOAT) &&
+       !_mesa_is_integer_format(format)) {
       transferOps |= IMAGE_CLAMP_BIT;
+   }
 
-   /* Try optimized path first */
-   if (fast_read_rgba_pixels(ctx, x, y, width, height,
-                             format, type, pixels, packing, transferOps)) {
-      return; /* done! */
+   if (!transferOps) {
+      /* Try the optimized paths first. */
+      if (fast_read_rgba_pixels_memcpy(ctx, x, y, width, height,
+                                      format, type, pixels, packing,
+                                      transferOps)) {
+        return;
+      }
    }
 
-   /* width should never be > MAX_WIDTH since we did clipping earlier */
-   ASSERT(width <= MAX_WIDTH);
+   slow_read_rgba_pixels(ctx, x, y, width, height,
+                        format, type, pixels, packing, transferOps);
+}
 
-   if (ctx->Pixel.Convolution2DEnabled || ctx->Pixel.Separable2DEnabled) {
-      GLfloat *dest, *src, *tmpImage, *convImage;
-      GLint row;
+/**
+ * For a packed depth/stencil buffer being read as depth/stencil, just memcpy the
+ * data (possibly swapping 8/24 vs 24/8 as we go).
+ */
+static GLboolean
+fast_read_depth_stencil_pixels(struct gl_context *ctx,
+                              GLint x, GLint y,
+                              GLsizei width, GLsizei height,
+                              GLubyte *dst, int dstStride)
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+   struct gl_renderbuffer *stencilRb = fb->Attachment[BUFFER_STENCIL].Renderbuffer;
+   GLubyte *map;
+   int stride, i;
 
-      tmpImage = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
-      if (!tmpImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels");
-         return;
-      }
-      convImage = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
-      if (!convImage) {
-         free(tmpImage);
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels");
-         return;
-      }
+   if (rb != stencilRb)
+      return GL_FALSE;
 
-      /* read full RGBA, FLOAT image */
-      dest = tmpImage;
-      for (row = 0; row < height; row++, y++) {
-         _swrast_read_rgba_span(ctx, rb, width, x, y, GL_FLOAT, dest);
-         _mesa_apply_rgba_transfer_ops(ctx, 
-                                      transferOps & IMAGE_PRE_CONVOLUTION_BITS,
-                                      width, (GLfloat (*)[4]) dest);
-         dest += width * 4;
-      }
+   if (rb->Format != MESA_FORMAT_Z24_S8 &&
+       rb->Format != MESA_FORMAT_S8_Z24)
+      return GL_FALSE;
 
-      /* do convolution */
-      if (ctx->Pixel.Convolution2DEnabled) {
-         _mesa_convolve_2d_image(ctx, &width, &height, tmpImage, convImage);
-      }
-      else {
-         ASSERT(ctx->Pixel.Separable2DEnabled);
-         _mesa_convolve_sep_image(ctx, &width, &height, tmpImage, convImage);
-      }
-      free(tmpImage);
-
-      /* finish transfer ops and pack the resulting image */
-      src = convImage;
-      for (row = 0; row < height; row++) {
-         GLvoid *dest;
-         dest = _mesa_image_address2d(packing, pixels, width, height,
-                                      format, type, row, 0);
-         _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) src,
-                                    format, type, dest, packing,
-                                    transferOps & IMAGE_POST_CONVOLUTION_BITS);
-         src += width * 4;
-      }
-      free(convImage);
+   ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT,
+                              &map, &stride);
+
+   for (i = 0; i < height; i++) {
+      _mesa_unpack_uint_24_8_depth_stencil_row(rb->Format, width,
+                                              map, (GLuint *)dst);
+      map += stride;
+      dst += dstStride;
    }
-   else {
-      /* no convolution */
-      const GLint dstStride
-         = _mesa_image_row_stride(packing, width, format, type);
-      GLfloat (*rgba)[4] = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0];
-      GLint row;
-      GLubyte *dst
-         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
-                                             format, type, 0, 0);
-
-      /* make sure we don't apply 1D convolution */
-      transferOps &= ~(IMAGE_CONVOLUTION_BIT |
-                       IMAGE_POST_CONVOLUTION_SCALE_BIAS);
-
-      for (row = 0; row < height; row++, y++) {
-
-         /* Get float rgba pixels */
-         _swrast_read_rgba_span(ctx, rb, width, x, y, GL_FLOAT, rgba);
-
-         /* apply fudge factor for shallow color buffers */
-         if (fb->Visual.redBits < 8 ||
-             fb->Visual.greenBits < 8 ||
-             fb->Visual.blueBits < 8) {
-            adjust_colors(fb, width, rgba);
-         }
 
-         /* pack the row of RGBA pixels into user's buffer */
-         _mesa_pack_rgba_span_float(ctx, width, rgba, format, type, dst,
-                                    packing, transferOps);
+   ctx->Driver.UnmapRenderbuffer(ctx, rb);
+
+   return GL_TRUE;
+}
+
+
+/**
+ * For non-float-depth and stencil buffers being read as 24/8 depth/stencil,
+ * copy the integer data directly instead of converting depth to float and
+ * re-packing.
+ */
+static GLboolean
+fast_read_depth_stencil_pixels_separate(struct gl_context *ctx,
+                                       GLint x, GLint y,
+                                       GLsizei width, GLsizei height,
+                                       uint32_t *dst, int dstStride)
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *depthRb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+   struct gl_renderbuffer *stencilRb = fb->Attachment[BUFFER_STENCIL].Renderbuffer;
+   GLubyte *depthMap, *stencilMap;
+   int depthStride, stencilStride, i, j;
+
+   if (_mesa_get_format_datatype(depthRb->Format) != GL_UNSIGNED_INT)
+      return GL_FALSE;
+
+   ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height,
+                              GL_MAP_READ_BIT, &depthMap, &depthStride);
+   ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
+                              GL_MAP_READ_BIT, &stencilMap, &stencilStride);
 
-         dst += dstStride;
+   for (j = 0; j < height; j++) {
+      GLstencil stencilVals[MAX_WIDTH];
+
+      _mesa_unpack_uint_z_row(depthRb->Format, width, depthMap, dst);
+      _mesa_unpack_ubyte_stencil_row(stencilRb->Format, width,
+                                    stencilMap, stencilVals);
+
+      for (i = 0; i < width; i++) {
+        dst[i] = (dst[i] & 0xffffff00) | stencilVals[i];
       }
+
+      depthMap += depthStride;
+      stencilMap += stencilStride;
+      dst += dstStride / 4;
    }
+
+   ctx->Driver.UnmapRenderbuffer(ctx, depthRb);
+   ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
+
+   return GL_TRUE;
+}
+
+static void
+slow_read_depth_stencil_pixels_separate(struct gl_context *ctx,
+                                       GLint x, GLint y,
+                                       GLsizei width, GLsizei height,
+                                       GLenum type,
+                                       const struct gl_pixelstore_attrib *packing,
+                                       GLubyte *dst, int dstStride)
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *depthRb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+   struct gl_renderbuffer *stencilRb = fb->Attachment[BUFFER_STENCIL].Renderbuffer;
+   GLubyte *depthMap, *stencilMap;
+   int depthStride, stencilStride, j;
+
+   ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height,
+                              GL_MAP_READ_BIT, &depthMap, &depthStride);
+   ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
+                              GL_MAP_READ_BIT, &stencilMap, &stencilStride);
+
+   for (j = 0; j < height; j++) {
+      GLstencil stencilVals[MAX_WIDTH];
+      GLfloat depthVals[MAX_WIDTH];
+
+      _mesa_unpack_float_z_row(depthRb->Format, width, depthMap, depthVals);
+      _mesa_unpack_ubyte_stencil_row(stencilRb->Format, width,
+                                    stencilMap, stencilVals);
+
+      _mesa_pack_depth_stencil_span(ctx, width, type, (GLuint *)dst,
+                                   depthVals, stencilVals, packing);
+
+      depthMap += depthStride;
+      stencilMap += stencilStride;
+      dst += dstStride;
+   }
+
+   ctx->Driver.UnmapRenderbuffer(ctx, depthRb);
+   ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
 }
 
 
@@ -409,7 +436,7 @@ read_rgba_pixels( GLcontext *ctx,
  * depth and stencil buffers really exist.
  */
 static void
-read_depth_stencil_pixels(GLcontext *ctx,
+read_depth_stencil_pixels(struct gl_context *ctx,
                           GLint x, GLint y,
                           GLsizei width, GLsizei height,
                           GLenum type, GLvoid *pixels,
@@ -419,78 +446,31 @@ read_depth_stencil_pixels(GLcontext *ctx,
       = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
    const GLboolean stencilTransfer = ctx->Pixel.IndexShift
       || ctx->Pixel.IndexOffset || ctx->Pixel.MapStencilFlag;
-   struct gl_renderbuffer *depthRb, *stencilRb;
-
-   depthRb = ctx->ReadBuffer->_DepthBuffer;
-   stencilRb = ctx->ReadBuffer->_StencilBuffer;
-
-   if (!depthRb || !stencilRb)
-      return;
-
-   depthRb = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
-   stencilRb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
-
-   if (depthRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
-       stencilRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
-       depthRb == stencilRb &&
-       !scaleOrBias &&
-       !stencilTransfer) {
-      /* This is the ideal case.
-       * Reading GL_DEPTH_STENCIL pixels from combined depth/stencil buffer.
-       * Plus, no pixel transfer ops to worry about!
-       */
-      GLint i;
-      GLint dstStride = _mesa_image_row_stride(packing, width,
-                                               GL_DEPTH_STENCIL_EXT, type);
-      GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing, pixels,
-                                                       width, height,
-                                                       GL_DEPTH_STENCIL_EXT,
-                                                       type, 0, 0);
-      for (i = 0; i < height; i++) {
-         depthRb->GetRow(ctx, depthRb, width, x, y + i, dst);
-         dst += dstStride;
-      }
-   }
-   else {
-      /* Reading GL_DEPTH_STENCIL pixels from separate depth/stencil buffers,
-       * or we need pixel transfer.
-       */
-      GLint i;
-      depthRb = ctx->ReadBuffer->_DepthBuffer;
-      stencilRb = ctx->ReadBuffer->_StencilBuffer;
-
-      for (i = 0; i < height; i++) {
-         GLstencil stencilVals[MAX_WIDTH];
-
-         GLuint *depthStencilDst = (GLuint *)
-            _mesa_image_address2d(packing, pixels, width, height,
-                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
-
-         _swrast_read_stencil_span(ctx, stencilRb, width,
-                                   x, y + i, stencilVals);
-
-         if (!scaleOrBias && !stencilTransfer
-             && ctx->ReadBuffer->Visual.depthBits == 24) {
-            /* ideal case */
-            GLuint zVals[MAX_WIDTH]; /* 24-bit values! */
-            GLint j;
-            ASSERT(depthRb->DataType == GL_UNSIGNED_INT);
-            /* note, we've already been clipped */
-            depthRb->GetRow(ctx, depthRb, width, x, y + i, zVals);
-            for (j = 0; j < width; j++) {
-               depthStencilDst[j] = (zVals[j] << 8) | (stencilVals[j] & 0xff);
-            }
-         }
-         else {
-            /* general case */
-            GLfloat depthVals[MAX_WIDTH];
-            _swrast_read_depth_span_float(ctx, depthRb, width, x, y + i,
-                                          depthVals);
-            _mesa_pack_depth_stencil_span(ctx, width, depthStencilDst,
-                                          depthVals, stencilVals, packing);
-         }
-      }
+   GLubyte *dst;
+   int dstStride;
+
+   dst = (GLubyte *) _mesa_image_address2d(packing, pixels,
+                                          width, height,
+                                          GL_DEPTH_STENCIL_EXT,
+                                          type, 0, 0);
+   dstStride = _mesa_image_row_stride(packing, width,
+                                     GL_DEPTH_STENCIL_EXT, type);
+
+   /* Fast 24/8 reads. */
+   if (type == GL_UNSIGNED_INT_24_8 &&
+       !scaleOrBias && !stencilTransfer && !packing->SwapBytes) {
+      if (fast_read_depth_stencil_pixels(ctx, x, y, width, height,
+                                        dst, dstStride))
+        return;
+
+      if (fast_read_depth_stencil_pixels_separate(ctx, x, y, width, height,
+                                                 (uint32_t *)dst, dstStride))
+        return;
    }
+
+   slow_read_depth_stencil_pixels_separate(ctx, x, y, width, height,
+                                          type, packing,
+                                          dst, dstStride);
 }
 
 
@@ -500,71 +480,43 @@ read_depth_stencil_pixels(GLcontext *ctx,
  * By time we get here, all error checking will have been done.
  */
 void
-_swrast_ReadPixels( GLcontext *ctx,
+_swrast_ReadPixels( struct gl_context *ctx,
                    GLint x, GLint y, GLsizei width, GLsizei height,
                    GLenum format, GLenum type,
                    const struct gl_pixelstore_attrib *packing,
                    GLvoid *pixels )
 {
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
    struct gl_pixelstore_attrib clippedPacking = *packing;
 
    if (ctx->NewState)
       _mesa_update_state(ctx);
 
-   /* Need to do swrast_render_start() before clipping or anything else
-    * since this is where a driver may grab the hw lock and get an updated
-    * window size.
-    */
-   swrast_render_start(ctx);
-
-   if (swrast->NewState)
-      _swrast_validate_derived( ctx );
-
    /* Do all needed clipping here, so that we can forget about it later */
-   if (!_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) {
-      /* The ReadPixels region is totally outside the window bounds */
-      swrast_render_finish(ctx);
-      return;
-   }
-
-   pixels = _mesa_map_pbo_dest(ctx, &clippedPacking, pixels);
-   if (!pixels)
-      return;
-  
-   switch (format) {
-      case GL_STENCIL_INDEX:
-        read_stencil_pixels(ctx, x, y, width, height, type, pixels,
+   if (_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) {
+
+      pixels = _mesa_map_pbo_dest(ctx, &clippedPacking, pixels);
+
+      if (pixels) {
+         switch (format) {
+         case GL_STENCIL_INDEX:
+            read_stencil_pixels(ctx, x, y, width, height, type, pixels,
+                                &clippedPacking);
+            break;
+         case GL_DEPTH_COMPONENT:
+            read_depth_pixels(ctx, x, y, width, height, type, pixels,
+                              &clippedPacking);
+            break;
+         case GL_DEPTH_STENCIL_EXT:
+            read_depth_stencil_pixels(ctx, x, y, width, height, type, pixels,
+                                      &clippedPacking);
+            break;
+         default:
+            /* all other formats should be color formats */
+            read_rgba_pixels(ctx, x, y, width, height, format, type, pixels,
                              &clippedPacking);
-         break;
-      case GL_DEPTH_COMPONENT:
-        read_depth_pixels(ctx, x, y, width, height, type, pixels,
-                           &clippedPacking);
-        break;
-      case GL_RED:
-      case GL_GREEN:
-      case GL_BLUE:
-      case GL_ALPHA:
-      case GL_RGB:
-      case GL_LUMINANCE:
-      case GL_LUMINANCE_ALPHA:
-      case GL_RGBA:
-      case GL_BGR:
-      case GL_BGRA:
-      case GL_ABGR_EXT:
-         read_rgba_pixels(ctx, x, y, width, height,
-                          format, type, pixels, &clippedPacking);
-        break;
-      case GL_DEPTH_STENCIL_EXT:
-         read_depth_stencil_pixels(ctx, x, y, width, height,
-                                   type, pixels, &clippedPacking);
-         break;
-      default:
-        _mesa_problem(ctx, "unexpected format in _swrast_ReadPixels");
-         /* don't return yet, clean-up */
-   }
-
-   swrast_render_finish(ctx);
+         }
 
-   _mesa_unmap_pbo_dest(ctx, &clippedPacking);
+         _mesa_unmap_pbo_dest(ctx, &clippedPacking);
+      }
+   }
 }