st/mesa: always expose ARB_ES2_compatibility
[mesa.git] / src / mesa / state_tracker / st_cb_drawpixels.c
index ea6d021c010b6e055c8bb92f77481d9623e5c1ef..9a3f22465c6dd5418043185cb8bdd867bc356f07 100644 (file)
 #include "main/imports.h"
 #include "main/image.h"
 #include "main/bufferobj.h"
+#include "main/format_pack.h"
 #include "main/macros.h"
+#include "main/mfeatures.h"
 #include "main/mtypes.h"
 #include "main/pack.h"
+#include "main/pbo.h"
+#include "main/readpix.h"
 #include "main/texformat.h"
+#include "main/teximage.h"
 #include "main/texstore.h"
 #include "program/program.h"
 #include "program/prog_print.h"
@@ -61,6 +66,7 @@
 #include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_tile.h"
+#include "util/u_upload_mgr.h"
 #include "cso_cache/cso_context.h"
 
 
@@ -91,6 +97,46 @@ is_passthrough_program(const struct gl_fragment_program *prog)
 }
 
 
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ */
+static struct gl_fragment_program *
+get_glsl_pixel_transfer_program(struct st_context *st,
+                                struct st_fragment_program *orig)
+{
+   int pixelMaps = 0, scaleAndBias = 0;
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+
+   if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
+       ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
+       ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
+       ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
+      scaleAndBias = 1;
+   }
+
+   pixelMaps = ctx->Pixel.MapColorFlag;
+
+   if (pixelMaps) {
+      /* create the colormap/texture now if not already done */
+      if (!st->pixel_xfer.pixelmap_texture) {
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_sampler_view =
+            st_create_texture_sampler_view(st->pipe,
+                                           st->pixel_xfer.pixelmap_texture);
+      }
+   }
+
+   get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi,
+                              scaleAndBias, pixelMaps);
+
+   return &fp->Base;
+}
+
 
 /**
  * Make fragment shader for glDraw/CopyPixels.  This shader is made
@@ -104,11 +150,15 @@ st_make_drawpix_fragment_program(struct st_context *st,
                                  struct gl_fragment_program **fpOut)
 {
    struct gl_program *newProg;
+   struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn;
 
    if (is_passthrough_program(fpIn)) {
       newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
                                              &st->pixel_xfer.program->Base);
    }
+   else if (stfp->glsl_to_tgsi != NULL) {
+      newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp);
+   }
    else {
 #if 0
       /* debug */
@@ -211,9 +261,9 @@ st_make_drawpix_z_stencil_program(struct st_context *st,
    p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0;
    p->OutputsWritten = 0;
    if (write_depth)
-      p->OutputsWritten |= (1 << FRAG_RESULT_DEPTH);
+      p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
    if (write_stencil)
-      p->OutputsWritten |= (1 << FRAG_RESULT_STENCIL);
+      p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_STENCIL);
 
    p->SamplersUsed =  0x1;  /* sampler 0 (bit 0) is used */
    if (write_stencil)
@@ -269,40 +319,45 @@ make_passthrough_vertex_shader(struct st_context *st,
 }
 
 
-/**
- * Return a texture base format for drawing/copying an image
- * of the given format.
- */
-static GLenum
-base_format(GLenum format)
-{
-   switch (format) {
-   case GL_DEPTH_COMPONENT:
-      return GL_DEPTH_COMPONENT;
-   case GL_DEPTH_STENCIL:
-      return GL_DEPTH_STENCIL;
-   case GL_STENCIL_INDEX:
-      return GL_STENCIL_INDEX;
-   default:
-      return GL_RGBA;
-   }
-}
-
-
 /**
  * Return a texture internalFormat for drawing/copying an image
  * of the given format and type.
  */
 static GLenum
-internal_format(GLenum format, GLenum type)
+internal_format(struct gl_context *ctx, GLenum format, GLenum type)
 {
    switch (format) {
    case GL_DEPTH_COMPONENT:
-      return GL_DEPTH_COMPONENT;
+      switch (type) {
+      case GL_UNSIGNED_SHORT:
+         return GL_DEPTH_COMPONENT16;
+
+      case GL_UNSIGNED_INT:
+         return GL_DEPTH_COMPONENT32;
+
+      case GL_FLOAT:
+         if (ctx->Extensions.ARB_depth_buffer_float)
+            return GL_DEPTH_COMPONENT32F;
+         else
+            return GL_DEPTH_COMPONENT;
+
+      default:
+         return GL_DEPTH_COMPONENT;
+      }
+
    case GL_DEPTH_STENCIL:
-      return GL_DEPTH_STENCIL;
+      switch (type) {
+      case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+         return GL_DEPTH32F_STENCIL8;
+
+      case GL_UNSIGNED_INT_24_8:
+      default:
+         return GL_DEPTH24_STENCIL8;
+      }
+
    case GL_STENCIL_INDEX:
       return GL_STENCIL_INDEX;
+
    default:
       if (_mesa_is_integer_format(format)) {
          switch (type) {
@@ -324,7 +379,61 @@ internal_format(GLenum format, GLenum type)
          }
       }
       else {
-         return GL_RGBA;
+         switch (type) {
+         case GL_UNSIGNED_BYTE:
+         case GL_UNSIGNED_INT_8_8_8_8:
+         case GL_UNSIGNED_INT_8_8_8_8_REV:
+         default:
+            return GL_RGBA8;
+
+         case GL_UNSIGNED_BYTE_3_3_2:
+         case GL_UNSIGNED_BYTE_2_3_3_REV:
+         case GL_UNSIGNED_SHORT_4_4_4_4:
+         case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+            return GL_RGBA4;
+
+         case GL_UNSIGNED_SHORT_5_6_5:
+         case GL_UNSIGNED_SHORT_5_6_5_REV:
+         case GL_UNSIGNED_SHORT_5_5_5_1:
+         case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+            return GL_RGB5_A1;
+
+         case GL_UNSIGNED_INT_10_10_10_2:
+         case GL_UNSIGNED_INT_2_10_10_10_REV:
+            return GL_RGB10_A2;
+
+         case GL_UNSIGNED_SHORT:
+         case GL_UNSIGNED_INT:
+            return GL_RGBA16;
+
+         case GL_BYTE:
+            return
+               ctx->Extensions.EXT_texture_snorm ? GL_RGBA8_SNORM : GL_RGBA8;
+
+         case GL_SHORT:
+         case GL_INT:
+            return
+               ctx->Extensions.EXT_texture_snorm ? GL_RGBA16_SNORM : GL_RGBA16;
+
+         case GL_HALF_FLOAT_ARB:
+            return
+               ctx->Extensions.ARB_texture_float ? GL_RGBA16F :
+               ctx->Extensions.EXT_texture_snorm ? GL_RGBA16_SNORM : GL_RGBA16;
+
+         case GL_FLOAT:
+         case GL_DOUBLE:
+            return
+               ctx->Extensions.ARB_texture_float ? GL_RGBA32F :
+               ctx->Extensions.EXT_texture_snorm ? GL_RGBA16_SNORM : GL_RGBA16;
+
+         case GL_UNSIGNED_INT_5_9_9_9_REV:
+            assert(ctx->Extensions.EXT_texture_shared_exponent);
+            return GL_RGB9_E5;
+
+         case GL_UNSIGNED_INT_10F_11F_11F_REV:
+            assert(ctx->Extensions.EXT_packed_float);
+            return GL_R11F_G11F_B10F;
+         }
       }
    }
 }
@@ -342,7 +451,7 @@ alloc_texture(struct st_context *st, GLsizei width, GLsizei height,
    struct pipe_resource *pt;
 
    pt = st_texture_create(st, st->internal_target, texFormat, 0,
-                          width, height, 1, PIPE_BIND_SAMPLER_VIEW);
+                          width, height, 1, 1, PIPE_BIND_SAMPLER_VIEW);
 
    return pt;
 }
@@ -363,11 +472,10 @@ make_texture(struct st_context *st,
    gl_format mformat;
    struct pipe_resource *pt;
    enum pipe_format pipeFormat;
-   GLuint cpp;
-   GLenum baseFormat, intFormat;
+   GLenum baseInternalFormat, intFormat;
 
-   baseFormat = base_format(format);
-   intFormat = internal_format(format, type);
+   intFormat = internal_format(ctx, format, type);
+   baseInternalFormat = _mesa_base_tex_format(ctx, intFormat);
 
    mformat = st_ChooseTextureFormat_renderable(ctx, intFormat,
                                                format, type, GL_FALSE);
@@ -375,7 +483,6 @@ make_texture(struct st_context *st,
 
    pipeFormat = st_mesa_format_to_pipe_format(mformat);
    assert(pipeFormat);
-   cpp = util_format_get_blocksize(pipeFormat);
 
    pixels = _mesa_map_pbo_source(ctx, unpack, pixels);
    if (!pixels)
@@ -390,7 +497,6 @@ make_texture(struct st_context *st,
 
    {
       struct pipe_transfer *transfer;
-      static const GLuint dstImageOffsets = 0;
       GLboolean success;
       GLubyte *dest;
       const GLbitfield imageTransferStateSave = ctx->_ImageTransferState;
@@ -411,12 +517,10 @@ make_texture(struct st_context *st,
        * the texture.  We deal with that with texcoords.
        */
       success = _mesa_texstore(ctx, 2,           /* dims */
-                               baseFormat,       /* baseInternalFormat */
+                               baseInternalFormat, /* baseInternalFormat */
                                mformat,          /* gl_format */
-                               dest,             /* dest */
-                               0, 0, 0,          /* dstX/Y/Zoffset */
                                transfer->stride, /* dstRowStride, bytes */
-                               &dstImageOffsets, /* dstImageOffsets */
+                               &dest,            /* destSlices */
                                width, height, 1, /* size */
                                format, type,     /* src format/type */
                                pixels,           /* data source */
@@ -451,7 +555,15 @@ draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
 {
    struct st_context *st = st_context(ctx);
    struct pipe_context *pipe = st->pipe;
-   GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */
+   GLfloat (*verts)[3][4]; /* four verts, three attribs, XYZW */
+   struct pipe_resource *buf = NULL;
+   unsigned offset;
+
+   u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset, &buf,
+                 (void**)&verts);
+   if (!buf) {
+      return;
+   }
 
    /* setup vertex data */
    {
@@ -515,21 +627,12 @@ draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
       }
    }
 
-   {
-      struct pipe_resource *buf;
-
-      /* allocate/load buffer object with vertex data */
-      buf = pipe_buffer_create(pipe->screen,
-                              PIPE_BIND_VERTEX_BUFFER,
-                               sizeof(verts));
-      pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts);
-
-      util_draw_vertex_buffer(pipe, buf, 0,
-                              PIPE_PRIM_QUADS,
-                              4,  /* verts */
-                              3); /* attribs/vert */
-      pipe_resource_reference(&buf, NULL);
-   }
+   u_upload_unmap(st->uploader);
+   util_draw_vertex_buffer(pipe, st->cso_context, buf, offset,
+                          PIPE_PRIM_QUADS,
+                          4,  /* verts */
+                          3); /* attribs/vert */
+   pipe_resource_reference(&buf, NULL);
 }
 
 
@@ -567,8 +670,11 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    cso_save_samplers(cso);
    cso_save_fragment_sampler_views(cso);
    cso_save_fragment_shader(cso);
+   cso_save_stream_outputs(cso);
    cso_save_vertex_shader(cso);
+   cso_save_geometry_shader(cso);
    cso_save_vertex_elements(cso);
+   cso_save_vertex_buffers(cso);
    if (write_stencil) {
       cso_save_depth_stencil_alpha(cso);
       cso_save_blend(cso);
@@ -578,7 +684,10 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    {
       struct pipe_rasterizer_state rasterizer;
       memset(&rasterizer, 0, sizeof(rasterizer));
+      rasterizer.clamp_fragment_color = !st->clamp_frag_color_in_shader &&
+                                        ctx->Color._ClampFragmentColor;
       rasterizer.gl_rasterization_rules = 1;
+      rasterizer.depth_clip = !ctx->Transform.DepthClamp;
       rasterizer.scissor = ctx->Scissor.Enabled;
       cso_set_rasterizer(cso, &rasterizer);
    }
@@ -615,6 +724,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    /* vertex shader state: position + texcoord pass-through */
    cso_set_vertex_shader_handle(cso, driver_vp);
 
+   /* geometry shader state: disabled */
+   cso_set_geometry_shader_handle(cso, NULL);
 
    /* texture sampling state: */
    {
@@ -652,6 +763,7 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    }
 
    cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+   cso_set_stream_outputs(st->cso_context, 0, NULL, 0);
 
    /* texture state: */
    cso_set_fragment_sampler_views(cso, num_sampler_view, sv);
@@ -684,7 +796,10 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    cso_restore_fragment_sampler_views(cso);
    cso_restore_fragment_shader(cso);
    cso_restore_vertex_shader(cso);
+   cso_restore_geometry_shader(cso);
    cso_restore_vertex_elements(cso);
+   cso_restore_vertex_buffers(cso);
+   cso_restore_stream_outputs(cso);
    if (write_stencil) {
       cso_restore_depth_stencil_alpha(cso);
       cso_restore_blend(cso);
@@ -708,9 +823,10 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
    enum pipe_transfer_usage usage;
    struct pipe_transfer *pt;
    const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
-   GLint skipPixels;
    ubyte *stmap;
    struct gl_pixelstore_attrib clippedUnpack = *unpack;
+   GLubyte *sValues;
+   GLuint *zValues;
 
    if (!zoom) {
       if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height,
@@ -727,42 +843,47 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
       y = ctx->DrawBuffer->Height - y - height;
    }
 
-   if(format != GL_DEPTH_STENCIL && 
-      util_format_get_component_bits(strb->format,
-                                     UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
+   if (format == GL_STENCIL_INDEX && 
+       _mesa_is_format_packed_depth_stencil(strb->Base.Format)) {
+      /* writing stencil to a combined depth+stencil buffer */
       usage = PIPE_TRANSFER_READ_WRITE;
-   else
+   }
+   else {
       usage = PIPE_TRANSFER_WRITE;
+   }
 
-   pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0,
-                                     usage, x, y,
-                                     width, height);
+   pt = pipe_get_transfer(pipe, strb->texture,
+                          strb->rtt_level, strb->rtt_face + strb->rtt_slice,
+                          usage, x, y,
+                          width, height);
 
    stmap = pipe_transfer_map(pipe, pt);
 
    pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels);
    assert(pixels);
 
-   /* if width > MAX_WIDTH, have to process image in chunks */
-   skipPixels = 0;
-   while (skipPixels < width) {
-      const GLint spanX = skipPixels;
-      const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+   sValues = (GLubyte *) malloc(width * sizeof(GLubyte));
+   zValues = (GLuint *) malloc(width * sizeof(GLuint));
+
+   if (sValues && zValues) {
       GLint row;
       for (row = 0; row < height; row++) {
-         GLubyte sValues[MAX_WIDTH];
-         GLuint zValues[MAX_WIDTH];
+         GLfloat *zValuesFloat = (GLfloat*)zValues;
          GLenum destType = GL_UNSIGNED_BYTE;
          const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels,
                                                       width, height,
                                                       format, type,
-                                                      row, skipPixels);
-         _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues,
+                                                      row, 0);
+         _mesa_unpack_stencil_span(ctx, width, destType, sValues,
                                    type, source, &clippedUnpack,
                                    ctx->_ImageTransferState);
 
          if (format == GL_DEPTH_STENCIL) {
-            _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues,
+            GLenum ztype =
+               pt->resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ?
+               GL_FLOAT : GL_UNSIGNED_INT;
+
+            _mesa_unpack_depth_span(ctx, width, ztype, zValues,
                                     (1 << 24) - 1, type, source,
                                     &clippedUnpack);
          }
@@ -784,56 +905,81 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
 
             /* now pack the stencil (and Z) values in the dest format */
             switch (pt->resource->format) {
-            case PIPE_FORMAT_S8_USCALED:
+            case PIPE_FORMAT_S8_UINT:
                {
-                  ubyte *dest = stmap + spanY * pt->stride + spanX;
+                  ubyte *dest = stmap + spanY * pt->stride;
                   assert(usage == PIPE_TRANSFER_WRITE);
-                  memcpy(dest, sValues, spanWidth);
+                  memcpy(dest, sValues, width);
                }
                break;
-            case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+            case PIPE_FORMAT_Z24_UNORM_S8_UINT:
                if (format == GL_DEPTH_STENCIL) {
-                  uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
+                  uint *dest = (uint *) (stmap + spanY * pt->stride);
                   GLint k;
                   assert(usage == PIPE_TRANSFER_WRITE);
-                  for (k = 0; k < spanWidth; k++) {
+                  for (k = 0; k < width; k++) {
                      dest[k] = zValues[k] | (sValues[k] << 24);
                   }
                }
                else {
-                  uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
+                  uint *dest = (uint *) (stmap + spanY * pt->stride);
                   GLint k;
                   assert(usage == PIPE_TRANSFER_READ_WRITE);
-                  for (k = 0; k < spanWidth; k++) {
+                  for (k = 0; k < width; k++) {
                      dest[k] = (dest[k] & 0xffffff) | (sValues[k] << 24);
                   }
                }
                break;
-            case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+            case PIPE_FORMAT_S8_UINT_Z24_UNORM:
                if (format == GL_DEPTH_STENCIL) {
-                  uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
+                  uint *dest = (uint *) (stmap + spanY * pt->stride);
                   GLint k;
                   assert(usage == PIPE_TRANSFER_WRITE);
-                  for (k = 0; k < spanWidth; k++) {
+                  for (k = 0; k < width; k++) {
                      dest[k] = (zValues[k] << 8) | (sValues[k] & 0xff);
                   }
                }
                else {
-                  uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
+                  uint *dest = (uint *) (stmap + spanY * pt->stride);
                   GLint k;
                   assert(usage == PIPE_TRANSFER_READ_WRITE);
-                  for (k = 0; k < spanWidth; k++) {
+                  for (k = 0; k < width; k++) {
                      dest[k] = (dest[k] & 0xffffff00) | (sValues[k] & 0xff);
                   }
                }
                break;
+            case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+               if (format == GL_DEPTH_STENCIL) {
+                  uint *dest = (uint *) (stmap + spanY * pt->stride);
+                  GLfloat *destf = (GLfloat*)dest;
+                  GLint k;
+                  assert(usage == PIPE_TRANSFER_WRITE);
+                  for (k = 0; k < width; k++) {
+                     destf[k*2] = zValuesFloat[k];
+                     dest[k*2+1] = sValues[k] & 0xff;
+                  }
+               }
+               else {
+                  uint *dest = (uint *) (stmap + spanY * pt->stride);
+                  GLint k;
+                  assert(usage == PIPE_TRANSFER_READ_WRITE);
+                  for (k = 0; k < width; k++) {
+                     dest[k*2+1] = sValues[k] & 0xff;
+                  }
+               }
+               break;
             default:
                assert(0);
             }
          }
       }
-      skipPixels += spanWidth;
    }
+   else {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels()");
+   }
+
+   free(sValues);
+   free(zValues);
 
    _mesa_unmap_pbo_source(ctx, &clippedUnpack);
 
@@ -867,6 +1013,8 @@ get_color_fp_variant(struct st_context *st)
                        ctx->Pixel.AlphaBias != 0.0 ||
                        ctx->Pixel.AlphaScale != 1.0);
    key.pixelMaps = ctx->Pixel.MapColorFlag;
+   key.clamp_color = st->clamp_frag_color_in_shader &&
+                     st->ctx->Color._ClampFragmentColor;
 
    fpv = st_get_fp_variant(st, st->fp, &key);
 
@@ -914,7 +1062,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
    GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
    struct pipe_sampler_view *sv[2];
    int num_sampler_view = 1;
-   enum pipe_format stencil_format = PIPE_FORMAT_NONE;
    struct st_fp_variant *fpv;
 
    if (format == GL_DEPTH_STENCIL)
@@ -924,23 +1071,12 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
    else if (format == GL_DEPTH_COMPONENT)
       write_depth = GL_TRUE;
 
-   if (write_stencil) {
-      enum pipe_format tex_format;
-      /* can we write to stencil if not fallback */
-      if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT))
-        goto stencil_fallback;
-      
-      tex_format = st_choose_format(st->pipe->screen, base_format(format),
-                                    PIPE_TEXTURE_2D,
-                                   0, PIPE_BIND_SAMPLER_VIEW);
-      if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
-        stencil_format = PIPE_FORMAT_X24S8_USCALED;
-      else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM)
-        stencil_format = PIPE_FORMAT_S8X24_USCALED;
-      else
-        stencil_format = PIPE_FORMAT_S8_USCALED;
-      if (stencil_format == PIPE_FORMAT_NONE)
-        goto stencil_fallback;
+   if (write_stencil &&
+       !pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) {
+      /* software fallback */
+      draw_stencil_pixels(ctx, x, y, width, height, format, type,
+                          unpack, pixels);
+      return;
    }
 
    /* Mesa state should be up to date by now */
@@ -985,11 +1121,36 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
          sv[0] = st_create_texture_sampler_view(st->pipe, pt);
 
          if (sv[0]) {
-           if (write_stencil) {
-              sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
+            /* Create a second sampler view to read stencil.
+             * The stencil is written using the shader stencil export
+             * functionality. */
+            if (write_stencil) {
+               enum pipe_format stencil_format = PIPE_FORMAT_NONE;
+
+               switch (pt->format) {
+               case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               case PIPE_FORMAT_X24S8_UINT:
+                  stencil_format = PIPE_FORMAT_X24S8_UINT;
+                  break;
+               case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+               case PIPE_FORMAT_S8X24_UINT:
+                  stencil_format = PIPE_FORMAT_S8X24_UINT;
+                  break;
+               case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+               case PIPE_FORMAT_X32_S8X24_UINT:
+                  stencil_format = PIPE_FORMAT_X32_S8X24_UINT;
+                  break;
+               case PIPE_FORMAT_S8_UINT:
+                  stencil_format = PIPE_FORMAT_S8_UINT;
+                  break;
+               default:
+                  assert(0);
+               }
+
+               sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
                                                              stencil_format);
-              num_sampler_view++;
-           }
+               num_sampler_view++;
+            }
 
             draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
                                width, height,
@@ -1006,11 +1167,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
          pipe_resource_reference(&pt, NULL);
       }
    }
-   return;
-
-stencil_fallback:
-   draw_stencil_pixels(ctx, x, y, width, height, format, type,
-                      unpack, pixels);
 }
 
 
@@ -1037,17 +1193,14 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
       return;
    }
 
-   /* Get the dest renderbuffer.  If there's a wrapper, use the
-    * underlying renderbuffer.
-    */
-   rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer);
-   if (rbDraw->Base.Wrapped)
-      rbDraw = st_renderbuffer(rbDraw->Base.Wrapped);
+   /* Get the dest renderbuffer */
+   rbDraw = st_renderbuffer(ctx->DrawBuffer->
+                            Attachment[BUFFER_STENCIL].Renderbuffer);
 
    /* this will do stencil pixel transfer ops */
-   st_read_stencil_pixels(ctx, srcx, srcy, width, height,
-                          GL_STENCIL_INDEX, GL_UNSIGNED_BYTE,
-                          &ctx->DefaultPacking, buffer);
+   _mesa_readpixels(ctx, srcx, srcy, width, height,
+                    GL_STENCIL_INDEX, GL_UNSIGNED_BYTE,
+                    &ctx->DefaultPacking, buffer);
 
    if (0) {
       /* debug code: dump stencil values */
@@ -1061,8 +1214,7 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
       }
    }
 
-   if (util_format_get_component_bits(rbDraw->format,
-                                     UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
+   if (_mesa_is_format_packed_depth_stencil(rbDraw->Base.Format))
       usage = PIPE_TRANSFER_READ_WRITE;
    else
       usage = PIPE_TRANSFER_WRITE;
@@ -1071,8 +1223,10 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
       dsty = rbDraw->Base.Height - dsty - height;
    }
 
-   ptDraw = pipe_get_transfer(st_context(ctx)->pipe,
-                              rbDraw->texture, 0, 0,
+   ptDraw = pipe_get_transfer(pipe,
+                              rbDraw->texture,
+                              rbDraw->rtt_level,
+                              rbDraw->rtt_face + rbDraw->rtt_slice,
                               usage, dstx, dsty,
                               width, height);
 
@@ -1098,36 +1252,7 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
       dst = drawMap + y * ptDraw->stride;
       src = buffer + i * width;
 
-      switch (ptDraw->resource->format) {
-      case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
-         {
-            uint *dst4 = (uint *) dst;
-            int j;
-            assert(usage == PIPE_TRANSFER_READ_WRITE);
-            for (j = 0; j < width; j++) {
-               *dst4 = (*dst4 & 0xffffff) | (src[j] << 24);
-               dst4++;
-            }
-         }
-         break;
-      case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
-         {
-            uint *dst4 = (uint *) dst;
-            int j;
-            assert(usage == PIPE_TRANSFER_READ_WRITE);
-            for (j = 0; j < width; j++) {
-               *dst4 = (*dst4 & 0xffffff00) | (src[j] & 0xff);
-               dst4++;
-            }
-         }
-         break;
-      case PIPE_FORMAT_S8_USCALED:
-         assert(usage == PIPE_TRANSFER_WRITE);
-         memcpy(dst, src, width);
-         break;
-      default:
-         assert(0);
-      }
+      _mesa_pack_ubyte_stencil_row(rbDraw->Base.Format, width, src, dst);
    }
 
    free(buffer);
@@ -1138,6 +1263,127 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
 }
 
 
+/**
+ * Return renderbuffer to use for reading color pixels for glCopyPixels
+ */
+static struct st_renderbuffer *
+st_get_color_read_renderbuffer(struct gl_context *ctx)
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct st_renderbuffer *strb =
+      st_renderbuffer(fb->_ColorReadBuffer);
+
+   return strb;
+}
+
+
+/** Do the src/dest regions overlap? */
+static GLboolean
+regions_overlap(GLint srcX, GLint srcY, GLint dstX, GLint dstY,
+                GLsizei width, GLsizei height)
+{
+   if (srcX + width <= dstX ||
+       dstX + width <= srcX ||
+       srcY + height <= dstY ||
+       dstY + height <= srcY)
+      return GL_FALSE;
+   else
+      return GL_TRUE;
+}
+
+
+/**
+ * Try to do a glCopyPixels for simple cases with a blit by calling
+ * pipe->resource_copy_region().
+ *
+ * We can do this when we're copying color pixels (depth/stencil
+ * eventually) with no pixel zoom, no pixel transfer ops, no
+ * per-fragment ops, the src/dest regions don't overlap and the
+ * src/dest pixel formats are the same.
+ */
+static GLboolean
+blit_copy_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
+                 GLsizei width, GLsizei height,
+                 GLint dstx, GLint dsty, GLenum type)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct gl_pixelstore_attrib pack, unpack;
+   GLint readX, readY, readW, readH;
+
+   if (type == GL_COLOR &&
+       ctx->Pixel.ZoomX == 1.0 &&
+       ctx->Pixel.ZoomY == 1.0 &&
+       ctx->_ImageTransferState == 0x0 &&
+       !ctx->Color.BlendEnabled &&
+       !ctx->Color.AlphaEnabled &&
+       !ctx->Depth.Test &&
+       !ctx->Fog.Enabled &&
+       !ctx->Stencil.Enabled &&
+       !ctx->FragmentProgram.Enabled &&
+       !ctx->VertexProgram.Enabled &&
+       !ctx->Shader.CurrentFragmentProgram &&
+       st_fb_orientation(ctx->ReadBuffer) == st_fb_orientation(ctx->DrawBuffer) &&
+       ctx->DrawBuffer->_NumColorDrawBuffers == 1 &&
+       !ctx->Query.CondRenderQuery) {
+      struct st_renderbuffer *rbRead, *rbDraw;
+      GLint drawX, drawY;
+
+      /*
+       * Clip the read region against the src buffer bounds.
+       * We'll still allocate a temporary buffer/texture for the original
+       * src region size but we'll only read the region which is on-screen.
+       * This may mean that we draw garbage pixels into the dest region, but
+       * that's expected.
+       */
+      readX = srcx;
+      readY = srcy;
+      readW = width;
+      readH = height;
+      pack = ctx->DefaultPacking;
+      if (!_mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack))
+         return GL_TRUE; /* all done */
+
+      /* clip against dest buffer bounds and scissor box */
+      drawX = dstx + pack.SkipPixels;
+      drawY = dsty + pack.SkipRows;
+      unpack = pack;
+      if (!_mesa_clip_drawpixels(ctx, &drawX, &drawY, &readW, &readH, &unpack))
+         return GL_TRUE; /* all done */
+
+      readX = readX - pack.SkipPixels + unpack.SkipPixels;
+      readY = readY - pack.SkipRows + unpack.SkipRows;
+
+      rbRead = st_get_color_read_renderbuffer(ctx);
+      rbDraw = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+
+      if ((rbRead != rbDraw ||
+           !regions_overlap(readX, readY, drawX, drawY, readW, readH)) &&
+          rbRead->Base.Format == rbDraw->Base.Format) {
+         struct pipe_box srcBox;
+
+         /* flip src/dst position if needed */
+         if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
+            /* both buffers will have the same orientation */
+            readY = ctx->ReadBuffer->Height - readY - readH;
+            drawY = ctx->DrawBuffer->Height - drawY - readH;
+         }
+
+         u_box_2d(readX, readY, readW, readH, &srcBox);
+
+         pipe->resource_copy_region(pipe,
+                                    rbDraw->texture,
+                                    rbDraw->rtt_level, drawX, drawY, 0,
+                                    rbRead->texture,
+                                    rbRead->rtt_level, &srcBox);
+         return GL_TRUE;
+      }
+   }
+
+   return GL_FALSE;
+}
+
+
 static void
 st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
               GLsizei width, GLsizei height,
@@ -1161,12 +1407,30 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
 
    st_validate_state(st);
 
+   if (type == GL_DEPTH_STENCIL) {
+      /* XXX make this more efficient */
+      st_CopyPixels(ctx, srcx, srcy, width, height, dstx, dsty, GL_STENCIL);
+      st_CopyPixels(ctx, srcx, srcy, width, height, dstx, dsty, GL_DEPTH);
+      return;
+   }
+
    if (type == GL_STENCIL) {
       /* can't use texturing to do stencil */
       copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty);
       return;
    }
 
+   if (blit_copy_pixels(ctx, srcx, srcy, width, height, dstx, dsty, type))
+      return;
+
+   /*
+    * The subsequent code implements glCopyPixels by copying the source
+    * pixels into a temporary texture that's then applied to a textured quad.
+    * When we draw the textured quad, all the usual per-fragment operations
+    * are handled.
+    */
+
+
    /*
     * Get vertex/fragment shaders
     */
@@ -1186,7 +1450,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
    }
    else {
       assert(type == GL_DEPTH);
-      rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
+      rbRead = st_renderbuffer(ctx->ReadBuffer->
+                               Attachment[BUFFER_DEPTH].Renderbuffer);
       color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
 
       fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE);
@@ -1198,10 +1463,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
    /* update fragment program constants */
    st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
 
-
-   if (rbRead->Base.Wrapped)
-      rbRead = st_renderbuffer(rbRead->Base.Wrapped);
-
    sample_count = rbRead->texture->nr_samples;
    /* I believe this would be legal, presumably would need to do a resolve
       for color, and for depth/stencil spec says to just use one of the
@@ -1212,20 +1473,21 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
 
    if (screen->is_format_supported(screen, srcFormat, st->internal_target,
                                    sample_count,
-                                   PIPE_BIND_SAMPLER_VIEW, 0)) {
+                                   PIPE_BIND_SAMPLER_VIEW)) {
       texFormat = srcFormat;
    }
    else {
       /* srcFormat can't be used as a texture format */
       if (type == GL_DEPTH) {
          texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT,
-                                      st->internal_target, sample_count,
-                                      PIPE_BIND_DEPTH_STENCIL);
+                                      GL_NONE, GL_NONE, st->internal_target,
+                                     sample_count, PIPE_BIND_DEPTH_STENCIL);
          assert(texFormat != PIPE_FORMAT_NONE);
       }
       else {
          /* default color format */
-         texFormat = st_choose_format(screen, GL_RGBA, st->internal_target,
+         texFormat = st_choose_format(screen, GL_RGBA,
+                                      GL_NONE, GL_NONE, st->internal_target,
                                       sample_count, PIPE_BIND_SAMPLER_VIEW);
          assert(texFormat != PIPE_FORMAT_NONE);
       }
@@ -1247,7 +1509,15 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
    readY = srcy;
    readW = width;
    readH = height;
-   _mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack);
+   if (!_mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack)) {
+      /* The source region is completely out of bounds.  Do nothing.
+       * The GL spec says "Results of copies from outside the window,
+       * or from regions of the window that are not exposed, are
+       * hardware dependent and undefined."
+       */
+      return;
+   }
+
    readW = MAX2(0, readW);
    readH = MAX2(0, readH);
 
@@ -1267,20 +1537,22 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
    if (srcFormat == texFormat) {
       struct pipe_box src_box;
       u_box_2d(readX, readY, readW, readH, &src_box);
-    /* copy source framebuffer surface into mipmap/texture */
+      /* copy source framebuffer surface into mipmap/texture */
       pipe->resource_copy_region(pipe,
                                  pt,                                /* dest tex */
-                                 0,
+                                 0,                                 /* dest lvl */
                                  pack.SkipPixels, pack.SkipRows, 0, /* dest pos */
                                  rbRead->texture,                   /* src tex */
-                                 0,
+                                 rbRead->rtt_level,                 /* src lvl */
                                  &src_box);
 
    }
    else {
       /* CPU-based fallback/conversion */
       struct pipe_transfer *ptRead =
-         pipe_get_transfer(st->pipe, rbRead->texture, 0, 0,
+         pipe_get_transfer(st->pipe, rbRead->texture,
+                           rbRead->rtt_level,
+                           rbRead->rtt_face + rbRead->rtt_slice,
                            PIPE_TRANSFER_READ,
                            readX, readY, readW, readH);
       struct pipe_transfer *ptTex;
@@ -1301,15 +1573,19 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
       if (type == GL_COLOR) {
          /* alternate path using get/put_tile() */
          GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
-         pipe_get_tile_rgba(pipe, ptRead, readX, readY, readW, readH, buf);
-         pipe_put_tile_rgba(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
-                            readW, readH, buf);
+         enum pipe_format readFormat, drawFormat;
+         readFormat = util_format_linear(rbRead->texture->format);
+         drawFormat = util_format_linear(pt->format);
+         pipe_get_tile_rgba_format(pipe, ptRead, 0, 0, readW, readH,
+                                   readFormat, buf);
+         pipe_put_tile_rgba_format(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
+                                   readW, readH, drawFormat, buf);
          free(buf);
       }
       else {
          /* GL_DEPTH */
          GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint));
-         pipe_get_tile_z(pipe, ptRead, readX, readY, readW, readH, buf);
+         pipe_get_tile_z(pipe, ptRead, 0, 0, readW, readH, buf);
          pipe_put_tile_z(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
                          readW, readH, buf);
          free(buf);
@@ -1355,9 +1631,9 @@ st_destroy_drawpix(struct st_context *st)
 
    st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL);
    if (st->drawpix.vert_shaders[0])
-      ureg_free_tokens(st->drawpix.vert_shaders[0]);
+      cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[0]);
    if (st->drawpix.vert_shaders[1])
-      ureg_free_tokens(st->drawpix.vert_shaders[1]);
+      cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[1]);
 }
 
 #endif /* FEATURE_drawpix */