st/mesa: fix glCopyPixels bugs/crashes when src region need clipping
[mesa.git] / src / mesa / state_tracker / st_cb_drawpixels.c
index 081c09c1fbf1d2d269c248150eb21b7d97c22bab..7c611cb4ec965802b984c71434f9c2e858e2d252 100644 (file)
 #include "main/macros.h"
 #include "main/texformat.h"
 #include "main/texstore.h"
-#include "main/state.h"
 #include "shader/program.h"
-#include "shader/prog_parameter.h"
 #include "shader/prog_print.h"
 
+#include "st_debug.h"
 #include "st_context.h"
 #include "st_atom.h"
 #include "st_atom_constbuf.h"
-#include "st_draw.h"
 #include "st_program.h"
 #include "st_cb_drawpixels.h"
 #include "st_cb_readpixels.h"
 #include "st_cb_fbo.h"
-#include "st_cb_texture.h"
-#include "st_draw.h"
 #include "st_format.h"
-#include "st_mesa_to_tgsi.h"
 #include "st_texture.h"
 #include "st_inlines.h"
 
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_ureg.h"
 #include "util/u_tile.h"
 #include "util/u_draw_quad.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
+#include "util/u_rect.h"
 #include "shader/prog_instruction.h"
 #include "cso_cache/cso_context.h"
 
@@ -95,8 +93,9 @@ is_passthrough_program(const struct gl_fragment_program *prog)
 /**
  * Make fragment shader for glDraw/CopyPixels.  This shader is made
  * by combining the pixel transfer shader with the user-defined shader.
+ * \return pointer to Gallium driver fragment shader
  */
-static struct st_fragment_program *
+static void *
 combined_drawpix_fragment_program(GLcontext *ctx)
 {
    struct st_context *st = st_context(ctx);
@@ -115,7 +114,7 @@ combined_drawpix_fragment_program(GLcontext *ctx)
        */
       if (is_passthrough_program(&st->fp->Base)) {
          stfp = (struct st_fragment_program *)
-            _mesa_clone_program(ctx, &st->pixel_xfer.program->Base.Base);
+            _mesa_clone_fragment_program(ctx, &st->pixel_xfer.program->Base);
       }
       else {
 #if 0
@@ -142,7 +141,7 @@ combined_drawpix_fragment_program(GLcontext *ctx)
 #endif
 
       /* translate to TGSI tokens */
-      st_translate_fragment_program(st, stfp, NULL);
+      st_translate_fragment_program(st, stfp);
 
       /* save new program, update serial numbers */
       st->pixel_xfer.xfer_prog_sn = st->pixel_xfer.program->serialNo;
@@ -158,7 +157,7 @@ combined_drawpix_fragment_program(GLcontext *ctx)
     */
    st_upload_constants(st, stfp->Base.Base.Parameters, PIPE_SHADER_FRAGMENT);
 
-   return stfp;
+   return stfp->driver_shader;
 }
 
 
@@ -166,8 +165,9 @@ combined_drawpix_fragment_program(GLcontext *ctx)
  * Create fragment shader that does a TEX() instruction to get a Z
  * value, then writes to FRAG_RESULT_DEPTH.
  * Pass fragment color through as-is.
+ * \return pointer to the Gallium driver fragment shader
  */
-static struct st_fragment_program *
+static void *
 make_fragment_shader_z(struct st_context *st)
 {
    GLcontext *ctx = st->ctx;
@@ -175,7 +175,7 @@ make_fragment_shader_z(struct st_context *st)
    GLuint ic = 0;
 
    if (st->drawpix.z_shader) {
-      return st->drawpix.z_shader;
+      return st->drawpix.z_shader->driver_shader;
    }
 
    /*
@@ -223,9 +223,9 @@ make_fragment_shader_z(struct st_context *st)
    p->SamplersUsed = 0x1;  /* sampler 0 (bit 0) is used */
 
    st->drawpix.z_shader = (struct st_fragment_program *) p;
-   st_translate_fragment_program(st, st->drawpix.z_shader, NULL);
+   st_translate_fragment_program(st, st->drawpix.z_shader);
 
-   return st->drawpix.z_shader;
+   return st->drawpix.z_shader->driver_shader;
 }
 
 
@@ -234,83 +234,50 @@ make_fragment_shader_z(struct st_context *st)
  * Create a simple vertex shader that just passes through the
  * vertex position and texcoord (and optionally, color).
  */
-static struct st_vertex_program *
-st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor)
+static void *
+make_passthrough_vertex_shader(struct st_context *st, 
+                               GLboolean passColor)
 {
-   GLcontext *ctx = st->ctx;
-   struct st_vertex_program *stvp;
-   struct gl_program *p;
-   GLuint ic = 0;
-
-   if (st->drawpix.vert_shaders[passColor])
-      return st->drawpix.vert_shaders[passColor];
-
-   /*
-    * Create shader now
-    */
-   p = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
-   if (!p)
-      return NULL;
-
-   if (passColor)
-      p->NumInstructions = 4;
-   else
-      p->NumInstructions = 3;
-
-   p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
-   if (!p->Instructions) {
-      ctx->Driver.DeleteProgram(ctx, p);
-      return NULL;
-   }
-   _mesa_init_instructions(p->Instructions, p->NumInstructions);
-   /* MOV result.pos, vertex.pos; */
-   p->Instructions[0].Opcode = OPCODE_MOV;
-   p->Instructions[0].DstReg.File = PROGRAM_OUTPUT;
-   p->Instructions[0].DstReg.Index = VERT_RESULT_HPOS;
-   p->Instructions[0].SrcReg[0].File = PROGRAM_INPUT;
-   p->Instructions[0].SrcReg[0].Index = VERT_ATTRIB_POS;
-   /* MOV result.texcoord0, vertex.texcoord0; */
-   p->Instructions[1].Opcode = OPCODE_MOV;
-   p->Instructions[1].DstReg.File = PROGRAM_OUTPUT;
-   p->Instructions[1].DstReg.Index = VERT_RESULT_TEX0;
-   p->Instructions[1].SrcReg[0].File = PROGRAM_INPUT;
-   p->Instructions[1].SrcReg[0].Index = VERT_ATTRIB_TEX0;
-   ic = 2;
-   if (passColor) {
-      /* MOV result.color0, vertex.color0; */
-      p->Instructions[ic].Opcode = OPCODE_MOV;
-      p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
-      p->Instructions[ic].DstReg.Index = VERT_RESULT_COL0;
-      p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
-      p->Instructions[ic].SrcReg[0].Index = VERT_ATTRIB_COLOR0;
-      ic++;
-   }
-
-   /* END; */
-   p->Instructions[ic].Opcode = OPCODE_END;
-   ic++;
-
-   assert(ic == p->NumInstructions);
+   if (!st->drawpix.vert_shaders[passColor]) {
+      struct ureg_program *ureg = 
+         ureg_create( TGSI_PROCESSOR_VERTEX );
+
+      if (ureg == NULL)
+         return NULL;
+
+      /* MOV result.pos, vertex.pos; */
+      ureg_MOV(ureg, 
+               ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ),
+               ureg_DECL_vs_input( ureg, 0 ));
+      
+      /* MOV result.texcoord0, vertex.texcoord0; */
+      ureg_MOV(ureg, 
+               ureg_DECL_output( ureg, TGSI_SEMANTIC_GENERIC, 0 ),
+               ureg_DECL_vs_input( ureg, 1 ));
+      
+      if (passColor) {
+         /* MOV result.color0, vertex.color0; */
+         ureg_MOV(ureg, 
+                  ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ),
+                  ureg_DECL_vs_input( ureg, 2 ));
+      }
 
-   p->InputsRead = VERT_BIT_POS | VERT_BIT_TEX0;
-   p->OutputsWritten = ((1 << VERT_RESULT_TEX0) |
-                        (1 << VERT_RESULT_HPOS));
-   if (passColor) {
-      p->InputsRead |= VERT_BIT_COLOR0;
-      p->OutputsWritten |= (1 << VERT_RESULT_COL0);
+      ureg_END( ureg );
+      
+      st->drawpix.vert_shaders[passColor] = 
+         ureg_create_shader_and_destroy( ureg, st->pipe );
    }
 
-   stvp = (struct st_vertex_program *) p;
-   st_translate_vertex_program(st, stvp, NULL, NULL, NULL);
-
-   st->drawpix.vert_shaders[passColor] = stvp;
-
-   return stvp;
+   return st->drawpix.vert_shaders[passColor];
 }
 
 
+/**
+ * Return a texture internalFormat for drawing/copying an image
+ * of the given type.
+ */
 static GLenum
-_mesa_base_format(GLenum format)
+base_format(GLenum format)
 {
    switch (format) {
    case GL_DEPTH_COMPONENT:
@@ -345,14 +312,14 @@ make_texture(struct st_context *st,
    GLenum baseFormat;
    int ptw, pth;
 
-   baseFormat = _mesa_base_format(format);
+   baseFormat = base_format(format);
 
    mformat = st_ChooseTextureFormat(ctx, baseFormat, format, type);
    assert(mformat);
 
    pipeFormat = st_mesa_format_to_pipe_format(mformat);
    assert(pipeFormat);
-   cpp = st_sizeof_format(pipeFormat);
+   cpp = util_format_get_blocksize(pipeFormat);
 
    pixels = _mesa_map_pbo_source(ctx, unpack, pixels);
    if (!pixels)
@@ -410,7 +377,7 @@ make_texture(struct st_context *st,
        */
       success = _mesa_texstore(ctx, 2,           /* dims */
                                baseFormat,       /* baseInternalFormat */
-                               mformat,          /* gl_texture_format */
+                               mformat,          /* gl_format */
                                dest,             /* dest */
                                0, 0, 0,          /* dstX/Y/Zoffset */
                                transfer->stride, /* dstRowStride, bytes */
@@ -438,7 +405,7 @@ make_texture(struct st_context *st,
 
 /**
  * Draw quad with texcoords and optional color.
- * Coords are window coords with y=0=bottom.
+ * Coords are gallium window coords with y=0=top.
  * \param color  may be null
  * \param invertTex  if true, flip texcoords vertically
  */
@@ -537,8 +504,8 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
                    GLsizei width, GLsizei height,
                    GLfloat zoomX, GLfloat zoomY,
                    struct pipe_texture *pt,
-                   struct st_vertex_program *stvp,
-                   struct st_fragment_program *stfp,
+                   void *driver_vp,
+                   void *driver_fp,
                    const GLfloat *color,
                    GLboolean invertTex)
 {
@@ -573,10 +540,10 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    }
 
    /* fragment shader state: TEX lookup program */
-   cso_set_fragment_shader_handle(cso, stfp->driver_shader);
+   cso_set_fragment_shader_handle(cso, driver_fp);
 
    /* vertex shader state: position + texcoord pass-through */
-   cso_set_vertex_shader_handle(cso, stvp->driver_shader);
+   cso_set_vertex_shader_handle(cso, driver_vp);
 
 
    /* texture sampling state: */
@@ -600,16 +567,16 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
 
    /* viewport state: viewport matching window dims */
    {
-      const float width = (float) ctx->DrawBuffer->Width;
-      const float height = (float) ctx->DrawBuffer->Height;
+      const float w = (float) ctx->DrawBuffer->Width;
+      const float h = (float) ctx->DrawBuffer->Height;
       struct pipe_viewport_state vp;
-      vp.scale[0] =  0.5f * width;
-      vp.scale[1] = -0.5f * height;
-      vp.scale[2] = 1.0f;
+      vp.scale[0] =  0.5f * w;
+      vp.scale[1] = -0.5f * h;
+      vp.scale[2] = 0.5f;
       vp.scale[3] = 1.0f;
-      vp.translate[0] = 0.5f * width;
-      vp.translate[1] = 0.5f * height;
-      vp.translate[2] = 0.0f;
+      vp.translate[0] = 0.5f * w;
+      vp.translate[1] = 0.5f * h;
+      vp.translate[2] = 0.5f;
       vp.translate[3] = 0.0f;
       cso_set_viewport(cso, &vp);
    }
@@ -619,24 +586,32 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
       struct pipe_texture *textures[2];
       textures[0] = pt;
       textures[1] = st->pixel_xfer.pixelmap_texture;
-      pipe->set_sampler_textures(pipe, 2, textures);
+      pipe->set_fragment_sampler_textures(pipe, 2, textures);
    }
    else {
-      pipe->set_sampler_textures(pipe, 1, &pt);
+      pipe->set_fragment_sampler_textures(pipe, 1, &pt);
    }
 
-   /* Compute window coords (y=0=bottom) with pixel zoom.
+   /* Compute Gallium window coords (y=0=top) with pixel zoom.
     * Recall that these coords are transformed by the current
     * vertex shader and viewport transformation.
     */
+   if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) {
+      y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY);
+      invertTex = !invertTex;
+   }
+
    x0 = (GLfloat) x;
    x1 = x + width * ctx->Pixel.ZoomX;
    y0 = (GLfloat) y;
    y1 = y + height * ctx->Pixel.ZoomY;
 
+   /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
+   z = z * 2.0 - 1.0;
+
    draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
-            (GLfloat) width / pt->width[0],
-            (GLfloat) height / pt->height[0]);
+            (GLfloat) width / pt->width0,
+            (GLfloat) height / pt->height0);
 
    /* restore state */
    cso_restore_rasterizer(cso);
@@ -663,6 +638,15 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
    const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
    GLint skipPixels;
    ubyte *stmap;
+   struct gl_pixelstore_attrib clippedUnpack = *unpack;
+
+   if (!zoom) {
+      if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height,
+                                 &clippedUnpack)) {
+         /* totally clipped */
+         return;
+      }
+   }
 
    strb = st_renderbuffer(ctx->DrawBuffer->
                           Attachment[BUFFER_STENCIL].Renderbuffer);
@@ -672,7 +656,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
    }
 
    if(format != GL_DEPTH_STENCIL && 
-      pf_get_component_bits( strb->format, PIPE_FORMAT_COMP_Z ) != 0)
+      util_format_get_component_bits(strb->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
       usage = PIPE_TRANSFER_READ_WRITE;
    else
       usage = PIPE_TRANSFER_WRITE;
@@ -683,7 +667,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
 
    stmap = screen->transfer_map(screen, pt);
 
-   pixels = _mesa_map_pbo_source(ctx, unpack, pixels);
+   pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels);
    assert(pixels);
 
    /* if width > MAX_WIDTH, have to process image in chunks */
@@ -696,17 +680,18 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
          GLubyte sValues[MAX_WIDTH];
          GLuint zValues[MAX_WIDTH];
          GLenum destType = GL_UNSIGNED_BYTE;
-         const GLvoid *source = _mesa_image_address2d(unpack, pixels,
+         const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels,
                                                       width, height,
                                                       format, type,
                                                       row, skipPixels);
          _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues,
-                                   type, source, unpack,
+                                   type, source, &clippedUnpack,
                                    ctx->_ImageTransferState);
 
          if (format == GL_DEPTH_STENCIL) {
             _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues,
-                                    (1 << 24) - 1, type, source, unpack);
+                                    (1 << 24) - 1, type, source,
+                                    &clippedUnpack);
          }
 
          if (zoom) {
@@ -725,7 +710,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
             }
 
             /* now pack the stencil (and Z) values in the dest format */
-            switch (pt->format) {
+            switch (pt->texture->format) {
             case PIPE_FORMAT_S8_UNORM:
                {
                   ubyte *dest = stmap + spanY * pt->stride + spanX;
@@ -733,7 +718,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
                   memcpy(dest, sValues, spanWidth);
                }
                break;
-            case PIPE_FORMAT_S8Z24_UNORM:
+            case PIPE_FORMAT_Z24S8_UNORM:
                if (format == GL_DEPTH_STENCIL) {
                   uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
                   GLint k;
@@ -751,7 +736,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
                   }
                }
                break;
-            case PIPE_FORMAT_Z24S8_UNORM:
+            case PIPE_FORMAT_S8Z24_UNORM:
                if (format == GL_DEPTH_STENCIL) {
                   uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
                   GLint k;
@@ -777,7 +762,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
       skipPixels += spanWidth;
    }
 
-   _mesa_unmap_pbo_source(ctx, unpack);
+   _mesa_unmap_pbo_source(ctx, &clippedUnpack);
 
    /* unmap the stencil buffer */
    screen->transfer_unmap(screen, pt);
@@ -793,10 +778,8 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
               GLenum format, GLenum type,
               const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels)
 {
-   struct st_fragment_program *stfp;
-   struct st_vertex_program *stvp;
+   void *driver_vp, *driver_fp;
    struct st_context *st = st_context(ctx);
-   struct pipe_surface *ps;
    const GLfloat *color;
 
    if (format == GL_STENCIL_INDEX ||
@@ -812,15 +795,13 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
    st_validate_state(st);
 
    if (format == GL_DEPTH_COMPONENT) {
-      ps = st->state.framebuffer.zsbuf;
-      stfp = make_fragment_shader_z(st);
-      stvp = st_make_passthrough_vertex_shader(st, GL_TRUE);
+      driver_fp = make_fragment_shader_z(st);
+      driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
       color = ctx->Current.RasterColor;
    }
    else {
-      ps = st->state.framebuffer.cbufs[0];
-      stfp = combined_drawpix_fragment_program(ctx);
-      stvp = st_make_passthrough_vertex_shader(st, GL_FALSE);
+      driver_fp = combined_drawpix_fragment_program(ctx);
+      driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
       color = NULL;
    }
 
@@ -831,7 +812,10 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
       if (pt) {
          draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
                             width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
-                            pt, stvp, stfp, color, GL_FALSE);
+                            pt, 
+                            driver_vp, 
+                            driver_fp,
+                            color, GL_FALSE);
          pipe_texture_reference(&pt, NULL);
       }
    }
@@ -852,7 +836,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
    ubyte *buffer;
    int i;
 
-   buffer = _mesa_malloc(width * height * sizeof(ubyte));
+   buffer = malloc(width * height * sizeof(ubyte));
    if (!buffer) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels(stencil)");
       return;
@@ -863,7 +847,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
                           GL_STENCIL_INDEX, GL_UNSIGNED_BYTE,
                           &ctx->DefaultPacking, buffer);
 
-   if(pf_get_component_bits( rbDraw->format, PIPE_FORMAT_COMP_Z ) != 0)
+   if(util_format_get_component_bits(rbDraw->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
       usage = PIPE_TRANSFER_READ_WRITE;
    else
       usage = PIPE_TRANSFER_WRITE;
@@ -877,8 +861,8 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
                                           usage, dstx, dsty,
                                           width, height);
 
-   assert(ptDraw->block.width == 1);
-   assert(ptDraw->block.height == 1);
+   assert(util_format_get_blockwidth(ptDraw->texture->format) == 1);
+   assert(util_format_get_blockheight(ptDraw->texture->format) == 1);
 
    /* map the stencil buffer */
    drawMap = screen->transfer_map(screen, ptDraw);
@@ -899,8 +883,8 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
       dst = drawMap + y * ptDraw->stride;
       src = buffer + i * width;
 
-      switch (ptDraw->format) {
-      case PIPE_FORMAT_S8Z24_UNORM:
+      switch (ptDraw->texture->format) {
+      case PIPE_FORMAT_Z24S8_UNORM:
          {
             uint *dst4 = (uint *) dst;
             int j;
@@ -911,7 +895,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
             }
          }
          break;
-      case PIPE_FORMAT_Z24S8_UNORM:
+      case PIPE_FORMAT_S8Z24_UNORM:
          {
             uint *dst4 = (uint *) dst;
             int j;
@@ -931,7 +915,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
       }
    }
 
-   _mesa_free(buffer);
+   free(buffer);
 
    /* unmap the stencil buffer */
    screen->transfer_unmap(screen, ptDraw);
@@ -948,45 +932,19 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
    struct pipe_context *pipe = st->pipe;
    struct pipe_screen *screen = pipe->screen;
    struct st_renderbuffer *rbRead;
-   struct st_vertex_program *stvp;
-   struct st_fragment_program *stfp;
+   void *driver_vp, *driver_fp;
    struct pipe_texture *pt;
    GLfloat *color;
    enum pipe_format srcFormat, texFormat;
    int ptw, pth;
+   GLboolean invertTex = GL_FALSE;
+   GLint readX, readY, readW, readH;
+   struct gl_pixelstore_attrib unpack = ctx->DefaultPacking;
 
    pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
 
    st_validate_state(st);
 
-   if (srcx < 0) {
-      width -= -srcx;
-      dstx += -srcx;
-      srcx = 0;
-   }
-
-   if (srcy < 0) {
-      height -= -srcy;
-      dsty += -srcy;
-      srcy = 0;
-   }
-
-   if (dstx < 0) {
-      width -= -dstx;
-      srcx += -dstx;
-      dstx = 0;
-   }
-
-   if (dsty < 0) {
-      height -= -dsty;
-      srcy += -dsty;
-      dsty = 0;
-   }
-
-   if (width < 0 || height < 0)
-      return;
-
-
    if (type == GL_STENCIL) {
       /* can't use texturing to do stencil */
       copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty);
@@ -996,15 +954,15 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
    if (type == GL_COLOR) {
       rbRead = st_get_color_read_renderbuffer(ctx);
       color = NULL;
-      stfp = combined_drawpix_fragment_program(ctx);
-      stvp = st_make_passthrough_vertex_shader(st, GL_FALSE);
+      driver_fp = combined_drawpix_fragment_program(ctx);
+      driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
    }
    else {
       assert(type == GL_DEPTH);
       rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
       color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
-      stfp = make_fragment_shader_z(st);
-      stvp = st_make_passthrough_vertex_shader(st, GL_TRUE);
+      driver_fp = make_fragment_shader_z(st);
+      driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
    }
 
    srcFormat = rbRead->texture->format;
@@ -1016,30 +974,39 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
    else {
       /* srcFormat can't be used as a texture format */
       if (type == GL_DEPTH) {
-         texFormat = st_choose_format(pipe, GL_DEPTH_COMPONENT, PIPE_TEXTURE_2D, 
+         texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT,
+                                      PIPE_TEXTURE_2D, 
                                       PIPE_TEXTURE_USAGE_DEPTH_STENCIL);
-         assert(texFormat != PIPE_FORMAT_NONE); /* XXX no depth texture formats??? */
+         assert(texFormat != PIPE_FORMAT_NONE);
       }
       else {
          /* default color format */
-         texFormat = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, 
+         texFormat = st_choose_format(screen, GL_RGBA, PIPE_TEXTURE_2D, 
                                       PIPE_TEXTURE_USAGE_SAMPLER);
          assert(texFormat != PIPE_FORMAT_NONE);
       }
    }
 
-   if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
-      srcy = ctx->DrawBuffer->Height - srcy - height;
-
-      if (srcy < 0) {
-         height -= -srcy;
-         srcy = 0;
-      }
-
-      if (height < 0)
-         return;
+   /* Invert src region if needed */
+   if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
+      srcy = ctx->ReadBuffer->Height - srcy - height;
+      invertTex = !invertTex;
    }
 
+   /* Clip the read region against the src buffer bounds.
+    * We'll still allocate a temporary buffer/texture for the original
+    * src region size but we'll only read the region which is on-screen.
+    * This may mean that we draw garbage pixels into the dest region, but
+    * that's expected.
+    */
+   readX = srcx;
+   readY = srcy;
+   readW = width;
+   readH = height;
+   _mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &unpack);
+   readW = MAX2(0, readW);
+   readH = MAX2(0, readH);
+
    /* Need to use POT texture? */
    ptw = width;
    pth = height;
@@ -1067,7 +1034,8 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
    if (!pt)
       return;
 
-
+   /* Make temporary texture which is a copy of the src region.
+    */
    if (srcFormat == texFormat) {
       /* copy source framebuffer surface into mipmap/texture */
       struct pipe_surface *psRead = screen->get_tex_surface(screen,
@@ -1075,11 +1043,26 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
                                        PIPE_BUFFER_USAGE_GPU_READ);
       struct pipe_surface *psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, 
                                       PIPE_BUFFER_USAGE_GPU_WRITE );
-      pipe->surface_copy(pipe,
-                        psTex, /* dest */
-                        0, 0, /* destx/y */
-                        psRead,
-                        srcx, srcy, width, height);
+      if (pipe->surface_copy) {
+         pipe->surface_copy(pipe,
+                            psTex,                               /* dest surf */
+                            unpack.SkipPixels, unpack.SkipRows,  /* dest pos */
+                            psRead,                              /* src surf */
+                            readX, readY, readW, readH);         /* src region */
+      } else {
+         util_surface_copy(pipe, FALSE,
+                           psTex,
+                           unpack.SkipPixels, unpack.SkipRows,
+                           psRead,
+                           readX, readY, readW, readH);
+      }
+
+      if (0) {
+         /* debug */
+         debug_dump_surface("copypixsrcsurf", psRead);
+         debug_dump_surface("copypixtemptex", psTex);
+      }
+
       pipe_surface_reference(&psRead, NULL); 
       pipe_surface_reference(&psTex, NULL);
    }
@@ -1087,12 +1070,15 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
       /* CPU-based fallback/conversion */
       struct pipe_transfer *ptRead =
          st_cond_flush_get_tex_transfer(st, rbRead->texture, 0, 0, 0,
-                                       PIPE_TRANSFER_READ, srcx, srcy, width,
-                                       height);
+                                        PIPE_TRANSFER_READ,
+                                        readX, readY, readW, readH);
       struct pipe_transfer *ptTex;
       enum pipe_transfer_usage transfer_usage;
 
-      if (type == GL_DEPTH && pf_is_depth_and_stencil(pt->format))
+      if (ST_DEBUG & DEBUG_FALLBACK)
+         debug_printf("%s: fallback processing\n", __FUNCTION__);
+
+      if (type == GL_DEPTH && util_format_is_depth_and_stencil(pt->format))
          transfer_usage = PIPE_TRANSFER_READ_WRITE;
       else
          transfer_usage = PIPE_TRANSFER_WRITE;
@@ -1100,31 +1086,37 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
       ptTex = st_cond_flush_get_tex_transfer(st, pt, 0, 0, 0, transfer_usage,
                                              0, 0, width, height);
 
+      /* copy image from ptRead surface to ptTex surface */
       if (type == GL_COLOR) {
          /* alternate path using get/put_tile() */
-         GLfloat *buf = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
-
-         pipe_get_tile_rgba(ptRead, 0, 0, width, height, buf);
-         pipe_put_tile_rgba(ptTex, 0, 0, width, height, buf);
-
-         _mesa_free(buf);
+         GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+         pipe_get_tile_rgba(ptRead, readX, readY, readW, readH, buf);
+         pipe_put_tile_rgba(ptTex, unpack.SkipPixels, unpack.SkipRows,
+                            readW, readH, buf);
+         free(buf);
       }
       else {
          /* GL_DEPTH */
-         GLuint *buf = (GLuint *) _mesa_malloc(width * height * sizeof(GLuint));
-         pipe_get_tile_z(ptRead, 0, 0, width, height, buf);
-         pipe_put_tile_z(ptTex, 0, 0, width, height, buf);
-         _mesa_free(buf);
+         GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint));
+         pipe_get_tile_z(ptRead, readX, readY, readW, readH, buf);
+         pipe_put_tile_z(ptTex, unpack.SkipPixels, unpack.SkipRows,
+                            readW, readH, buf);
+         free(buf);
       }
 
       screen->tex_transfer_destroy(ptRead);
       screen->tex_transfer_destroy(ptTex);
    }
 
-   /* draw textured quad */
+   /* OK, the texture 'pt' contains the src image/pixels.  Now draw a
+    * textured quad with that texture.
+    */
    draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2],
                       width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
-                      pt, stvp, stfp, color, GL_TRUE);
+                      pt, 
+                      driver_vp, 
+                      driver_fp,
+                      color, invertTex);
 
    pipe_texture_reference(&pt, NULL);
 }
@@ -1143,6 +1135,8 @@ st_destroy_drawpix(struct st_context *st)
 {
    st_reference_fragprog(st, &st->drawpix.z_shader, NULL);
    st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL);
-   st_reference_vertprog(st, &st->drawpix.vert_shaders[0], NULL);
-   st_reference_vertprog(st, &st->drawpix.vert_shaders[1], NULL);
+   if (st->drawpix.vert_shaders[0])
+      free(st->drawpix.vert_shaders[0]);
+   if (st->drawpix.vert_shaders[1])
+      free(st->drawpix.vert_shaders[1]);
 }