Merge branch 'mesa_7_6_branch'
[mesa.git] / src / mesa / state_tracker / st_cb_texture.c
index a018cdee64338db072517664316544fbe9752dd7..3520c986b4761d6a28aa57674e912d2939a6bda7 100644 (file)
  * 
  **************************************************************************/
 
-#include "main/imports.h"
+#include "main/mfeatures.h"
+#include "main/bufferobj.h"
 #if FEATURE_convolve
 #include "main/convolve.h"
 #endif
 #include "main/enums.h"
 #include "main/image.h"
+#include "main/imports.h"
 #include "main/macros.h"
 #include "main/mipmap.h"
 #include "main/pixel.h"
 #include "main/texcompress.h"
 #include "main/texformat.h"
+#include "main/texgetimage.h"
 #include "main/teximage.h"
 #include "main/texobj.h"
 #include "main/texstore.h"
 #include "state_tracker/st_public.h"
 #include "state_tracker/st_texture.h"
 #include "state_tracker/st_gen_mipmap.h"
+#include "state_tracker/st_inlines.h"
+#include "state_tracker/st_atom.h"
 
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "pipe/p_shader_tokens.h"
 #include "util/u_tile.h"
 #include "util/u_blit.h"
+#include "util/u_surface.h"
+#include "util/u_math.h"
 
 
 #define DBG if (0) printf
@@ -86,7 +94,7 @@ gl_target_to_pipe(GLenum target)
  * Return nominal bytes per texel for a compressed format, 0 for non-compressed
  * format.
  */
-static int
+static GLuint
 compressed_num_bytes(GLuint mesaFormat)
 {
    switch(mesaFormat) {
@@ -108,13 +116,32 @@ compressed_num_bytes(GLuint mesaFormat)
 }
 
 
+static GLboolean
+is_compressed_mesa_format(const struct gl_texture_format *format)
+{
+   switch (format->MesaFormat) {
+   case MESA_FORMAT_RGB_DXT1:
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGBA_DXT3:
+   case MESA_FORMAT_RGBA_DXT5:
+   case MESA_FORMAT_SRGB_DXT1:
+   case MESA_FORMAT_SRGBA_DXT1:
+   case MESA_FORMAT_SRGBA_DXT3:
+   case MESA_FORMAT_SRGBA_DXT5:
+      return GL_TRUE;
+   default:
+      return GL_FALSE;
+   }
+}
+
+
 /** called via ctx->Driver.NewTextureImage() */
 static struct gl_texture_image *
 st_NewTextureImage(GLcontext * ctx)
 {
    DBG("%s\n", __FUNCTION__);
    (void) ctx;
-   return (struct gl_texture_image *) CALLOC_STRUCT(st_texture_image);
+   return (struct gl_texture_image *) ST_CALLOC_STRUCT(st_texture_image);
 }
 
 
@@ -122,7 +149,7 @@ st_NewTextureImage(GLcontext * ctx)
 static struct gl_texture_object *
 st_NewTextureObject(GLcontext * ctx, GLuint name, GLenum target)
 {
-   struct st_texture_object *obj = CALLOC_STRUCT(st_texture_object);
+   struct st_texture_object *obj = ST_CALLOC_STRUCT(st_texture_object);
 
    DBG("%s\n", __FUNCTION__);
    _mesa_initialize_texture_object(&obj->base, name, target);
@@ -156,7 +183,7 @@ st_FreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage)
    }
 
    if (texImage->Data) {
-      free(texImage->Data);
+      _mesa_align_free(texImage->Data);
       texImage->Data = NULL;
    }
 }
@@ -167,7 +194,7 @@ st_FreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage)
  * than COPY_DWORDS would:
  * XXX Put this in src/mesa/main/imports.h ???
  */
-#if defined(i386) || defined(__i386__)
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
 static INLINE void *
 __memcpy(void *to, const void *from, size_t n)
 {
@@ -204,7 +231,7 @@ __memcpy(void *to, const void *from, size_t n)
 static void *
 do_memcpy(void *dest, const void *src, size_t n)
 {
-   if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) {
+   if ((((unsigned long) src) & 63) || (((unsigned long) dest) & 63)) {
       return __memcpy(dest, src, n);
    }
    else
@@ -212,15 +239,18 @@ do_memcpy(void *dest, const void *src, size_t n)
 }
 
 
-static int
-logbase2(int n)
+/**
+ * Return default texture usage bitmask for the given texture format.
+ */
+static GLuint
+default_usage(enum pipe_format fmt)
 {
-   GLint i = 1, log2 = 0;
-   while (n > i) {
-      i *= 2;
-      log2++;
-   }
-   return log2;
+   GLuint usage = PIPE_TEXTURE_USAGE_SAMPLER;
+   if (pf_is_depth_stencil(fmt))
+      usage |= PIPE_TEXTURE_USAGE_DEPTH_STENCIL;
+   else
+      usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET;
+   return usage;
 }
 
 
@@ -248,7 +278,7 @@ guess_and_alloc_texture(struct st_context *st,
    GLuint width = stImage->base.Width2;  /* size w/out border */
    GLuint height = stImage->base.Height2;
    GLuint depth = stImage->base.Depth2;
-   GLuint i, comp_byte = 0;
+   GLuint i, usage;
    enum pipe_format fmt;
 
    DBG("%s\n", __FUNCTION__);
@@ -295,21 +325,23 @@ guess_and_alloc_texture(struct st_context *st,
     * pagetable arrangements.
     */
    if ((stObj->base.MinFilter == GL_NEAREST ||
-        stObj->base.MinFilter == GL_LINEAR) &&
+        stObj->base.MinFilter == GL_LINEAR ||
+        stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
+        stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) &&
        stImage->level == firstLevel) {
       lastLevel = firstLevel;
    }
    else {
-      GLuint l2width = logbase2(width);
-      GLuint l2height = logbase2(height);
-      GLuint l2depth = logbase2(depth);
+      GLuint l2width = util_logbase2(width);
+      GLuint l2height = util_logbase2(height);
+      GLuint l2depth = util_logbase2(depth);
       lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth);
    }
 
-   if (stImage->base.IsCompressed)
-      comp_byte = compressed_num_bytes(stImage->base.TexFormat->MesaFormat);
-
    fmt = st_mesa_format_to_pipe_format(stImage->base.TexFormat->MesaFormat);
+
+   usage = default_usage(fmt);
+
    stObj->pt = st_texture_create(st,
                                  gl_target_to_pipe(stObj->base.Target),
                                  fmt,
@@ -317,11 +349,7 @@ guess_and_alloc_texture(struct st_context *st,
                                  width,
                                  height,
                                  depth,
-                                 comp_byte,
-                                 ( (pf_is_depth_stencil(fmt) ?
-                                   PIPE_TEXTURE_USAGE_DEPTH_STENCIL :
-                                   PIPE_TEXTURE_USAGE_RENDER_TARGET) |
-                                   PIPE_TEXTURE_USAGE_SAMPLER ));
+                                 usage);
 
    DBG("%s - success\n", __FUNCTION__);
 }
@@ -365,6 +393,109 @@ strip_texture_border(GLint border,
 }
 
 
+/**
+ * Try to do texture compression via rendering.  If the Gallium driver
+ * can render into a compressed surface this will allow us to do texture
+ * compression.
+ * \return GL_TRUE for success, GL_FALSE for failure
+ */
+static GLboolean
+compress_with_blit(GLcontext * ctx,
+                   GLenum target, GLint level,
+                   GLint xoffset, GLint yoffset, GLint zoffset,
+                   GLint width, GLint height, GLint depth,
+                   GLenum format, GLenum type, const void *pixels,
+                   const struct gl_pixelstore_attrib *unpack,
+                   struct gl_texture_image *texImage)
+{
+   const GLuint dstImageOffsets[1] = {0};
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   struct pipe_screen *screen = ctx->st->pipe->screen;
+   const struct gl_texture_format *mesa_format;
+   struct pipe_texture templ;
+   struct pipe_texture *src_tex;
+   struct pipe_surface *dst_surface;
+   struct pipe_transfer *tex_xfer;
+   void *map;
+
+
+   if (!stImage->pt) {
+      /* XXX: Can this happen? Should we assert? */
+      return GL_FALSE;
+   }
+
+   /* get destination surface (in the compressed texture) */
+   dst_surface = screen->get_tex_surface(screen, stImage->pt,
+                                         stImage->face, stImage->level, 0,
+                                         PIPE_BUFFER_USAGE_GPU_WRITE);
+   if (!dst_surface) {
+      /* can't render into this format (or other problem) */
+      return GL_FALSE;
+   }
+
+   /* Choose format for the temporary RGBA texture image.
+    */
+   mesa_format = st_ChooseTextureFormat(ctx, GL_RGBA, format, type);
+   assert(mesa_format);
+   if (!mesa_format)
+      return GL_FALSE;
+
+   /* Create the temporary source texture
+    */
+   memset(&templ, 0, sizeof(templ));
+   templ.target = PIPE_TEXTURE_2D;
+   templ.format = st_mesa_format_to_pipe_format(mesa_format->MesaFormat);
+   pf_get_block(templ.format, &templ.block);
+   templ.width[0] = width;
+   templ.height[0] = height;
+   templ.depth[0] = 1;
+   templ.last_level = 0;
+   templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
+   src_tex = screen->texture_create(screen, &templ);
+
+   if (!src_tex)
+      return GL_FALSE;
+
+   /* Put user's tex data into the temporary texture
+    */
+   tex_xfer = st_cond_flush_get_tex_transfer(st_context(ctx), src_tex,
+                                            0, 0, 0, /* face, level are zero */
+                                            PIPE_TRANSFER_WRITE,
+                                            0, 0, width, height); /* x, y, w, h */
+   map = screen->transfer_map(screen, tex_xfer);
+
+   mesa_format->StoreImage(ctx, 2, GL_RGBA, mesa_format,
+                           map,              /* dest ptr */
+                           0, 0, 0,          /* dest x/y/z offset */
+                           tex_xfer->stride, /* dest row stride (bytes) */
+                           dstImageOffsets,  /* image offsets (for 3D only) */
+                           width, height, 1, /* size */
+                           format, type,     /* source format/type */
+                           pixels,           /* source data */
+                           unpack);          /* source data packing */
+
+   screen->transfer_unmap(screen, tex_xfer);
+   screen->tex_transfer_destroy(tex_xfer);
+
+   /* copy / compress image */
+   util_blit_pixels_tex(ctx->st->blit,
+                        src_tex,          /* pipe_texture (src) */
+                        0, 0,             /* src x0, y0 */
+                        width, height,    /* src x1, y1 */
+                        dst_surface,      /* pipe_surface (dst) */
+                        xoffset, yoffset, /* dst x0, y0 */
+                        xoffset + width,  /* dst x1 */
+                        yoffset + height, /* dst y1 */
+                        0.0,              /* z */
+                        PIPE_TEX_MIPFILTER_NEAREST);
+
+   pipe_surface_reference(&dst_surface, NULL);
+   pipe_texture_reference(&src_tex, NULL);
+
+   return GL_TRUE;
+}
+
+
 /**
  * Do glTexImage1/2/3D().
  */
@@ -379,22 +510,29 @@ st_TexImage(GLcontext * ctx,
             const struct gl_pixelstore_attrib *unpack,
             struct gl_texture_object *texObj,
             struct gl_texture_image *texImage,
-            GLsizei imageSize, int compressed)
+            GLsizei imageSize, GLboolean compressed_src)
 {
+   struct pipe_screen *screen = ctx->st->pipe->screen;
    struct st_texture_object *stObj = st_texture_object(texObj);
    struct st_texture_image *stImage = st_texture_image(texImage);
    GLint postConvWidth, postConvHeight;
    GLint texelBytes, sizeInBytes;
-   GLuint dstRowStride;
+   GLuint dstRowStride = 0;
    struct gl_pixelstore_attrib unpackNB;
+   enum pipe_transfer_usage transfer_usage = 0;
 
    DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
 
+   /* switch to "normal" */
+   if (stObj->surface_based) {
+      _mesa_clear_texture_object(ctx, texObj);
+      stObj->surface_based = GL_FALSE;
+   }
+
    /* gallium does not support texture borders, strip it off */
    if (border) {
-      strip_texture_border(border, &width, &height, &depth,
-                           unpack, &unpackNB);
+      strip_texture_border(border, &width, &height, &depth, unpack, &unpackNB);
       unpack = &unpackNB;
       texImage->Width = width;
       texImage->Height = height;
@@ -464,18 +602,18 @@ st_TexImage(GLcontext * ctx,
     * bmBufferData with NULL data to free the old block and avoid
     * waiting on any outstanding fences.
     */
-   if (stObj->pt &&
-       (stObj->teximage_realloc ||
-        (/*stObj->pt->first_level == level &&*/
-         stObj->pt->last_level == level &&
-         stObj->pt->target != PIPE_TEXTURE_CUBE &&
-         !st_texture_match_image(stObj->pt, &stImage->base,
-                                 stImage->face, stImage->level)))) {
-
-      DBG("release it\n");
-      pipe_texture_reference(&stObj->pt, NULL);
-      assert(!stObj->pt);
-      stObj->teximage_realloc = FALSE;
+   if (stObj->pt) {
+      if (stObj->teximage_realloc ||
+          level > (GLint) stObj->pt->last_level ||
+          (stObj->pt->last_level == level &&
+           stObj->pt->target != PIPE_TEXTURE_CUBE &&
+           !st_texture_match_image(stObj->pt, &stImage->base,
+                                   stImage->face, stImage->level))) {
+         DBG("release it\n");
+         pipe_texture_reference(&stObj->pt, NULL);
+         assert(!stObj->pt);
+         stObj->teximage_realloc = FALSE;
+      }
    }
 
    if (!stObj->pt) {
@@ -510,23 +648,52 @@ st_TexImage(GLcontext * ctx,
     * the expectation that the texture will be set up but nothing
     * more will be done.  This is where those calls return:
     */
-   if (compressed) {
+   if (compressed_src) {
       pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, pixels,
                                                      unpack,
                                                      "glCompressedTexImage");
-   } else {
+   }
+   else {
       pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1,
                                           format, type,
                                           pixels, unpack, "glTexImage");
    }
-   if (!pixels)
-      return;
+
+   /* Note: we can't check for pixels==NULL until after we've allocated
+    * memory for the texture.
+    */
+
+   /* See if we can do texture compression with a blit/render.
+    */
+   if (!compressed_src &&
+       !ctx->Mesa_DXTn &&
+       is_compressed_mesa_format(texImage->TexFormat) &&
+       screen->is_format_supported(screen,
+                                   stImage->pt->format,
+                                   stImage->pt->target,
+                                   PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
+      if (!pixels)
+         goto done;
+
+      if (compress_with_blit(ctx, target, level, 0, 0, 0, width, height, depth,
+                             format, type, pixels, unpack, texImage)) {
+         goto done;
+      }
+   }
 
    if (stImage->pt) {
+      if (format == GL_DEPTH_COMPONENT &&
+          pf_is_depth_and_stencil(stImage->pt->format))
+         transfer_usage = PIPE_TRANSFER_READ_WRITE;
+      else
+         transfer_usage = PIPE_TRANSFER_WRITE;
+
       texImage->Data = st_texture_image_map(ctx->st, stImage, 0,
-                                            PIPE_BUFFER_USAGE_CPU_WRITE);
-      if (stImage->surface)
-         dstRowStride = stImage->surface->stride;
+                                            transfer_usage, 0, 0,
+                                            stImage->base.Width,
+                                            stImage->base.Height);
+      if(stImage->transfer)
+         dstRowStride = stImage->transfer->stride;
    }
    else {
       /* Allocate regular memory and store the image there temporarily.   */
@@ -541,7 +708,7 @@ st_TexImage(GLcontext * ctx,
          sizeInBytes = depth * dstRowStride * postConvHeight;
       }
 
-      texImage->Data = malloc(sizeInBytes);
+      texImage->Data = _mesa_align_malloc(sizeInBytes, 16);
    }
 
    if (!texImage->Data) {
@@ -549,6 +716,9 @@ st_TexImage(GLcontext * ctx,
       return;
    }
 
+   if (!pixels)
+      goto done;
+
    DBG("Upload image %dx%dx%d row_len %x pitch %x\n",
        width, height, depth, width * texelBytes, dstRowStride);
 
@@ -556,16 +726,16 @@ st_TexImage(GLcontext * ctx,
     * the blitter to copy.  Or, use the hardware to do the format
     * conversion and copy:
     */
-   if (compressed) {
+   if (compressed_src) {
       memcpy(texImage->Data, pixels, imageSize);
    }
    else {
-      GLuint srcImageStride = _mesa_image_image_stride(unpack, width, height,
-                                                      format, type);
-      int i;
+      const GLuint srcImageStride =
+         _mesa_image_image_stride(unpack, width, height, format, type);
+      GLint i;
       const GLubyte *src = (const GLubyte *) pixels;
 
-      for (i = 0; i++ < depth;) {
+      for (i = 0; i < depth; i++) {
         if (!texImage->TexFormat->StoreImage(ctx, dims, 
                                              texImage->_BaseFormat, 
                                              texImage->TexFormat, 
@@ -578,25 +748,26 @@ st_TexImage(GLcontext * ctx,
            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
         }
 
-        if (stImage->pt && i < depth) {
+        if (stImage->pt && i + 1 < depth) {
+            /* unmap this slice */
            st_texture_image_unmap(ctx->st, stImage);
-           texImage->Data = st_texture_image_map(ctx->st, stImage, i,
-                                                  PIPE_BUFFER_USAGE_CPU_WRITE);
+            /* map next slice of 3D texture */
+           texImage->Data = st_texture_image_map(ctx->st, stImage, i + 1,
+                                                  transfer_usage, 0, 0,
+                                                  stImage->base.Width,
+                                                  stImage->base.Height);
            src += srcImageStride;
         }
       }
    }
 
+done:
    _mesa_unmap_teximage_pbo(ctx, unpack);
 
-   if (stImage->pt) {
+   if (stImage->pt && texImage->Data) {
       st_texture_image_unmap(ctx->st, stImage);
       texImage->Data = NULL;
    }
-
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      ctx->Driver.GenerateMipmap(ctx, target, texObj);
-   }
 }
 
 
@@ -611,9 +782,9 @@ st_TexImage3D(GLcontext * ctx,
               struct gl_texture_object *texObj,
               struct gl_texture_image *texImage)
 {
-   st_TexImage(ctx, 3, target, level,
-                 internalFormat, width, height, depth, border,
-                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+   st_TexImage(ctx, 3, target, level, internalFormat, width, height, depth,
+               border, format, type, pixels, unpack, texObj, texImage,
+               0, GL_FALSE);
 }
 
 
@@ -627,9 +798,8 @@ st_TexImage2D(GLcontext * ctx,
               struct gl_texture_object *texObj,
               struct gl_texture_image *texImage)
 {
-   st_TexImage(ctx, 2, target, level,
-                 internalFormat, width, height, 1, border,
-                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+   st_TexImage(ctx, 2, target, level, internalFormat, width, height, 1, border,
+               format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
 }
 
 
@@ -643,9 +813,8 @@ st_TexImage1D(GLcontext * ctx,
               struct gl_texture_object *texObj,
               struct gl_texture_image *texImage)
 {
-   st_TexImage(ctx, 1, target, level,
-                 internalFormat, width, 1, 1, border,
-                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+   st_TexImage(ctx, 1, target, level, internalFormat, width, 1, 1, border,
+               format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
 }
 
 
@@ -657,12 +826,96 @@ st_CompressedTexImage2D(GLcontext *ctx, GLenum target, GLint level,
                         struct gl_texture_object *texObj,
                         struct gl_texture_image *texImage)
 {
-   st_TexImage(ctx, 2, target, level,
-                internalFormat, width, height, 1, border,
-                0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1);
+   st_TexImage(ctx, 2, target, level, internalFormat, width, height, 1, border,
+               0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE);
 }
 
 
+
+/**
+ * glGetTexImage() helper: decompress a compressed texture by rendering
+ * a textured quad.  Store the results in the user's buffer.
+ */
+static void
+decompress_with_blit(GLcontext * ctx, GLenum target, GLint level,
+                     GLenum format, GLenum type, GLvoid *pixels,
+                     struct gl_texture_object *texObj,
+                     struct gl_texture_image *texImage)
+{
+   struct pipe_screen *screen = ctx->st->pipe->screen;
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   const GLuint width = texImage->Width;
+   const GLuint height = texImage->Height;
+   struct pipe_surface *dst_surface;
+   struct pipe_texture *dst_texture;
+   struct pipe_transfer *tex_xfer;
+
+   /* create temp / dest surface */
+   if (!util_create_rgba_surface(screen, width, height,
+                                 &dst_texture, &dst_surface)) {
+      _mesa_problem(ctx, "util_create_rgba_surface() failed "
+                    "in decompress_with_blit()");
+      return;
+   }
+
+   /* blit/render/decompress */
+   util_blit_pixels_tex(ctx->st->blit,
+                        stImage->pt,      /* pipe_texture (src) */
+                        0, 0,             /* src x0, y0 */
+                        width, height,    /* src x1, y1 */
+                        dst_surface,      /* pipe_surface (dst) */
+                        0, 0,             /* dst x0, y0 */
+                        width, height,    /* dst x1, y1 */
+                        0.0,              /* z */
+                        PIPE_TEX_MIPFILTER_NEAREST);
+
+   /* map the dst_surface so we can read from it */
+   tex_xfer = st_cond_flush_get_tex_transfer(st_context(ctx),
+                                            dst_texture, 0, 0, 0,
+                                            PIPE_TRANSFER_READ,
+                                            0, 0, width, height);
+
+   pixels = _mesa_map_pbo_dest(ctx, &ctx->Pack, pixels);
+
+   /* copy/pack data into user buffer */
+   if (st_equal_formats(stImage->pt->format, format, type)) {
+      /* memcpy */
+      const uint bytesPerRow = width * pf_get_size(stImage->pt->format);
+      ubyte *map = screen->transfer_map(screen, tex_xfer);
+      GLuint row;
+      for (row = 0; row < height; row++) {
+         GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width,
+                                              height, format, type, row, 0);
+         memcpy(dest, map, bytesPerRow);
+         map += tex_xfer->stride;
+      }
+      screen->transfer_unmap(screen, tex_xfer);
+   }
+   else {
+      /* format translation via floats */
+      GLuint row;
+      for (row = 0; row < height; row++) {
+         const GLbitfield transferOps = 0x0; /* bypassed for glGetTexImage() */
+         GLfloat rgba[4 * MAX_WIDTH];
+         GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width,
+                                              height, format, type, row, 0);
+
+         /* get float[4] rgba row from surface */
+         pipe_get_tile_rgba(tex_xfer, 0, row, width, 1, rgba);
+
+         _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format,
+                                    type, dest, &ctx->Pack, transferOps);
+      }
+   }
+
+   _mesa_unmap_pbo_dest(ctx, &ctx->Pack);
+
+   /* destroy the temp / dest surface */
+   util_destroy_rgba_surface(dst_texture, dst_surface);
+}
+
+
+
 /**
  * Need to map texture image into memory before copying image data,
  * then unmap it.
@@ -671,25 +924,42 @@ static void
 st_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
                  GLenum format, GLenum type, GLvoid * pixels,
                  struct gl_texture_object *texObj,
-                 struct gl_texture_image *texImage, int compressed)
+                 struct gl_texture_image *texImage, GLboolean compressed_dst)
 {
    struct st_texture_image *stImage = st_texture_image(texImage);
-   GLuint dstImageStride = _mesa_image_image_stride(&ctx->Pack,
-                                                    texImage->Width,
-                                                   texImage->Height,
-                                                    format, type);
-   GLuint depth;
-   GLuint i;
+   const GLuint dstImageStride =
+      _mesa_image_image_stride(&ctx->Pack, texImage->Width, texImage->Height,
+                               format, type);
+   GLuint depth, i;
    GLubyte *dest;
 
+   if (stImage->pt &&
+       pf_is_compressed(stImage->pt->format) &&
+       !compressed_dst) {
+      /* Need to decompress the texture.
+       * We'll do this by rendering a textured quad.
+       * Note that we only expect RGBA formats (no Z/depth formats).
+       */
+      decompress_with_blit(ctx, target, level, format, type, pixels,
+                           texObj, texImage);
+      return;
+   }
+
    /* Map */
    if (stImage->pt) {
       /* Image is stored in hardware format in a buffer managed by the
        * kernel.  Need to explicitly map and unmap it.
        */
+      unsigned face = _mesa_tex_target_to_face(target);
+
+      st_teximage_flush_before_map(ctx->st, stImage->pt, face, level,
+                                  PIPE_TRANSFER_READ);
+
       texImage->Data = st_texture_image_map(ctx->st, stImage, 0,
-                                            PIPE_BUFFER_USAGE_CPU_READ);
-      texImage->RowStride = stImage->surface->stride / stImage->pt->block.size;
+                                            PIPE_TRANSFER_READ, 0, 0,
+                                            stImage->base.Width,
+                                            stImage->base.Height);
+      texImage->RowStride = stImage->transfer->stride / stImage->pt->block.size;
    }
    else {
       /* Otherwise, the image should actually be stored in
@@ -708,19 +978,24 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
 
    dest = (GLubyte *) pixels;
 
-   for (i = 0; i++ < depth;) {
-      if (compressed) {
+   for (i = 0; i < depth; i++) {
+      if (compressed_dst) {
         _mesa_get_compressed_teximage(ctx, target, level, dest,
                                       texObj, texImage);
-      } else {
+      }
+      else {
         _mesa_get_teximage(ctx, target, level, format, type, dest,
                            texObj, texImage);
       }
 
-      if (stImage->pt && i < depth) {
+      if (stImage->pt && i + 1 < depth) {
+         /* unmap this slice */
         st_texture_image_unmap(ctx->st, stImage);
-        texImage->Data = st_texture_image_map(ctx->st, stImage, i,
-                                               PIPE_BUFFER_USAGE_CPU_READ);
+         /* map next slice of 3D texture */
+        texImage->Data = st_texture_image_map(ctx->st, stImage, i + 1,
+                                               PIPE_TRANSFER_READ, 0, 0,
+                                               stImage->base.Width,
+                                               stImage->base.Height);
         dest += dstImageStride;
       }
    }
@@ -741,28 +1016,25 @@ st_GetTexImage(GLcontext * ctx, GLenum target, GLint level,
                struct gl_texture_object *texObj,
                struct gl_texture_image *texImage)
 {
-   st_get_tex_image(ctx, target, level, format, type, pixels,
-                    texObj, texImage, 0);
+   st_get_tex_image(ctx, target, level, format, type, pixels, texObj, texImage,
+                    GL_FALSE);
 }
 
 
 static void
 st_GetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
                          GLvoid *pixels,
-                         const struct gl_texture_object *texObj,
-                         const struct gl_texture_image *texImage)
+                         struct gl_texture_object *texObj,
+                         struct gl_texture_image *texImage)
 {
-   st_get_tex_image(ctx, target, level, 0, 0, pixels,
-                    (struct gl_texture_object *) texObj,
-                    (struct gl_texture_image *) texImage, 1);
+   st_get_tex_image(ctx, target, level, 0, 0, pixels, texObj, texImage,
+                    GL_TRUE);
 }
 
 
 
 static void
-st_TexSubimage(GLcontext * ctx,
-               GLint dims,
-               GLenum target, GLint level,
+st_TexSubimage(GLcontext *ctx, GLint dims, GLenum target, GLint level,
                GLint xoffset, GLint yoffset, GLint zoffset,
                GLint width, GLint height, GLint depth,
                GLenum format, GLenum type, const void *pixels,
@@ -770,12 +1042,15 @@ st_TexSubimage(GLcontext * ctx,
                struct gl_texture_object *texObj,
                struct gl_texture_image *texImage)
 {
+   struct pipe_screen *screen = ctx->st->pipe->screen;
    struct st_texture_image *stImage = st_texture_image(texImage);
    GLuint dstRowStride;
-   GLuint srcImageStride = _mesa_image_image_stride(packing, width, height,
-                                                   format, type);
-   int i;
+   const GLuint srcImageStride =
+      _mesa_image_image_stride(packing, width, height, format, type);
+   GLint i;
    const GLubyte *src;
+   /* init to silence warning only: */
+   enum pipe_transfer_usage transfer_usage = PIPE_TRANSFER_WRITE;
 
    DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target),
@@ -787,28 +1062,55 @@ st_TexSubimage(GLcontext * ctx,
    if (!pixels)
       return;
 
+   /* See if we can do texture compression with a blit/render.
+    */
+   if (!ctx->Mesa_DXTn &&
+       is_compressed_mesa_format(texImage->TexFormat) &&
+       screen->is_format_supported(screen,
+                                   stImage->pt->format,
+                                   stImage->pt->target,
+                                   PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
+      if (compress_with_blit(ctx, target, level,
+                             xoffset, yoffset, zoffset,
+                             width, height, depth,
+                             format, type, pixels, packing, texImage)) {
+         goto done;
+      }
+   }
+
    /* Map buffer if necessary.  Need to lock to prevent other contexts
     * from uploading the buffer under us.
     */
    if (stImage->pt) {
+      unsigned face = _mesa_tex_target_to_face(target);
+
+      if (format == GL_DEPTH_COMPONENT &&
+          pf_is_depth_and_stencil(stImage->pt->format))
+         transfer_usage = PIPE_TRANSFER_READ_WRITE;
+      else
+         transfer_usage = PIPE_TRANSFER_WRITE;
+
+      st_teximage_flush_before_map(ctx->st, stImage->pt, face, level,
+                                  transfer_usage);
       texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset, 
-                                            PIPE_BUFFER_USAGE_CPU_WRITE);
-      if (stImage->surface)
-         dstRowStride = stImage->surface->stride;
+                                            transfer_usage,
+                                            xoffset, yoffset,
+                                            width, height);
    }
 
    if (!texImage->Data) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
-      return;
+      goto done;
    }
 
    src = (const GLubyte *) pixels;
+   dstRowStride = stImage->transfer->stride;
 
-   for (i = 0; i++ < depth;) {
+   for (i = 0; i < depth; i++) {
       if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
                                           texImage->TexFormat,
                                           texImage->Data,
-                                          xoffset, yoffset, 0,
+                                          0, 0, 0,
                                           dstRowStride,
                                           texImage->ImageOffsets,
                                           width, height, 1,
@@ -816,19 +1118,20 @@ st_TexSubimage(GLcontext * ctx,
         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
       }
 
-      if (stImage->pt && i < depth) {
-         /* map next slice of 3D texture */
+      if (stImage->pt && i + 1 < depth) {
+         /* unmap this slice */
         st_texture_image_unmap(ctx->st, stImage);
-        texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset + i,
-                                               PIPE_BUFFER_USAGE_CPU_WRITE);
+         /* map next slice of 3D texture */
+        texImage->Data = st_texture_image_map(ctx->st, stImage,
+                                               zoffset + i + 1,
+                                               transfer_usage,
+                                               xoffset, yoffset,
+                                               width, height);
         src += srcImageStride;
       }
    }
 
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      ctx->Driver.GenerateMipmap(ctx, target, texObj);
-   }
-
+done:
    _mesa_unmap_teximage_pbo(ctx, packing);
 
    if (stImage->pt) {
@@ -840,91 +1143,142 @@ st_TexSubimage(GLcontext * ctx,
 
 
 static void
-st_TexSubImage3D(GLcontext * ctx,
-                   GLenum target,
-                   GLint level,
-                   GLint xoffset, GLint yoffset, GLint zoffset,
-                   GLsizei width, GLsizei height, GLsizei depth,
-                   GLenum format, GLenum type,
-                   const GLvoid * pixels,
-                   const struct gl_pixelstore_attrib *packing,
-                   struct gl_texture_object *texObj,
-                   struct gl_texture_image *texImage)
+st_TexSubImage3D(GLcontext *ctx, GLenum target, GLint level,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLsizei width, GLsizei height, GLsizei depth,
+                 GLenum format, GLenum type, const GLvoid *pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
 {
-   st_TexSubimage(ctx, 3, target, level,
-                  xoffset, yoffset, zoffset,
-                  width, height, depth,
-                  format, type, pixels, packing, texObj, texImage);
+   st_TexSubimage(ctx, 3, target, level, xoffset, yoffset, zoffset,
+                  width, height, depth, format, type,
+                  pixels, packing, texObj, texImage);
 }
 
 
 static void
-st_TexSubImage2D(GLcontext * ctx,
-                   GLenum target,
-                   GLint level,
-                   GLint xoffset, GLint yoffset,
-                   GLsizei width, GLsizei height,
-                   GLenum format, GLenum type,
-                   const GLvoid * pixels,
-                   const struct gl_pixelstore_attrib *packing,
-                   struct gl_texture_object *texObj,
-                   struct gl_texture_image *texImage)
+st_TexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+                 GLint xoffset, GLint yoffset,
+                 GLsizei width, GLsizei height,
+                 GLenum format, GLenum type, const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
 {
-   st_TexSubimage(ctx, 2, target, level,
-                  xoffset, yoffset, 0,
-                  width, height, 1,
-                  format, type, pixels, packing, texObj, texImage);
+   st_TexSubimage(ctx, 2, target, level, xoffset, yoffset, 0,
+                  width, height, 1, format, type,
+                  pixels, packing, texObj, texImage);
 }
 
 
 static void
-st_TexSubImage1D(GLcontext * ctx,
-                   GLenum target,
-                   GLint level,
-                   GLint xoffset,
-                   GLsizei width,
-                   GLenum format, GLenum type,
-                   const GLvoid * pixels,
-                   const struct gl_pixelstore_attrib *packing,
-                   struct gl_texture_object *texObj,
-                   struct gl_texture_image *texImage)
+st_TexSubImage1D(GLcontext *ctx, GLenum target, GLint level,
+                 GLint xoffset, GLsizei width, GLenum format, GLenum type,
+                 const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
 {
-   st_TexSubimage(ctx, 1, target, level,
-                  xoffset, 0, 0,
-                  width, 1, 1,
+   st_TexSubimage(ctx, 1, target, level, xoffset, 0, 0, width, 1, 1,
                   format, type, pixels, packing, texObj, texImage);
 }
 
 
+static void
+st_CompressedTexSubImage1D(GLcontext *ctx, GLenum target, GLint level,
+                           GLint xoffset, GLsizei width,
+                           GLenum format,
+                           GLsizei imageSize, const GLvoid *data,
+                           struct gl_texture_object *texObj,
+                           struct gl_texture_image *texImage)
+{
+   assert(0);
+}
 
-/**
- * Return 0 for GL_TEXTURE_CUBE_MAP_POSITIVE_X,
- *        1 for GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
- *        etc.
- * XXX duplicated from main/teximage.c
- */
-static uint
-texture_face(GLenum target)
+
+static void
+st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+                           GLint xoffset, GLint yoffset,
+                           GLsizei width, GLint height,
+                           GLenum format,
+                           GLsizei imageSize, const GLvoid *data,
+                           struct gl_texture_object *texObj,
+                           struct gl_texture_image *texImage)
 {
-   if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB &&
-       target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB)
-      return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-   else
-      return 0;
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   struct pipe_format_block block;
+   int srcBlockStride;
+   int dstBlockStride;
+   int y;
+
+   if (stImage->pt) {
+      unsigned face = _mesa_tex_target_to_face(target);
+
+      st_teximage_flush_before_map(ctx->st, stImage->pt, face, level,
+                                  PIPE_TRANSFER_WRITE);
+      texImage->Data = st_texture_image_map(ctx->st, stImage, 0, 
+                                            PIPE_TRANSFER_WRITE,
+                                            xoffset, yoffset,
+                                            width, height);
+      
+      block = stImage->pt->block;
+      srcBlockStride = pf_get_stride(&block, width);
+      dstBlockStride = stImage->transfer->stride;
+   } else {
+      assert(stImage->pt);
+      /* TODO find good values for block and strides */
+      /* TODO also adjust texImage->data for yoffset/xoffset */
+      return;
+   }
+
+   if (!texImage->Data) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage");
+      return;
+   }
+
+   assert(xoffset % block.width == 0);
+   assert(yoffset % block.height == 0);
+   assert(width % block.width == 0);
+   assert(height % block.height == 0);
+
+   for (y = 0; y < height; y += block.height) {
+      /* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */
+      const char *src = (const char*)data + srcBlockStride * pf_get_nblocksy(&block, y);
+      char *dst = (char*)texImage->Data + dstBlockStride * pf_get_nblocksy(&block, y);
+      memcpy(dst, src, pf_get_stride(&block, width));
+   }
+
+   if (stImage->pt) {
+      st_texture_image_unmap(ctx->st, stImage);
+      texImage->Data = NULL;
+   }
+}
+
+
+static void
+st_CompressedTexSubImage3D(GLcontext *ctx, GLenum target, GLint level,
+                           GLint xoffset, GLint yoffset, GLint zoffset,
+                           GLsizei width, GLint height, GLint depth,
+                           GLenum format,
+                           GLsizei imageSize, const GLvoid *data,
+                           struct gl_texture_object *texObj,
+                           struct gl_texture_image *texImage)
+{
+   assert(0);
 }
 
 
 
 /**
- * Do a CopyTexSubImage operation by mapping the source surface and
- * dest surface and using get_tile()/put_tile() to access the pixels/texels.
+ * Do a CopyTexSubImage operation using a read transfer from the source,
+ * a write transfer to the destination and get_tile()/put_tile() to access
+ * the pixels/texels.
  *
  * Note: srcY=0=TOP of renderbuffer
  */
 static void
-fallback_copy_texsubimage(GLcontext *ctx,
-                          GLenum target,
-                          GLint level,
+fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level,
                           struct st_renderbuffer *strb,
                           struct st_texture_image *stImage,
                           GLenum baseFormat,
@@ -934,61 +1288,73 @@ fallback_copy_texsubimage(GLcontext *ctx,
 {
    struct pipe_context *pipe = ctx->st->pipe;
    struct pipe_screen *screen = pipe->screen;
-   const uint face = texture_face(target);
-   struct pipe_texture *pt = stImage->pt;
-   struct pipe_surface *src_surf, *dest_surf;
+   struct pipe_transfer *src_trans;
+   GLvoid *texDest;
+   enum pipe_transfer_usage transfer_usage;
 
-   /* We'd use strb->surface, here but it's created for GPU read/write only */
-   src_surf = pipe->screen->get_tex_surface( pipe->screen,
-                                             strb->texture,
-                                             0, 0, 0,
-                                             PIPE_BUFFER_USAGE_CPU_READ);
+   assert(width <= MAX_WIDTH);
 
-   dest_surf = screen->get_tex_surface(screen, pt, face, level, destZ,
-                                       PIPE_BUFFER_USAGE_CPU_WRITE);
+   if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
+      srcY = strb->Base.Height - srcY - height;
+   }
 
-   assert(width <= MAX_WIDTH);
+   src_trans = st_cond_flush_get_tex_transfer( st_context(ctx),
+                                              strb->texture,
+                                              0, 0, 0,
+                                              PIPE_TRANSFER_READ,
+                                              srcX, srcY,
+                                              width, height);
+
+   if (baseFormat == GL_DEPTH_COMPONENT &&
+       pf_is_depth_and_stencil(stImage->pt->format))
+      transfer_usage = PIPE_TRANSFER_READ_WRITE;
+   else
+      transfer_usage = PIPE_TRANSFER_WRITE;
+
+   st_teximage_flush_before_map(ctx->st, stImage->pt, 0, 0,
+                               transfer_usage);
+
+   texDest = st_texture_image_map(ctx->st, stImage, 0, transfer_usage,
+                                  destX, destY, width, height);
 
-   if (baseFormat == GL_DEPTH_COMPONENT) {
+   if (baseFormat == GL_DEPTH_COMPONENT ||
+       baseFormat == GL_DEPTH24_STENCIL8) {
       const GLboolean scaleOrBias = (ctx->Pixel.DepthScale != 1.0F ||
                                      ctx->Pixel.DepthBias != 0.0F);
       GLint row, yStep;
 
       /* determine bottom-to-top vs. top-to-bottom order for src buffer */
       if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
-         srcY = strb->Base.Height - 1 - srcY;
+         srcY = height - 1;
          yStep = -1;
       }
       else {
+         srcY = 0;
          yStep = 1;
       }
 
       /* To avoid a large temp memory allocation, do copy row by row */
-      for (row = 0; row < height; row++, srcY += yStep, destY++) {
+      for (row = 0; row < height; row++, srcY += yStep) {
          uint data[MAX_WIDTH];
-         pipe_get_tile_z(src_surf, srcX, srcY, width, 1, data);
+         pipe_get_tile_z(src_trans, 0, srcY, width, 1, data);
          if (scaleOrBias) {
             _mesa_scale_and_bias_depth_uint(ctx, width, data);
          }
-         pipe_put_tile_z(dest_surf, destX, destY, width, 1, data);
+         pipe_put_tile_z(stImage->transfer, 0, row, width, 1, data);
       }
    }
    else {
       /* RGBA format */
       GLfloat *tempSrc =
          (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
-      GLvoid *texDest =
-         st_texture_image_map(ctx->st, stImage, 0,PIPE_BUFFER_USAGE_CPU_WRITE);
 
       if (tempSrc && texDest) {
          const GLint dims = 2;
+         const GLint dstRowStride = stImage->transfer->stride;
          struct gl_texture_image *texImage = &stImage->base;
-         GLint dstRowStride = stImage->surface->stride;
          struct gl_pixelstore_attrib unpack = ctx->DefaultPacking;
 
          if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
-            /* need to invert src */
-            srcY = strb->Base.Height - srcY - height;
             unpack.Invert = GL_TRUE;
          }
 
@@ -996,7 +1362,7 @@ fallback_copy_texsubimage(GLcontext *ctx,
          /* XXX this usually involves a lot of int/float conversion.
           * try to avoid that someday.
           */
-         pipe_get_tile_rgba(src_surf, srcX, srcY, width, height, tempSrc);
+         pipe_get_tile_rgba(src_trans, 0, 0, width, height, tempSrc);
 
          /* Store into texture memory.
           * Note that this does some special things such as pixel transfer
@@ -1008,7 +1374,7 @@ fallback_copy_texsubimage(GLcontext *ctx,
                                          texImage->_BaseFormat, 
                                          texImage->TexFormat, 
                                          texDest,
-                                         destX, destY, destZ,
+                                         0, 0, 0,
                                          dstRowStride,
                                          texImage->ImageOffsets,
                                          width, height, 1,
@@ -1021,15 +1387,43 @@ fallback_copy_texsubimage(GLcontext *ctx,
 
       if (tempSrc)
          _mesa_free(tempSrc);
-      if (texDest)
-         st_texture_image_unmap(ctx->st, stImage);
    }
 
-   screen->tex_surface_release(screen, &dest_surf);
-   screen->tex_surface_release(screen, &src_surf);
+   st_texture_image_unmap(ctx->st, stImage);
+   screen->tex_transfer_destroy(src_trans);
+}
+
+
+static unsigned
+compatible_src_dst_formats(const struct gl_renderbuffer *src,
+                           const struct gl_texture_image *dst)
+{
+   const GLenum srcFormat = src->_BaseFormat;
+   const GLenum dstLogicalFormat = dst->_BaseFormat;
+
+   if (srcFormat == dstLogicalFormat) {
+      /* This is the same as matching_base_formats, which should
+       * always pass, as it did previously.
+       */
+      return TGSI_WRITEMASK_XYZW;
+   }
+   else if (srcFormat == GL_RGBA &&
+            dstLogicalFormat == GL_RGB) {
+      /* Add a single special case to cope with RGBA->RGB transfers,
+       * setting A to 1.0 to cope with situations where the RGB
+       * destination is actually stored as RGBA.
+       */
+      return TGSI_WRITEMASK_XYZ; /* A ==> 1.0 */
+   }
+   else {
+      /* Otherwise fail.
+       */
+      return 0;
+   }
 }
 
 
+
 /**
  * Do a CopyTex[Sub]Image1/2/3D() using a hardware (blit) path if possible.
  * Note that the region to copy has already been clipped so we know we
@@ -1059,12 +1453,20 @@ st_copy_texsubimage(GLcontext *ctx,
    enum pipe_format dest_format, src_format;
    GLboolean use_fallback = GL_TRUE;
    GLboolean matching_base_formats;
+   GLuint format_writemask;
+   struct pipe_surface *dest_surface = NULL;
+   GLboolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP);
+
+   /* any rendering in progress must flushed before we grab the fb image */
+   st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL);
 
-   /* any rendering in progress must complete before we grab the fb image */
-   st_finish(ctx->st);
+   /* make sure finalize_textures has been called? 
+    */
+   if (0) st_validate_state(ctx->st);
 
    /* determine if copying depth or color data */
-   if (texBaseFormat == GL_DEPTH_COMPONENT) {
+   if (texBaseFormat == GL_DEPTH_COMPONENT ||
+       texBaseFormat == GL_DEPTH24_STENCIL8) {
       strb = st_renderbuffer(fb->_DepthBuffer);
    }
    else if (texBaseFormat == GL_DEPTH_STENCIL_EXT) {
@@ -1075,6 +1477,39 @@ st_copy_texsubimage(GLcontext *ctx,
       strb = st_renderbuffer(fb->_ColorReadBuffer);
    }
 
+   if (!strb || !strb->surface || !stImage->pt) {
+      debug_printf("%s: null strb or stImage\n", __FUNCTION__);
+      return;
+   }
+
+   if (srcX < 0) {
+      width -= -srcX;
+      destX += -srcX;
+      srcX = 0;
+   }
+
+   if (srcY < 0) {
+      height -= -srcY;
+      destY += -srcY;
+      srcY = 0;
+   }
+
+   if (destX < 0) {
+      width -= -destX;
+      srcX += -destX;
+      destX = 0;
+   }
+
+   if (destY < 0) {
+      height -= -destY;
+      srcY += -destY;
+      destY = 0;
+   }
+
+   if (width < 0 || height < 0)
+      return;
+
+
    assert(strb);
    assert(strb->surface);
    assert(stImage->pt);
@@ -1091,25 +1526,23 @@ st_copy_texsubimage(GLcontext *ctx,
     * textured-quad paths.
     */
    matching_base_formats = (strb->Base._BaseFormat == texImage->_BaseFormat);
+   format_writemask = compatible_src_dst_formats(&strb->Base, texImage);
 
-   if (matching_base_formats && ctx->_ImageTransferState == 0x0) {
-      /* try potential hardware path */
-      struct pipe_surface *dest_surface = NULL;
+   if (ctx->_ImageTransferState == 0x0) {
 
-      if (src_format == dest_format) {
+      if (matching_base_formats && 
+          src_format == dest_format &&
+          !do_flip) 
+      {
          /* use surface_copy() / blit */
-         boolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP);
 
          dest_surface = screen->get_tex_surface(screen, stImage->pt,
                                                 stImage->face, stImage->level,
                                                 destZ,
                                                 PIPE_BUFFER_USAGE_GPU_WRITE);
-         if (do_flip)
-            srcY = strb->surface->height - srcY - height;
 
          /* for surface_copy(), y=0=top, always */
          pipe->surface_copy(pipe,
-                            do_flip,
                             /* dest */
                             dest_surface,
                             destX, destY,
@@ -1120,7 +1553,8 @@ st_copy_texsubimage(GLcontext *ctx,
                             width, height);
          use_fallback = GL_FALSE;
       }
-      else if (screen->is_format_supported(screen, src_format,
+      else if (format_writemask &&
+               screen->is_format_supported(screen, src_format,
                                            PIPE_TEXTURE_2D, 
                                            PIPE_TEXTURE_USAGE_SAMPLER,
                                            0) &&
@@ -1129,8 +1563,7 @@ st_copy_texsubimage(GLcontext *ctx,
                                            PIPE_TEXTURE_USAGE_RENDER_TARGET,
                                            0)) {
          /* draw textured quad to do the copy */
-         boolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP);
-         int srcY0, srcY1;
+         GLint srcY0, srcY1;
 
          dest_surface = screen->get_tex_surface(screen, stImage->pt,
                                                 stImage->face, stImage->level,
@@ -1145,14 +1578,15 @@ st_copy_texsubimage(GLcontext *ctx,
             srcY0 = srcY;
             srcY1 = srcY0 + height;
          }
-         util_blit_pixels(ctx->st->blit,
-                          strb->surface,
-                          srcX, srcY0,
-                          srcX + width, srcY1,
-                          dest_surface,
-                          destX, destY,
-                          destX + width, destY + height,
-                          0.0, PIPE_TEX_MIPFILTER_NEAREST);
+         util_blit_pixels_writemask(ctx->st->blit,
+                                    strb->surface,
+                                    srcX, srcY0,
+                                    srcX + width, srcY1,
+                                    dest_surface,
+                                    destX, destY,
+                                    destX + width, destY + height,
+                                    0.0, PIPE_TEX_MIPFILTER_NEAREST,
+                                    format_writemask);
          use_fallback = GL_FALSE;
       }
 
@@ -1167,10 +1601,6 @@ st_copy_texsubimage(GLcontext *ctx,
                                 destX, destY, destZ,
                                 srcX, srcY, width, height);
    }
-
-   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-      ctx->Driver.GenerateMipmap(ctx, target, texObj);
-   }
 }
 
 
@@ -1187,11 +1617,6 @@ st_CopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
    struct gl_texture_image *texImage =
       _mesa_select_tex_image(ctx, texObj, target, level);
 
-#if 0
-   if (border)
-      goto fail;
-#endif
-
    /* Setup or redefine the texture object, texture and texture
     * image.  Don't populate yet.  
     */
@@ -1282,8 +1707,8 @@ calculate_first_last_level(struct st_texture_object *stObj)
     * and having firstLevel and lastLevel as signed prevents the need for
     * extra sign checks.
     */
-   int firstLevel;
-   int lastLevel;
+   GLint firstLevel;
+   GLint lastLevel;
 
    /* Yes, this looks overly complicated, but it's all needed.
     */
@@ -1337,16 +1762,21 @@ copy_image_data_to_texture(struct st_context *st,
 
       /* More straightforward upload.  
        */
-      st_texture_image_data(st->pipe,
-                               stObj->pt,
-                               stImage->face,
-                               dstLevel,
-                               stImage->base.Data,
-                               stImage->base.RowStride * 
-                               stObj->pt->block.size,
-                               stImage->base.RowStride *
-                               stImage->base.Height *
-                               stObj->pt->block.size);
+
+      st_teximage_flush_before_map(st, stObj->pt, stImage->face, dstLevel,
+                                  PIPE_TRANSFER_WRITE);
+
+
+      st_texture_image_data(st,
+                            stObj->pt,
+                            stImage->face,
+                            dstLevel,
+                            stImage->base.Data,
+                            stImage->base.RowStride * 
+                            stObj->pt->block.size,
+                            stImage->base.RowStride *
+                            stImage->base.Height *
+                            stObj->pt->block.size);
       _mesa_align_free(stImage->base.Data);
       stImage->base.Data = NULL;
    }
@@ -1368,9 +1798,7 @@ st_finalize_texture(GLcontext *ctx,
 {
    struct st_texture_object *stObj = st_texture_object(tObj);
    const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-   int comp_byte = 0;
-   int cpp;
-   GLuint face;
+   GLuint cpp, face;
    struct st_texture_image *firstImage;
 
    *needFlush = GL_FALSE;
@@ -1392,14 +1820,12 @@ st_finalize_texture(GLcontext *ctx,
    if (firstImage->pt &&
        firstImage->pt != stObj->pt &&
        firstImage->pt->last_level >= stObj->lastLevel) {
-
       pipe_texture_reference(&stObj->pt, firstImage->pt);
    }
 
    /* FIXME: determine format block instead of cpp */
    if (firstImage->base.IsCompressed) {
-      comp_byte = compressed_num_bytes(firstImage->base.TexFormat->MesaFormat);
-      cpp = comp_byte;
+      cpp = compressed_num_bytes(firstImage->base.TexFormat->MesaFormat);
    }
    else {
       cpp = firstImage->base.TexFormat->TexelBytes;
@@ -1417,11 +1843,10 @@ st_finalize_texture(GLcontext *ctx,
           stObj->pt->width[0] != firstImage->base.Width2 ||
           stObj->pt->height[0] != firstImage->base.Height2 ||
           stObj->pt->depth[0] != firstImage->base.Depth2 ||
-          stObj->pt->block.size != cpp ||
-          stObj->pt->block.width != 1 ||
-          stObj->pt->block.height != 1 ||
-          stObj->pt->compressed != firstImage->base.IsCompressed) {
-         pipe_texture_release(&stObj->pt);
+          /* Nominal bytes per pixel: */
+          stObj->pt->block.size / stObj->pt->block.width != cpp)
+      {
+         pipe_texture_reference(&stObj->pt, NULL);
          ctx->st->dirty.st |= ST_NEW_FRAMEBUFFER;
       }
    }
@@ -1431,6 +1856,8 @@ st_finalize_texture(GLcontext *ctx,
    if (!stObj->pt) {
       const enum pipe_format fmt =
          st_mesa_format_to_pipe_format(firstImage->base.TexFormat->MesaFormat);
+      GLuint usage = default_usage(fmt);
+
       stObj->pt = st_texture_create(ctx->st,
                                     gl_target_to_pipe(stObj->base.Target),
                                     fmt,
@@ -1438,11 +1865,7 @@ st_finalize_texture(GLcontext *ctx,
                                     firstImage->base.Width2,
                                     firstImage->base.Height2,
                                     firstImage->base.Depth2,
-                                    comp_byte,
-                                    ( (pf_is_depth_stencil(fmt) ?
-                                      PIPE_TEXTURE_USAGE_DEPTH_STENCIL :
-                                      PIPE_TEXTURE_USAGE_RENDER_TARGET) |
-                                      PIPE_TEXTURE_USAGE_SAMPLER ));
+                                    usage);
 
       if (!stObj->pt) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
@@ -1484,9 +1907,19 @@ st_get_default_texture(struct st_context *st)
       GLubyte pixels[16][16][4];
       struct gl_texture_object *texObj;
       struct gl_texture_image *texImg;
+      GLuint i, j;
 
-      /* init image to gray */
-      memset(pixels, 127, sizeof(pixels));
+      /* The ARB_fragment_program spec says (0,0,0,1) should be returned
+       * when attempting to sample incomplete textures.
+       */
+      for (i = 0; i < 16; i++) {
+         for (j = 0; j < 16; j++) {
+            pixels[i][j][0] = 0;
+            pixels[i][j][1] = 0;
+            pixels[i][j][2] = 0;
+            pixels[i][j][3] = 255;
+         }
+      }
 
       texObj = st->ctx->Driver.NewTextureObject(st->ctx, 0, target);
 
@@ -1524,6 +1957,9 @@ st_init_texture_functions(struct dd_function_table *functions)
    functions->TexSubImage1D = st_TexSubImage1D;
    functions->TexSubImage2D = st_TexSubImage2D;
    functions->TexSubImage3D = st_TexSubImage3D;
+   functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D;
+   functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D;
+   functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D;
    functions->CopyTexImage1D = st_CopyTexImage1D;
    functions->CopyTexImage2D = st_CopyTexImage2D;
    functions->CopyTexSubImage1D = st_CopyTexSubImage1D;