gallium: u_blitter add overlapping blit support.
[mesa.git] / src / gallium / auxiliary / util / u_blitter.c
index e51a5dfc905d8c67107bb4b0860bcae3144ba961..46c297993fe7bf26dbc07c07205b39edd0f9f12a 100644 (file)
@@ -38,6 +38,7 @@
 #include "pipe/p_shader_tokens.h"
 #include "pipe/p_state.h"
 
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_blitter.h"
@@ -47,6 +48,8 @@
 #include "util/u_simple_shaders.h"
 #include "util/u_texture.h"
 
+#define INVALID_PTR ((void*)~0)
+
 struct blitter_context_priv
 {
    struct blitter_context blitter;
@@ -56,16 +59,26 @@ struct blitter_context_priv
 
    float vertices[4][2][4];   /**< {pos, color} or {pos, texcoord} */
 
+   /* Templates for various state objects. */
+   struct pipe_depth_stencil_alpha_state template_dsa;
+   struct pipe_sampler_state template_sampler_state;
+
    /* Constant state objects. */
    /* Vertex shaders. */
    void *vs_col; /**< Vertex shader which passes {pos, color} to the output */
    void *vs_tex; /**<Vertex shader which passes {pos, texcoord} to the output.*/
 
    /* Fragment shaders. */
-   void *fs_col[8];     /**< FS which outputs colors to 1-8 color buffers */
-   void *fs_texfetch_col[4];   /**< FS which outputs a color from a texture */
-   void *fs_texfetch_depth[4]; /**< FS which outputs a depth from a texture,
-                              where the index is PIPE_TEXTURE_* to be sampled */
+   /* FS which outputs a color to multiple color buffers. */
+   void *fs_col[PIPE_MAX_COLOR_BUFS];
+
+   /* FS which outputs a color from a texture,
+      where the index is PIPE_TEXTURE_* to be sampled. */
+   void *fs_texfetch_col[PIPE_MAX_TEXTURE_TYPES];
+
+   /* FS which outputs a depth from a texture,
+      where the index is PIPE_TEXTURE_* to be sampled. */
+   void *fs_texfetch_depth[PIPE_MAX_TEXTURE_TYPES];
 
    /* Blend state. */
    void *blend_write_color;   /**< blend state with writemask of RGBA */
@@ -76,19 +89,21 @@ struct blitter_context_priv
    void *dsa_write_depth_keep_stencil;
    void *dsa_keep_depth_stencil;
 
-   /* Other state. */
-   void *sampler_state[16];   /**< sampler state for clamping to a miplevel */
-   void *rs_state;            /**< rasterizer state */
+   /* Sampler state for clamping to a miplevel. */
+   void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
+
+   /* Rasterizer state. */
+   void *rs_state;
 };
 
 struct blitter_context *util_blitter_create(struct pipe_context *pipe)
 {
    struct blitter_context_priv *ctx;
    struct pipe_blend_state blend;
-   struct pipe_depth_stencil_alpha_state dsa;
+   struct pipe_depth_stencil_alpha_state *dsa;
    struct pipe_rasterizer_state rs_state;
-   struct pipe_sampler_state sampler_state;
-   unsigned i, max_render_targets;
+   struct pipe_sampler_state *sampler_state;
+   unsigned i;
 
    ctx = CALLOC_STRUCT(blitter_context_priv);
    if (!ctx)
@@ -97,6 +112,11 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    ctx->pipe = pipe;
 
    /* init state objects for them to be considered invalid */
+   ctx->blitter.saved_blend_state = INVALID_PTR;
+   ctx->blitter.saved_dsa_state = INVALID_PTR;
+   ctx->blitter.saved_rs_state = INVALID_PTR;
+   ctx->blitter.saved_fs = INVALID_PTR;
+   ctx->blitter.saved_vs = INVALID_PTR;
    ctx->blitter.saved_fb_state.nr_cbufs = ~0;
    ctx->blitter.saved_num_textures = ~0;
    ctx->blitter.saved_num_sampler_states = ~0;
@@ -109,46 +129,33 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    ctx->blend_write_color = pipe->create_blend_state(pipe, &blend);
 
    /* depth stencil alpha state objects */
-   memset(&dsa, 0, sizeof(dsa));
+   dsa = &ctx->template_dsa;
    ctx->dsa_keep_depth_stencil =
-      pipe->create_depth_stencil_alpha_state(pipe, &dsa);
+      pipe->create_depth_stencil_alpha_state(pipe, dsa);
 
-   dsa.depth.enabled = 1;
-   dsa.depth.writemask = 1;
-   dsa.depth.func = PIPE_FUNC_ALWAYS;
+   dsa->depth.enabled = 1;
+   dsa->depth.writemask = 1;
+   dsa->depth.func = PIPE_FUNC_ALWAYS;
    ctx->dsa_write_depth_keep_stencil =
-      pipe->create_depth_stencil_alpha_state(pipe, &dsa);
-
-   dsa.stencil[0].enabled = 1;
-   dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
-   dsa.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
-   dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
-   dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
-   dsa.stencil[0].valuemask = 0xff;
-   dsa.stencil[0].writemask = 0xff;
-
-   /* create a depth stencil alpha state for each possible stencil clear
-    * value */
-   for (i = 0; i < 0xff; i++) {
-      dsa.stencil[0].ref_value = i;
-
-      ctx->dsa_write_depth_stencil[i] =
-         pipe->create_depth_stencil_alpha_state(pipe, &dsa);
-   }
+      pipe->create_depth_stencil_alpha_state(pipe, dsa);
+
+   dsa->stencil[0].enabled = 1;
+   dsa->stencil[0].func = PIPE_FUNC_ALWAYS;
+   dsa->stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
+   dsa->stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
+   dsa->stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
+   dsa->stencil[0].valuemask = 0xff;
+   dsa->stencil[0].writemask = 0xff;
+   /* The DSA state objects which write depth and stencil are created
+    * on-demand. */
 
    /* sampler state */
-   memset(&sampler_state, 0, sizeof(sampler_state));
-   sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   sampler_state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-
-   for (i = 0; i < 16; i++) {
-      sampler_state.lod_bias = i;
-      sampler_state.min_lod = i;
-      sampler_state.max_lod = i;
-
-      ctx->sampler_state[i] = pipe->create_sampler_state(pipe, &sampler_state);
-   }
+   sampler_state = &ctx->template_sampler_state;
+   sampler_state->wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler_state->wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler_state->wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   /* The sampler state objects which sample from a specified mipmap level
+    * are created on-demand. */
 
    /* rasterizer state */
    memset(&rs_state, 0, sizeof(rs_state));
@@ -158,6 +165,8 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    rs_state.gl_rasterization_rules = 1;
    ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
 
+   /* fragment shaders are created on-demand */
+
    /* vertex shaders */
    {
       const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
@@ -176,31 +185,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
                                              semantic_indices);
    }
 
-   /* fragment shaders */
-   ctx->fs_texfetch_col[PIPE_TEXTURE_1D] =
-      util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D);
-   ctx->fs_texfetch_col[PIPE_TEXTURE_2D] =
-      util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
-   ctx->fs_texfetch_col[PIPE_TEXTURE_3D] =
-      util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D);
-   ctx->fs_texfetch_col[PIPE_TEXTURE_CUBE] =
-      util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE);
-
-   ctx->fs_texfetch_depth[PIPE_TEXTURE_1D] =
-      util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_1D);
-   ctx->fs_texfetch_depth[PIPE_TEXTURE_2D] =
-      util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D);
-   ctx->fs_texfetch_depth[PIPE_TEXTURE_3D] =
-      util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_3D);
-   ctx->fs_texfetch_depth[PIPE_TEXTURE_CUBE] =
-      util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_CUBE);
-
-   max_render_targets = pipe->screen->get_param(pipe->screen,
-                                                PIPE_CAP_MAX_RENDER_TARGETS);
-   assert(max_render_targets <= 8);
-   for (i = 0; i < max_render_targets; i++)
-      ctx->fs_col[i] = util_make_fragment_clonecolor_shader(pipe, 1+i);
-
    /* set invariant vertex coordinates */
    for (i = 0; i < 4; i++)
       ctx->vertices[i][0][3] = 1; /*v.w*/
@@ -227,19 +211,28 @@ void util_blitter_destroy(struct blitter_context *blitter)
                                           ctx->dsa_write_depth_keep_stencil);
 
    for (i = 0; i < 0xff; i++)
-      pipe->delete_depth_stencil_alpha_state(pipe,
-                                             ctx->dsa_write_depth_stencil[i]);
+      if (ctx->dsa_write_depth_stencil[i])
+         pipe->delete_depth_stencil_alpha_state(pipe,
+            ctx->dsa_write_depth_stencil[i]);
 
    pipe->delete_rasterizer_state(pipe, ctx->rs_state);
    pipe->delete_vs_state(pipe, ctx->vs_col);
    pipe->delete_vs_state(pipe, ctx->vs_tex);
 
-   for (i = 0; i < 4; i++) {
-      pipe->delete_fs_state(pipe, ctx->fs_texfetch_col[i]);
-      pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
+   for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
+      if (ctx->fs_texfetch_col[i])
+         pipe->delete_fs_state(pipe, ctx->fs_texfetch_col[i]);
+      if (ctx->fs_texfetch_depth[i])
+         pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
    }
-   for (i = 0; i < 8 && ctx->fs_col[i]; i++)
-      pipe->delete_fs_state(pipe, ctx->fs_col[i]);
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++)
+      if (ctx->fs_col[i])
+         pipe->delete_fs_state(pipe, ctx->fs_col[i]);
+
+   for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+      if (ctx->sampler_state[i])
+         pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
 
    pipe_buffer_reference(&ctx->vbuf, NULL);
    FREE(ctx);
@@ -248,11 +241,11 @@ void util_blitter_destroy(struct blitter_context *blitter)
 static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
 {
    /* make sure these CSOs have been saved */
-   assert(ctx->blitter.saved_blend_state &&
-          ctx->blitter.saved_dsa_state &&
-          ctx->blitter.saved_rs_state &&
-          ctx->blitter.saved_fs &&
-          ctx->blitter.saved_vs);
+   assert(ctx->blitter.saved_blend_state != INVALID_PTR &&
+          ctx->blitter.saved_dsa_state != INVALID_PTR &&
+          ctx->blitter.saved_rs_state != INVALID_PTR &&
+          ctx->blitter.saved_fs != INVALID_PTR &&
+          ctx->blitter.saved_vs != INVALID_PTR);
 }
 
 static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
@@ -266,11 +259,11 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
    pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
    pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
 
-   ctx->blitter.saved_blend_state = 0;
-   ctx->blitter.saved_dsa_state = 0;
-   ctx->blitter.saved_rs_state = 0;
-   ctx->blitter.saved_fs = 0;
-   ctx->blitter.saved_vs = 0;
+   ctx->blitter.saved_blend_state = INVALID_PTR;
+   ctx->blitter.saved_dsa_state = INVALID_PTR;
+   ctx->blitter.saved_rs_state = INVALID_PTR;
+   ctx->blitter.saved_fs = INVALID_PTR;
+   ctx->blitter.saved_vs = INVALID_PTR;
 
    /* restore the state objects which are required to be saved before copy/fill
     */
@@ -400,20 +393,142 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
 
 static void blitter_draw_quad(struct blitter_context_priv *ctx)
 {
-   struct blitter_context *blitter = &ctx->blitter;
    struct pipe_context *pipe = ctx->pipe;
 
-   if (blitter->draw_quad) {
-      blitter->draw_quad(pipe, &ctx->vertices[0][0][0]);
-   } else {
-      /* write vertices and draw them */
-      pipe_buffer_write(pipe->screen, ctx->vbuf,
-                        0, sizeof(ctx->vertices), ctx->vertices);
+   /* write vertices and draw them */
+   pipe_buffer_write(pipe->screen, ctx->vbuf,
+                     0, sizeof(ctx->vertices), ctx->vertices);
+
+   util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN,
+                           4,  /* verts */
+                           2); /* attribs/vert */
+}
+
+static INLINE
+void *blitter_get_state_write_depth_stencil(
+               struct blitter_context_priv *ctx,
+               unsigned stencil)
+{
+   struct pipe_context *pipe = ctx->pipe;
+
+   stencil &= 0xff;
+
+   /* Create the DSA state on-demand. */
+   if (!ctx->dsa_write_depth_stencil[stencil]) {
+      ctx->template_dsa.stencil[0].ref_value = stencil;
+
+      ctx->dsa_write_depth_stencil[stencil] =
+         pipe->create_depth_stencil_alpha_state(pipe, &ctx->template_dsa);
+   }
+
+   return ctx->dsa_write_depth_stencil[stencil];
+}
+
+static INLINE
+void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
+                                 int miplevel)
+{
+   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
+
+   assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
+
+   /* Create the sampler state on-demand. */
+   if (!ctx->sampler_state[miplevel]) {
+      sampler_state->lod_bias = miplevel;
+      sampler_state->min_lod = miplevel;
+      sampler_state->max_lod = miplevel;
+
+      ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe,
+                                                                sampler_state);
+   }
+
+   /* Return void** so that it can be passed to bind_fragment_sampler_states
+    * directly. */
+   return &ctx->sampler_state[miplevel];
+}
+
+static INLINE
+void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs)
+{
+   struct pipe_context *pipe = ctx->pipe;
+   unsigned index = num_cbufs ? num_cbufs - 1 : 0;
+
+   assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
+
+   if (!ctx->fs_col[index])
+      ctx->fs_col[index] =
+         util_make_fragment_clonecolor_shader(pipe, num_cbufs);
+
+   return ctx->fs_col[index];
+}
+
+static INLINE
+void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
+                                  unsigned tex_target)
+{
+   struct pipe_context *pipe = ctx->pipe;
 
-      util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN,
-                              4,  /* verts */
-                              2); /* attribs/vert */
+   assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
+
+   /* Create the fragment shader on-demand. */
+   if (!ctx->fs_texfetch_col[tex_target]) {
+      switch (tex_target) {
+         case PIPE_TEXTURE_1D:
+            ctx->fs_texfetch_col[PIPE_TEXTURE_1D] =
+               util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D);
+            break;
+         case PIPE_TEXTURE_2D:
+            ctx->fs_texfetch_col[PIPE_TEXTURE_2D] =
+               util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D);
+            break;
+         case PIPE_TEXTURE_3D:
+            ctx->fs_texfetch_col[PIPE_TEXTURE_3D] =
+               util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D);
+            break;
+         case PIPE_TEXTURE_CUBE:
+            ctx->fs_texfetch_col[PIPE_TEXTURE_CUBE] =
+               util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE);
+            break;
+         default:;
+      }
    }
+
+   return ctx->fs_texfetch_col[tex_target];
+}
+
+static INLINE
+void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
+                                    unsigned tex_target)
+{
+   struct pipe_context *pipe = ctx->pipe;
+
+   assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
+
+   /* Create the fragment shader on-demand. */
+   if (!ctx->fs_texfetch_depth[tex_target]) {
+      switch (tex_target) {
+         case PIPE_TEXTURE_1D:
+            ctx->fs_texfetch_depth[PIPE_TEXTURE_1D] =
+               util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_1D);
+            break;
+         case PIPE_TEXTURE_2D:
+            ctx->fs_texfetch_depth[PIPE_TEXTURE_2D] =
+               util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D);
+            break;
+         case PIPE_TEXTURE_3D:
+            ctx->fs_texfetch_depth[PIPE_TEXTURE_3D] =
+               util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_3D);
+            break;
+         case PIPE_TEXTURE_CUBE:
+            ctx->fs_texfetch_depth[PIPE_TEXTURE_CUBE] =
+               util_make_fragment_tex_shader_writedepth(pipe,TGSI_TEXTURE_CUBE);
+            break;
+         default:;
+      }
+   }
+
+   return ctx->fs_texfetch_depth[tex_target];
 }
 
 void util_blitter_clear(struct blitter_context *blitter,
@@ -426,7 +541,7 @@ void util_blitter_clear(struct blitter_context *blitter,
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->pipe;
 
-   assert(num_cbufs <= 8);
+   assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
 
    blitter_check_saved_CSOs(ctx);
 
@@ -438,12 +553,12 @@ void util_blitter_clear(struct blitter_context *blitter,
 
    if (clear_buffers & PIPE_CLEAR_DEPTHSTENCIL)
       pipe->bind_depth_stencil_alpha_state(pipe,
-         ctx->dsa_write_depth_stencil[stencil&0xff]);
+         blitter_get_state_write_depth_stencil(ctx, stencil));
    else
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
 
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
-   pipe->bind_fs_state(pipe, ctx->fs_col[num_cbufs ? num_cbufs-1 : 0]);
+   pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
    pipe->bind_vs_state(pipe, ctx->vs_col);
 
    blitter_set_clear_color(ctx, rgba);
@@ -452,49 +567,34 @@ void util_blitter_clear(struct blitter_context *blitter,
    blitter_restore_CSOs(ctx);
 }
 
-void util_blitter_copy(struct blitter_context *blitter,
-                       struct pipe_surface *dst,
-                       unsigned dstx, unsigned dsty,
-                       struct pipe_surface *src,
-                       unsigned srcx, unsigned srcy,
-                       unsigned width, unsigned height,
-                       boolean ignore_stencil)
+static boolean
+is_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2)
+{
+    if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TL x1, y1 */
+       ((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TR x2, y1 */
+       ((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || /* BL x1, y2 */
+       ((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)))   /* BR x2, y2 */
+       return TRUE;
+    else
+       return FALSE;
+}
+
+static void util_blitter_do_copy(struct blitter_context *blitter,
+                                struct pipe_surface *dst,
+                                unsigned dstx, unsigned dsty,
+                                struct pipe_surface *src,
+                                unsigned srcx, unsigned srcy,
+                                unsigned width, unsigned height,
+                                boolean is_depth)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->pipe;
-   struct pipe_screen *screen = pipe->screen;
    struct pipe_framebuffer_state fb_state;
-   boolean is_stencil, is_depth;
-   unsigned dst_tex_usage;
-
-   /* give up if textures are not set */
-   assert(dst->texture && src->texture);
-   if (!dst->texture || !src->texture)
-      return;
-
-   is_depth = pf_get_component_bits(src->format, PIPE_FORMAT_COMP_Z) != 0;
-   is_stencil = pf_get_component_bits(src->format, PIPE_FORMAT_COMP_S) != 0;
-   dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL :
-                                            PIPE_TEXTURE_USAGE_RENDER_TARGET;
-
-   /* check if we can sample from and render to the surfaces */
-   /* (assuming copying a stencil buffer is not possible) */
-   if ((!ignore_stencil && is_stencil) ||
-       !screen->is_format_supported(screen, dst->format, dst->texture->target,
-                                    dst_tex_usage, 0) ||
-       !screen->is_format_supported(screen, src->format, src->texture->target,
-                                    PIPE_TEXTURE_USAGE_SAMPLER, 0)) {
-      util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy,
-                        width, height);
-      return;
-   }
 
-   /* check whether the states are properly saved */
-   blitter_check_saved_CSOs(ctx);
    assert(blitter->saved_fb_state.nr_cbufs != ~0);
    assert(blitter->saved_num_textures != ~0);
    assert(blitter->saved_num_sampler_states != ~0);
-   assert(src->texture->target < 4);
+   assert(src->texture->target < PIPE_MAX_TEXTURE_TYPES);
 
    /* bind CSOs */
    fb_state.width = dst->width;
@@ -504,22 +604,26 @@ void util_blitter_copy(struct blitter_context *blitter,
       pipe->bind_blend_state(pipe, ctx->blend_keep_color);
       pipe->bind_depth_stencil_alpha_state(pipe,
                                            ctx->dsa_write_depth_keep_stencil);
-      pipe->bind_fs_state(pipe, ctx->fs_texfetch_depth[src->texture->target]);
+      pipe->bind_fs_state(pipe,
+         blitter_get_fs_texfetch_depth(ctx, src->texture->target));
 
       fb_state.nr_cbufs = 0;
       fb_state.zsbuf = dst;
    } else {
       pipe->bind_blend_state(pipe, ctx->blend_write_color);
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
-      pipe->bind_fs_state(pipe, ctx->fs_texfetch_col[src->texture->target]);
+      pipe->bind_fs_state(pipe,
+         blitter_get_fs_texfetch_col(ctx, src->texture->target));
 
       fb_state.nr_cbufs = 1;
       fb_state.cbufs[0] = dst;
       fb_state.zsbuf = 0;
    }
+
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
    pipe->bind_vs_state(pipe, ctx->vs_tex);
-   pipe->bind_fragment_sampler_states(pipe, 1, &ctx->sampler_state[src->level]);
+   pipe->bind_fragment_sampler_states(pipe, 1,
+      blitter_get_sampler_state(ctx, src->level));
    pipe->set_fragment_sampler_textures(pipe, 1, &src->texture);
    pipe->set_framebuffer_state(pipe, &fb_state);
 
@@ -538,10 +642,117 @@ void util_blitter_copy(struct blitter_context *blitter,
          blitter_set_texcoords_cube(ctx, src, srcx, srcy,
                                     srcx+width, srcy+height);
          break;
+      default:
+         assert(0);
    }
 
    blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0);
    blitter_draw_quad(ctx);
+
+}
+
+static void util_blitter_overlap_copy(struct blitter_context *blitter,
+                                     struct pipe_surface *dst,
+                                     unsigned dstx, unsigned dsty,
+                                     struct pipe_surface *src,
+                                     unsigned srcx, unsigned srcy,
+                                     unsigned width, unsigned height)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_screen *screen = pipe->screen;
+
+   struct pipe_texture texTemp;
+   struct pipe_texture *texture;
+   struct pipe_surface *tex_surf;
+   uint level;
+
+   /* check whether the states are properly saved */
+   blitter_check_saved_CSOs(ctx);
+
+   memset(&texTemp, 0, sizeof(texTemp));
+   texTemp.target = PIPE_TEXTURE_2D;
+   texTemp.format = dst->texture->format; /* XXX verify supported by driver! */
+   texTemp.last_level = 0;
+   texTemp.width0 = width;
+   texTemp.height0 = height;
+   texTemp.depth0 = 1;
+
+   texture = screen->texture_create(screen, &texTemp);
+   if (!texture)
+      return;
+
+   tex_surf = screen->get_tex_surface(screen, texture, 0, 0, 0,
+                                     PIPE_BUFFER_USAGE_GPU_READ | 
+                                     PIPE_BUFFER_USAGE_GPU_WRITE);
+
+   /* blit from the src to the temp */
+   util_blitter_do_copy(blitter, tex_surf, 0, 0,
+                       src, srcx, srcy,
+                       width, height,
+                       FALSE);
+   util_blitter_do_copy(blitter, dst, dstx, dsty,
+                       tex_surf, 0, 0,
+                       width, height,
+                       FALSE);
+   pipe_surface_reference(&tex_surf, NULL);
+   pipe_texture_reference(&texture, NULL);
+   blitter_restore_CSOs(ctx);
+}
+
+void util_blitter_copy(struct blitter_context *blitter,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface *src,
+                       unsigned srcx, unsigned srcy,
+                       unsigned width, unsigned height,
+                       boolean ignore_stencil)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   boolean is_stencil, is_depth;
+   unsigned dst_tex_usage;
+   boolean is_overlap_flag;
+
+   /* give up if textures are not set */
+   assert(dst->texture && src->texture);
+   if (!dst->texture || !src->texture)
+      return;
+
+   if (dst->texture == src->texture) {
+      if (is_overlap(srcx, srcx + (width - 1), srcy, srcy + (height - 1),
+                    dstx, dstx + (width - 1), dsty, dsty + (height - 1))) {
+        is_overlap_flag = TRUE;
+        util_blitter_overlap_copy(blitter, dst, dstx, dsty, src, srcx, srcy,
+                                  width, height);
+        return;
+      }
+   }
+                  
+   is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
+   is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0;
+   dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL :
+                                            PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+   /* check if we can sample from and render to the surfaces */
+   /* (assuming copying a stencil buffer is not possible) */
+   if ((!ignore_stencil && is_stencil) ||
+       !screen->is_format_supported(screen, dst->format, dst->texture->target,
+                                    dst_tex_usage, 0) ||
+       !screen->is_format_supported(screen, src->format, src->texture->target,
+                                    PIPE_TEXTURE_USAGE_SAMPLER, 0)) {
+      util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy,
+                        width, height);
+      return;
+   }
+
+   /* check whether the states are properly saved */
+   blitter_check_saved_CSOs(ctx);
+   util_blitter_do_copy(blitter,
+                       dst, dstx, dsty,
+                       src, srcx, srcy,
+                       width, height, is_depth);
    blitter_restore_CSOs(ctx);
 }
 
@@ -565,7 +776,7 @@ void util_blitter_fill(struct blitter_context *blitter,
       return;
 
    /* check if we can render to the surface */
-   if (pf_is_depth_or_stencil(dst->format) || /* unlikely, but you never know */
+   if (util_format_is_depth_or_stencil(dst->format) || /* unlikely, but you never know */
        !screen->is_format_supported(screen, dst->format, dst->texture->target,
                                     PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
       util_surface_fill(pipe, dst, dstx, dsty, width, height, value);
@@ -587,7 +798,7 @@ void util_blitter_fill(struct blitter_context *blitter,
    pipe->bind_blend_state(pipe, ctx->blend_write_color);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
    pipe->bind_rasterizer_state(pipe, ctx->rs_state);
-   pipe->bind_fs_state(pipe, ctx->fs_col[0]);
+   pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1));
    pipe->bind_vs_state(pipe, ctx->vs_col);
 
    /* set a framebuffer state */