util: add debug_print_bind_flags() debug helper
[mesa.git] / src / gallium / auxiliary / util / u_blitter.c
index 85e1e979120a609b38bcae934fbc498b0f588162..9d087fe8a66fc9068b5c93a77e50d097e4635a56 100644 (file)
@@ -17,7 +17,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define INVALID_PTR ((void*)~0)
 
+#define GET_CLEAR_BLEND_STATE_IDX(clear_buffers) \
+   ((clear_buffers) / PIPE_CLEAR_COLOR0)
+
+#define NUM_RESOLVE_FRAG_SHADERS 5 /* MSAA 2x, 4x, 8x, 16x, 32x */
+#define GET_MSAA_RESOLVE_FS_IDX(nr_samples) (util_logbase2(nr_samples)-1)
+
 struct blitter_context_priv
 {
    struct blitter_context base;
@@ -65,11 +71,12 @@ struct blitter_context_priv
    /* Vertex shaders. */
    void *vs; /**< Vertex shader which passes {pos, generic} to the output.*/
    void *vs_pos_only; /**< Vertex shader which passes pos to the output.*/
+   void *vs_layered; /**< Vertex shader which sets LAYER = INSTANCEID. */
 
    /* Fragment shaders. */
-   /* The shader at index i outputs color to color buffers 0,1,...,i-1. */
-   void *fs_col[PIPE_MAX_COLOR_BUFS+1];
-   void *fs_col_int[PIPE_MAX_COLOR_BUFS+1];
+   void *fs_empty;
+   void *fs_write_one_cbuf;
+   void *fs_write_all_cbufs;
 
    /* FS which outputs a color from a texture,
       where the index is PIPE_TEXTURE_* to be sampled. */
@@ -87,8 +94,14 @@ struct blitter_context_priv
    void *fs_texfetch_depthstencil_msaa[PIPE_MAX_TEXTURE_TYPES];
    void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES];
 
+   /* FS which outputs an average of all samples. */
+   void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
+   void *fs_resolve_sint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
+   void *fs_resolve_uint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
+
    /* Blend state. */
    void *blend[PIPE_MASK_RGBA+1]; /**< blend state with writemask */
+   void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1];
 
    /* Depth stencil alpha state. */
    void *dsa_write_depth_stencil;
@@ -98,8 +111,6 @@ struct blitter_context_priv
 
    /* Vertex elements states. */
    void *velem_state;
-   void *velem_uint_state;
-   void *velem_sint_state;
    void *velem_state_readbuf[4]; /**< X, XY, XYZ, XYZW */
 
    /* Sampler state. */
@@ -119,10 +130,11 @@ struct blitter_context_priv
    unsigned dst_height;
 
    boolean has_geometry_shader;
-   boolean vertex_has_integers;
+   boolean has_layered;
    boolean has_stream_out;
    boolean has_stencil_export;
    boolean has_texture_multisample;
+   boolean cached_all_shaders;
 
    /* The Draw module overrides these functions.
     * Always create the blitter before Draw. */
@@ -171,9 +183,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    ctx->has_geometry_shader =
       pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_GEOMETRY,
                                      PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0;
-   ctx->vertex_has_integers =
-      pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_VERTEX,
-                                     PIPE_SHADER_CAP_INTEGERS);
    ctx->has_stream_out =
       pipe->screen->get_param(pipe->screen,
                               PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0;
@@ -239,7 +248,8 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    /* rasterizer state */
    memset(&rs_state, 0, sizeof(rs_state));
    rs_state.cull_face = PIPE_FACE_NONE;
-   rs_state.gl_rasterization_rules = 1;
+   rs_state.half_pixel_center = 1;
+   rs_state.bottom_edge_rule = 1;
    rs_state.flatshade = 1;
    rs_state.depth_clip = 1;
    ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
@@ -264,26 +274,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    }
    ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
 
-   if (ctx->vertex_has_integers) {
-      memset(&velem[0], 0, sizeof(velem[0]) * 2);
-      velem[0].src_offset = 0;
-      velem[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      velem[0].vertex_buffer_index = ctx->base.vb_slot;
-      velem[1].src_offset = 4 * sizeof(float);
-      velem[1].src_format = PIPE_FORMAT_R32G32B32A32_SINT;
-      velem[1].vertex_buffer_index = ctx->base.vb_slot;
-      ctx->velem_sint_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
-
-      memset(&velem[0], 0, sizeof(velem[0]) * 2);
-      velem[0].src_offset = 0;
-      velem[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      velem[0].vertex_buffer_index = ctx->base.vb_slot;
-      velem[1].src_offset = 4 * sizeof(float);
-      velem[1].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
-      velem[1].vertex_buffer_index = ctx->base.vb_slot;
-      ctx->velem_uint_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
-   }
-
    if (ctx->has_stream_out) {
       static enum pipe_format formats[4] = {
          PIPE_FORMAT_R32_UINT,
@@ -300,18 +290,24 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
       }
    }
 
-   /* fragment shaders are created on-demand */
+   ctx->has_layered =
+      pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_INSTANCEID) &&
+      pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT);
 
-   /* vertex shaders */
-   {
-      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                      TGSI_SEMANTIC_GENERIC };
-      const uint semantic_indices[] = { 0, 0 };
-      ctx->vs =
-         util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
-                                             semantic_indices);
-   }
-   if (ctx->has_stream_out) {
+   /* set invariant vertex coordinates */
+   for (i = 0; i < 4; i++)
+      ctx->vertices[i][0][3] = 1; /*v.w*/
+
+   ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
+
+   return &ctx->base;
+}
+
+static void bind_vs_pos_only(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->vs_pos_only) {
       struct pipe_stream_output_info so;
       const uint semantic_names[] = { TGSI_SEMANTIC_POSITION };
       const uint semantic_indices[] = { 0 };
@@ -323,27 +319,93 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
 
       ctx->vs_pos_only =
          util_make_vertex_passthrough_shader_with_so(pipe, 1, semantic_names,
-                                                     semantic_indices, &so);
+                                                     semantic_indices, FALSE,
+                                                     &so);
    }
 
-   /* set invariant vertex coordinates */
-   for (i = 0; i < 4; i++)
-      ctx->vertices[i][0][3] = 1; /*v.w*/
+   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+}
 
-   ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
+static void bind_vs_passthrough(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
 
-   return &ctx->base;
+   if (!ctx->vs) {
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+                                      TGSI_SEMANTIC_GENERIC };
+      const uint semantic_indices[] = { 0, 0 };
+      ctx->vs =
+         util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+                                             semantic_indices, FALSE);
+   }
+
+   pipe->bind_vs_state(pipe, ctx->vs);
+}
+
+static void bind_vs_layered(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->vs_layered) {
+      ctx->vs_layered = util_make_layered_clear_vertex_shader(pipe);
+   }
+
+   pipe->bind_vs_state(pipe, ctx->vs_layered);
+}
+
+static void bind_fs_empty(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->fs_empty) {
+      assert(!ctx->cached_all_shaders);
+      ctx->fs_empty = util_make_empty_fragment_shader(pipe);
+   }
+
+   ctx->bind_fs_state(pipe, ctx->fs_empty);
+}
+
+static void bind_fs_write_one_cbuf(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->fs_write_one_cbuf) {
+      assert(!ctx->cached_all_shaders);
+      ctx->fs_write_one_cbuf =
+         util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
+                                               TGSI_INTERPOLATE_CONSTANT, FALSE);
+   }
+
+   ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+}
+
+static void bind_fs_write_all_cbufs(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->fs_write_all_cbufs) {
+      assert(!ctx->cached_all_shaders);
+      ctx->fs_write_all_cbufs =
+         util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
+                                               TGSI_INTERPOLATE_CONSTANT, TRUE);
+   }
+
+   ctx->bind_fs_state(pipe, ctx->fs_write_all_cbufs);
 }
 
 void util_blitter_destroy(struct blitter_context *blitter)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = blitter->pipe;
-   int i;
+   int i, j, f;
 
    for (i = 0; i <= PIPE_MASK_RGBA; i++) {
       pipe->delete_blend_state(pipe, ctx->blend[i]);
    }
+   for (i = 0; i < Elements(ctx->blend_clear); i++) {
+      if (ctx->blend_clear[i])
+         pipe->delete_blend_state(pipe, ctx->blend_clear[i]);
+   }
    pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
    pipe->delete_depth_stencil_alpha_state(pipe,
                                           ctx->dsa_write_depth_keep_stencil);
@@ -354,14 +416,13 @@ void util_blitter_destroy(struct blitter_context *blitter)
    pipe->delete_rasterizer_state(pipe, ctx->rs_state_scissor);
    if (ctx->rs_discard_state)
       pipe->delete_rasterizer_state(pipe, ctx->rs_discard_state);
-   pipe->delete_vs_state(pipe, ctx->vs);
+   if (ctx->vs)
+      pipe->delete_vs_state(pipe, ctx->vs);
    if (ctx->vs_pos_only)
       pipe->delete_vs_state(pipe, ctx->vs_pos_only);
+   if (ctx->vs_layered)
+      pipe->delete_vs_state(pipe, ctx->vs_layered);
    pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
-   if (ctx->vertex_has_integers) {
-      pipe->delete_vertex_elements_state(pipe, ctx->velem_sint_state);
-      pipe->delete_vertex_elements_state(pipe, ctx->velem_uint_state);
-   }
    for (i = 0; i < 4; i++) {
       if (ctx->velem_state_readbuf[i]) {
          pipe->delete_vertex_elements_state(pipe, ctx->velem_state_readbuf[i]);
@@ -377,15 +438,39 @@ void util_blitter_destroy(struct blitter_context *blitter)
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_depthstencil[i]);
       if (ctx->fs_texfetch_stencil[i])
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil[i]);
-   }
 
-   for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++) {
-      if (ctx->fs_col[i])
-         ctx->delete_fs_state(pipe, ctx->fs_col[i]);
-      if (ctx->fs_col_int[i])
-         ctx->delete_fs_state(pipe, ctx->fs_col_int[i]);
+      if (ctx->fs_texfetch_col_msaa[i])
+         ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_msaa[i]);
+      if (ctx->fs_texfetch_depth_msaa[i])
+         ctx->delete_fs_state(pipe, ctx->fs_texfetch_depth_msaa[i]);
+      if (ctx->fs_texfetch_depthstencil_msaa[i])
+         ctx->delete_fs_state(pipe, ctx->fs_texfetch_depthstencil_msaa[i]);
+      if (ctx->fs_texfetch_stencil_msaa[i])
+         ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil_msaa[i]);
+
+      for (j = 0; j< Elements(ctx->fs_resolve[i]); j++)
+         for (f = 0; f < 2; f++)
+            if (ctx->fs_resolve[i][j][f])
+               ctx->delete_fs_state(pipe, ctx->fs_resolve[i][j][f]);
+
+      for (j = 0; j< Elements(ctx->fs_resolve_sint[i]); j++)
+         for (f = 0; f < 2; f++)
+            if (ctx->fs_resolve_sint[i][j][f])
+               ctx->delete_fs_state(pipe, ctx->fs_resolve_sint[i][j][f]);
+
+      for (j = 0; j< Elements(ctx->fs_resolve_uint[i]); j++)
+         for (f = 0; f < 2; f++)
+            if (ctx->fs_resolve_uint[i][j][f])
+               ctx->delete_fs_state(pipe, ctx->fs_resolve_uint[i][j][f]);
    }
 
+   if (ctx->fs_empty)
+      ctx->delete_fs_state(pipe, ctx->fs_empty);
+   if (ctx->fs_write_one_cbuf)
+      ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf);
+   if (ctx->fs_write_all_cbufs)
+      ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs);
+
    pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear);
    pipe->delete_sampler_state(pipe, ctx->sampler_state_rect);
    pipe->delete_sampler_state(pipe, ctx->sampler_state_linear);
@@ -455,9 +540,12 @@ static void blitter_restore_vertex_states(struct blitter_context_priv *ctx)
 
    /* Stream outputs. */
    if (ctx->has_stream_out) {
+      unsigned offsets[PIPE_MAX_SO_BUFFERS];
+      for (i = 0; i < ctx->base.saved_num_so_targets; i++)
+         offsets[i] = (unsigned)-1;
       pipe->set_stream_output_targets(pipe,
                                       ctx->base.saved_num_so_targets,
-                                      ctx->base.saved_so_targets, ~0);
+                                      ctx->base.saved_so_targets, offsets);
 
       for (i = 0; i < ctx->base.saved_num_so_targets; i++)
          pipe_so_target_reference(&ctx->base.saved_so_targets[i], NULL);
@@ -503,7 +591,7 @@ static void blitter_restore_fragment_states(struct blitter_context_priv *ctx)
    /* XXX check whether these are saved and whether they need to be restored
     * (depending on the operation) */
    pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref);
-   pipe->set_viewport_state(pipe, &ctx->base.saved_viewport);
+   pipe->set_viewport_states(pipe, 0, 1, &ctx->base.saved_viewport);
 }
 
 static void blitter_check_saved_fb_state(struct blitter_context_priv *ctx)
@@ -516,7 +604,7 @@ static void blitter_disable_render_cond(struct blitter_context_priv *ctx)
    struct pipe_context *pipe = ctx->base.pipe;
 
    if (ctx->base.saved_render_cond_query) {
-      pipe->render_condition(pipe, NULL, 0);
+      pipe->render_condition(pipe, NULL, FALSE, 0);
    }
 }
 
@@ -526,6 +614,7 @@ static void blitter_restore_render_cond(struct blitter_context_priv *ctx)
 
    if (ctx->base.saved_render_cond_query) {
       pipe->render_condition(pipe, ctx->base.saved_render_cond_query,
+                             ctx->base.saved_render_cond_cond,
                              ctx->base.saved_render_cond_mode);
       ctx->base.saved_render_cond_query = NULL;
    }
@@ -551,15 +640,16 @@ static void blitter_restore_textures(struct blitter_context_priv *ctx)
    unsigned i;
 
    /* Fragment sampler states. */
-   pipe->bind_fragment_sampler_states(pipe,
-                                      ctx->base.saved_num_sampler_states,
-                                      ctx->base.saved_sampler_states);
+   pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0,
+                             ctx->base.saved_num_sampler_states,
+                             ctx->base.saved_sampler_states);
+
    ctx->base.saved_num_sampler_states = ~0;
 
    /* Fragment sampler views. */
-   pipe->set_fragment_sampler_views(pipe,
-                                    ctx->base.saved_num_sampler_views,
-                                    ctx->base.saved_sampler_views);
+   pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
+                           ctx->base.saved_num_sampler_views,
+                           ctx->base.saved_sampler_views);
 
    for (i = 0; i < ctx->base.saved_num_sampler_views; i++)
       pipe_sampler_view_reference(&ctx->base.saved_sampler_views[i], NULL);
@@ -593,12 +683,10 @@ static void blitter_set_rectangle(struct blitter_context_priv *ctx,
    ctx->viewport.scale[0] = 0.5f * ctx->dst_width;
    ctx->viewport.scale[1] = 0.5f * ctx->dst_height;
    ctx->viewport.scale[2] = 1.0f;
-   ctx->viewport.scale[3] = 1.0f;
    ctx->viewport.translate[0] = 0.5f * ctx->dst_width;
    ctx->viewport.translate[1] = 0.5f * ctx->dst_height;
    ctx->viewport.translate[2] = 0.0f;
-   ctx->viewport.translate[3] = 0.0f;
-   ctx->base.pipe->set_viewport_state(ctx->base.pipe, &ctx->viewport);
+   ctx->base.pipe->set_viewport_states(ctx->base.pipe, 0, 1, &ctx->viewport);
 }
 
 static void blitter_set_clear_color(struct blitter_context_priv *ctx,
@@ -666,7 +754,7 @@ static void set_texcoords_in_vertices(const float coord[4],
 static void blitter_set_texcoords(struct blitter_context_priv *ctx,
                                   struct pipe_sampler_view *src,
                                   unsigned src_width0, unsigned src_height0,
-                                  unsigned layer, unsigned sample,
+                                  float layer, unsigned sample,
                                   int x1, int y1, int x2, int y2)
 {
    unsigned i;
@@ -678,10 +766,11 @@ static void blitter_set_texcoords(struct blitter_context_priv *ctx,
    if (src->texture->target == PIPE_TEXTURE_CUBE ||
        src->texture->target == PIPE_TEXTURE_CUBE_ARRAY) {
       set_texcoords_in_vertices(coord, &face_coord[0][0], 2);
-      util_map_texcoords2d_onto_cubemap(layer % 6,
+      util_map_texcoords2d_onto_cubemap((unsigned)layer % 6,
                                         /* pointer, stride in floats */
                                         &face_coord[0][0], 2,
-                                        &ctx->vertices[0][1][0], 8);
+                                        &ctx->vertices[0][1][0], 8,
+                                        FALSE);
    } else {
       set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
    }
@@ -711,7 +800,7 @@ static void blitter_set_texcoords(struct blitter_context_priv *ctx,
 
    case PIPE_TEXTURE_CUBE_ARRAY:
       for (i = 0; i < 4; i++)
-         ctx->vertices[i][1][3] = (float) (layer / 6); /*w*/
+         ctx->vertices[i][1][3] = (float) ((unsigned)layer / 6); /*w*/
       break;
 
    case PIPE_TEXTURE_2D:
@@ -731,47 +820,63 @@ static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx,
    ctx->dst_height = height;
 }
 
-static void *blitter_get_fs_col(struct blitter_context_priv *ctx,
-                                unsigned num_cbufs, boolean int_format)
-{
-   struct pipe_context *pipe = ctx->base.pipe;
-
-   assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
-
-   if (int_format) {
-      if (!ctx->fs_col_int[num_cbufs])
-         ctx->fs_col_int[num_cbufs] =
-            util_make_fragment_cloneinput_shader(pipe, num_cbufs,
-                                                 TGSI_SEMANTIC_GENERIC,
-                                                 TGSI_INTERPOLATE_CONSTANT);
-      return ctx->fs_col_int[num_cbufs];
-   } else {
-      if (!ctx->fs_col[num_cbufs])
-         ctx->fs_col[num_cbufs] =
-            util_make_fragment_cloneinput_shader(pipe, num_cbufs,
-                                                 TGSI_SEMANTIC_GENERIC,
-                                                 TGSI_INTERPOLATE_LINEAR);
-      return ctx->fs_col[num_cbufs];
-   }
-}
-
 static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
+                                         enum pipe_format format,
                                          enum pipe_texture_target target,
-                                         unsigned nr_samples)
+                                         unsigned src_nr_samples,
+                                         unsigned dst_nr_samples,
+                                         unsigned filter)
 {
    struct pipe_context *pipe = ctx->base.pipe;
+   unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_nr_samples);
 
    assert(target < PIPE_MAX_TEXTURE_TYPES);
 
-   if (nr_samples > 1) {
-      void **shader = &ctx->fs_texfetch_col_msaa[target];
+   if (src_nr_samples > 1) {
+      void **shader;
 
-      /* Create the fragment shader on-demand. */
-      if (!*shader) {
-         unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target,
-                                                       nr_samples);
+      if (dst_nr_samples <= 1) {
+         /* The destination has one sample, so we'll do color resolve. */
+         boolean is_uint, is_sint;
+         unsigned index = GET_MSAA_RESOLVE_FS_IDX(src_nr_samples);
+
+         is_uint = util_format_is_pure_uint(format);
+         is_sint = util_format_is_pure_sint(format);
 
-         *shader = util_make_fs_blit_msaa_color(pipe, tgsi_tex);
+         assert(filter < 2);
+
+         if (is_uint)
+            shader = &ctx->fs_resolve_uint[target][index][filter];
+         else if (is_sint)
+            shader = &ctx->fs_resolve_sint[target][index][filter];
+         else
+            shader = &ctx->fs_resolve[target][index][filter];
+
+         if (!*shader) {
+            assert(!ctx->cached_all_shaders);
+            if (filter == PIPE_TEX_FILTER_LINEAR) {
+               *shader = util_make_fs_msaa_resolve_bilinear(pipe, tgsi_tex,
+                                                   src_nr_samples,
+                                                   is_uint, is_sint);
+            }
+            else {
+               *shader = util_make_fs_msaa_resolve(pipe, tgsi_tex,
+                                                   src_nr_samples,
+                                                   is_uint, is_sint);
+            }
+         }
+      }
+      else {
+         /* The destination has multiple samples, we'll do
+          * an MSAA->MSAA copy.
+          */
+         shader = &ctx->fs_texfetch_col_msaa[target];
+
+         /* Create the fragment shader on-demand. */
+         if (!*shader) {
+            assert(!ctx->cached_all_shaders);
+            *shader = util_make_fs_blit_msaa_color(pipe, tgsi_tex);
+         }
       }
 
       return *shader;
@@ -780,11 +885,9 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
 
       /* Create the fragment shader on-demand. */
       if (!*shader) {
-         unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
-
-         *shader =
-            util_make_fragment_tex_shader(pipe, tgsi_tex,
-                                          TGSI_INTERPOLATE_LINEAR);
+         assert(!ctx->cached_all_shaders);
+         *shader = util_make_fragment_tex_shader(pipe, tgsi_tex,
+                                                 TGSI_INTERPOLATE_LINEAR);
       }
 
       return *shader;
@@ -805,11 +908,10 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
 
       /* Create the fragment shader on-demand. */
       if (!*shader) {
-         unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target,
-                                                       nr_samples);
-
-         *shader =
-            util_make_fs_blit_msaa_depth(pipe, tgsi_tex);
+         unsigned tgsi_tex;
+         assert(!ctx->cached_all_shaders);
+         tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples);
+         *shader = util_make_fs_blit_msaa_depth(pipe, tgsi_tex);
       }
 
       return *shader;
@@ -818,8 +920,9 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
 
       /* Create the fragment shader on-demand. */
       if (!*shader) {
-         unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
-
+         unsigned tgsi_tex;
+         assert(!ctx->cached_all_shaders);
+         tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
          *shader =
             util_make_fragment_tex_shader_writedepth(pipe, tgsi_tex,
                                                      TGSI_INTERPOLATE_LINEAR);
@@ -843,11 +946,10 @@ void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
 
       /* Create the fragment shader on-demand. */
       if (!*shader) {
-         unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target,
-                                                       nr_samples);
-
-         *shader =
-            util_make_fs_blit_msaa_depthstencil(pipe, tgsi_tex);
+         unsigned tgsi_tex;
+         assert(!ctx->cached_all_shaders);
+         tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples);
+         *shader = util_make_fs_blit_msaa_depthstencil(pipe, tgsi_tex);
       }
 
       return *shader;
@@ -856,8 +958,9 @@ void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
 
       /* Create the fragment shader on-demand. */
       if (!*shader) {
-         unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
-
+         unsigned tgsi_tex;
+         assert(!ctx->cached_all_shaders);
+         tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
          *shader =
             util_make_fragment_tex_shader_writedepthstencil(pipe, tgsi_tex,
                                                      TGSI_INTERPOLATE_LINEAR);
@@ -881,11 +984,10 @@ void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx,
 
       /* Create the fragment shader on-demand. */
       if (!*shader) {
-         unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target,
-                                                       nr_samples);
-
-         *shader =
-            util_make_fs_blit_msaa_stencil(pipe, tgsi_tex);
+         unsigned tgsi_tex;
+         assert(!ctx->cached_all_shaders);
+         tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples);
+         *shader = util_make_fs_blit_msaa_stencil(pipe, tgsi_tex);
       }
 
       return *shader;
@@ -894,8 +996,9 @@ void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx,
 
       /* Create the fragment shader on-demand. */
       if (!*shader) {
-         unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
-
+         unsigned tgsi_tex;
+         assert(!ctx->cached_all_shaders);
+         tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
          *shader =
             util_make_fragment_tex_shader_writestencil(pipe, tgsi_tex,
                                                        TGSI_INTERPOLATE_LINEAR);
@@ -905,28 +1008,29 @@ void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx,
    }
 }
 
+
+/**
+ * Generate and save all fragment shaders that we will ever need for
+ * blitting.  Drivers which use the 'draw' fallbacks will typically use
+ * this to make sure we generate/use shaders that don't go through the
+ * draw module's wrapper functions.
+ */
 void util_blitter_cache_all_shaders(struct blitter_context *blitter)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
-   struct pipe_screen *screen = blitter->pipe->screen;
-   unsigned num_cbufs, i, target, max_samples;
+   struct pipe_context *pipe = blitter->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   unsigned samples, j, f, target, max_samples;
    boolean has_arraytex, has_cubearraytex;
 
-   num_cbufs = MAX2(screen->get_param(screen,
-                                      PIPE_CAP_MAX_RENDER_TARGETS), 1);
    max_samples = ctx->has_texture_multisample ? 2 : 1;
    has_arraytex = screen->get_param(screen,
                                     PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS) != 0;
    has_cubearraytex = screen->get_param(screen,
                                     PIPE_CAP_CUBE_MAP_ARRAY) != 0;
 
-   for (i = 0; i < num_cbufs; i++) {
-      blitter_get_fs_col(ctx, i, FALSE);
-      blitter_get_fs_col(ctx, i, TRUE);
-   }
-
    /* It only matters if i <= 1 or > 1. */
-   for (i = 1; i <= max_samples; i++) {
+   for (samples = 1; samples <= max_samples; samples++) {
       for (target = PIPE_TEXTURE_1D; target < PIPE_MAX_TEXTURE_TYPES; target++) {
          if (!has_arraytex &&
              (target == PIPE_TEXTURE_1D_ARRAY ||
@@ -937,49 +1041,96 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter)
              (target == PIPE_TEXTURE_CUBE_ARRAY))
             continue;
 
-        if (i > 1 &&
+        if (samples > 1 &&
             (target != PIPE_TEXTURE_2D &&
              target != PIPE_TEXTURE_2D_ARRAY))
            continue;
 
-         blitter_get_fs_texfetch_col(ctx, target, i);
-         blitter_get_fs_texfetch_depth(ctx, target, i);
+         /* If samples == 1, the shaders read one texel. If samples >= 1,
+          * they read one sample.
+          */
+         blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target,
+                                     samples, samples, 0);
+         blitter_get_fs_texfetch_depth(ctx, target, samples);
          if (ctx->has_stencil_export) {
-            blitter_get_fs_texfetch_depthstencil(ctx, target, i);
-            blitter_get_fs_texfetch_stencil(ctx, target, i);
+            blitter_get_fs_texfetch_depthstencil(ctx, target, samples);
+            blitter_get_fs_texfetch_stencil(ctx, target, samples);
+         }
+
+         if (samples == 1)
+            continue;
+
+         /* MSAA resolve shaders. */
+         for (j = 2; j < 32; j++) {
+            if (!screen->is_format_supported(screen, PIPE_FORMAT_R32_FLOAT,
+                                             target, j,
+                                             PIPE_BIND_SAMPLER_VIEW)) {
+               continue;
+            }
+
+            for (f = 0; f < 2; f++) {
+               blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target,
+                                           j, 1, f);
+               blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_UINT, target,
+                                           j, 1, f);
+               blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_SINT, target,
+                                           j, 1, f);
+            }
          }
       }
    }
+
+   ctx->fs_empty = util_make_empty_fragment_shader(pipe);
+
+   ctx->fs_write_one_cbuf =
+      util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
+                                            TGSI_INTERPOLATE_CONSTANT, FALSE);
+
+   ctx->fs_write_all_cbufs =
+      util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
+                                            TGSI_INTERPOLATE_CONSTANT, TRUE);
+
+   ctx->cached_all_shaders = TRUE;
 }
 
 static void blitter_set_common_draw_rect_state(struct blitter_context_priv *ctx,
-                                               boolean scissor)
+                                               boolean scissor,
+                                               boolean vs_layered)
 {
    struct pipe_context *pipe = ctx->base.pipe;
 
    pipe->bind_rasterizer_state(pipe, scissor ? ctx->rs_state_scissor
                                              : ctx->rs_state);
-   pipe->bind_vs_state(pipe, ctx->vs);
+   if (vs_layered)
+      bind_vs_layered(ctx);
+   else
+      bind_vs_passthrough(ctx);
+
    if (ctx->has_geometry_shader)
       pipe->bind_gs_state(pipe, NULL);
    if (ctx->has_stream_out)
-      pipe->set_stream_output_targets(pipe, 0, NULL, 0);
+      pipe->set_stream_output_targets(pipe, 0, NULL, NULL);
 }
 
 static void blitter_draw(struct blitter_context_priv *ctx,
-                         int x1, int y1, int x2, int y2, float depth)
+                         int x1, int y1, int x2, int y2, float depth,
+                         unsigned num_instances)
 {
-   struct pipe_resource *buf = NULL;
-   unsigned offset = 0;
+   struct pipe_context *pipe = ctx->base.pipe;
+   struct pipe_vertex_buffer vb = {0};
 
    blitter_set_rectangle(ctx, x1, y1, x2, y2, depth);
 
+   vb.stride = 8 * sizeof(float);
+
    u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
-                 &offset, &buf);
+                 &vb.buffer_offset, &vb.buffer);
    u_upload_unmap(ctx->upload);
-   util_draw_vertex_buffer(ctx->base.pipe, NULL, buf, ctx->base.vb_slot,
-                           offset, PIPE_PRIM_TRIANGLE_FAN, 4, 2);
-   pipe_resource_reference(&buf, NULL);
+
+   pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
+   util_draw_arrays_instanced(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, 4,
+                              0, num_instances);
+   pipe_resource_reference(&vb.buffer, NULL);
 }
 
 void util_blitter_draw_rectangle(struct blitter_context *blitter,
@@ -1001,14 +1152,48 @@ void util_blitter_draw_rectangle(struct blitter_context *blitter,
       default:;
    }
 
-   blitter_draw(ctx, x1, y1, x2, y2, depth);
+   blitter_draw(ctx, x1, y1, x2, y2, depth, 1);
+}
+
+static void *get_clear_blend_state(struct blitter_context_priv *ctx,
+                                   unsigned clear_buffers)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+   int index;
+
+   clear_buffers &= PIPE_CLEAR_COLOR;
+
+   /* Return an existing blend state. */
+   if (!clear_buffers)
+      return ctx->blend[0];
+
+   index = GET_CLEAR_BLEND_STATE_IDX(clear_buffers);
+
+   if (ctx->blend_clear[index])
+      return ctx->blend_clear[index];
+
+   /* Create a new one. */
+   {
+      struct pipe_blend_state blend = {0};
+      unsigned i;
+
+      blend.independent_blend_enable = 1;
+
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+         if (clear_buffers & (PIPE_CLEAR_COLOR0 << i)) {
+            blend.rt[i].colormask = PIPE_MASK_RGBA;
+         }
+      }
+
+      ctx->blend_clear[index] = pipe->create_blend_state(pipe, &blend);
+   }
+   return ctx->blend_clear[index];
 }
 
 static void util_blitter_clear_custom(struct blitter_context *blitter,
                                       unsigned width, unsigned height,
-                                      unsigned num_cbufs,
+                                      unsigned num_layers,
                                       unsigned clear_buffers,
-                                      enum pipe_format cbuf_format,
                                       const union pipe_color_union *color,
                                       double depth, unsigned stencil,
                                       void *custom_blend, void *custom_dsa)
@@ -1016,8 +1201,8 @@ static void util_blitter_clear_custom(struct blitter_context *blitter,
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
    struct pipe_stencil_ref sr = { { 0 } };
-   boolean int_format = util_format_is_pure_integer(cbuf_format);
-   assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
+
+   assert(ctx->has_layered || num_layers <= 1);
 
    blitter_set_running_flag(ctx);
    blitter_check_saved_vertex_states(ctx);
@@ -1027,10 +1212,8 @@ static void util_blitter_clear_custom(struct blitter_context *blitter,
    /* bind states */
    if (custom_blend) {
       pipe->bind_blend_state(pipe, custom_blend);
-   } else if (clear_buffers & PIPE_CLEAR_COLOR) {
-      pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
    } else {
-      pipe->bind_blend_state(pipe, ctx->blend[0]);
+      pipe->bind_blend_state(pipe, get_clear_blend_state(ctx, clear_buffers));
    }
 
    if (custom_dsa) {
@@ -1048,20 +1231,22 @@ static void util_blitter_clear_custom(struct blitter_context *blitter,
    sr.ref_value[0] = stencil & 0xff;
    pipe->set_stencil_ref(pipe, &sr);
 
-   if (util_format_is_pure_sint(cbuf_format)) {
-      pipe->bind_vertex_elements_state(pipe, ctx->velem_sint_state);
-   } else if (util_format_is_pure_uint(cbuf_format)) {
-      pipe->bind_vertex_elements_state(pipe, ctx->velem_uint_state);
-   } else {
-      pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
-   }
-   ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs, int_format));
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
+   bind_fs_write_all_cbufs(ctx);
    pipe->set_sample_mask(pipe, ~0);
 
-   blitter_set_common_draw_rect_state(ctx, FALSE);
    blitter_set_dst_dimensions(ctx, width, height);
-   blitter->draw_rectangle(blitter, 0, 0, width, height, (float) depth,
-                           UTIL_BLITTER_ATTRIB_COLOR, color);
+
+   if (num_layers > 1 && ctx->has_layered) {
+      blitter_set_common_draw_rect_state(ctx, FALSE, TRUE);
+      blitter_set_clear_color(ctx, color);
+      blitter_draw(ctx, 0, 0, width, height, depth, num_layers);
+   }
+   else {
+      blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
+      blitter->draw_rectangle(blitter, 0, 0, width, height, (float) depth,
+                              UTIL_BLITTER_ATTRIB_COLOR, color);
+   }
 
    blitter_restore_vertex_states(ctx);
    blitter_restore_fragment_states(ctx);
@@ -1070,15 +1255,13 @@ static void util_blitter_clear_custom(struct blitter_context *blitter,
 }
 
 void util_blitter_clear(struct blitter_context *blitter,
-                        unsigned width, unsigned height,
-                        unsigned num_cbufs,
+                        unsigned width, unsigned height, unsigned num_layers,
                         unsigned clear_buffers,
-                        enum pipe_format cbuf_format,
                         const union pipe_color_union *color,
                         double depth, unsigned stencil)
 {
-   util_blitter_clear_custom(blitter, width, height, num_cbufs,
-                             clear_buffers, cbuf_format, color, depth, stencil,
+   util_blitter_clear_custom(blitter, width, height, num_layers,
+                             clear_buffers, color, depth, stencil,
                              NULL, NULL);
 }
 
@@ -1087,8 +1270,8 @@ void util_blitter_custom_clear_depth(struct blitter_context *blitter,
                                      double depth, void *custom_dsa)
 {
     static const union pipe_color_union color;
-    util_blitter_clear_custom(blitter, width, height, 0,
-                              0, PIPE_FORMAT_NONE, &color, depth, 0, NULL, custom_dsa);
+    util_blitter_clear_custom(blitter, width, height, 0, 0, &color, depth, 0,
+                              NULL, custom_dsa);
 }
 
 void util_blitter_default_dst_texture(struct pipe_surface *dst_templ,
@@ -1201,11 +1384,10 @@ static boolean is_blit_generic_supported(struct blitter_context *blitter,
 
 boolean util_blitter_is_copy_supported(struct blitter_context *blitter,
                                        const struct pipe_resource *dst,
-                                       const struct pipe_resource *src,
-                                       unsigned mask)
+                                       const struct pipe_resource *src)
 {
    return is_blit_generic_supported(blitter, dst, dst->format,
-                                    src, src->format, mask);
+                                    src, src->format, PIPE_MASK_RGBAZS);
 }
 
 boolean util_blitter_is_blit_supported(struct blitter_context *blitter,
@@ -1223,8 +1405,7 @@ void util_blitter_copy_texture(struct blitter_context *blitter,
                                unsigned dstx, unsigned dsty, unsigned dstz,
                                struct pipe_resource *src,
                                unsigned src_level,
-                               const struct pipe_box *srcbox, unsigned mask,
-                               boolean copy_all_samples)
+                               const struct pipe_box *srcbox)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
@@ -1249,8 +1430,7 @@ void util_blitter_copy_texture(struct blitter_context *blitter,
    /* Copy. */
    util_blitter_blit_generic(blitter, dst_view, &dstbox,
                              src_view, srcbox, src->width0, src->height0,
-                             mask, PIPE_TEX_FILTER_NEAREST, NULL,
-                             copy_all_samples);
+                             PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
 
    pipe_surface_reference(&dst_view, NULL);
    pipe_sampler_view_reference(&src_view, NULL);
@@ -1263,14 +1443,14 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
                                const struct pipe_box *srcbox,
                                unsigned src_width0, unsigned src_height0,
                                unsigned mask, unsigned filter,
-                               const struct pipe_scissor_state *scissor,
-                               boolean copy_all_samples)
+                               const struct pipe_scissor_state *scissor)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
    struct pipe_framebuffer_state fb_state;
    enum pipe_texture_target src_target = src->texture->target;
    unsigned src_samples = src->texture->nr_samples;
+   unsigned dst_samples = dst->texture->nr_samples;
    boolean has_depth, has_stencil, has_color;
    boolean blit_stencil, blit_depth, blit_color;
    void *sampler_state;
@@ -1295,6 +1475,12 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
       return;
    }
 
+   if (blit_stencil ||
+       (dstbox->width == abs(srcbox->width) &&
+        dstbox->height == abs(srcbox->height))) {
+      filter = PIPE_TEX_FILTER_NEAREST;
+   }
+
    /* Check whether the states are properly saved. */
    blitter_set_running_flag(ctx);
    blitter_check_saved_vertex_states(ctx);
@@ -1337,16 +1523,12 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
       pipe->bind_blend_state(pipe, ctx->blend[mask & PIPE_MASK_RGBA]);
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
       ctx->bind_fs_state(pipe,
-            blitter_get_fs_texfetch_col(ctx, src_target,
-                                        src_samples));
+            blitter_get_fs_texfetch_col(ctx, src->format, src_target,
+                                        src_samples, dst_samples, filter));
    }
 
    /* Set the linear filter only for scaled color non-MSAA blits. */
-   if (filter == PIPE_TEX_FILTER_LINEAR &&
-       !blit_depth && !blit_stencil &&
-       src_samples <= 1 &&
-       (dstbox->width != abs(srcbox->width) ||
-        dstbox->height != abs(srcbox->height))) {
+   if (filter == PIPE_TEX_FILTER_LINEAR) {
       if (src_target == PIPE_TEXTURE_RECT) {
          sampler_state = ctx->sampler_state_rect_linear;
       } else {
@@ -1374,8 +1556,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
       views[0] = src;
       views[1] = pipe->create_sampler_view(pipe, src->texture, &templ);
 
-      pipe->set_fragment_sampler_views(pipe, 2, views);
-      pipe->bind_fragment_sampler_states(pipe, 2, samplers);
+      pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 2, views);
+      pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 2, samplers);
 
       pipe_sampler_view_reference(&views[1], NULL);
    } else if (blit_stencil) {
@@ -1389,21 +1571,23 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
 
       view = pipe->create_sampler_view(pipe, src->texture, &templ);
 
-      pipe->set_fragment_sampler_views(pipe, 1, &view);
-      pipe->bind_fragment_sampler_states(pipe, 1, &sampler_state);
+      pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &view);
+      pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT,
+                                0, 1, &sampler_state);
 
       pipe_sampler_view_reference(&view, NULL);
    } else {
-      pipe->set_fragment_sampler_views(pipe, 1, &src);
-      pipe->bind_fragment_sampler_states(pipe, 1, &sampler_state);
+      pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &src);
+      pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT,
+                                0, 1, &sampler_state);
    }
 
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    if (scissor) {
-      pipe->set_scissor_state(pipe, scissor);
+      pipe->set_scissor_states(pipe, 0, 1, scissor);
    }
 
-   blitter_set_common_draw_rect_state(ctx, scissor != NULL);
+   blitter_set_common_draw_rect_state(ctx, scissor != NULL, FALSE);
    blitter_set_dst_dimensions(ctx, dst->width, dst->height);
 
    if ((src_target == PIPE_TEXTURE_1D ||
@@ -1437,9 +1621,31 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
                               UTIL_BLITTER_ATTRIB_TEXCOORD, &coord);
    } else {
       /* Draw the quad with the generic codepath. */
-      int z;
-      for (z = 0; z < dstbox->depth; z++) {
+      int dst_z;
+      for (dst_z = 0; dst_z < dstbox->depth; dst_z++) {
          struct pipe_surface *old;
+         float dst2src_scale = srcbox->depth / (float)dstbox->depth;
+
+         /* Scale Z properly if the blit is scaled.
+          *
+          * When downscaling, we want the coordinates centered, so that
+          * mipmapping works for 3D textures. For example, when generating
+          * a 4x4x4 level, this wouldn't average the pixels:
+          *
+          *   src Z:  0 1 2 3 4 5 6 7
+          *   dst Z:  0   1   2   3
+          *
+          * Because the pixels are not centered below the pixels of the higher
+          * level. Therefore, we want this:
+          *   src Z:  0 1 2 3 4 5 6 7
+          *   dst Z:   0   1   2   3
+          *
+          * dst_offset defines the offset needed for centering the pixels and
+          * it works with any scaling (not just 2x).
+          */
+         float dst_offset = ((srcbox->depth - 1) -
+                             (dstbox->depth - 1) * dst2src_scale) * 0.5;
+         float src_z = (dst_z + dst_offset) * dst2src_scale;
 
          /* Set framebuffer state. */
          if (blit_depth || blit_stencil) {
@@ -1450,41 +1656,41 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
          pipe->set_framebuffer_state(pipe, &fb_state);
 
          /* See if we need to blit a multisample or singlesample buffer. */
-         if (copy_all_samples &&
-             src_samples == dst->texture->nr_samples &&
-             dst->texture->nr_samples > 1) {
-            unsigned i, max_sample = MAX2(dst->texture->nr_samples, 1) - 1;
+         if (src_samples == dst_samples && dst_samples > 1) {
+            /* MSAA copy. */
+            unsigned i, max_sample = dst_samples - 1;
 
             for (i = 0; i <= max_sample; i++) {
                pipe->set_sample_mask(pipe, 1 << i);
                blitter_set_texcoords(ctx, src, src_width0, src_height0,
-                                     srcbox->z + z,
+                                     srcbox->z + src_z,
                                      i, srcbox->x, srcbox->y,
                                      srcbox->x + srcbox->width,
                                      srcbox->y + srcbox->height);
                blitter_draw(ctx, dstbox->x, dstbox->y,
                             dstbox->x + dstbox->width,
-                            dstbox->y + dstbox->height, 0);
+                            dstbox->y + dstbox->height, 0, 1);
             }
          } else {
+            /* Normal copy, MSAA upsampling, or MSAA resolve. */
             pipe->set_sample_mask(pipe, ~0);
             blitter_set_texcoords(ctx, src, src_width0, src_height0,
-                                  srcbox->z + z, 0,
+                                  srcbox->z + src_z, 0,
                                   srcbox->x, srcbox->y,
                                   srcbox->x + srcbox->width,
                                   srcbox->y + srcbox->height);
             blitter_draw(ctx, dstbox->x, dstbox->y,
                          dstbox->x + dstbox->width,
-                         dstbox->y + dstbox->height, 0);
+                         dstbox->y + dstbox->height, 0, 1);
          }
 
          /* Get the next surface or (if this is the last iteration)
           * just unreference the last one. */
          old = dst;
-         if (z < dstbox->depth-1) {
+         if (dst_z < dstbox->depth-1) {
             dst = ctx->base.get_next_surface_layer(ctx->base.pipe, dst);
          }
-         if (z) {
+         if (dst_z) {
             pipe_surface_reference(&old, NULL);
          }
       }
@@ -1495,7 +1701,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
    blitter_restore_textures(ctx);
    blitter_restore_fb_state(ctx);
    if (scissor) {
-      pipe->set_scissor_state(pipe, &ctx->base.saved_scissor);
+      pipe->set_scissor_states(pipe, 0, 1, &ctx->base.saved_scissor);
    }
    blitter_restore_render_cond(ctx);
    blitter_unset_running_flag(ctx);
@@ -1527,7 +1733,7 @@ util_blitter_blit(struct blitter_context *blitter,
    util_blitter_blit_generic(blitter, dst_view, &info->dst.box,
                              src_view, &info->src.box, src->width0, src->height0,
                              info->mask, info->filter,
-                             info->scissor_enable ? &info->scissor : NULL, TRUE);
+                             info->scissor_enable ? &info->scissor : NULL);
 
    pipe_surface_reference(&dst_view, NULL);
    pipe_sampler_view_reference(&src_view, NULL);
@@ -1558,7 +1764,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
    /* bind states */
    pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
-   ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1, FALSE));
+   bind_fs_write_one_cbuf(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
@@ -1570,7 +1776,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, ~0);
 
-   blitter_set_common_draw_rect_state(ctx, FALSE);
+   blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
    blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
    blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
                            UTIL_BLITTER_ATTRIB_COLOR, color);
@@ -1626,7 +1832,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
       /* hmm that should be illegal probably, or make it a no-op somewhere */
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
 
-   ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0, FALSE));
+   bind_fs_empty(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
@@ -1638,7 +1844,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, ~0);
 
-   blitter_set_common_draw_rect_state(ctx, FALSE);
+   blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
    blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
    blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height,
                            (float) depth,
@@ -1677,7 +1883,10 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
    pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] :
                                          ctx->blend[0]);
    pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
-   ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0, FALSE));
+   if (cbsurf)
+      bind_fs_write_one_cbuf(ctx);
+   else
+      bind_fs_empty(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
@@ -1695,7 +1904,7 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, sample_mask);
 
-   blitter_set_common_draw_rect_state(ctx, FALSE);
+   blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
    blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height);
    blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth,
                            UTIL_BLITTER_ATTRIB_NONE, NULL);
@@ -1718,6 +1927,7 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
    struct pipe_context *pipe = ctx->base.pipe;
    struct pipe_vertex_buffer vb;
    struct pipe_stream_output_target *so_target;
+   unsigned offsets[PIPE_MAX_SO_BUFFERS] = {0};
 
    if (srcx >= src->width0 ||
        dstx >= dst->width0) {
@@ -1753,13 +1963,13 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
 
    pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[0]);
-   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+   bind_vs_pos_only(ctx);
    if (ctx->has_geometry_shader)
       pipe->bind_gs_state(pipe, NULL);
    pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state);
 
    so_target = pipe->create_stream_output_target(pipe, dst, dstx, size);
-   pipe->set_stream_output_targets(pipe, 1, &so_target, 0);
+   pipe->set_stream_output_targets(pipe, 1, &so_target, offsets);
 
    util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);
 
@@ -1779,6 +1989,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
    struct pipe_context *pipe = ctx->base.pipe;
    struct pipe_vertex_buffer vb = {0};
    struct pipe_stream_output_target *so_target;
+   unsigned offsets[PIPE_MAX_SO_BUFFERS] = {0};
 
    assert(num_channels >= 1);
    assert(num_channels <= 4);
@@ -1812,13 +2023,13 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
    pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
    pipe->bind_vertex_elements_state(pipe,
                                     ctx->velem_state_readbuf[num_channels-1]);
-   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+   bind_vs_pos_only(ctx);
    if (ctx->has_geometry_shader)
       pipe->bind_gs_state(pipe, NULL);
    pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state);
 
    so_target = pipe->create_stream_output_target(pipe, dst, offset, size);
-   pipe->set_stream_output_targets(pipe, 1, &so_target, 0);
+   pipe->set_stream_output_targets(pipe, 1, &so_target, offsets);
 
    util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);
 
@@ -1854,7 +2065,7 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
    pipe->bind_blend_state(pipe, custom_blend);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
-   ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1, FALSE));
+   bind_fs_write_one_cbuf(ctx);
    pipe->set_sample_mask(pipe, sample_mask);
 
    memset(&surf_tmpl, 0, sizeof(surf_tmpl));
@@ -1880,7 +2091,7 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
    fb_state.zsbuf = NULL;
    pipe->set_framebuffer_state(pipe, &fb_state);
 
-   blitter_set_common_draw_rect_state(ctx, FALSE);
+   blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
    blitter_set_dst_dimensions(ctx, src->width0, src->height0);
    blitter->draw_rectangle(blitter, 0, 0, src->width0, src->height0,
                            0, 0, NULL);
@@ -1917,7 +2128,7 @@ void util_blitter_custom_color(struct blitter_context *blitter,
    pipe->bind_blend_state(pipe, custom_blend ? custom_blend
                                              : ctx->blend[PIPE_MASK_RGBA]);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
-   ctx->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1, FALSE));
+   bind_fs_write_one_cbuf(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->set_sample_mask(pipe, (1ull << MAX2(1, dstsurf->texture->nr_samples)) - 1);
 
@@ -1930,7 +2141,7 @@ void util_blitter_custom_color(struct blitter_context *blitter,
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, ~0);
 
-   blitter_set_common_draw_rect_state(ctx, FALSE);
+   blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
    blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
    blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height,
                            0, 0, NULL);