radv: Fix 3d blits.
[mesa.git] / src / amd / vulkan / radv_meta_blit.c
index bfbf880dce4ca8b338fd8142f69f2392df9cb1db..71cea3b0a316827e529712c74dd64b395e5b6439 100644 (file)
@@ -31,6 +31,13 @@ struct blit_region {
        VkExtent3D dest_extent;
 };
 
+static VkResult
+build_pipeline(struct radv_device *device,
+               VkImageAspectFlagBits aspect,
+               enum glsl_sampler_dim tex_dim,
+               unsigned fs_key,
+               VkPipeline *pipeline);
+
 static nir_shader *
 build_nir_vertex_shader(void)
 {
@@ -38,25 +45,64 @@ build_nir_vertex_shader(void)
        nir_builder b;
 
        nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-       b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");
+       b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
 
-       nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-                                                  vec4, "a_pos");
-       pos_in->data.location = VERT_ATTRIB_GENERIC0;
        nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                    vec4, "gl_Position");
        pos_out->data.location = VARYING_SLOT_POS;
-       nir_copy_var(&b, pos_out, pos_in);
 
-       nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-                                                      vec4, "a_tex_pos");
-       tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
        nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                        vec4, "v_tex_pos");
        tex_pos_out->data.location = VARYING_SLOT_VAR0;
        tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
-       nir_copy_var(&b, tex_pos_out, tex_pos_in);
 
+       nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+
+       nir_store_var(&b, pos_out, outvec, 0xf);
+
+       nir_intrinsic_instr *src_box = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+       src_box->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+       nir_intrinsic_set_base(src_box, 0);
+       nir_intrinsic_set_range(src_box, 16);
+       src_box->num_components = 4;
+       nir_ssa_dest_init(&src_box->instr, &src_box->dest, 4, 32, "src_box");
+       nir_builder_instr_insert(&b, &src_box->instr);
+
+       nir_intrinsic_instr *src0_z = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+       src0_z->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+       nir_intrinsic_set_base(src0_z, 16);
+       nir_intrinsic_set_range(src0_z, 4);
+       src0_z->num_components = 1;
+       nir_ssa_dest_init(&src0_z->instr, &src0_z->dest, 1, 32, "src0_z");
+       nir_builder_instr_insert(&b, &src0_z->instr);
+
+       nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_vertex_id_zero_base);
+       nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
+       nir_builder_instr_insert(&b, &vertex_id->instr);
+
+       /* vertex 0 - src0_x, src0_y, src0_z */
+       /* vertex 1 - src0_x, src1_y, src0_z*/
+       /* vertex 2 - src1_x, src0_y, src0_z */
+       /* so channel 0 is vertex_id != 2 ? src_x : src_x + w
+          channel 1 is vertex id != 1 ? src_y : src_y + w */
+
+       nir_ssa_def *c0cmp = nir_ine(&b, &vertex_id->dest.ssa,
+                                    nir_imm_int(&b, 2));
+       nir_ssa_def *c1cmp = nir_ine(&b, &vertex_id->dest.ssa,
+                                    nir_imm_int(&b, 1));
+
+       nir_ssa_def *comp[4];
+       comp[0] = nir_bcsel(&b, c0cmp,
+                           nir_channel(&b, &src_box->dest.ssa, 0),
+                           nir_channel(&b, &src_box->dest.ssa, 2));
+
+       comp[1] = nir_bcsel(&b, c1cmp,
+                           nir_channel(&b, &src_box->dest.ssa, 1),
+                           nir_channel(&b, &src_box->dest.ssa, 3));
+       comp[2] = &src0_z->dest.ssa;
+       comp[3] = nir_imm_float(&b, 1.0);
+       nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
+       nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
        return b.shader;
 }
 
@@ -70,7 +116,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
        nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
 
        sprintf(shader_name, "meta_blit_fs.%d", tex_dim);
-       b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+       b.shader->info.name = ralloc_strdup(b.shader, shader_name);
 
        nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                                       vec4, "v_tex_pos");
@@ -82,7 +128,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
        unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
        nir_ssa_def *const tex_pos =
                nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
-                           (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
+                           (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
 
        const struct glsl_type *sampler_type =
                glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
@@ -92,16 +138,20 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
        sampler->data.descriptor_set = 0;
        sampler->data.binding = 0;
 
-       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
+       nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
        tex->sampler_dim = tex_dim;
        tex->op = nir_texop_tex;
        tex->src[0].src_type = nir_tex_src_coord;
        tex->src[0].src = nir_src_for_ssa(tex_pos);
+       tex->src[1].src_type = nir_tex_src_texture_deref;
+       tex->src[1].src = nir_src_for_ssa(tex_deref);
+       tex->src[2].src_type = nir_tex_src_sampler_deref;
+       tex->src[2].src = nir_src_for_ssa(tex_deref);
        tex->dest_type = nir_type_float; /* TODO */
        tex->is_array = glsl_sampler_type_is_array(sampler_type);
        tex->coord_components = tex_pos->num_components;
-       tex->texture = nir_deref_var_create(tex, sampler);
-       tex->sampler = nir_deref_var_create(tex, sampler);
 
        nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
        nir_builder_instr_insert(&b, &tex->instr);
@@ -124,7 +174,7 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
        nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
 
        sprintf(shader_name, "meta_blit_depth_fs.%d", tex_dim);
-       b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+       b.shader->info.name = ralloc_strdup(b.shader, shader_name);
 
        nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                                       vec4, "v_tex_pos");
@@ -136,7 +186,7 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
        unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
        nir_ssa_def *const tex_pos =
                nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
-                           (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
+                           (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
 
        const struct glsl_type *sampler_type =
                glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
@@ -146,16 +196,20 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
        sampler->data.descriptor_set = 0;
        sampler->data.binding = 0;
 
-       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
+       nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
        tex->sampler_dim = tex_dim;
        tex->op = nir_texop_tex;
        tex->src[0].src_type = nir_tex_src_coord;
        tex->src[0].src = nir_src_for_ssa(tex_pos);
+       tex->src[1].src_type = nir_tex_src_texture_deref;
+       tex->src[1].src = nir_src_for_ssa(tex_deref);
+       tex->src[2].src_type = nir_tex_src_sampler_deref;
+       tex->src[2].src = nir_src_for_ssa(tex_deref);
        tex->dest_type = nir_type_float; /* TODO */
        tex->is_array = glsl_sampler_type_is_array(sampler_type);
        tex->coord_components = tex_pos->num_components;
-       tex->texture = nir_deref_var_create(tex, sampler);
-       tex->sampler = nir_deref_var_create(tex, sampler);
 
        nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
        nir_builder_instr_insert(&b, &tex->instr);
@@ -178,7 +232,7 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
        nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
 
        sprintf(shader_name, "meta_blit_stencil_fs.%d", tex_dim);
-       b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+       b.shader->info.name = ralloc_strdup(b.shader, shader_name);
 
        nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                                       vec4, "v_tex_pos");
@@ -190,7 +244,7 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
        unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
        nir_ssa_def *const tex_pos =
                nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
-                           (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
+                           (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
 
        const struct glsl_type *sampler_type =
                glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
@@ -200,16 +254,20 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
        sampler->data.descriptor_set = 0;
        sampler->data.binding = 0;
 
-       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
+       nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
        tex->sampler_dim = tex_dim;
        tex->op = nir_texop_tex;
        tex->src[0].src_type = nir_tex_src_coord;
        tex->src[0].src = nir_src_for_ssa(tex_pos);
+       tex->src[1].src_type = nir_tex_src_texture_deref;
+       tex->src[1].src = nir_src_for_ssa(tex_deref);
+       tex->src[2].src_type = nir_tex_src_sampler_deref;
+       tex->src[2].src = nir_src_for_ssa(tex_deref);
        tex->dest_type = nir_type_float; /* TODO */
        tex->is_array = glsl_sampler_type_is_array(sampler_type);
        tex->coord_components = tex_pos->num_components;
-       tex->texture = nir_deref_var_create(tex, sampler);
-       tex->sampler = nir_deref_var_create(tex, sampler);
 
        nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
        nir_builder_instr_insert(&b, &tex->instr);
@@ -222,115 +280,56 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
        return b.shader;
 }
 
+static enum glsl_sampler_dim
+translate_sampler_dim(VkImageType type) {
+       switch(type) {
+       case VK_IMAGE_TYPE_1D:
+               return GLSL_SAMPLER_DIM_1D;
+       case VK_IMAGE_TYPE_2D:
+               return GLSL_SAMPLER_DIM_2D;
+       case VK_IMAGE_TYPE_3D:
+               return GLSL_SAMPLER_DIM_3D;
+       default:
+               unreachable("Unhandled image type");
+       }
+}
+
 static void
 meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
                struct radv_image *src_image,
                struct radv_image_view *src_iview,
-               VkOffset3D src_offset,
-               VkExtent3D src_extent,
+              VkImageLayout src_image_layout,
+               float src_offset_0[3],
+               float src_offset_1[3],
                struct radv_image *dest_image,
                struct radv_image_view *dest_iview,
-               VkOffset3D dest_offset,
-               VkExtent3D dest_extent,
-               VkFilter blit_filter)
+              VkImageLayout dest_image_layout,
+               VkOffset2D dest_offset_0,
+               VkOffset2D dest_offset_1,
+               VkRect2D dest_box,
+               VkSampler sampler)
 {
        struct radv_device *device = cmd_buffer->device;
-       unsigned offset = 0;
-       struct blit_vb_data {
-               float pos[2];
-               float tex_coord[3];
-       } vb_data[3];
-
-       assert(src_image->samples == dest_image->samples);
-       unsigned vb_size = 3 * sizeof(*vb_data);
-       vb_data[0] = (struct blit_vb_data) {
-               .pos = {
-                       dest_offset.x,
-                       dest_offset.y,
-               },
-               .tex_coord = {
-                       (float)(src_offset.x) / (float)src_iview->extent.width,
-                       (float)(src_offset.y) / (float)src_iview->extent.height,
-                       (float)src_offset.z / (float)src_iview->extent.depth,
-               },
-       };
-
-       vb_data[1] = (struct blit_vb_data) {
-               .pos = {
-                       dest_offset.x,
-                       dest_offset.y + dest_extent.height,
-               },
-               .tex_coord = {
-                       (float)src_offset.x / (float)src_iview->extent.width,
-                       (float)(src_offset.y + src_extent.height) /
-                       (float)src_iview->extent.height,
-                       (float)src_offset.z / (float)src_iview->extent.depth,
-               },
+       uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
+       uint32_t src_height = radv_minify(src_iview->image->info.height, src_iview->base_mip);
+       uint32_t src_depth = radv_minify(src_iview->image->info.depth, src_iview->base_mip);
+       uint32_t dst_width = radv_minify(dest_iview->image->info.width, dest_iview->base_mip);
+       uint32_t dst_height = radv_minify(dest_iview->image->info.height, dest_iview->base_mip);
+
+       assert(src_image->info.samples == dest_image->info.samples);
+
+       float vertex_push_constants[5] = {
+               src_offset_0[0] / (float)src_width,
+               src_offset_0[1] / (float)src_height,
+               src_offset_1[0] / (float)src_width,
+               src_offset_1[1] / (float)src_height,
+               src_offset_0[2] / (float)src_depth,
        };
 
-       vb_data[2] = (struct blit_vb_data) {
-               .pos = {
-                       dest_offset.x + dest_extent.width,
-                       dest_offset.y,
-               },
-               .tex_coord = {
-                       (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width,
-                       (float)src_offset.y / (float)src_iview->extent.height,
-                       (float)src_offset.z / (float)src_iview->extent.depth,
-               },
-       };
-       radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset);
-
-       struct radv_buffer vertex_buffer = {
-               .device = device,
-               .size = vb_size,
-               .bo = cmd_buffer->upload.upload_bo,
-               .offset = offset,
-       };
-
-       radv_CmdBindVertexBuffers(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
-                                 (VkBuffer[]) {
-                                                 radv_buffer_to_handle(&vertex_buffer)
-                                                 },
-                                 (VkDeviceSize[]) {
-                                         0,
-                                                 });
-
-       VkSampler sampler;
-       radv_CreateSampler(radv_device_to_handle(device),
-                                &(VkSamplerCreateInfo) {
-                                        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
-                                                .magFilter = blit_filter,
-                                                .minFilter = blit_filter,
-                                                .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-                                                .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-                                                .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-                                                }, &cmd_buffer->pool->alloc, &sampler);
-
-       VkDescriptorSet set;
-       radv_temp_descriptor_set_create(cmd_buffer->device, cmd_buffer,
-                                               device->meta_state.blit.ds_layout,
-                                               &set);
-
-       radv_UpdateDescriptorSets(radv_device_to_handle(device),
-                                 1, /* writeCount */
-                                 (VkWriteDescriptorSet[]) {
-                                         {
-                                                 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-                                                         .dstSet = set,
-                                                         .dstBinding = 0,
-                                                         .dstArrayElement = 0,
-                                                         .descriptorCount = 1,
-                                                         .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-                                                         .pImageInfo = (VkDescriptorImageInfo[]) {
-                                                         {
-                                                                 .sampler = sampler,
-                                                                 .imageView = radv_image_view_to_handle(src_iview),
-                                                                 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-                                                         },
-                                                 }
-                                         }
-                                 }, 0, NULL);
+       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                             device->meta_state.blit.pipeline_layout,
+                             VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
+                             vertex_push_constants);
 
        VkFramebuffer fb;
        radv_CreateFramebuffer(radv_device_to_handle(device),
@@ -340,113 +339,162 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
                                               .pAttachments = (VkImageView[]) {
                                               radv_image_view_to_handle(dest_iview),
                                       },
-                                      .width = dest_iview->extent.width,
-                                      .height = dest_iview->extent.height,
+                                      .width = dst_width,
+                                      .height = dst_height,
                                       .layers = 1,
                                }, &cmd_buffer->pool->alloc, &fb);
-       VkPipeline pipeline;
+       VkPipeline* pipeline = NULL;
+       unsigned fs_key = 0;
        switch (src_iview->aspect_mask) {
        case VK_IMAGE_ASPECT_COLOR_BIT: {
-               unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
-
-               radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-                                             &(VkRenderPassBeginInfo) {
-                                                     .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-                                                             .renderPass = device->meta_state.blit.render_pass[fs_key],
-                                                             .framebuffer = fb,
-                                                             .renderArea = {
-                                                             .offset = { dest_offset.x, dest_offset.y },
-                                                             .extent = { dest_extent.width, dest_extent.height },
-                                                     },
-                                                             .clearValueCount = 0,
-                                                                      .pClearValues = NULL,
-                                                      }, VK_SUBPASS_CONTENTS_INLINE);
+               unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
+               fs_key = radv_format_meta_fs_key(dest_image->vk_format);
+
+               radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                                 &(VkRenderPassBeginInfo) {
+                                                       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                                               .renderPass = device->meta_state.blit.render_pass[fs_key][dst_layout],
+                                                               .framebuffer = fb,
+                                                               .renderArea = {
+                                                                       .offset = { dest_box.offset.x, dest_box.offset.y },
+                                                                       .extent = { dest_box.extent.width, dest_box.extent.height },
+                                                               },
+                                                       .clearValueCount = 0,
+                                                       .pClearValues = NULL,
+                                               });
                switch (src_image->type) {
                case VK_IMAGE_TYPE_1D:
-                       pipeline = device->meta_state.blit.pipeline_1d_src[fs_key];
+                       pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key];
                        break;
                case VK_IMAGE_TYPE_2D:
-                       pipeline = device->meta_state.blit.pipeline_2d_src[fs_key];
+                       pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key];
                        break;
                case VK_IMAGE_TYPE_3D:
-                       pipeline = device->meta_state.blit.pipeline_3d_src[fs_key];
+                       pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key];
                        break;
                default:
-                       unreachable(!"bad VkImageType");
+                       unreachable("bad VkImageType");
                }
                break;
        }
-       case VK_IMAGE_ASPECT_DEPTH_BIT:
-               radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-                                             &(VkRenderPassBeginInfo) {
-                                                     .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-                                                             .renderPass = device->meta_state.blit.depth_only_rp,
-                                                             .framebuffer = fb,
-                                                             .renderArea = {
-                                                             .offset = { dest_offset.x, dest_offset.y },
-                                                             .extent = { dest_extent.width, dest_extent.height },
-                                                     },
-                                                             .clearValueCount = 0,
-                                                                      .pClearValues = NULL,
-                                                      }, VK_SUBPASS_CONTENTS_INLINE);
+       case VK_IMAGE_ASPECT_DEPTH_BIT: {
+               enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
+               radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                                 &(VkRenderPassBeginInfo) {
+                                                       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                                       .renderPass = device->meta_state.blit.depth_only_rp[ds_layout],
+                                                       .framebuffer = fb,
+                                                       .renderArea = {
+                                                               .offset = { dest_box.offset.x, dest_box.offset.y },
+                                                               .extent = { dest_box.extent.width, dest_box.extent.height },
+                                                       },
+                                                       .clearValueCount = 0,
+                                                       .pClearValues = NULL,
+                                                 });
                switch (src_image->type) {
                case VK_IMAGE_TYPE_1D:
-                       pipeline = device->meta_state.blit.depth_only_1d_pipeline;
+                       pipeline = &device->meta_state.blit.depth_only_1d_pipeline;
                        break;
                case VK_IMAGE_TYPE_2D:
-                       pipeline = device->meta_state.blit.depth_only_2d_pipeline;
+                       pipeline = &device->meta_state.blit.depth_only_2d_pipeline;
                        break;
                case VK_IMAGE_TYPE_3D:
-                       pipeline = device->meta_state.blit.depth_only_3d_pipeline;
+                       pipeline = &device->meta_state.blit.depth_only_3d_pipeline;
                        break;
                default:
-                       unreachable(!"bad VkImageType");
+                       unreachable("bad VkImageType");
                }
                break;
-       case VK_IMAGE_ASPECT_STENCIL_BIT:
-               radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-                                             &(VkRenderPassBeginInfo) {
-                                                     .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-                                                             .renderPass = device->meta_state.blit.stencil_only_rp,
-                                                             .framebuffer = fb,
-                                                             .renderArea = {
-                                                             .offset = { dest_offset.x, dest_offset.y },
-                                                             .extent = { dest_extent.width, dest_extent.height },
-                                                     },
-                                                             .clearValueCount = 0,
-                                                                      .pClearValues = NULL,
-                                                      }, VK_SUBPASS_CONTENTS_INLINE);
+       }
+       case VK_IMAGE_ASPECT_STENCIL_BIT: {
+               enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
+               radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                                 &(VkRenderPassBeginInfo) {
+                                                       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                                       .renderPass = device->meta_state.blit.stencil_only_rp[ds_layout],
+                                                       .framebuffer = fb,
+                                                       .renderArea = {
+                                                               .offset = { dest_box.offset.x, dest_box.offset.y },
+                                                               .extent = { dest_box.extent.width, dest_box.extent.height },
+                                                       },
+                                                       .clearValueCount = 0,
+                                                       .pClearValues = NULL,
+                                                 });
                switch (src_image->type) {
                case VK_IMAGE_TYPE_1D:
-                       pipeline = device->meta_state.blit.stencil_only_1d_pipeline;
+                       pipeline = &device->meta_state.blit.stencil_only_1d_pipeline;
                        break;
                case VK_IMAGE_TYPE_2D:
-                       pipeline = device->meta_state.blit.stencil_only_2d_pipeline;
+                       pipeline = &device->meta_state.blit.stencil_only_2d_pipeline;
                        break;
                case VK_IMAGE_TYPE_3D:
-                       pipeline = device->meta_state.blit.stencil_only_3d_pipeline;
+                       pipeline = &device->meta_state.blit.stencil_only_3d_pipeline;
                        break;
                default:
-                       unreachable(!"bad VkImageType");
+                       unreachable("bad VkImageType");
                }
                break;
+       }
        default:
-               unreachable(!"bad VkImageType");
+               unreachable("bad VkImageType");
        }
 
-       if (cmd_buffer->state.pipeline != radv_pipeline_from_handle(pipeline)) {
-               radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-                                    VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+       radv_cmd_buffer_set_subpass(cmd_buffer,
+                                   &cmd_buffer->state.pass->subpasses[0]);
+
+       if (!*pipeline) {
+               VkResult ret = build_pipeline(device, src_iview->aspect_mask, translate_sampler_dim(src_image->type), fs_key, pipeline);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       goto fail_pipeline;
+               }
        }
 
-       radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
-                                  VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                  device->meta_state.blit.pipeline_layout, 0, 1,
-                                  &set, 0, NULL);
+       radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+                            VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+       radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                     device->meta_state.blit.pipeline_layout,
+                                     0, /* set */
+                                     1, /* descriptorWriteCount */
+                                     (VkWriteDescriptorSet[]) {
+                                             {
+                                                     .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                                     .dstBinding = 0,
+                                                     .dstArrayElement = 0,
+                                                     .descriptorCount = 1,
+                                                     .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                                                     .pImageInfo = (VkDescriptorImageInfo[]) {
+                                                             {
+                                                                     .sampler = sampler,
+                                                                     .imageView = radv_image_view_to_handle(src_iview),
+                                                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                                             },
+                                                     }
+                                             }
+                                     });
+
+       radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
+               .x = dest_offset_0.x,
+               .y = dest_offset_0.y,
+               .width = dest_offset_1.x - dest_offset_0.x,
+               .height = dest_offset_1.y - dest_offset_0.y,
+               .minDepth = 0.0f,
+               .maxDepth = 1.0f
+       });
+
+       radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
+               .offset = (VkOffset2D) { MIN2(dest_offset_0.x, dest_offset_1.x), MIN2(dest_offset_0.y, dest_offset_1.y) },
+               .extent = (VkExtent2D) {
+                       abs(dest_offset_1.x - dest_offset_0.x),
+                       abs(dest_offset_1.y - dest_offset_0.y)
+               },
+       });
 
        radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
 
-       radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+fail_pipeline:
+       radv_cmd_buffer_end_render_pass(cmd_buffer);
 
        /* At the point where we emit the draw call, all data from the
         * descriptor sets, etc. has been used.  We are free to delete it.
@@ -454,13 +502,30 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
        /* TODO: above comment is not valid for at least descriptor sets/pools,
         * as we may not free them till after execution finishes. Check others. */
 
-       radv_temp_descriptor_set_destroy(cmd_buffer->device, set);
-       radv_DestroySampler(radv_device_to_handle(device), sampler,
-                           &cmd_buffer->pool->alloc);
        radv_DestroyFramebuffer(radv_device_to_handle(device), fb,
                                &cmd_buffer->pool->alloc);
 }
 
+static bool
+flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
+{
+       bool flip = false;
+       if (*src0 > *src1) {
+               unsigned tmp = *src0;
+               *src0 = *src1;
+               *src1 = tmp;
+               flip = !flip;
+       }
+
+       if (*dst0 > *dst1) {
+               unsigned tmp = *dst0;
+               *dst0 = *dst1;
+               *dst1 = tmp;
+               flip = !flip;
+       }
+       return flip;
+}
+
 void radv_CmdBlitImage(
        VkCommandBuffer                             commandBuffer,
        VkImage                                     srcImage,
@@ -475,80 +540,123 @@ void radv_CmdBlitImage(
        RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
        RADV_FROM_HANDLE(radv_image, src_image, srcImage);
        RADV_FROM_HANDLE(radv_image, dest_image, destImage);
+       struct radv_device *device = cmd_buffer->device;
        struct radv_meta_saved_state saved_state;
+       bool old_predicating;
+       VkSampler sampler;
 
        /* From the Vulkan 1.0 spec:
         *
         *    vkCmdBlitImage must not be used for multisampled source or
         *    destination images. Use vkCmdResolveImage for this purpose.
         */
-       assert(src_image->samples == 1);
-       assert(dest_image->samples == 1);
+       assert(src_image->info.samples == 1);
+       assert(dest_image->info.samples == 1);
 
-       radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+       radv_CreateSampler(radv_device_to_handle(device),
+                          &(VkSamplerCreateInfo) {
+                               .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+                               .magFilter = filter,
+                               .minFilter = filter,
+                               .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                               .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                               .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                          }, &cmd_buffer->pool->alloc, &sampler);
+
+       radv_meta_save(&saved_state, cmd_buffer,
+                      RADV_META_SAVE_GRAPHICS_PIPELINE |
+                      RADV_META_SAVE_CONSTANTS |
+                      RADV_META_SAVE_DESCRIPTORS);
+
+       /* VK_EXT_conditional_rendering says that blit commands should not be
+        * affected by conditional rendering.
+        */
+       old_predicating = cmd_buffer->state.predicating;
+       cmd_buffer->state.predicating = false;
 
        for (unsigned r = 0; r < regionCount; r++) {
-               struct radv_image_view src_iview;
-               radv_image_view_init(&src_iview, cmd_buffer->device,
-                                    &(VkImageViewCreateInfo) {
-                                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-                                                    .image = srcImage,
-                                                    .viewType = radv_meta_get_view_type(src_image),
-                                                    .format = src_image->vk_format,
-                                                    .subresourceRange = {
-                                                    .aspectMask = pRegions[r].srcSubresource.aspectMask,
-                                                    .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
-                                                    .levelCount = 1,
-                                                    .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
-                                                    .layerCount = 1
-                                            },
-                                                    },
-                                    cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
-
-               if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
-                   pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
-                   pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
-                   pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
-                       radv_finishme("FINISHME: Allow flipping in blits");
-
-               const VkExtent3D dest_extent = {
-                       .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
-                       .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
-                       .depth = 1,
-               };
+               const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
+               const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
+
+               unsigned dst_start, dst_end;
+               if (dest_image->type == VK_IMAGE_TYPE_3D) {
+                       assert(dst_res->baseArrayLayer == 0);
+                       dst_start = pRegions[r].dstOffsets[0].z;
+                       dst_end = pRegions[r].dstOffsets[1].z;
+               } else {
+                       dst_start = dst_res->baseArrayLayer;
+                       dst_end = dst_start + dst_res->layerCount;
+               }
 
-               const VkExtent3D src_extent = {
-                       .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
-                       .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
-                       .depth = pRegions[r].srcOffsets[1].z - pRegions[r].srcOffsets[0].z,
-               };
+               unsigned src_start, src_end;
+               if (src_image->type == VK_IMAGE_TYPE_3D) {
+                       assert(src_res->baseArrayLayer == 0);
+                       src_start = pRegions[r].srcOffsets[0].z;
+                       src_end = pRegions[r].srcOffsets[1].z;
+               } else {
+                       src_start = src_res->baseArrayLayer;
+                       src_end = src_start + src_res->layerCount;
+               }
 
+               bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
+               float src_z_step = (float)(src_end - src_start) /
+                       (float)(dst_end - dst_start);
 
-               if (pRegions[r].srcSubresource.layerCount > 1)
-                       radv_finishme("FINISHME: copy multiple array layers");
+               /* There is no interpolation to the pixel center during
+                * rendering, so add the 0.5 offset ourselves here. */
+               float depth_center_offset = 0;
+               if (src_image->type == VK_IMAGE_TYPE_3D)
+                       depth_center_offset = 0.5 / (dst_end - dst_start) * (src_end - src_start);
 
-               struct radv_image_view dest_iview;
-               unsigned usage;
-               if (pRegions[r].dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
-                       usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
-               else
-                       usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+               if (flip_z) {
+                       src_start = src_end;
+                       src_z_step *= -1;
+                       depth_center_offset *= -1;
+               }
 
-               for (unsigned i = pRegions[r].dstOffsets[0].z; i < pRegions[r].dstOffsets[1].z; i++) {
+               unsigned src_x0 = pRegions[r].srcOffsets[0].x;
+               unsigned src_x1 = pRegions[r].srcOffsets[1].x;
+               unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
+               unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
+
+               unsigned src_y0 = pRegions[r].srcOffsets[0].y;
+               unsigned src_y1 = pRegions[r].srcOffsets[1].y;
+               unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
+               unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
+
+               VkRect2D dest_box;
+               dest_box.offset.x = MIN2(dst_x0, dst_x1);
+               dest_box.offset.y = MIN2(dst_y0, dst_y1);
+               dest_box.extent.width = dst_x1 - dst_x0;
+               dest_box.extent.height = dst_y1 - dst_y0;
+
+               const unsigned num_layers = dst_end - dst_start;
+               for (unsigned i = 0; i < num_layers; i++) {
+                       struct radv_image_view dest_iview, src_iview;
+
+                       const VkOffset2D dest_offset_0 = {
+                               .x = dst_x0,
+                               .y = dst_y0,
+                       };
+                       const VkOffset2D dest_offset_1 = {
+                               .x = dst_x1,
+                               .y = dst_y1,
+                       };
 
-                       const VkOffset3D dest_offset = {
-                               .x = pRegions[r].dstOffsets[0].x,
-                               .y = pRegions[r].dstOffsets[0].y,
-                               .z = i,
+                       float src_offset_0[3] = {
+                               src_x0,
+                               src_y0,
+                               src_start + i * src_z_step + depth_center_offset,
                        };
-                       VkOffset3D src_offset = {
-                               .x = pRegions[r].srcOffsets[0].x,
-                               .y = pRegions[r].srcOffsets[0].y,
-                               .z = i,
+                       float src_offset_1[3] = {
+                               src_x1,
+                               src_y1,
+                               src_start + i * src_z_step + depth_center_offset,
                        };
-                       const uint32_t dest_array_slice =
-                               radv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
-                                                         &dest_offset);
+                       const uint32_t dest_array_slice = dst_start + i;
+
+                       /* 3D images have just 1 layer */
+                       const uint32_t src_array_slice = src_image->type == VK_IMAGE_TYPE_3D ? 0 : src_start + i;
 
                        radv_image_view_init(&dest_iview, cmd_buffer->device,
                                             &(VkImageViewCreateInfo) {
@@ -557,372 +665,158 @@ void radv_CmdBlitImage(
                                                             .viewType = radv_meta_get_view_type(dest_image),
                                                             .format = dest_image->vk_format,
                                                             .subresourceRange = {
-                                                            .aspectMask = pRegions[r].dstSubresource.aspectMask,
-                                                            .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+                                                            .aspectMask = dst_res->aspectMask,
+                                                            .baseMipLevel = dst_res->mipLevel,
                                                             .levelCount = 1,
                                                             .baseArrayLayer = dest_array_slice,
                                                             .layerCount = 1
                                                     },
-                                            },
-                                            cmd_buffer, usage);
+                                            }, NULL);
+                       radv_image_view_init(&src_iview, cmd_buffer->device,
+                                            &(VkImageViewCreateInfo) {
+                                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                                                       .image = srcImage,
+                                                       .viewType = radv_meta_get_view_type(src_image),
+                                                       .format = src_image->vk_format,
+                                                       .subresourceRange = {
+                                                       .aspectMask = src_res->aspectMask,
+                                                       .baseMipLevel = src_res->mipLevel,
+                                                       .levelCount = 1,
+                                                       .baseArrayLayer = src_array_slice,
+                                                       .layerCount = 1
+                                               },
+                                       }, NULL);
                        meta_emit_blit(cmd_buffer,
-                                      src_image, &src_iview,
-                                      src_offset, src_extent,
-                                      dest_image, &dest_iview,
-                                      dest_offset, dest_extent,
-                                      filter);
+                                      src_image, &src_iview, srcImageLayout,
+                                      src_offset_0, src_offset_1,
+                                      dest_image, &dest_iview, destImageLayout,
+                                      dest_offset_0, dest_offset_1,
+                                      dest_box,
+                                      sampler);
                }
        }
 
+       /* Restore conditional rendering. */
+       cmd_buffer->state.predicating = old_predicating;
+
        radv_meta_restore(&saved_state, cmd_buffer);
+
+       radv_DestroySampler(radv_device_to_handle(device), sampler,
+                           &cmd_buffer->pool->alloc);
 }
 
 void
 radv_device_finish_meta_blit_state(struct radv_device *device)
 {
+       struct radv_meta_state *state = &device->meta_state;
+
        for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
-               if (device->meta_state.blit.render_pass[i])
+               for (unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
                        radv_DestroyRenderPass(radv_device_to_handle(device),
-                                              device->meta_state.blit.render_pass[i],
-                                              &device->meta_state.alloc);
-               if (device->meta_state.blit.pipeline_1d_src[i])
-                       radv_DestroyPipeline(radv_device_to_handle(device),
-                                            device->meta_state.blit.pipeline_1d_src[i],
-                                            &device->meta_state.alloc);
-               if (device->meta_state.blit.pipeline_2d_src[i])
-                       radv_DestroyPipeline(radv_device_to_handle(device),
-                                            device->meta_state.blit.pipeline_2d_src[i],
-                                            &device->meta_state.alloc);
-               if (device->meta_state.blit.pipeline_3d_src[i])
-                       radv_DestroyPipeline(radv_device_to_handle(device),
-                                            device->meta_state.blit.pipeline_3d_src[i],
-                                            &device->meta_state.alloc);
-       }
-
-       if (device->meta_state.blit.depth_only_rp)
-               radv_DestroyRenderPass(radv_device_to_handle(device),
-                                      device->meta_state.blit.depth_only_rp,
-                                      &device->meta_state.alloc);
-       if (device->meta_state.blit.depth_only_1d_pipeline)
-               radv_DestroyPipeline(radv_device_to_handle(device),
-                                    device->meta_state.blit.depth_only_1d_pipeline,
-                                    &device->meta_state.alloc);
-       if (device->meta_state.blit.depth_only_2d_pipeline)
-               radv_DestroyPipeline(radv_device_to_handle(device),
-                                    device->meta_state.blit.depth_only_2d_pipeline,
-                                    &device->meta_state.alloc);
-       if (device->meta_state.blit.depth_only_3d_pipeline)
-               radv_DestroyPipeline(radv_device_to_handle(device),
-                                    device->meta_state.blit.depth_only_3d_pipeline,
-                                    &device->meta_state.alloc);
-       if (device->meta_state.blit.stencil_only_rp)
-               radv_DestroyRenderPass(radv_device_to_handle(device),
-                                      device->meta_state.blit.stencil_only_rp,
-                                      &device->meta_state.alloc);
-       if (device->meta_state.blit.stencil_only_1d_pipeline)
+                                              state->blit.render_pass[i][j],
+                                              &state->alloc);
+               }
                radv_DestroyPipeline(radv_device_to_handle(device),
-                                    device->meta_state.blit.stencil_only_1d_pipeline,
-                                    &device->meta_state.alloc);
-       if (device->meta_state.blit.stencil_only_2d_pipeline)
+                                    state->blit.pipeline_1d_src[i],
+                                    &state->alloc);
                radv_DestroyPipeline(radv_device_to_handle(device),
-                                    device->meta_state.blit.stencil_only_2d_pipeline,
-                                    &device->meta_state.alloc);
-       if (device->meta_state.blit.stencil_only_3d_pipeline)
+                                    state->blit.pipeline_2d_src[i],
+                                    &state->alloc);
                radv_DestroyPipeline(radv_device_to_handle(device),
-                                    device->meta_state.blit.stencil_only_3d_pipeline,
-                                    &device->meta_state.alloc);
-       if (device->meta_state.blit.pipeline_layout)
-               radv_DestroyPipelineLayout(radv_device_to_handle(device),
-                                          device->meta_state.blit.pipeline_layout,
-                                          &device->meta_state.alloc);
-       if (device->meta_state.blit.ds_layout)
-               radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
-                                               device->meta_state.blit.ds_layout,
-                                               &device->meta_state.alloc);
-}
-
-static VkFormat pipeline_formats[] = {
-   VK_FORMAT_R8G8B8A8_UNORM,
-   VK_FORMAT_R8G8B8A8_UINT,
-   VK_FORMAT_R8G8B8A8_SINT,
-   VK_FORMAT_R16G16B16A16_UNORM,
-   VK_FORMAT_R16G16B16A16_SNORM,
-   VK_FORMAT_R16G16B16A16_UINT,
-   VK_FORMAT_R16G16B16A16_SINT,
-   VK_FORMAT_R32_SFLOAT,
-   VK_FORMAT_R32G32_SFLOAT,
-   VK_FORMAT_R32G32B32A32_SFLOAT
-};
-
-static VkResult
-radv_device_init_meta_blit_color(struct radv_device *device,
-                                struct radv_shader_module *vs)
-{
-       struct radv_shader_module fs_1d = {0}, fs_2d = {0}, fs_3d = {0};
-       VkResult result;
-
-       fs_1d.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D);
-       fs_2d.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D);
-       fs_3d.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D);
-
-       for (unsigned i = 0; i < ARRAY_SIZE(pipeline_formats); ++i) {
-               unsigned key = radv_format_meta_fs_key(pipeline_formats[i]);
-               result = radv_CreateRenderPass(radv_device_to_handle(device),
-                                       &(VkRenderPassCreateInfo) {
-                                               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-                                                       .attachmentCount = 1,
-                                                       .pAttachments = &(VkAttachmentDescription) {
-                                                       .format = pipeline_formats[i],
-                                                       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-                                                       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-                                                       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-                                                       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-                                               },
-                                                       .subpassCount = 1,
-                                                                       .pSubpasses = &(VkSubpassDescription) {
-                                                       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                                       .inputAttachmentCount = 0,
-                                                       .colorAttachmentCount = 1,
-                                                       .pColorAttachments = &(VkAttachmentReference) {
-                                                               .attachment = 0,
-                                                               .layout = VK_IMAGE_LAYOUT_GENERAL,
-                                                       },
-                                                       .pResolveAttachments = NULL,
-                                                       .pDepthStencilAttachment = &(VkAttachmentReference) {
-                                                               .attachment = VK_ATTACHMENT_UNUSED,
-                                                               .layout = VK_IMAGE_LAYOUT_GENERAL,
-                                                       },
-                                                       .preserveAttachmentCount = 1,
-                                                       .pPreserveAttachments = (uint32_t[]) { 0 },
-                                               },
-                                               .dependencyCount = 0,
-                                       }, &device->meta_state.alloc, &device->meta_state.blit.render_pass[key]);
-               if (result != VK_SUCCESS)
-                       goto fail;
-
-               VkPipelineVertexInputStateCreateInfo vi_create_info = {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-                       .vertexBindingDescriptionCount = 1,
-                       .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-                               {
-                                       .binding = 0,
-                                       .stride = 5 * sizeof(float),
-                                       .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-                               },
-                       },
-                       .vertexAttributeDescriptionCount = 2,
-                       .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-                               {
-                                       /* Position */
-                                       .location = 0,
-                                       .binding = 0,
-                                       .format = VK_FORMAT_R32G32_SFLOAT,
-                                       .offset = 0
-                               },
-                               {
-                                       /* Texture Coordinate */
-                                       .location = 1,
-                                       .binding = 0,
-                                       .format = VK_FORMAT_R32G32B32_SFLOAT,
-                                       .offset = 8
-                               }
-                       }
-               };
-
-               VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-                       {
-                               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-                               .stage = VK_SHADER_STAGE_VERTEX_BIT,
-                               .module = radv_shader_module_to_handle(vs),
-                               .pName = "main",
-                               .pSpecializationInfo = NULL
-                       }, {
-                               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-                               .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-                               .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
-                               .pName = "main",
-                               .pSpecializationInfo = NULL
-                       },
-               };
-
-               const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-                       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-                       .stageCount = ARRAY_SIZE(pipeline_shader_stages),
-                       .pStages = pipeline_shader_stages,
-                       .pVertexInputState = &vi_create_info,
-                       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-                               .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-                               .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-                               .primitiveRestartEnable = false,
-                       },
-                       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
-                               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-                               .viewportCount = 0,
-                               .scissorCount = 0,
-                       },
-                       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-                               .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-                               .rasterizerDiscardEnable = false,
-                               .polygonMode = VK_POLYGON_MODE_FILL,
-                               .cullMode = VK_CULL_MODE_NONE,
-                               .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-                       },
-                       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-                               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-                               .rasterizationSamples = 1,
-                               .sampleShadingEnable = false,
-                               .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-                       },
-                       .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-                               .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-                               .attachmentCount = 1,
-                               .pAttachments = (VkPipelineColorBlendAttachmentState []) {
-                                       { .colorWriteMask =
-                                       VK_COLOR_COMPONENT_A_BIT |
-                                       VK_COLOR_COMPONENT_R_BIT |
-                                       VK_COLOR_COMPONENT_G_BIT |
-                                       VK_COLOR_COMPONENT_B_BIT },
-                               }
-                       },
-                       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-                               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-                               .dynamicStateCount = 2,
-                               .pDynamicStates = (VkDynamicState[]) {
-                                       VK_DYNAMIC_STATE_LINE_WIDTH,
-                                       VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-                               },
-                       },
-                       .flags = 0,
-                       .layout = device->meta_state.blit.pipeline_layout,
-                       .renderPass = device->meta_state.blit.render_pass[key],
-                       .subpass = 0,
-               };
-
-               const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-                       .use_rectlist = true
-               };
-
-               pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_1d);
-               result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                               radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                               &vk_pipeline_info, &radv_pipeline_info,
-                                               &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src[key]);
-               if (result != VK_SUCCESS)
-                       goto fail;
-
-               pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_2d);
-               result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                               radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                               &vk_pipeline_info, &radv_pipeline_info,
-                                               &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src[key]);
-               if (result != VK_SUCCESS)
-                       goto fail;
-
-               pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_3d);
-               result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                               radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                               &vk_pipeline_info, &radv_pipeline_info,
-                                               &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src[key]);
-               if (result != VK_SUCCESS)
-                       goto fail;
+                                    state->blit.pipeline_3d_src[i],
+                                    &state->alloc);
+       }
 
+       for (enum radv_blit_ds_layout i = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; i < RADV_BLIT_DS_LAYOUT_COUNT; i++) {
+               radv_DestroyRenderPass(radv_device_to_handle(device),
+                                      state->blit.depth_only_rp[i], &state->alloc);
+               radv_DestroyRenderPass(radv_device_to_handle(device),
+                                      state->blit.stencil_only_rp[i], &state->alloc);
        }
 
-       result = VK_SUCCESS;
-fail:
-       ralloc_free(fs_1d.nir);
-       ralloc_free(fs_2d.nir);
-       ralloc_free(fs_3d.nir);
-       return result;
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->blit.depth_only_1d_pipeline, &state->alloc);
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->blit.depth_only_2d_pipeline, &state->alloc);
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->blit.depth_only_3d_pipeline, &state->alloc);
+
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->blit.stencil_only_1d_pipeline,
+                            &state->alloc);
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->blit.stencil_only_2d_pipeline,
+                            &state->alloc);
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->blit.stencil_only_3d_pipeline,
+                            &state->alloc);
+
+
+       radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                                  state->blit.pipeline_layout, &state->alloc);
+       radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+                                       state->blit.ds_layout, &state->alloc);
 }
 
 static VkResult
-radv_device_init_meta_blit_depth(struct radv_device *device,
-                                struct radv_shader_module *vs)
+build_pipeline(struct radv_device *device,
+               VkImageAspectFlagBits aspect,
+               enum glsl_sampler_dim tex_dim,
+               unsigned fs_key,
+               VkPipeline *pipeline)
 {
-       struct radv_shader_module fs_1d = {0}, fs_2d = {0}, fs_3d = {0};
-       VkResult result;
+       VkResult result = VK_SUCCESS;
 
-       fs_1d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_1D);
-       fs_2d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_2D);
-       fs_3d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_3D);
+       mtx_lock(&device->meta_state.mtx);
 
-       result = radv_CreateRenderPass(radv_device_to_handle(device),
-                                      &(VkRenderPassCreateInfo) {
-                                              .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-                                                      .attachmentCount = 1,
-                                                      .pAttachments = &(VkAttachmentDescription) {
-                                                      .format = 0,
-                                                      .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-                                                      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-                                                      .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-                                                      .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-                                              },
-                                                      .subpassCount = 1,
-                                               .pSubpasses = &(VkSubpassDescription) {
-                                                      .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                                      .inputAttachmentCount = 0,
-                                                      .colorAttachmentCount = 0,
-                                                      .pColorAttachments = NULL,
-                                                      .pResolveAttachments = NULL,
-                                                      .pDepthStencilAttachment = &(VkAttachmentReference) {
-                                                              .attachment = 0,
-                                                              .layout = VK_IMAGE_LAYOUT_GENERAL,
-                                                      },
-                                                      .preserveAttachmentCount = 1,
-                                                      .pPreserveAttachments = (uint32_t[]) { 0 },
-                                              },
-                                               .dependencyCount = 0,
-                                        }, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp);
-       if (result != VK_SUCCESS)
-               goto fail;
+       if (*pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
+       struct radv_shader_module fs = {0};
+       struct radv_shader_module vs = {.nir = build_nir_vertex_shader()};
+       VkRenderPass rp;
 
+       switch(aspect) {
+       case VK_IMAGE_ASPECT_COLOR_BIT:
+               fs.nir = build_nir_copy_fragment_shader(tex_dim);
+               rp = device->meta_state.blit.render_pass[fs_key][0];
+               break;
+       case VK_IMAGE_ASPECT_DEPTH_BIT:
+               fs.nir = build_nir_copy_fragment_shader_depth(tex_dim);
+               rp = device->meta_state.blit.depth_only_rp[0];
+               break;
+       case VK_IMAGE_ASPECT_STENCIL_BIT:
+               fs.nir = build_nir_copy_fragment_shader_stencil(tex_dim);
+               rp = device->meta_state.blit.stencil_only_rp[0];
+               break;
+       default:
+               unreachable("Unhandled aspect");
+       }
        VkPipelineVertexInputStateCreateInfo vi_create_info = {
                .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-               .vertexBindingDescriptionCount = 1,
-               .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-                       {
-                               .binding = 0,
-                               .stride = 5 * sizeof(float),
-                               .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-                       },
-               },
-               .vertexAttributeDescriptionCount = 2,
-               .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-                       {
-                               /* Position */
-                               .location = 0,
-                               .binding = 0,
-                               .format = VK_FORMAT_R32G32_SFLOAT,
-                               .offset = 0
-                       },
-                       {
-                               /* Texture Coordinate */
-                               .location = 1,
-                               .binding = 0,
-                               .format = VK_FORMAT_R32G32B32_SFLOAT,
-                               .offset = 8
-                       }
-               }
+               .vertexBindingDescriptionCount = 0,
+               .vertexAttributeDescriptionCount = 0,
        };
 
        VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
                {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
                        .stage = VK_SHADER_STAGE_VERTEX_BIT,
-                       .module = radv_shader_module_to_handle(vs),
+                       .module = radv_shader_module_to_handle(&vs),
                        .pName = "main",
                        .pSpecializationInfo = NULL
                }, {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
                        .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-                       .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+                       .module = radv_shader_module_to_handle(&fs),
                        .pName = "main",
                        .pSpecializationInfo = NULL
                },
        };
 
-       const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+       VkGraphicsPipelineCreateInfo vk_pipeline_info = {
                .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
                .stageCount = ARRAY_SIZE(pipeline_shader_stages),
                .pStages = pipeline_shader_stages,
@@ -934,8 +828,8 @@ radv_device_init_meta_blit_depth(struct radv_device *device,
                },
                .pViewportState = &(VkPipelineViewportStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-                       .viewportCount = 0,
-                       .scissorCount = 0,
+                       .viewportCount = 1,
+                       .scissorCount = 1,
                },
                .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
@@ -950,276 +844,344 @@ radv_device_init_meta_blit_depth(struct radv_device *device,
                        .sampleShadingEnable = false,
                        .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
                },
-               .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-                       .attachmentCount = 0,
-                       .pAttachments = NULL,
-               },
-               .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-                       .depthTestEnable = true,
-                       .depthWriteEnable = true,
-                       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
-               },
                .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-                       .dynamicStateCount = 7,
+                       .dynamicStateCount = 4,
                        .pDynamicStates = (VkDynamicState[]) {
+                               VK_DYNAMIC_STATE_VIEWPORT,
+                               VK_DYNAMIC_STATE_SCISSOR,
                                VK_DYNAMIC_STATE_LINE_WIDTH,
-                               VK_DYNAMIC_STATE_DEPTH_BIAS,
                                VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-                               VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-                               VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
-                               VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
-                               VK_DYNAMIC_STATE_STENCIL_REFERENCE,
                        },
                },
                .flags = 0,
                .layout = device->meta_state.blit.pipeline_layout,
-               .renderPass = device->meta_state.blit.depth_only_rp,
+               .renderPass = rp,
                .subpass = 0,
        };
 
+       VkPipelineColorBlendStateCreateInfo color_blend_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+               .attachmentCount = 1,
+               .pAttachments = (VkPipelineColorBlendAttachmentState []) {
+                       {
+                               .colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
+                                                 VK_COLOR_COMPONENT_R_BIT |
+                                                 VK_COLOR_COMPONENT_G_BIT |
+                                                 VK_COLOR_COMPONENT_B_BIT },
+                       }
+               };
+
+       VkPipelineDepthStencilStateCreateInfo depth_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+               .depthTestEnable = true,
+               .depthWriteEnable = true,
+               .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+       };
+
+       VkPipelineDepthStencilStateCreateInfo stencil_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+               .depthTestEnable = false,
+               .depthWriteEnable = false,
+               .stencilTestEnable = true,
+               .front = {
+                       .failOp = VK_STENCIL_OP_REPLACE,
+                       .passOp = VK_STENCIL_OP_REPLACE,
+                       .depthFailOp = VK_STENCIL_OP_REPLACE,
+                       .compareOp = VK_COMPARE_OP_ALWAYS,
+                       .compareMask = 0xff,
+                       .writeMask = 0xff,
+                       .reference = 0
+               },
+               .back = {
+                       .failOp = VK_STENCIL_OP_REPLACE,
+                       .passOp = VK_STENCIL_OP_REPLACE,
+                       .depthFailOp = VK_STENCIL_OP_REPLACE,
+                       .compareOp = VK_COMPARE_OP_ALWAYS,
+                       .compareMask = 0xff,
+                       .writeMask = 0xff,
+                       .reference = 0
+               },
+               .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+       };
+
+       switch(aspect) {
+       case VK_IMAGE_ASPECT_COLOR_BIT:
+               vk_pipeline_info.pColorBlendState = &color_blend_info;
+               break;
+       case VK_IMAGE_ASPECT_DEPTH_BIT:
+               vk_pipeline_info.pDepthStencilState = &depth_info;
+               break;
+       case VK_IMAGE_ASPECT_STENCIL_BIT:
+               vk_pipeline_info.pDepthStencilState = &stencil_info;
+               break;
+       default:
+               unreachable("Unhandled aspect");
+       }
+
        const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
                .use_rectlist = true
        };
 
-       pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_1d);
        result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                              &vk_pipeline_info, &radv_pipeline_info,
-                                              &device->meta_state.alloc, &device->meta_state.blit.depth_only_1d_pipeline);
+                                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
+                                              &vk_pipeline_info, &radv_pipeline_info,
+                                              &device->meta_state.alloc, pipeline);
+       ralloc_free(vs.nir);
+       ralloc_free(fs.nir);
+       mtx_unlock(&device->meta_state.mtx);
+       return result;
+}
+
+static VkResult
+radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand)
+{
+       VkResult result;
+
+       for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
+               unsigned key = radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]);
+               for(unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
+                       VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
+                       result = radv_CreateRenderPass(radv_device_to_handle(device),
+                                               &(VkRenderPassCreateInfo) {
+                                                       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+                                                               .attachmentCount = 1,
+                                                               .pAttachments = &(VkAttachmentDescription) {
+                                                               .format = radv_fs_key_format_exemplars[i],
+                                                               .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                                                               .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                                                               .initialLayout = layout,
+                                                               .finalLayout = layout,
+                                                       },
+                                                               .subpassCount = 1,
+                                                                               .pSubpasses = &(VkSubpassDescription) {
+                                                               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                                               .inputAttachmentCount = 0,
+                                                               .colorAttachmentCount = 1,
+                                                               .pColorAttachments = &(VkAttachmentReference) {
+                                                                       .attachment = 0,
+                                                                       .layout = layout,
+                                                               },
+                                                               .pResolveAttachments = NULL,
+                                                               .pDepthStencilAttachment = &(VkAttachmentReference) {
+                                                                       .attachment = VK_ATTACHMENT_UNUSED,
+                                                                       .layout = VK_IMAGE_LAYOUT_GENERAL,
+                                                               },
+                                                               .preserveAttachmentCount = 0,
+                                                               .pPreserveAttachments = NULL,
+                                                       },
+                                                       .dependencyCount = 2,
+                                                       .pDependencies = (VkSubpassDependency[]) {
+                                                               {
+                                                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .dstSubpass = 0,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               },
+                                                               {
+                                                                       .srcSubpass = 0,
+                                                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               }
+                                                       },
+                                               }, &device->meta_state.alloc, &device->meta_state.blit.render_pass[key][j]);
+                       if (result != VK_SUCCESS)
+                               goto fail;
+               }
+
+               if (on_demand)
+                       continue;
+
+               result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key, &device->meta_state.blit.pipeline_1d_src[key]);
+               if (result != VK_SUCCESS)
+                       goto fail;
+
+               result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_2D, key, &device->meta_state.blit.pipeline_2d_src[key]);
+               if (result != VK_SUCCESS)
+                       goto fail;
+
+               result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_3D, key, &device->meta_state.blit.pipeline_3d_src[key]);
+               if (result != VK_SUCCESS)
+                       goto fail;
+
+       }
+
+       result = VK_SUCCESS;
+fail:
+       return result;
+}
+
+static VkResult
+radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand)
+{
+       VkResult result;
+
+       for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+               VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+               result = radv_CreateRenderPass(radv_device_to_handle(device),
+                                              &(VkRenderPassCreateInfo) {
+                                                      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+                                                      .attachmentCount = 1,
+                                                      .pAttachments = &(VkAttachmentDescription) {
+                                                              .format = VK_FORMAT_D32_SFLOAT,
+                                                              .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                                                              .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                                                              .initialLayout = layout,
+                                                              .finalLayout = layout,
+                                                      },
+                                                      .subpassCount = 1,
+                                                      .pSubpasses = &(VkSubpassDescription) {
+                                                              .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                                              .inputAttachmentCount = 0,
+                                                              .colorAttachmentCount = 0,
+                                                              .pColorAttachments = NULL,
+                                                              .pResolveAttachments = NULL,
+                                                              .pDepthStencilAttachment = &(VkAttachmentReference) {
+                                                                      .attachment = 0,
+                                                                      .layout = layout,
+                                                               },
+                                                              .preserveAttachmentCount = 0,
+                                                              .pPreserveAttachments = NULL,
+                                                       },
+                                                       .dependencyCount = 2,
+                                                       .pDependencies = (VkSubpassDependency[]) {
+                                                               {
+                                                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .dstSubpass = 0,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               },
+                                                               {
+                                                                       .srcSubpass = 0,
+                                                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               }
+                                                       },
+                                               }, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp[ds_layout]);
+               if (result != VK_SUCCESS)
+                       goto fail;
+       }
+
+       if (on_demand)
+               return VK_SUCCESS;
+
+       result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.depth_only_1d_pipeline);
        if (result != VK_SUCCESS)
                goto fail;
 
-       pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_2d);
-       result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                              &vk_pipeline_info, &radv_pipeline_info,
-                                              &device->meta_state.alloc, &device->meta_state.blit.depth_only_2d_pipeline);
+       result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, 0, &device->meta_state.blit.depth_only_2d_pipeline);
        if (result != VK_SUCCESS)
                goto fail;
 
-       pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_3d);
-       result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                              &vk_pipeline_info, &radv_pipeline_info,
-                                              &device->meta_state.alloc, &device->meta_state.blit.depth_only_3d_pipeline);
+       result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, 0, &device->meta_state.blit.depth_only_3d_pipeline);
        if (result != VK_SUCCESS)
                goto fail;
 
 fail:
-       ralloc_free(fs_1d.nir);
-       ralloc_free(fs_2d.nir);
-       ralloc_free(fs_3d.nir);
        return result;
 }
 
 static VkResult
-radv_device_init_meta_blit_stencil(struct radv_device *device,
-                                  struct radv_shader_module *vs)
+radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand)
 {
-       struct radv_shader_module fs_1d = {0}, fs_2d = {0}, fs_3d = {0};
        VkResult result;
 
-       fs_1d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_1D);
-       fs_2d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_2D);
-       fs_3d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_3D);
-
-       result = radv_CreateRenderPass(radv_device_to_handle(device),
-                                      &(VkRenderPassCreateInfo) {
-                                              .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+       for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+               VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+               result = radv_CreateRenderPass(radv_device_to_handle(device),
+                                              &(VkRenderPassCreateInfo) {
+                                                      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
                                                       .attachmentCount = 1,
                                                       .pAttachments = &(VkAttachmentDescription) {
-                                                      .format = 0,
-                                                      .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-                                                      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-                                                      .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-                                                      .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-                                              },
+                                                              .format = VK_FORMAT_S8_UINT,
+                                                              .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+                                                              .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                                                              .initialLayout = layout,
+                                                              .finalLayout = layout,
+                                                      },
                                                       .subpassCount = 1,
-                                               .pSubpasses = &(VkSubpassDescription) {
-                                                      .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                                      .inputAttachmentCount = 0,
-                                                      .colorAttachmentCount = 0,
-                                                      .pColorAttachments = NULL,
-                                                      .pResolveAttachments = NULL,
-                                                      .pDepthStencilAttachment = &(VkAttachmentReference) {
-                                                              .attachment = 0,
-                                                              .layout = VK_IMAGE_LAYOUT_GENERAL,
+                                                      .pSubpasses = &(VkSubpassDescription) {
+                                                              .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                                              .inputAttachmentCount = 0,
+                                                              .colorAttachmentCount = 0,
+                                                              .pColorAttachments = NULL,
+                                                              .pResolveAttachments = NULL,
+                                                              .pDepthStencilAttachment = &(VkAttachmentReference) {
+                                                                      .attachment = 0,
+                                                                      .layout = layout,
+                                                              },
+                                                              .preserveAttachmentCount = 0,
+                                                              .pPreserveAttachments = NULL,
                                                       },
-                                                      .preserveAttachmentCount = 1,
-                                                      .pPreserveAttachments = (uint32_t[]) { 0 },
-                                              },
-                                               .dependencyCount = 0,
-                                        }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp);
+                                                      .dependencyCount = 2,
+                                                      .pDependencies = (VkSubpassDependency[]) {
+                                                               {
+                                                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .dstSubpass = 0,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               },
+                                                               {
+                                                                       .srcSubpass = 0,
+                                                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               }
+                                                       },
+
+                                        }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp[ds_layout]);
+       }
        if (result != VK_SUCCESS)
                goto fail;
 
-       VkPipelineVertexInputStateCreateInfo vi_create_info = {
-               .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-               .vertexBindingDescriptionCount = 1,
-               .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-                       {
-                               .binding = 0,
-                               .stride = 5 * sizeof(float),
-                               .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-                       },
-               },
-               .vertexAttributeDescriptionCount = 2,
-               .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-                       {
-                               /* Position */
-                               .location = 0,
-                               .binding = 0,
-                               .format = VK_FORMAT_R32G32_SFLOAT,
-                               .offset = 0
-                       },
-                       {
-                               /* Texture Coordinate */
-                               .location = 1,
-                               .binding = 0,
-                               .format = VK_FORMAT_R32G32B32_SFLOAT,
-                               .offset = 8
-                       }
-               }
-       };
-
-       VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-               {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-                       .stage = VK_SHADER_STAGE_VERTEX_BIT,
-                       .module = radv_shader_module_to_handle(vs),
-                       .pName = "main",
-                       .pSpecializationInfo = NULL
-               }, {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-                       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-                       .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
-                       .pName = "main",
-                       .pSpecializationInfo = NULL
-               },
-       };
+       if (on_demand)
+               return VK_SUCCESS;
 
-       const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-               .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-               .stageCount = ARRAY_SIZE(pipeline_shader_stages),
-               .pStages = pipeline_shader_stages,
-               .pVertexInputState = &vi_create_info,
-               .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-                       .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-                       .primitiveRestartEnable = false,
-               },
-               .pViewportState = &(VkPipelineViewportStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-                       .viewportCount = 0,
-                       .scissorCount = 0,
-               },
-               .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-                       .rasterizerDiscardEnable = false,
-                       .polygonMode = VK_POLYGON_MODE_FILL,
-                       .cullMode = VK_CULL_MODE_NONE,
-                       .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-               },
-               .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-                       .rasterizationSamples = 1,
-                       .sampleShadingEnable = false,
-                       .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-               },
-               .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-                       .attachmentCount = 0,
-                       .pAttachments = NULL,
-               },
-               .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-                       .depthTestEnable = false,
-                       .depthWriteEnable = false,
-                       .stencilTestEnable = true,
-                       .front = {
-                               .failOp = VK_STENCIL_OP_REPLACE,
-                               .passOp = VK_STENCIL_OP_REPLACE,
-                               .depthFailOp = VK_STENCIL_OP_REPLACE,
-                               .compareOp = VK_COMPARE_OP_ALWAYS,
-                               .compareMask = 0xff,
-                               .writeMask = 0xff,
-                               .reference = 0
-                       },
-                       .back = {
-                               .failOp = VK_STENCIL_OP_REPLACE,
-                               .passOp = VK_STENCIL_OP_REPLACE,
-                               .depthFailOp = VK_STENCIL_OP_REPLACE,
-                               .compareOp = VK_COMPARE_OP_ALWAYS,
-                               .compareMask = 0xff,
-                               .writeMask = 0xff,
-                               .reference = 0
-                       },
-                       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
-               },
-
-               .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-                       .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-                       .dynamicStateCount = 4,
-                       .pDynamicStates = (VkDynamicState[]) {
-                               VK_DYNAMIC_STATE_LINE_WIDTH,
-                               VK_DYNAMIC_STATE_DEPTH_BIAS,
-                               VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-                               VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-                       },
-               },
-               .flags = 0,
-               .layout = device->meta_state.blit.pipeline_layout,
-               .renderPass = device->meta_state.blit.stencil_only_rp,
-               .subpass = 0,
-       };
-
-       const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-               .use_rectlist = true
-       };
-
-       pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_1d);
-       result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                              &vk_pipeline_info, &radv_pipeline_info,
-                                              &device->meta_state.alloc, &device->meta_state.blit.stencil_only_1d_pipeline);
+       result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.stencil_only_1d_pipeline);
        if (result != VK_SUCCESS)
                goto fail;
 
-       pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_2d);
-       result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                              &vk_pipeline_info, &radv_pipeline_info,
-                                              &device->meta_state.alloc, &device->meta_state.blit.stencil_only_2d_pipeline);
+       result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, 0, &device->meta_state.blit.stencil_only_2d_pipeline);
        if (result != VK_SUCCESS)
                goto fail;
 
-       pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_3d);
-       result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-                                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
-                                              &vk_pipeline_info, &radv_pipeline_info,
-                                              &device->meta_state.alloc, &device->meta_state.blit.stencil_only_3d_pipeline);
+       result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, 0, &device->meta_state.blit.stencil_only_3d_pipeline);
        if (result != VK_SUCCESS)
                goto fail;
 
+
 fail:
-       ralloc_free(fs_1d.nir);
-       ralloc_free(fs_2d.nir);
-       ralloc_free(fs_3d.nir);
        return result;
 }
 
 VkResult
-radv_device_init_meta_blit_state(struct radv_device *device)
+radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand)
 {
        VkResult result;
-       struct radv_shader_module vs = {0};
-       zero(device->meta_state.blit);
 
        VkDescriptorSetLayoutCreateInfo ds_layout_info = {
                .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+               .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
                .bindingCount = 1,
                .pBindings = (VkDescriptorSetLayoutBinding[]) {
                        {
@@ -1238,33 +1200,32 @@ radv_device_init_meta_blit_state(struct radv_device *device)
        if (result != VK_SUCCESS)
                goto fail;
 
+       const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, 20};
+
        result = radv_CreatePipelineLayout(radv_device_to_handle(device),
                                           &(VkPipelineLayoutCreateInfo) {
                                                   .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
                                                           .setLayoutCount = 1,
                                                           .pSetLayouts = &device->meta_state.blit.ds_layout,
+                                                          .pushConstantRangeCount = 1,
+                                                          .pPushConstantRanges = &push_constant_range,
                                                           },
                                           &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
        if (result != VK_SUCCESS)
                goto fail;
 
-       vs.nir = build_nir_vertex_shader();
-
-       result = radv_device_init_meta_blit_color(device, &vs);
+       result = radv_device_init_meta_blit_color(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail;
 
-       result = radv_device_init_meta_blit_depth(device, &vs);
+       result = radv_device_init_meta_blit_depth(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail;
 
-       result = radv_device_init_meta_blit_stencil(device, &vs);
-       if (result != VK_SUCCESS)
-               goto fail;
-       return VK_SUCCESS;
+       result = radv_device_init_meta_blit_stencil(device, on_demand);
 
 fail:
-       ralloc_free(vs.nir);
-       radv_device_finish_meta_blit_state(device);
+       if (result != VK_SUCCESS)
+               radv_device_finish_meta_blit_state(device);
        return result;
 }