anv/allocator: Correctly set the number of buckets
[mesa.git] / src / intel / vulkan / anv_meta_blit2d.c
index 144a62481b8a5140a4ceb1bfe79b955899cbfcc0..6b2222c73b5da72cefba59b325d91e28a3718430 100644 (file)
 #include "anv_meta.h"
 #include "nir/nir_builder.h"
 
+enum blit2d_src_type {
+   /* We can make a "normal" image view of this source and just texture
+    * from it like you would in any other shader.
+    */
+   BLIT2D_SRC_TYPE_NORMAL,
+
+   /* The source is W-tiled and we need to detile manually in the shader.
+    * This will work on any platform but is needed for all W-tiled sources
+    * prior to Broadwell.
+    */
+   BLIT2D_SRC_TYPE_W_DETILE,
+
+   BLIT2D_NUM_SRC_TYPES,
+};
+
+enum blit2d_dst_type {
+   /* We can bind this destination as a "normal" render target and render
+    * to it just like you would anywhere else.
+    */
+   BLIT2D_DST_TYPE_NORMAL,
+
+   /* The destination is W-tiled and we need to do the tiling manually in
+    * the shader.  This is required for all W-tiled destinations.
+    *
+    * Sky Lake adds a feature for providing explicit stencil values in the
+    * shader but mesa doesn't support that yet so neither do we.
+    */
+   BLIT2D_DST_TYPE_W_TILE,
+
+   /* The destination has a 3-channel RGB format.  Since we can't render to
+    * non-power-of-two textures, we have to bind it as a red texture and
+    * select the correct component for the given red pixel in the shader.
+    */
+   BLIT2D_DST_TYPE_RGB,
+
+   BLIT2D_NUM_DST_TYPES,
+};
+
 static VkFormat
 vk_format_for_size(int bs)
 {
@@ -54,32 +92,40 @@ vk_format_for_size(int bs)
    }
 }
 
+/* This function returns the format corresponding to a single component of the
+ * RGB format for the given size returned by vk_format_for_size().
+ */
+static VkFormat
+vk_single_component_format_for_rgb_size(int bs)
+{
+   switch (bs) {
+   case 3: return VK_FORMAT_R8_UNORM;
+   case 6: return VK_FORMAT_R16_UNORM;
+   case 12: return VK_FORMAT_R32_UINT;
+   default:
+      unreachable("Invalid format block size");
+   }
+}
+
 static void
 create_iview(struct anv_cmd_buffer *cmd_buffer,
              struct anv_meta_blit2d_surf *surf,
-             struct anv_meta_blit2d_rect *rect,
+             uint64_t offset,
              VkImageUsageFlags usage,
+             uint32_t width,
+             uint32_t height,
+             VkFormat format,
              VkImage *img,
              struct anv_image_view *iview)
 {
-   struct isl_tile_info tile_info;
-   isl_tiling_get_info(&cmd_buffer->device->isl_dev,
-                       surf->tiling, surf->bs, &tile_info);
-   const unsigned tile_width_px = tile_info.width > surf->bs ?
-                                  tile_info.width / surf->bs : 1;
-   uint32_t *rect_y = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ?
-                      &rect->src_y : &rect->dst_y;
-   uint32_t *rect_x = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ?
-                      &rect->src_x : &rect->dst_x;
-
-   /* Define the shared state among all created image views */
    const VkImageCreateInfo image_info = {
       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
       .imageType = VK_IMAGE_TYPE_2D,
-      .format = vk_format_for_size(surf->bs),
+      /* W-tiled images must be stencil-formatted. */
+      .format = format,
       .extent = {
-         .width = rect->width + (*rect_x) % tile_width_px,
-         .height = rect->height + (*rect_y) % tile_info.height,
+         .width = width,
+         .height = height,
          .depth = 1,
       },
       .mipLevels = 1,
@@ -102,17 +148,8 @@ create_iview(struct anv_cmd_buffer *cmd_buffer,
     * creating a dummy memory object etc. so there's really no point.
     */
    anv_image_from_handle(*img)->bo = surf->bo;
-   anv_image_from_handle(*img)->offset = surf->base_offset;
+   anv_image_from_handle(*img)->offset = surf->base_offset + offset;
 
-   /* Create a VkImageView that starts at the tile aligned offset closest
-    * to the provided x/y offset into the surface.
-    */
-   uint32_t img_o = 0;
-   isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev,
-                                             &anv_image_from_handle(*img)->
-                                                color_surface.isl,
-                                             *rect_x, *rect_y,
-                                             &img_o, rect_x, rect_y);
    anv_image_view_init(iview, cmd_buffer->device,
                        &(VkImageViewCreateInfo) {
                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -120,211 +157,660 @@ create_iview(struct anv_cmd_buffer *cmd_buffer,
                           .viewType = VK_IMAGE_VIEW_TYPE_2D,
                           .format = image_info.format,
                           .subresourceRange = {
-                             .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                             .aspectMask = anv_image_from_handle(*img)->aspects,
                              .baseMipLevel = 0,
                              .levelCount = 1,
                              .baseArrayLayer = 0,
                              .layerCount = 1
                           },
-                       }, cmd_buffer, img_o, usage);
+                       }, cmd_buffer, usage);
 }
 
-static void
-meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer,
-               struct anv_image_view *src_iview,
-               VkOffset3D src_offset,
-               struct anv_image_view *dest_iview,
-               VkOffset3D dest_offset,
-               VkExtent3D extent)
-{
-   struct anv_device *device = cmd_buffer->device;
+struct blit2d_src_temps {
+   VkImage image;
+   struct anv_image_view iview;
 
-   struct blit_vb_data {
-      float pos[2];
-      float tex_coord[3];
-   } *vb_data;
+   struct anv_buffer buffer;
+   struct anv_buffer_view bview;
 
-   unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
-
-   struct anv_state vb_state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
-   memset(vb_state.map, 0, sizeof(struct anv_vue_header));
-   vb_data = vb_state.map + sizeof(struct anv_vue_header);
-
-   vb_data[0] = (struct blit_vb_data) {
-      .pos = {
-         dest_offset.x + extent.width,
-         dest_offset.y + extent.height,
-      },
-      .tex_coord = {
-         src_offset.x + extent.width,
-         src_offset.y + extent.height,
-         src_offset.z,
-      },
-   };
-
-   vb_data[1] = (struct blit_vb_data) {
-      .pos = {
-         dest_offset.x,
-         dest_offset.y + extent.height,
-      },
-      .tex_coord = {
-         src_offset.x,
-         src_offset.y + extent.height,
-         src_offset.z,
-      },
-   };
-
-   vb_data[2] = (struct blit_vb_data) {
-      .pos = {
-         dest_offset.x,
-         dest_offset.y,
-      },
-      .tex_coord = {
-         src_offset.x,
-         src_offset.y,
-         src_offset.z,
-      },
-   };
-
-   anv_state_clflush(vb_state);
-
-   struct anv_buffer vertex_buffer = {
-      .device = device,
-      .size = vb_size,
-      .bo = &device->dynamic_state_block_pool.bo,
-      .offset = vb_state.offset,
-   };
+   VkDescriptorPool desc_pool;
+   VkDescriptorSet set;
+};
 
-   anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
-      (VkBuffer[]) {
-         anv_buffer_to_handle(&vertex_buffer),
-         anv_buffer_to_handle(&vertex_buffer)
-      },
-      (VkDeviceSize[]) {
-         0,
-         sizeof(struct anv_vue_header),
-      });
+static void
+blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer,
+                struct anv_meta_blit2d_surf *src,
+                enum blit2d_src_type src_type,
+                struct anv_meta_blit2d_rect *rect,
+                struct blit2d_src_temps *tmp)
+{
+   struct anv_device *device = cmd_buffer->device;
+   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
 
-   VkDescriptorPool desc_pool;
-   anv_CreateDescriptorPool(anv_device_to_handle(device),
-      &(const VkDescriptorPoolCreateInfo) {
-         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
-         .pNext = NULL,
-         .flags = 0,
-         .maxSets = 1,
-         .poolSizeCount = 1,
-         .pPoolSizes = (VkDescriptorPoolSize[]) {
+   if (src_type == BLIT2D_SRC_TYPE_NORMAL) {
+      uint32_t offset = 0;
+      isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
+                                         src->tiling, src->bs, src->pitch,
+                                         rect->src_x, rect->src_y,
+                                         &offset, &rect->src_x, &rect->src_y);
+
+      VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT;    
+
+      /* W-tiled images must be stencil-formatted. Outside of meta,
+       * a stencil image has this usage bit set. Adding it here
+       * ensures the ISL surface is created correctly.
+       */
+      if (src->tiling == ISL_TILING_W)
+         usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+
+      create_iview(cmd_buffer, src, offset, usage,
+                   rect->src_x + rect->width, rect->src_y + rect->height,
+                   src->tiling == ISL_TILING_W ?
+                      VK_FORMAT_S8_UINT : vk_format_for_size(src->bs),
+                   &tmp->image, &tmp->iview);
+
+      anv_CreateDescriptorPool(vk_device,
+         &(const VkDescriptorPoolCreateInfo) {
+            .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+            .pNext = NULL,
+            .flags = 0,
+            .maxSets = 1,
+            .poolSizeCount = 1,
+            .pPoolSizes = (VkDescriptorPoolSize[]) {
+               {
+                  .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                  .descriptorCount = 1
+               },
+            }
+         }, &cmd_buffer->pool->alloc, &tmp->desc_pool);
+
+      anv_AllocateDescriptorSets(vk_device,
+         &(VkDescriptorSetAllocateInfo) {
+            .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+            .descriptorPool = tmp->desc_pool,
+            .descriptorSetCount = 1,
+            .pSetLayouts = &device->meta_state.blit2d.img_ds_layout
+         }, &tmp->set);
+
+      anv_UpdateDescriptorSets(vk_device,
+         1, /* writeCount */
+         (VkWriteDescriptorSet[]) {
             {
-               .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-               .descriptorCount = 1
-            },
-         }
-      }, &cmd_buffer->pool->alloc, &desc_pool);
-
-   VkDescriptorSet set;
-   anv_AllocateDescriptorSets(anv_device_to_handle(device),
-      &(VkDescriptorSetAllocateInfo) {
-         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
-         .descriptorPool = desc_pool,
-         .descriptorSetCount = 1,
-         .pSetLayouts = &device->meta_state.blit2d.ds_layout
-      }, &set);
-
-   anv_UpdateDescriptorSets(anv_device_to_handle(device),
-      1, /* writeCount */
-      (VkWriteDescriptorSet[]) {
-         {
-            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-            .dstSet = set,
-            .dstBinding = 0,
-            .dstArrayElement = 0,
-            .descriptorCount = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-            .pImageInfo = (VkDescriptorImageInfo[]) {
+               .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+               .dstSet = tmp->set,
+               .dstBinding = 0,
+               .dstArrayElement = 0,
+               .descriptorCount = 1,
+               .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+               .pImageInfo = (VkDescriptorImageInfo[]) {
+                  {
+                     .sampler = NULL,
+                     .imageView = anv_image_view_to_handle(&tmp->iview),
+                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                  },
+               }
+            }
+         }, 0, NULL);
+
+      anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
+                                VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                device->meta_state.blit2d.img_p_layout, 0, 1,
+                                &tmp->set, 0, NULL);
+   } else {
+      assert(src_type == BLIT2D_SRC_TYPE_W_DETILE);
+      assert(src->tiling == ISL_TILING_W);
+      assert(src->bs == 1);
+
+      uint32_t tile_offset = 0;
+      isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
+                                         ISL_TILING_W, 1, src->pitch,
+                                         rect->src_x, rect->src_y,
+                                         &tile_offset,
+                                         &rect->src_x, &rect->src_y);
+
+      tmp->buffer = (struct anv_buffer) {
+         .device = device,
+         .size = align_u32(rect->src_y + rect->height, 64) * src->pitch,
+         .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
+         .bo = src->bo,
+         .offset = src->base_offset + tile_offset,
+      };
+
+      anv_buffer_view_init(&tmp->bview, device,
+         &(VkBufferViewCreateInfo) {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+            .buffer = anv_buffer_to_handle(&tmp->buffer),
+            .format = VK_FORMAT_R8_UINT,
+            .offset = 0,
+            .range = VK_WHOLE_SIZE,
+         }, cmd_buffer);
+
+      anv_CreateDescriptorPool(vk_device,
+         &(const VkDescriptorPoolCreateInfo) {
+            .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+            .pNext = NULL,
+            .flags = 0,
+            .maxSets = 1,
+            .poolSizeCount = 1,
+            .pPoolSizes = (VkDescriptorPoolSize[]) {
                {
-                  .sampler = NULL,
-                  .imageView = anv_image_view_to_handle(src_iview),
-                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                  .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+                  .descriptorCount = 1
                },
             }
-         }
-      }, 0, NULL);
+         }, &cmd_buffer->pool->alloc, &tmp->desc_pool);
+
+      anv_AllocateDescriptorSets(vk_device,
+         &(VkDescriptorSetAllocateInfo) {
+            .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+            .descriptorPool = tmp->desc_pool,
+            .descriptorSetCount = 1,
+            .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout
+         }, &tmp->set);
+
+      anv_UpdateDescriptorSets(vk_device,
+         1, /* writeCount */
+         (VkWriteDescriptorSet[]) {
+            {
+               .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+               .dstSet = tmp->set,
+               .dstBinding = 0,
+               .dstArrayElement = 0,
+               .descriptorCount = 1,
+               .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+               .pTexelBufferView = (VkBufferView[]) {
+                  anv_buffer_view_to_handle(&tmp->bview),
+               },
+            }
+         }, 0, NULL);
 
+      anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
+                                VK_PIPELINE_BIND_POINT_GRAPHICS,
+                                device->meta_state.blit2d.buf_p_layout, 0, 1,
+                                &tmp->set, 0, NULL);
+   }
+}
+
+static void
+blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer,
+                  enum blit2d_src_type src_type,
+                  struct blit2d_src_temps *tmp)
+{
+   anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device),
+                             tmp->desc_pool, &cmd_buffer->pool->alloc);
+   if (src_type == BLIT2D_SRC_TYPE_NORMAL) {
+      anv_DestroyImage(anv_device_to_handle(cmd_buffer->device),
+                       tmp->image, &cmd_buffer->pool->alloc);
+   }
+}
+
+struct blit2d_dst_temps {
+   VkImage image;
+   struct anv_image_view iview;
    VkFramebuffer fb;
-   anv_CreateFramebuffer(anv_device_to_handle(device),
+};
+
+static void
+blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer,
+                struct anv_meta_blit2d_surf *dst,
+                uint64_t offset,
+                uint32_t width,
+                uint32_t height,
+                VkFormat format,
+                struct blit2d_dst_temps *tmp)
+{
+   create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+                width, height, format, &tmp->image, &tmp->iview);
+
+   anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device),
       &(VkFramebufferCreateInfo) {
          .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
          .attachmentCount = 1,
          .pAttachments = (VkImageView[]) {
-            anv_image_view_to_handle(dest_iview),
+            anv_image_view_to_handle(&tmp->iview),
          },
-         .width = dest_iview->extent.width,
-         .height = dest_iview->extent.height,
+         .width = width,
+         .height = height,
          .layers = 1
-      }, &cmd_buffer->pool->alloc, &fb);
-
-   ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
-      &(VkRenderPassBeginInfo) {
-         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-         .renderPass = device->meta_state.blit2d.render_pass,
-         .framebuffer = fb,
-         .renderArea = {
-            .offset = { dest_offset.x, dest_offset.y },
-            .extent = { extent.width, extent.height },
-         },
-         .clearValueCount = 0,
-         .pClearValues = NULL,
-      }, VK_SUBPASS_CONTENTS_INLINE);
+      }, &cmd_buffer->pool->alloc, &tmp->fb);
+}
+
+static void
+blit2d_unbind_dst(struct anv_cmd_buffer *cmd_buffer,
+                  struct blit2d_dst_temps *tmp)
+{
+   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
+   anv_DestroyFramebuffer(vk_device, tmp->fb, &cmd_buffer->pool->alloc);
+   anv_DestroyImage(vk_device, tmp->image, &cmd_buffer->pool->alloc);
+}
 
-   VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src;
+void
+anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
+                    struct anv_meta_saved_state *save)
+{
+   anv_meta_restore(save, cmd_buffer);
+}
+
+void
+anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
+                      struct anv_meta_saved_state *save)
+{
+   anv_meta_save(save, cmd_buffer, 0);
+}
+
+static void
+bind_pipeline(struct anv_cmd_buffer *cmd_buffer,
+              enum blit2d_src_type src_type,
+              enum blit2d_dst_type dst_type)
+{
+   VkPipeline pipeline =
+      cmd_buffer->device->meta_state.blit2d.pipelines[src_type][dst_type];
 
    if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
       anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
                           VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
    }
+}
 
-   anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
-                      &(VkViewport) {
-                        .x = 0.0f,
-                        .y = 0.0f,
-                        .width = dest_iview->extent.width,
-                        .height = dest_iview->extent.height,
-                        .minDepth = 0.0f,
-                        .maxDepth = 1.0f,
-                      });
+static void
+anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer,
+                           struct anv_meta_blit2d_surf *src,
+                           enum blit2d_src_type src_type,
+                           struct anv_meta_blit2d_surf *dst,
+                           unsigned num_rects,
+                           struct anv_meta_blit2d_rect *rects)
+{
+   struct anv_device *device = cmd_buffer->device;
 
-   anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
-                             VK_PIPELINE_BIND_POINT_GRAPHICS,
-                             device->meta_state.blit2d.pipeline_layout, 0, 1,
-                             &set, 0, NULL);
+   for (unsigned r = 0; r < num_rects; ++r) {
+      struct blit2d_src_temps src_temps;
+      blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
+
+      uint32_t offset = 0;
+      isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
+                                         dst->tiling, dst->bs, dst->pitch,
+                                         rects[r].dst_x, rects[r].dst_y,
+                                         &offset,
+                                         &rects[r].dst_x, &rects[r].dst_y);
+
+      struct blit2d_dst_temps dst_temps;
+      blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width,
+                      rects[r].dst_y + rects[r].height,
+                      vk_format_for_size(dst->bs), &dst_temps);
+
+      struct blit_vb_data {
+         float pos[2];
+         float tex_coord[3];
+      } *vb_data;
+
+      unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+
+      struct anv_state vb_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+      memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+      vb_data = vb_state.map + sizeof(struct anv_vue_header);
+
+      vb_data[0] = (struct blit_vb_data) {
+         .pos = {
+            rects[r].dst_x + rects[r].width,
+            rects[r].dst_y + rects[r].height,
+         },
+         .tex_coord = {
+            rects[r].src_x + rects[r].width,
+            rects[r].src_y + rects[r].height,
+            src->pitch,
+         },
+      };
 
-   ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+      vb_data[1] = (struct blit_vb_data) {
+         .pos = {
+            rects[r].dst_x,
+            rects[r].dst_y + rects[r].height,
+         },
+         .tex_coord = {
+            rects[r].src_x,
+            rects[r].src_y + rects[r].height,
+            src->pitch,
+         },
+      };
 
-   ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+      vb_data[2] = (struct blit_vb_data) {
+         .pos = {
+            rects[r].dst_x,
+            rects[r].dst_y,
+         },
+         .tex_coord = {
+            rects[r].src_x,
+            rects[r].src_y,
+            src->pitch,
+         },
+      };
+
+      if (!device->info.has_llc)
+         anv_state_clflush(vb_state);
+
+      struct anv_buffer vertex_buffer = {
+         .device = device,
+         .size = vb_size,
+         .bo = &device->dynamic_state_block_pool.bo,
+         .offset = vb_state.offset,
+      };
+
+      anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+         (VkBuffer[]) {
+            anv_buffer_to_handle(&vertex_buffer),
+            anv_buffer_to_handle(&vertex_buffer)
+         },
+         (VkDeviceSize[]) {
+            0,
+            sizeof(struct anv_vue_header),
+         });
+
+      ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+         &(VkRenderPassBeginInfo) {
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+            .renderPass = device->meta_state.blit2d.render_pass,
+            .framebuffer = dst_temps.fb,
+            .renderArea = {
+               .offset = { rects[r].dst_x, rects[r].dst_y, },
+               .extent = { rects[r].width, rects[r].height },
+            },
+            .clearValueCount = 0,
+            .pClearValues = NULL,
+         }, VK_SUBPASS_CONTENTS_INLINE);
 
-   /* At the point where we emit the draw call, all data from the
-    * descriptor sets, etc. has been used.  We are free to delete it.
-    */
-   anv_DestroyDescriptorPool(anv_device_to_handle(device),
-                             desc_pool, &cmd_buffer->pool->alloc);
-   anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
-                          &cmd_buffer->pool->alloc);
+      bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_NORMAL);
+
+      ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+      ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+      /* At the point where we emit the draw call, all data from the
+       * descriptor sets, etc. has been used.  We are free to delete it.
+       */
+      blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
+      blit2d_unbind_dst(cmd_buffer, &dst_temps);
+   }
 }
 
-void
-anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
-                    struct anv_meta_saved_state *save)
+static void
+anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer,
+                            struct anv_meta_blit2d_surf *src,
+                            enum blit2d_src_type src_type,
+                            struct anv_meta_blit2d_surf *dst,
+                            unsigned num_rects,
+                            struct anv_meta_blit2d_rect *rects)
 {
-   anv_meta_restore(save, cmd_buffer);
+   struct anv_device *device = cmd_buffer->device;
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      struct blit2d_src_temps src_temps;
+      blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
+
+      assert(dst->bs == 1);
+      uint32_t offset;
+      isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
+                                         ISL_TILING_W, 1, dst->pitch,
+                                         rects[r].dst_x, rects[r].dst_y,
+                                         &offset,
+                                         &rects[r].dst_x, &rects[r].dst_y);
+
+      /* The original coordinates were in terms of an actual W-tiled offset
+       * but we are binding this image as Y-tiled.  We need to adjust our
+       * rectangle accordingly.
+       */
+      uint32_t xmin_Y, xmax_Y, ymin_Y, ymax_Y;
+      xmin_Y = (rects[r].dst_x / 8) * 16;
+      xmax_Y = DIV_ROUND_UP(rects[r].dst_x + rects[r].width, 8) * 16;
+      ymin_Y = (rects[r].dst_y / 4) * 2;
+      ymax_Y = DIV_ROUND_UP(rects[r].dst_y + rects[r].height, 4) * 2;
+
+      struct anv_meta_blit2d_surf dst_Y = {
+         .bo = dst->bo,
+         .tiling = ISL_TILING_Y0,
+         .base_offset = dst->base_offset,
+         .bs = 1,
+         .pitch = dst->pitch,
+      };
+
+      struct blit2d_dst_temps dst_temps;
+      blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y,
+                      VK_FORMAT_R8_UINT, &dst_temps);
+
+      struct blit_vb_header {
+         struct anv_vue_header vue;
+         int32_t tex_offset[2];
+         uint32_t tex_pitch;
+         uint32_t bounds[4];
+      } *vb_header;
+
+      struct blit_vb_data {
+         float pos[2];
+      } *vb_data;
+
+      unsigned vb_size = sizeof(*vb_header) + 3 * sizeof(*vb_data);
+
+      struct anv_state vb_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+      vb_header = vb_state.map;
+
+      *vb_header = (struct blit_vb_header) {
+         .tex_offset = {
+            rects[r].src_x - rects[r].dst_x,
+            rects[r].src_y - rects[r].dst_y,
+         },
+         .tex_pitch = src->pitch,
+         .bounds = {
+            rects[r].dst_x,
+            rects[r].dst_y,
+            rects[r].dst_x + rects[r].width,
+            rects[r].dst_y + rects[r].height,
+         },
+      };
+
+      vb_data = (void *)(vb_header + 1);
+
+      vb_data[0] = (struct blit_vb_data) {
+         .pos = {
+            xmax_Y,
+            ymax_Y,
+         },
+      };
+
+      vb_data[1] = (struct blit_vb_data) {
+         .pos = {
+            xmin_Y,
+            ymax_Y,
+         },
+      };
+
+      vb_data[2] = (struct blit_vb_data) {
+         .pos = {
+            xmin_Y,
+            ymin_Y,
+         },
+      };
+
+      if (!device->info.has_llc)
+         anv_state_clflush(vb_state);
+
+      struct anv_buffer vertex_buffer = {
+         .device = device,
+         .size = vb_size,
+         .bo = &device->dynamic_state_block_pool.bo,
+         .offset = vb_state.offset,
+      };
+
+      anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+         (VkBuffer[]) {
+            anv_buffer_to_handle(&vertex_buffer),
+            anv_buffer_to_handle(&vertex_buffer)
+         },
+         (VkDeviceSize[]) {
+            0,
+            (void *)vb_data - vb_state.map,
+         });
+
+      ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+         &(VkRenderPassBeginInfo) {
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+            .renderPass = device->meta_state.blit2d.render_pass,
+            .framebuffer = dst_temps.fb,
+            .renderArea = {
+               .offset = { xmin_Y, ymin_Y, },
+               .extent = { xmax_Y - xmin_Y, ymax_Y - ymin_Y },
+            },
+            .clearValueCount = 0,
+            .pClearValues = NULL,
+         }, VK_SUBPASS_CONTENTS_INLINE);
+
+      bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_W_TILE);
+
+      ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+      ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+      /* At the point where we emit the draw call, all data from the
+       * descriptor sets, etc. has been used.  We are free to delete it.
+       */
+      blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
+      blit2d_unbind_dst(cmd_buffer, &dst_temps);
+   }
 }
 
-void
-anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
-                      struct anv_meta_saved_state *save)
+static void
+anv_meta_blit2d_rgb_dst(struct anv_cmd_buffer *cmd_buffer,
+                        struct anv_meta_blit2d_surf *src,
+                        enum blit2d_src_type src_type,
+                        struct anv_meta_blit2d_surf *dst,
+                        unsigned num_rects,
+                        struct anv_meta_blit2d_rect *rects)
 {
-   anv_meta_save(save, cmd_buffer,
-                 (1 << VK_DYNAMIC_STATE_VIEWPORT));
+   struct anv_device *device = cmd_buffer->device;
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      struct blit2d_src_temps src_temps;
+      blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
+
+      assert(dst->bs % 3 == 0);
+      assert(dst->tiling == ISL_TILING_LINEAR);
+
+      uint32_t offset;
+      isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
+                                         dst->tiling, 1, dst->pitch,
+                                         rects[r].dst_x, rects[r].dst_y,
+                                         &offset,
+                                         &rects[r].dst_x, &rects[r].dst_y);
+
+      /* A red surface three times as wide as the actual RGB destination */
+      struct anv_meta_blit2d_surf dst_R = {
+         .bo = dst->bo,
+         .tiling = dst->tiling,
+         .base_offset = dst->base_offset,
+         .bs = dst->bs / 3,
+         .pitch = dst->pitch,
+      };
+
+      struct blit2d_dst_temps dst_temps;
+      blit2d_bind_dst(cmd_buffer, &dst_R, offset,
+                      (rects[r].dst_x + rects[r].width) * 3,
+                      rects[r].dst_y + rects[r].height,
+                      vk_single_component_format_for_rgb_size(dst->bs),
+                      &dst_temps);
+
+      struct blit_vb_data {
+         float pos[2];
+         float tex_coord[3];
+      } *vb_data;
+
+      unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+
+      struct anv_state vb_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+      memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+      vb_data = vb_state.map + sizeof(struct anv_vue_header);
+
+      vb_data[0] = (struct blit_vb_data) {
+         .pos = {
+            (rects[r].dst_x + rects[r].width) * 3,
+            rects[r].dst_y + rects[r].height,
+         },
+         .tex_coord = {
+            rects[r].src_x + rects[r].width,
+            rects[r].src_y + rects[r].height,
+            src->pitch,
+         },
+      };
+
+      vb_data[1] = (struct blit_vb_data) {
+         .pos = {
+            rects[r].dst_x * 3,
+            rects[r].dst_y + rects[r].height,
+         },
+         .tex_coord = {
+            rects[r].src_x,
+            rects[r].src_y + rects[r].height,
+            src->pitch,
+         },
+      };
+
+      vb_data[2] = (struct blit_vb_data) {
+         .pos = {
+            rects[r].dst_x * 3,
+            rects[r].dst_y,
+         },
+         .tex_coord = {
+            rects[r].src_x,
+            rects[r].src_y,
+            src->pitch,
+         },
+      };
+
+      if (!device->info.has_llc)
+         anv_state_clflush(vb_state);
+
+      struct anv_buffer vertex_buffer = {
+         .device = device,
+         .size = vb_size,
+         .bo = &device->dynamic_state_block_pool.bo,
+         .offset = vb_state.offset,
+      };
+
+      anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+         (VkBuffer[]) {
+            anv_buffer_to_handle(&vertex_buffer),
+            anv_buffer_to_handle(&vertex_buffer)
+         },
+         (VkDeviceSize[]) {
+            0,
+            sizeof(struct anv_vue_header),
+         });
+
+      ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+         &(VkRenderPassBeginInfo) {
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+            .renderPass = device->meta_state.blit2d.render_pass,
+            .framebuffer = dst_temps.fb,
+            .renderArea = {
+               .offset = { rects[r].dst_x, rects[r].dst_y, },
+               .extent = { rects[r].width, rects[r].height },
+            },
+            .clearValueCount = 0,
+            .pClearValues = NULL,
+         }, VK_SUBPASS_CONTENTS_INLINE);
+
+      bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_RGB);
+
+      ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+      ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+      /* At the point where we emit the draw call, all data from the
+       * descriptor sets, etc. has been used.  We are free to delete it.
+       */
+      blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
+      blit2d_unbind_dst(cmd_buffer, &dst_temps);
+   }
 }
 
 void
@@ -334,32 +820,28 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
                 unsigned num_rects,
                 struct anv_meta_blit2d_rect *rects)
 {
-   VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
-   VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT;
-   VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+   enum blit2d_src_type src_type;
+   if (src->tiling == ISL_TILING_W && cmd_buffer->device->info.gen < 8) {
+      src_type = BLIT2D_SRC_TYPE_W_DETILE;
+   } else {
+      src_type = BLIT2D_SRC_TYPE_NORMAL;
+   }
 
-   for (unsigned r = 0; r < num_rects; ++r) {
-      VkImage src_img;
-      VkImage dst_img;
-      struct anv_image_view src_iview;
-      struct anv_image_view dst_iview;
-      create_iview(cmd_buffer, src, &rects[r], src_usage, &src_img, &src_iview);
-      create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview);
-
-      /* Perform blit */
-      meta_emit_blit2d(cmd_buffer,
-                     &src_iview,
-                     (VkOffset3D){rects[r].src_x, rects[r].src_y, 0},
-                     &dst_iview,
-                     (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0},
-                     (VkExtent3D){rects[r].width, rects[r].height, 1});
-
-      anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc);
-      anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc);
+   if (dst->tiling == ISL_TILING_W) {
+      anv_meta_blit2d_w_tiled_dst(cmd_buffer, src, src_type, dst,
+                                  num_rects, rects);
+      return;
+   } else if (dst->bs % 3 == 0) {
+      anv_meta_blit2d_rgb_dst(cmd_buffer, src, src_type, dst,
+                              num_rects, rects);
+      return;
+   } else {
+      assert(util_is_power_of_two(dst->bs));
+      anv_meta_blit2d_normal_dst(cmd_buffer, src, src_type, dst,
+                                 num_rects, rects);
    }
 }
 
-
 static nir_shader *
 build_nir_vertex_shader(void)
 {
@@ -383,55 +865,381 @@ build_nir_vertex_shader(void)
    nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                    vec4, "v_tex_pos");
    tex_pos_out->data.location = VARYING_SLOT_VAR0;
-   tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+   tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
    nir_copy_var(&b, tex_pos_out, tex_pos_in);
 
+   nir_variable *other_in = nir_variable_create(b.shader, nir_var_shader_in,
+                                                vec4, "a_other");
+   other_in->data.location = VERT_ATTRIB_GENERIC2;
+   nir_variable *other_out = nir_variable_create(b.shader, nir_var_shader_out,
+                                                   vec4, "v_other");
+   other_out->data.location = VARYING_SLOT_VAR1;
+   other_out->data.interpolation = INTERP_MODE_FLAT;
+   nir_copy_var(&b, other_out, other_in);
+
    return b.shader;
 }
 
-static nir_shader *
-build_nir_copy_fragment_shader()
+typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *,
+                                               struct anv_device *,
+                                               nir_ssa_def *, nir_ssa_def *);
+
+static nir_ssa_def *
+nir_copy_bits(struct nir_builder *b, nir_ssa_def *dst, unsigned dst_offset,
+              nir_ssa_def *src, unsigned src_offset, unsigned num_bits)
 {
-   const struct glsl_type *vec4 = glsl_vec4_type();
-   const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
-   nir_builder b;
+   unsigned src_mask = (~1u >> (32 - num_bits)) << src_offset;
+   nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask));
+
+   nir_ssa_def *shifted;
+   if (dst_offset > src_offset) {
+      shifted = nir_ishl(b, masked, nir_imm_int(b, dst_offset - src_offset));
+   } else if (dst_offset < src_offset) {
+      shifted = nir_ushr(b, masked, nir_imm_int(b, src_offset - dst_offset));
+   } else {
+      assert(dst_offset == src_offset);
+      shifted = masked;
+   }
 
-   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+   return nir_ior(b, dst, shifted);
+}
 
-   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-                                                  vec2, "v_tex_pos");
-   tex_pos_in->data.location = VARYING_SLOT_VAR0;
-   nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
+static nir_ssa_def *
+build_nir_w_tiled_fetch(struct nir_builder *b, struct anv_device *device,
+                        nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch)
+{
+   nir_ssa_def *x = nir_channel(b, tex_pos, 0);
+   nir_ssa_def *y = nir_channel(b, tex_pos, 1);
+
+   /* First, compute the block-aligned offset */
+   nir_ssa_def *x_major = nir_ushr(b, x, nir_imm_int(b, 6));
+   nir_ssa_def *y_major = nir_ushr(b, y, nir_imm_int(b, 6));
+   /* W tiles have physical size of 128x32 and logical size of 64x64, hence
+    * the multiplication by 32 (instead of 64). */
+   nir_ssa_def *offset =
+      nir_iadd(b, nir_imul(b, y_major,
+                              nir_imul(b, tex_pitch, nir_imm_int(b, 32))),
+                  nir_imul(b, x_major, nir_imm_int(b, 4096)));
+
+   /* Compute the bottom 12 bits of the offset */
+   offset = nir_copy_bits(b, offset, 0, x, 0, 1);
+   offset = nir_copy_bits(b, offset, 1, y, 0, 1);
+   offset = nir_copy_bits(b, offset, 2, x, 1, 1);
+   offset = nir_copy_bits(b, offset, 3, y, 1, 1);
+   offset = nir_copy_bits(b, offset, 4, x, 2, 1);
+   offset = nir_copy_bits(b, offset, 5, y, 2, 4);
+   offset = nir_copy_bits(b, offset, 9, x, 3, 3);
+
+   if (device->isl_dev.has_bit6_swizzling) {
+      offset = nir_ixor(b, offset,
+                        nir_ushr(b, nir_iand(b, offset, nir_imm_int(b, 0x0200)),
+                                 nir_imm_int(b, 3)));
+   }
+
+   const struct glsl_type *sampler_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+   nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
+                                               sampler_type, "s_tex");
+   sampler->data.descriptor_set = 0;
+   sampler->data.binding = 0;
+
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
+   tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+   tex->op = nir_texop_txf;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(offset);
+   tex->dest_type = nir_type_float; /* TODO */
+   tex->is_array = false;
+   tex->coord_components = 1;
+   tex->texture = nir_deref_var_create(tex, sampler);
+   tex->sampler = NULL;
 
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+   nir_builder_instr_insert(b, &tex->instr);
+
+   return &tex->dest.ssa;
+}
+
+static nir_ssa_def *
+build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device,
+                      nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch)
+{
    const struct glsl_type *sampler_type =
-      glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false,
-                        glsl_get_base_type(vec4));
-   nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
+      glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
+   nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
                                                sampler_type, "s_tex");
    sampler->data.descriptor_set = 0;
    sampler->data.binding = 0;
 
-   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
    tex->op = nir_texop_txf;
    tex->src[0].src_type = nir_tex_src_coord;
    tex->src[0].src = nir_src_for_ssa(tex_pos);
    tex->src[1].src_type = nir_tex_src_lod;
-   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+   tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
    tex->dest_type = nir_type_float; /* TODO */
    tex->is_array = false;
-   tex->coord_components = tex_pos->num_components;
+   tex->coord_components = 2;
    tex->texture = nir_deref_var_create(tex, sampler);
    tex->sampler = NULL;
 
    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-   nir_builder_instr_insert(&b, &tex->instr);
+   nir_builder_instr_insert(b, &tex->instr);
+
+   return &tex->dest.ssa;
+}
+
+static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
+   .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+   .vertexBindingDescriptionCount = 2,
+   .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+      {
+         .binding = 0,
+         .stride = 0,
+         .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
+      },
+      {
+         .binding = 1,
+         .stride = 5 * sizeof(float),
+         .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+      },
+   },
+   .vertexAttributeDescriptionCount = 3,
+   .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+      {
+         /* VUE Header */
+         .location = 0,
+         .binding = 0,
+         .format = VK_FORMAT_R32G32B32A32_UINT,
+         .offset = 0
+      },
+      {
+         /* Position */
+         .location = 1,
+         .binding = 1,
+         .format = VK_FORMAT_R32G32_SFLOAT,
+         .offset = 0
+      },
+      {
+         /* Texture Coordinate */
+         .location = 2,
+         .binding = 1,
+         .format = VK_FORMAT_R32G32B32_SFLOAT,
+         .offset = 8
+      },
+   },
+};
+
+static nir_shader *
+build_nir_copy_fragment_shader(struct anv_device *device,
+                               texel_fetch_build_func txf_func)
+{
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+   nir_builder b;
+
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+                                                  vec3, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
 
    nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                  vec4, "f_color");
    color_out->data.location = FRAG_RESULT_DATA0;
-   nir_store_var(&b, color_out, &tex->dest.ssa, 0xf);
+
+   nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
+   unsigned swiz[4] = { 0, 1 };
+   nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
+   nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2);
+
+   nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
+   nir_store_var(&b, color_out, color, 0xf);
+
+   return b.shader;
+}
+
+/* RGB copies have the same interface as normal copies */
+#define rgb_vi_create_info normal_vi_create_info
+
+static nir_shader *
+build_nir_rgb_fragment_shader(struct anv_device *device,
+                              texel_fetch_build_func txf_func)
+{
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+   nir_builder b;
+
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+                                                  vec3, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+                                                 vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_DATA0;
+
+   /* We need gl_FragCoord so we know our position */
+   nir_variable *frag_coord_in = nir_variable_create(b.shader,
+                                                     nir_var_shader_in,
+                                                     vec4, "gl_FragCoord");
+   frag_coord_in->data.location = VARYING_SLOT_POS;
+   frag_coord_in->data.origin_upper_left = true;
+
+   nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
+   unsigned swiz[4] = { 0, 1 };
+   nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
+   nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2);
+
+   nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
+
+   /* We figure out which component we are by the x component of FragCoord */
+   nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in));
+   nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, frag_coord_int, 0),
+                                    nir_imm_int(&b, 3));
+
+   /* Select the given channel from the texelFetch result */
+   nir_ssa_def *color_channel =
+      nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)),
+                    nir_channel(&b, color, 0),
+                    nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)),
+                                  nir_channel(&b, color, 1),
+                                  nir_channel(&b, color, 2)));
+
+   nir_ssa_def *u = nir_ssa_undef(&b, 1, 32);
+   nir_store_var(&b, color_out, nir_vec4(&b, color_channel, u, u, u), 0x1);
+
+   return b.shader;
+}
+
+static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = {
+   .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+   .vertexBindingDescriptionCount = 2,
+   .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
+      {
+         .binding = 0,
+         .stride = 0,
+         .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
+      },
+      {
+         .binding = 1,
+         .stride = 2 * sizeof(float),
+         .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
+      },
+   },
+   .vertexAttributeDescriptionCount = 4,
+   .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
+      {
+         /* VUE Header */
+         .location = 0,
+         .binding = 0,
+         .format = VK_FORMAT_R32G32B32A32_UINT,
+         .offset = 0
+      },
+      {
+         /* Position */
+         .location = 1,
+         .binding = 1,
+         .format = VK_FORMAT_R32G32_SFLOAT,
+         .offset = 0
+      },
+      {
+         /* Texture Offset */
+         .location = 2,
+         .binding = 0,
+         .format = VK_FORMAT_R32G32B32_UINT,
+         .offset = 16
+      },
+      {
+         /* Destination bounds */
+         .location = 3,
+         .binding = 0,
+         .format = VK_FORMAT_R32G32B32A32_UINT,
+         .offset = 28
+      },
+   },
+};
+
+static nir_shader *
+build_nir_w_tiled_fragment_shader(struct anv_device *device,
+                                  texel_fetch_build_func txf_func)
+{
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *ivec3 = glsl_vector_type(GLSL_TYPE_INT, 3);
+   const struct glsl_type *uvec4 = glsl_vector_type(GLSL_TYPE_UINT, 4);
+   nir_builder b;
+
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+
+   /* We need gl_FragCoord so we know our Y-tiled position */
+   nir_variable *frag_coord_in = nir_variable_create(b.shader,
+                                                     nir_var_shader_in,
+                                                     vec4, "gl_FragCoord");
+   frag_coord_in->data.location = VARYING_SLOT_POS;
+   frag_coord_in->data.origin_upper_left = true;
+
+   /* In location 0 we have an ivec3 that has the offset from dest to
+    * source in the first two components and the stride in the third.
+    */
+   nir_variable *tex_off_in = nir_variable_create(b.shader, nir_var_shader_in,
+                                                  ivec3, "v_tex_off");
+   tex_off_in->data.location = VARYING_SLOT_VAR0;
+   tex_off_in->data.interpolation = INTERP_MODE_FLAT;
+
+   /* In location 1 we have a uvec4 that gives us the bounds of the
+    * destination.  We need to discard if we get outside this boundary.
+    */
+   nir_variable *bounds_in = nir_variable_create(b.shader, nir_var_shader_in,
+                                                 uvec4, "v_bounds");
+   bounds_in->data.location = VARYING_SLOT_VAR1;
+   bounds_in->data.interpolation = INTERP_MODE_FLAT;
+
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+                                                 vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_DATA0;
+
+   nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in));
+   nir_ssa_def *x_Y = nir_channel(&b, frag_coord_int, 0);
+   nir_ssa_def *y_Y = nir_channel(&b, frag_coord_int, 1);
+
+   /* Compute the W-tiled position from the Y-tiled position */
+   nir_ssa_def *x_W = nir_iand(&b, x_Y, nir_imm_int(&b, 0xffffff80));
+   x_W = nir_ushr(&b, x_W, nir_imm_int(&b, 1));
+   x_W = nir_copy_bits(&b, x_W, 0, x_Y, 0, 1);
+   x_W = nir_copy_bits(&b, x_W, 1, x_Y, 2, 1);
+   x_W = nir_copy_bits(&b, x_W, 2, y_Y, 0, 1);
+   x_W = nir_copy_bits(&b, x_W, 3, x_Y, 4, 3);
+
+   nir_ssa_def *y_W = nir_iand(&b, y_Y, nir_imm_int(&b, 0xffffffe0));
+   y_W = nir_ishl(&b, y_W, nir_imm_int(&b, 1));
+   y_W = nir_copy_bits(&b, y_W, 0, x_Y, 1, 1);
+   y_W = nir_copy_bits(&b, y_W, 1, x_Y, 3, 1);
+   y_W = nir_copy_bits(&b, y_W, 2, y_Y, 1, 4);
+
+   /* Figure out if we are out-of-bounds and discard */
+   nir_ssa_def *bounds = nir_load_var(&b, bounds_in);
+   nir_ssa_def *oob =
+      nir_ior(&b, nir_ult(&b, x_W, nir_channel(&b, bounds, 0)),
+      nir_ior(&b, nir_ult(&b, y_W, nir_channel(&b, bounds, 1)),
+      nir_ior(&b, nir_uge(&b, x_W, nir_channel(&b, bounds, 2)),
+                  nir_uge(&b, y_W, nir_channel(&b, bounds, 3)))));
+
+   nir_intrinsic_instr *discard =
+      nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
+   discard->src[0] = nir_src_for_ssa(oob);
+   nir_builder_instr_insert(&b, &discard->instr);
+
+   nir_ssa_def *tex_off = nir_channels(&b, nir_load_var(&b, tex_off_in), 0x3);
+   nir_ssa_def *tex_pos = nir_iadd(&b, nir_vec2(&b, x_W, y_W), tex_off);
+   nir_ssa_def *tex_pitch = nir_channel(&b, nir_load_var(&b, tex_off_in), 2);
+
+   nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
+   nir_store_var(&b, color_out, color, 0xf);
 
    return b.shader;
 }
@@ -439,57 +1247,88 @@ build_nir_copy_fragment_shader()
 void
 anv_device_finish_meta_blit2d_state(struct anv_device *device)
 {
-   anv_DestroyRenderPass(anv_device_to_handle(device),
-                         device->meta_state.blit2d.render_pass,
-                         &device->meta_state.alloc);
-   anv_DestroyPipeline(anv_device_to_handle(device),
-                       device->meta_state.blit2d.pipeline_2d_src,
-                       &device->meta_state.alloc);
-   anv_DestroyPipelineLayout(anv_device_to_handle(device),
-                             device->meta_state.blit2d.pipeline_layout,
-                             &device->meta_state.alloc);
-   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
-                                  device->meta_state.blit2d.ds_layout,
-                                  &device->meta_state.alloc);
+   if (device->meta_state.blit2d.render_pass) {
+      anv_DestroyRenderPass(anv_device_to_handle(device),
+                            device->meta_state.blit2d.render_pass,
+                            &device->meta_state.alloc);
+   }
+
+   if (device->meta_state.blit2d.img_p_layout) {
+      anv_DestroyPipelineLayout(anv_device_to_handle(device),
+                                device->meta_state.blit2d.img_p_layout,
+                                &device->meta_state.alloc);
+   }
+
+   if (device->meta_state.blit2d.img_ds_layout) {
+      anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+                                     device->meta_state.blit2d.img_ds_layout,
+                                     &device->meta_state.alloc);
+   }
+
+   if (device->meta_state.blit2d.buf_p_layout) {
+      anv_DestroyPipelineLayout(anv_device_to_handle(device),
+                                device->meta_state.blit2d.buf_p_layout,
+                                &device->meta_state.alloc);
+   }
+
+   if (device->meta_state.blit2d.buf_ds_layout) {
+      anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+                                     device->meta_state.blit2d.buf_ds_layout,
+                                     &device->meta_state.alloc);
+   }
+
+   for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
+      for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) {
+         if (device->meta_state.blit2d.pipelines[src][dst]) {
+            anv_DestroyPipeline(anv_device_to_handle(device),
+                                device->meta_state.blit2d.pipelines[src][dst],
+                                &device->meta_state.alloc);
+         }
+      }
+   }
 }
 
-VkResult
-anv_device_init_meta_blit2d_state(struct anv_device *device)
+static VkResult
+blit2d_init_pipeline(struct anv_device *device,
+                     enum blit2d_src_type src_type,
+                     enum blit2d_dst_type dst_type)
 {
    VkResult result;
 
-   result = anv_CreateRenderPass(anv_device_to_handle(device),
-      &(VkRenderPassCreateInfo) {
-         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-         .attachmentCount = 1,
-         .pAttachments = &(VkAttachmentDescription) {
-            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
-            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-         },
-         .subpassCount = 1,
-         .pSubpasses = &(VkSubpassDescription) {
-            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-            .inputAttachmentCount = 0,
-            .colorAttachmentCount = 1,
-            .pColorAttachments = &(VkAttachmentReference) {
-               .attachment = 0,
-               .layout = VK_IMAGE_LAYOUT_GENERAL,
-            },
-            .pResolveAttachments = NULL,
-            .pDepthStencilAttachment = &(VkAttachmentReference) {
-               .attachment = VK_ATTACHMENT_UNUSED,
-               .layout = VK_IMAGE_LAYOUT_GENERAL,
-            },
-            .preserveAttachmentCount = 1,
-            .pPreserveAttachments = (uint32_t[]) { 0 },
-         },
-         .dependencyCount = 0,
-      }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass);
-   if (result != VK_SUCCESS)
-      goto fail;
+   texel_fetch_build_func src_func;
+   switch (src_type) {
+   case BLIT2D_SRC_TYPE_NORMAL:
+      src_func = build_nir_texel_fetch;
+      break;
+   case BLIT2D_SRC_TYPE_W_DETILE:
+      src_func = build_nir_w_tiled_fetch;
+      break;
+   default:
+      unreachable("Invalid blit2d source type");
+   }
+
+   const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+   struct anv_shader_module fs = { .nir = NULL };
+   switch (dst_type) {
+   case BLIT2D_DST_TYPE_NORMAL:
+      fs.nir = build_nir_copy_fragment_shader(device, src_func);
+      vi_create_info = &normal_vi_create_info;
+      break;
+   case BLIT2D_DST_TYPE_W_TILE:
+      fs.nir = build_nir_w_tiled_fragment_shader(device, src_func);
+      vi_create_info = &w_tiled_vi_create_info;
+      break;
+   case BLIT2D_DST_TYPE_RGB:
+      /* RGB destinations and W-detiling don't mix */
+      if (src_type != BLIT2D_SRC_TYPE_NORMAL)
+         return VK_SUCCESS;
+
+      fs.nir = build_nir_rgb_fragment_shader(device, src_func);
+      vi_create_info = &rgb_vi_create_info;
+      break;
+   default:
+      return VK_SUCCESS;
+   }
 
    /* We don't use a vertex shader for blitting, but instead build and pass
     * the VUEs directly to the rasterization backend.  However, we do need
@@ -500,81 +1339,6 @@ anv_device_init_meta_blit2d_state(struct anv_device *device)
       .nir = build_nir_vertex_shader(),
    };
 
-   struct anv_shader_module fs_2d = {
-      .nir = build_nir_copy_fragment_shader(),
-   };
-
-   VkPipelineVertexInputStateCreateInfo vi_create_info = {
-      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-      .vertexBindingDescriptionCount = 2,
-      .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-         {
-            .binding = 0,
-            .stride = 0,
-            .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
-         },
-         {
-            .binding = 1,
-            .stride = 5 * sizeof(float),
-            .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-         },
-      },
-      .vertexAttributeDescriptionCount = 3,
-      .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-         {
-            /* VUE Header */
-            .location = 0,
-            .binding = 0,
-            .format = VK_FORMAT_R32G32B32A32_UINT,
-            .offset = 0
-         },
-         {
-            /* Position */
-            .location = 1,
-            .binding = 1,
-            .format = VK_FORMAT_R32G32_SFLOAT,
-            .offset = 0
-         },
-         {
-            /* Texture Coordinate */
-            .location = 2,
-            .binding = 1,
-            .format = VK_FORMAT_R32G32B32_SFLOAT,
-            .offset = 8
-         }
-      }
-   };
-
-   VkDescriptorSetLayoutCreateInfo ds_layout_info = {
-      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-      .bindingCount = 1,
-      .pBindings = (VkDescriptorSetLayoutBinding[]) {
-         {
-            .binding = 0,
-            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
-            .pImmutableSamplers = NULL
-         },
-      }
-   };
-   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
-                                          &ds_layout_info,
-                                          &device->meta_state.alloc,
-                                          &device->meta_state.blit2d.ds_layout);
-   if (result != VK_SUCCESS)
-      goto fail_render_pass;
-
-   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
-      &(VkPipelineLayoutCreateInfo) {
-         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-         .setLayoutCount = 1,
-         .pSetLayouts = &device->meta_state.blit2d.ds_layout,
-      },
-      &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout);
-   if (result != VK_SUCCESS)
-      goto fail_descriptor_set_layout;
-
    VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
       {
          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@@ -585,7 +1349,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device)
       }, {
          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
          .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-         .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+         .module = anv_shader_module_to_handle(&fs),
          .pName = "main",
          .pSpecializationInfo = NULL
       },
@@ -595,7 +1359,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device)
       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
       .stageCount = ARRAY_SIZE(pipeline_shader_stages),
       .pStages = pipeline_shader_stages,
-      .pVertexInputState = &vi_create_info,
+      .pVertexInputState = vi_create_info,
       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
@@ -646,7 +1410,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device)
          },
       },
       .flags = 0,
-      .layout = device->meta_state.blit2d.pipeline_layout,
+      .layout = device->meta_state.blit2d.img_p_layout,
       .renderPass = device->meta_state.blit2d.render_pass,
       .subpass = 0,
    };
@@ -654,40 +1418,127 @@ anv_device_init_meta_blit2d_state(struct anv_device *device)
    const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
       .color_attachment_count = -1,
       .use_repclear = false,
-      .disable_viewport = true,
-      .disable_scissor = true,
       .disable_vs = true,
       .use_rectlist = true
    };
 
-   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
    result = anv_graphics_pipeline_create(anv_device_to_handle(device),
       VK_NULL_HANDLE,
       &vk_pipeline_info, &anv_pipeline_info,
-      &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src);
-   if (result != VK_SUCCESS)
-      goto fail_pipeline_layout;
+      &device->meta_state.alloc,
+      &device->meta_state.blit2d.pipelines[src_type][dst_type]);
 
    ralloc_free(vs.nir);
-   ralloc_free(fs_2d.nir);
+   ralloc_free(fs.nir);
 
-   return VK_SUCCESS;
+   return result;
+}
 
- fail_pipeline_layout:
-   anv_DestroyPipelineLayout(anv_device_to_handle(device),
-                             device->meta_state.blit2d.pipeline_layout,
-                             &device->meta_state.alloc);
- fail_descriptor_set_layout:
-   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
-                                  device->meta_state.blit2d.ds_layout,
-                                  &device->meta_state.alloc);
- fail_render_pass:
-   anv_DestroyRenderPass(anv_device_to_handle(device),
-                         device->meta_state.blit2d.render_pass,
-                         &device->meta_state.alloc);
+VkResult
+anv_device_init_meta_blit2d_state(struct anv_device *device)
+{
+   VkResult result;
 
-   ralloc_free(vs.nir);
-   ralloc_free(fs_2d.nir);
- fail:
+   zero(device->meta_state.blit2d);
+
+   result = anv_CreateRenderPass(anv_device_to_handle(device),
+      &(VkRenderPassCreateInfo) {
+         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+         .attachmentCount = 1,
+         .pAttachments = &(VkAttachmentDescription) {
+            .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
+            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+            .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+            .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+         },
+         .subpassCount = 1,
+         .pSubpasses = &(VkSubpassDescription) {
+            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+            .inputAttachmentCount = 0,
+            .colorAttachmentCount = 1,
+            .pColorAttachments = &(VkAttachmentReference) {
+               .attachment = 0,
+               .layout = VK_IMAGE_LAYOUT_GENERAL,
+            },
+            .pResolveAttachments = NULL,
+            .pDepthStencilAttachment = &(VkAttachmentReference) {
+               .attachment = VK_ATTACHMENT_UNUSED,
+               .layout = VK_IMAGE_LAYOUT_GENERAL,
+            },
+            .preserveAttachmentCount = 1,
+            .pPreserveAttachments = (uint32_t[]) { 0 },
+         },
+         .dependencyCount = 0,
+      }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+      &(VkDescriptorSetLayoutCreateInfo) {
+         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+         .bindingCount = 1,
+         .pBindings = (VkDescriptorSetLayoutBinding[]) {
+            {
+               .binding = 0,
+               .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+               .descriptorCount = 1,
+               .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+               .pImmutableSamplers = NULL
+            },
+         }
+      }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
+      &(VkPipelineLayoutCreateInfo) {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+         .setLayoutCount = 1,
+         .pSetLayouts = &device->meta_state.blit2d.img_ds_layout,
+      },
+      &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+      &(VkDescriptorSetLayoutCreateInfo) {
+         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+         .bindingCount = 1,
+         .pBindings = (VkDescriptorSetLayoutBinding[]) {
+            {
+               .binding = 0,
+               .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+               .descriptorCount = 1,
+               .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+               .pImmutableSamplers = NULL
+            },
+         }
+      }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
+      &(VkPipelineLayoutCreateInfo) {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+         .setLayoutCount = 1,
+         .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout,
+      },
+      &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
+      for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) {
+         result = blit2d_init_pipeline(device, src, dst);
+         if (result != VK_SUCCESS)
+            goto fail;
+      }
+   }
+
+   return VK_SUCCESS;
+
+fail:
+   anv_device_finish_meta_blit2d_state(device);
    return result;
 }