anv: Dirty fragment shader descriptors in meta restore
[mesa.git] / src / vulkan / anv_meta.c
index de5a037b5a859967ff01b42b4bcfc7ff1d25bb95..7034f70d6ac32e8de0e50e8f820013892676138b 100644 (file)
@@ -30,7 +30,7 @@
 #include "anv_meta.h"
 #include "anv_meta_clear.h"
 #include "anv_private.h"
-#include "anv_nir_builder.h"
+#include "glsl/nir/nir_builder.h"
 
 struct anv_render_pass anv_meta_dummy_renderpass = {0};
 
@@ -41,14 +41,15 @@ build_nir_vertex_shader(bool attr_flat)
 
    const struct glsl_type *vertex_type = glsl_vec4_type();
 
-   nir_builder_init_simple_shader(&b, MESA_SHADER_VERTEX);
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
 
    nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                               vertex_type, "a_pos");
    pos_in->data.location = VERT_ATTRIB_GENERIC0;
    nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                vertex_type, "gl_Position");
-   pos_in->data.location = VARYING_SLOT_POS;
+   pos_out->data.location = VARYING_SLOT_POS;
    nir_copy_var(&b, pos_out, pos_in);
 
    /* Add one more pass-through attribute.  For clear shaders, this is used
@@ -73,7 +74,8 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
 {
    nir_builder b;
 
-   nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT);
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs");
 
    const struct glsl_type *color_type = glsl_vec4_type();
 
@@ -81,8 +83,17 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
                                                   glsl_vec4_type(), "v_attr");
    tex_pos_in->data.location = VARYING_SLOT_VAR0;
 
+   /* Swizzle the array index which comes in as Z coordinate into the right
+    * position.
+    */
+   unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
+   nir_ssa_def *const tex_pos =
+      nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
+                  (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
+
    const struct glsl_type *sampler_type =
-      glsl_sampler_type(tex_dim, false, false, glsl_get_base_type(color_type));
+      glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
+                        glsl_get_base_type(color_type));
    nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
                                                sampler_type, "s_tex");
    sampler->data.descriptor_set = 0;
@@ -92,13 +103,10 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
    tex->sampler_dim = tex_dim;
    tex->op = nir_texop_tex;
    tex->src[0].src_type = nir_tex_src_coord;
-   tex->src[0].src = nir_src_for_ssa(nir_load_var(&b, tex_pos_in));
+   tex->src[0].src = nir_src_for_ssa(tex_pos);
    tex->dest_type = nir_type_float; /* TODO */
-
-   if (tex_dim == GLSL_SAMPLER_DIM_2D)
-      tex->is_array = true;
-   tex->coord_components = 3;
-
+   tex->is_array = glsl_sampler_type_is_array(sampler_type);
+   tex->coord_components = tex_pos->num_components;
    tex->sampler = nir_deref_var_create(tex, sampler);
 
    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
@@ -107,7 +115,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
    nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                  color_type, "f_color");
    color_out->data.location = FRAG_RESULT_DATA0;
-   nir_store_var(&b, color_out, &tex->dest.ssa);
+   nir_store_var(&b, color_out, &tex->dest.ssa, 4);
 
    return b.shader;
 }
@@ -138,26 +146,27 @@ anv_meta_restore(const struct anv_meta_saved_state *state,
 
    cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1;
    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE;
-   cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT;
+   cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
 
    anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic,
                           state->dynamic_mask);
    cmd_buffer->state.dirty |= state->dynamic_mask;
+
+   /* Since we've used the pipeline with the VS disabled, set
+    * need_query_wa. See CmdBeginQuery.
+    */
+   cmd_buffer->state.need_query_wa = true;
 }
 
-static VkImageViewType
-meta_blit_get_src_image_view_type(const struct anv_image *src_image)
+VkImageViewType
+anv_meta_get_view_type(const struct anv_image *image)
 {
-   switch (src_image->type) {
-   case VK_IMAGE_TYPE_1D:
-      return VK_IMAGE_VIEW_TYPE_1D;
-   case VK_IMAGE_TYPE_2D:
-      return VK_IMAGE_VIEW_TYPE_2D;
-   case VK_IMAGE_TYPE_3D:
-      return VK_IMAGE_VIEW_TYPE_3D;
+   switch (image->type) {
+   case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D;
+   case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D;
+   case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D;
    default:
-      assert(!"bad VkImageType");
-      return 0;
+      unreachable("bad VkImageViewType");
    }
 }
 
@@ -182,10 +191,12 @@ meta_blit_get_dest_view_base_array_slice(const struct anv_image *dest_image,
    }
 }
 
-static void
+static VkResult
 anv_device_init_meta_blit_state(struct anv_device *device)
 {
-   anv_CreateRenderPass(anv_device_to_handle(device),
+   VkResult result;
+
+   result = anv_CreateRenderPass(anv_device_to_handle(device),
       &(VkRenderPassCreateInfo) {
          .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
          .attachmentCount = 1,
@@ -211,55 +222,34 @@ anv_device_init_meta_blit_state(struct anv_device *device)
                .layout = VK_IMAGE_LAYOUT_GENERAL,
             },
             .preserveAttachmentCount = 1,
-            .pPreserveAttachments = &(VkAttachmentReference) {
-               .attachment = 0,
-               .layout = VK_IMAGE_LAYOUT_GENERAL,
-            },
+            .pPreserveAttachments = (uint32_t[]) { 0 },
          },
          .dependencyCount = 0,
-      }, &device->meta_state.blit.render_pass);
+      }, &device->meta_state.alloc, &device->meta_state.blit.render_pass);
+   if (result != VK_SUCCESS)
+      goto fail;
 
    /* We don't use a vertex shader for clearing, but instead build and pass
     * the VUEs directly to the rasterization backend.  However, we do need
     * to provide GLSL source for the vertex shader so that the compiler
     * does not dead-code our inputs.
     */
-   struct anv_shader_module vsm = {
+   struct anv_shader_module vs = {
       .nir = build_nir_vertex_shader(false),
    };
 
-   struct anv_shader_module fsm_2d = {
+   struct anv_shader_module fs_1d = {
+      .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D),
+   };
+
+   struct anv_shader_module fs_2d = {
       .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
    };
 
-   struct anv_shader_module fsm_3d = {
+   struct anv_shader_module fs_3d = {
       .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D),
    };
 
-   VkShader vs;
-   anv_CreateShader(anv_device_to_handle(device),
-      &(VkShaderCreateInfo) {
-         .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO,
-         .module = anv_shader_module_to_handle(&vsm),
-         .pName = "main",
-      }, &vs);
-
-   VkShader fs_2d;
-   anv_CreateShader(anv_device_to_handle(device),
-      &(VkShaderCreateInfo) {
-         .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO,
-         .module = anv_shader_module_to_handle(&fsm_2d),
-         .pName = "main",
-      }, &fs_2d);
-
-   VkShader fs_3d;
-   anv_CreateShader(anv_device_to_handle(device),
-      &(VkShaderCreateInfo) {
-         .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO,
-         .module = anv_shader_module_to_handle(&fsm_3d),
-         .pName = "main",
-      }, &fs_3d);
-
    VkPipelineVertexInputStateCreateInfo vi_create_info = {
       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
       .vertexBindingDescriptionCount = 2,
@@ -304,7 +294,7 @@ anv_device_init_meta_blit_state(struct anv_device *device)
    VkDescriptorSetLayoutCreateInfo ds_layout_info = {
       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
       .bindingCount = 1,
-      .pBinding = (VkDescriptorSetLayoutBinding[]) {
+      .pBindings = (VkDescriptorSetLayoutBinding[]) {
          {
             .binding = 0,
             .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
@@ -314,27 +304,35 @@ anv_device_init_meta_blit_state(struct anv_device *device)
          },
       }
    };
-   anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info,
-                                 &device->meta_state.blit.ds_layout);
-
-   anv_CreatePipelineLayout(anv_device_to_handle(device),
+   result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
+                                          &ds_layout_info,
+                                          &device->meta_state.alloc,
+                                          &device->meta_state.blit.ds_layout);
+   if (result != VK_SUCCESS)
+      goto fail_render_pass;
+
+   result = anv_CreatePipelineLayout(anv_device_to_handle(device),
       &(VkPipelineLayoutCreateInfo) {
          .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
          .setLayoutCount = 1,
          .pSetLayouts = &device->meta_state.blit.ds_layout,
       },
-      &device->meta_state.blit.pipeline_layout);
+      &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
+   if (result != VK_SUCCESS)
+      goto fail_descriptor_set_layout;
 
    VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
       {
          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-         .stage = VK_SHADER_STAGE_VERTEX,
-         .shader = vs,
+         .stage = VK_SHADER_STAGE_VERTEX_BIT,
+         .module = anv_shader_module_to_handle(&vs),
+         .pName = "main",
          .pSpecializationInfo = NULL
       }, {
          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-         .stage = VK_SHADER_STAGE_FRAGMENT,
-         .shader = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+         .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
+         .pName = "main",
          .pSpecializationInfo = NULL
       },
    };
@@ -356,7 +354,6 @@ anv_device_init_meta_blit_state(struct anv_device *device)
       },
       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-         .depthClipEnable = true,
          .rasterizerDiscardEnable = false,
          .polygonMode = VK_POLYGON_MODE_FILL,
          .cullMode = VK_CULL_MODE_NONE,
@@ -401,6 +398,7 @@ anv_device_init_meta_blit_state(struct anv_device *device)
    };
 
    const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
+      .color_attachment_count = -1,
       .use_repclear = false,
       .disable_viewport = true,
       .disable_scissor = true,
@@ -408,22 +406,66 @@ anv_device_init_meta_blit_state(struct anv_device *device)
       .use_rectlist = true
    };
 
-   pipeline_shader_stages[1].shader = fs_2d;
-   anv_graphics_pipeline_create(anv_device_to_handle(device),
+   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d);
+   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+      VK_NULL_HANDLE,
       &vk_pipeline_info, &anv_pipeline_info,
-      &device->meta_state.blit.pipeline_2d_src);
+      &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src);
+   if (result != VK_SUCCESS)
+      goto fail_pipeline_layout;
 
-   pipeline_shader_stages[1].shader = fs_3d;
-   anv_graphics_pipeline_create(anv_device_to_handle(device),
+   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
+   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+      VK_NULL_HANDLE,
       &vk_pipeline_info, &anv_pipeline_info,
-      &device->meta_state.blit.pipeline_3d_src);
-
-   anv_DestroyShader(anv_device_to_handle(device), vs);
-   anv_DestroyShader(anv_device_to_handle(device), fs_2d);
-   anv_DestroyShader(anv_device_to_handle(device), fs_3d);
-   ralloc_free(vsm.nir);
-   ralloc_free(fsm_2d.nir);
-   ralloc_free(fsm_3d.nir);
+      &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src);
+   if (result != VK_SUCCESS)
+      goto fail_pipeline_1d;
+
+   pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d);
+   result = anv_graphics_pipeline_create(anv_device_to_handle(device),
+      VK_NULL_HANDLE,
+      &vk_pipeline_info, &anv_pipeline_info,
+      &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src);
+   if (result != VK_SUCCESS)
+      goto fail_pipeline_2d;
+
+   ralloc_free(vs.nir);
+   ralloc_free(fs_1d.nir);
+   ralloc_free(fs_2d.nir);
+   ralloc_free(fs_3d.nir);
+
+   return VK_SUCCESS;
+
+ fail_pipeline_2d:
+   anv_DestroyPipeline(anv_device_to_handle(device),
+                       device->meta_state.blit.pipeline_2d_src,
+                       &device->meta_state.alloc);
+
+ fail_pipeline_1d:
+   anv_DestroyPipeline(anv_device_to_handle(device),
+                       device->meta_state.blit.pipeline_1d_src,
+                       &device->meta_state.alloc);
+
+ fail_pipeline_layout:
+   anv_DestroyPipelineLayout(anv_device_to_handle(device),
+                             device->meta_state.blit.pipeline_layout,
+                             &device->meta_state.alloc);
+ fail_descriptor_set_layout:
+   anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
+                                  device->meta_state.blit.ds_layout,
+                                  &device->meta_state.alloc);
+ fail_render_pass:
+   anv_DestroyRenderPass(anv_device_to_handle(device),
+                         device->meta_state.blit.render_pass,
+                         &device->meta_state.alloc);
+
+   ralloc_free(vs.nir);
+   ralloc_free(fs_1d.nir);
+   ralloc_free(fs_2d.nir);
+   ralloc_free(fs_3d.nir);
+ fail:
+   return result;
 }
 
 static void
@@ -461,6 +503,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
       float tex_coord[3];
    } *vb_data;
 
+   assert(src_image->samples == dest_image->samples);
+
    unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
 
    struct anv_state vb_state =
@@ -504,6 +548,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
       },
    };
 
+   anv_state_clflush(vb_state);
+
    struct anv_buffer vertex_buffer = {
       .device = device,
       .size = vb_size,
@@ -527,14 +573,14 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
          .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
          .magFilter = blit_filter,
          .minFilter = blit_filter,
-      }, &sampler);
+      }, &cmd_buffer->pool->alloc, &sampler);
 
    VkDescriptorSet set;
    anv_AllocateDescriptorSets(anv_device_to_handle(device),
       &(VkDescriptorSetAllocateInfo) {
          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
          .descriptorPool = dummy_desc_pool,
-         .setLayoutCount = 1,
+         .descriptorSetCount = 1,
          .pSetLayouts = &device->meta_state.blit.ds_layout
       }, &set);
    anv_UpdateDescriptorSets(anv_device_to_handle(device),
@@ -568,7 +614,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
          .width = dest_iview->extent.width,
          .height = dest_iview->extent.height,
          .layers = 1
-      }, &fb);
+      }, &cmd_buffer->pool->alloc, &fb);
 
    ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
       &(VkRenderPassBeginInfo) {
@@ -587,8 +633,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
 
    switch (src_image->type) {
    case VK_IMAGE_TYPE_1D:
-      anv_finishme("VK_IMAGE_TYPE_1D");
-      pipeline = device->meta_state.blit.pipeline_2d_src;
+      pipeline = device->meta_state.blit.pipeline_1d_src;
       break;
    case VK_IMAGE_TYPE_2D:
       pipeline = device->meta_state.blit.pipeline_2d_src;
@@ -605,7 +650,7 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
                           VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
    }
 
-   anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 1,
+   anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
                       &(VkViewport) {
                         .x = 0.0f,
                         .y = 0.0f,
@@ -628,8 +673,10 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
     * descriptor sets, etc. has been used.  We are free to delete it.
     */
    anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set));
-   anv_DestroySampler(anv_device_to_handle(device), sampler);
-   anv_DestroyFramebuffer(anv_device_to_handle(device), fb);
+   anv_DestroySampler(anv_device_to_handle(device), sampler,
+                      &cmd_buffer->pool->alloc);
+   anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
+                          &cmd_buffer->pool->alloc);
 }
 
 static void
@@ -642,6 +689,10 @@ meta_finish_blit(struct anv_cmd_buffer *cmd_buffer,
 static VkFormat
 vk_format_for_size(int bs)
 {
+   /* Note: We intentionally use the 4-channel formats whenever we can.
+    * This is so that, when we do a RGB <-> RGBX copy, the two formats will
+    * line up even though one of them is 3/4 the size of the other.
+    */
    switch (bs) {
    case 1: return VK_FORMAT_R8_UINT;
    case 2: return VK_FORMAT_R8G8_UINT;
@@ -683,11 +734,13 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer,
 
    VkImage src_image;
    image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT;
-   anv_CreateImage(vk_device, &image_info, &src_image);
+   anv_CreateImage(vk_device, &image_info,
+                   &cmd_buffer->pool->alloc, &src_image);
 
    VkImage dest_image;
    image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
-   anv_CreateImage(vk_device, &image_info, &dest_image);
+   anv_CreateImage(vk_device, &image_info,
+                   &cmd_buffer->pool->alloc, &dest_image);
 
    /* We could use a vk call to bind memory, but that would require
     * creating a dummy memory object etc. so there's really no point.
@@ -742,8 +795,8 @@ do_buffer_copy(struct anv_cmd_buffer *cmd_buffer,
                   (VkExtent3D) { width, height, 1 },
                   VK_FILTER_NEAREST);
 
-   anv_DestroyImage(vk_device, src_image);
-   anv_DestroyImage(vk_device, dest_image);
+   anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc);
+   anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc);
 }
 
 void anv_CmdCopyBuffer(
@@ -793,7 +846,7 @@ void anv_CmdCopyBuffer(
 
       /* First, we make a bunch of max-sized copies */
       uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs;
-      while (copy_size > max_copy_size) {
+      while (copy_size >= max_copy_size) {
          do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
                         dest_buffer->bo, dest_offset,
                         max_surface_dim, max_surface_dim, copy_format);
@@ -824,6 +877,106 @@ void anv_CmdCopyBuffer(
    meta_finish_blit(cmd_buffer, &saved_state);
 }
 
+void anv_CmdUpdateBuffer(
+    VkCommandBuffer                             commandBuffer,
+    VkBuffer                                    dstBuffer,
+    VkDeviceSize                                dstOffset,
+    VkDeviceSize                                dataSize,
+    const uint32_t*                             pData)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
+   struct anv_meta_saved_state saved_state;
+
+   meta_prepare_blit(cmd_buffer, &saved_state);
+
+   /* We can't quite grab a full block because the state stream needs a
+    * little data at the top to build its linked list.
+    */
+   const uint32_t max_update_size =
+      cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
+
+   assert(max_update_size < (1 << 14) * 4);
+
+   while (dataSize) {
+      const uint32_t copy_size = MIN2(dataSize, max_update_size);
+
+      struct anv_state tmp_data =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
+
+      memcpy(tmp_data.map, pData, copy_size);
+
+      VkFormat format;
+      int bs;
+      if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) {
+         format = VK_FORMAT_R32G32B32A32_UINT;
+         bs = 16;
+      } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) {
+         format = VK_FORMAT_R32G32_UINT;
+         bs = 8;
+      } else {
+         assert((copy_size & 3) == 0 && (dstOffset & 3) == 0);
+         format = VK_FORMAT_R32_UINT;
+         bs = 4;
+      }
+
+      do_buffer_copy(cmd_buffer,
+                     &cmd_buffer->device->dynamic_state_block_pool.bo,
+                     tmp_data.offset,
+                     dst_buffer->bo, dst_buffer->offset + dstOffset,
+                     copy_size / bs, 1, format);
+
+      dataSize -= copy_size;
+      dstOffset += copy_size;
+      pData = (void *)pData + copy_size;
+   }
+}
+
+static VkFormat
+choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect)
+{
+   assert(__builtin_popcount(aspect) == 1);
+
+   struct isl_surf *surf =
+      &anv_image_get_surface_for_aspect_mask(image, aspect)->isl;
+
+   /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT
+    * formats for the source and destination image views.
+    *
+    * From the Vulkan spec (2015-12-30):
+    *
+    *    vkCmdCopyImage performs image copies in a similar manner to a host
+    *    memcpy. It does not perform general-purpose conversions such as
+    *    scaling, resizing, blending, color-space conversion, or format
+    *    conversions.  Rather, it simply copies raw image data. vkCmdCopyImage
+    *    can copy between images with different formats, provided the formats
+    *    are compatible as defined below.
+    *
+    *    [The spec later defines compatibility as having the same number of
+    *    bytes per block].
+    */
+   return vk_format_for_size(isl_format_layouts[surf->format].bs);
+}
+
+static VkFormat
+choose_buffer_format(struct anv_image *image, VkImageAspectFlagBits aspect)
+{
+   assert(__builtin_popcount(aspect) == 1);
+
+   /* vkCmdCopy* commands behave like memcpy. Therefore we choose
+    * compatable UINT formats for the source and destination image views.
+    *
+    * For the buffer, we go back to the original image format and get a
+    * the format as if it were linear.  This way, for RGB formats, we get
+    * an RGB format here even if the tiled image is RGBA. XXX: This doesn't
+    * work if the buffer is the destination.
+    */
+   enum isl_format linear_format = anv_get_isl_format(image->vk_format, aspect,
+                                                      VK_IMAGE_TILING_LINEAR);
+
+   return vk_format_for_size(isl_format_layouts[linear_format].bs);
+}
+
 void anv_CmdCopyImage(
     VkCommandBuffer                             commandBuffer,
     VkImage                                     srcImage,
@@ -836,24 +989,35 @@ void anv_CmdCopyImage(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_image, src_image, srcImage);
    ANV_FROM_HANDLE(anv_image, dest_image, destImage);
-
-   const VkImageViewType src_iview_type =
-      meta_blit_get_src_image_view_type(src_image);
-
    struct anv_meta_saved_state saved_state;
 
+   /* From the Vulkan 1.0 spec:
+    *
+    *    vkCmdCopyImage can be used to copy image data between multisample
+    *    images, but both images must have the same number of samples.
+    */
+   assert(src_image->samples == dest_image->samples);
+
    meta_prepare_blit(cmd_buffer, &saved_state);
 
    for (unsigned r = 0; r < regionCount; r++) {
+      assert(pRegions[r].srcSubresource.aspectMask ==
+             pRegions[r].dstSubresource.aspectMask);
+
+      VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask;
+
+      VkFormat src_format = choose_iview_format(src_image, aspect);
+      VkFormat dst_format = choose_iview_format(dest_image, aspect);
+
       struct anv_image_view src_iview;
       anv_image_view_init(&src_iview, cmd_buffer->device,
          &(VkImageViewCreateInfo) {
             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
             .image = srcImage,
-            .viewType = src_iview_type,
-            .format = src_image->format->vk_format,
+            .viewType = anv_meta_get_view_type(src_image),
+            .format = src_format,
             .subresourceRange = {
-               .aspectMask = pRegions[r].srcSubresource.aspectMask,
+               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
                .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
                .levelCount = 1,
                .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
@@ -894,8 +1058,8 @@ void anv_CmdCopyImage(
             &(VkImageViewCreateInfo) {
                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
                .image = destImage,
-               .viewType = VK_IMAGE_VIEW_TYPE_2D,
-               .format = dest_image->format->vk_format,
+               .viewType = anv_meta_get_view_type(dest_image),
+               .format = dst_format,
                .subresourceRange = {
                   .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
                   .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
@@ -934,12 +1098,16 @@ void anv_CmdBlitImage(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_image, src_image, srcImage);
    ANV_FROM_HANDLE(anv_image, dest_image, destImage);
-
-   const VkImageViewType src_iview_type =
-      meta_blit_get_src_image_view_type(src_image);
-
    struct anv_meta_saved_state saved_state;
 
+   /* From the Vulkan 1.0 spec:
+    *
+    *    vkCmdBlitImage must not be used for multisampled source or
+    *    destination images. Use vkCmdResolveImage for this purpose.
+    */
+   assert(src_image->samples == 1);
+   assert(dest_image->samples == 1);
+
    anv_finishme("respect VkFilter");
 
    meta_prepare_blit(cmd_buffer, &saved_state);
@@ -950,8 +1118,8 @@ void anv_CmdBlitImage(
          &(VkImageViewCreateInfo) {
             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
             .image = srcImage,
-            .viewType = src_iview_type,
-            .format = src_image->format->vk_format,
+            .viewType = anv_meta_get_view_type(src_image),
+            .format = src_image->vk_format,
             .subresourceRange = {
                .aspectMask = pRegions[r].srcSubresource.aspectMask,
                .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
@@ -963,20 +1131,37 @@ void anv_CmdBlitImage(
          cmd_buffer);
 
       const VkOffset3D dest_offset = {
-         .x = pRegions[r].dstOffset.x,
-         .y = pRegions[r].dstOffset.y,
+         .x = pRegions[r].dstOffsets[0].x,
+         .y = pRegions[r].dstOffsets[0].y,
          .z = 0,
       };
 
+      if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
+          pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
+          pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
+          pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
+         anv_finishme("FINISHME: Allow flipping in blits");
+
+      const VkExtent3D dest_extent = {
+         .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
+         .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
+      };
+
+      const VkExtent3D src_extent = {
+         .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
+         .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
+      };
+
       const uint32_t dest_array_slice =
          meta_blit_get_dest_view_base_array_slice(dest_image,
                                                   &pRegions[r].dstSubresource,
-                                                  &pRegions[r].dstOffset);
+                                                  &pRegions[r].dstOffsets[0]);
 
       if (pRegions[r].srcSubresource.layerCount > 1)
          anv_finishme("FINISHME: copy multiple array layers");
 
-      if (pRegions[r].dstExtent.depth > 1)
+      if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z ||
+          pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z)
          anv_finishme("FINISHME: copy multiple depth layers");
 
       struct anv_image_view dest_iview;
@@ -984,8 +1169,8 @@ void anv_CmdBlitImage(
          &(VkImageViewCreateInfo) {
             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
             .image = destImage,
-            .viewType = VK_IMAGE_VIEW_TYPE_2D,
-            .format = dest_image->format->vk_format,
+            .viewType = anv_meta_get_view_type(dest_image),
+            .format = dest_image->vk_format,
             .subresourceRange = {
                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
                .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
@@ -998,11 +1183,9 @@ void anv_CmdBlitImage(
 
       meta_emit_blit(cmd_buffer,
                      src_image, &src_iview,
-                     pRegions[r].srcOffset,
-                     pRegions[r].srcExtent,
+                     pRegions[r].srcOffsets[0], src_extent,
                      dest_image, &dest_iview,
-                     dest_offset,
-                     pRegions[r].dstExtent,
+                     dest_offset, dest_extent,
                      filter);
    }
 
@@ -1013,6 +1196,7 @@ static struct anv_image *
 make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format,
                       VkImageUsageFlags usage,
                       VkImageType image_type,
+                      const VkAllocationCallbacks *alloc,
                       const VkBufferImageCopy *copy)
 {
    ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer);
@@ -1037,7 +1221,7 @@ make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format,
          .tiling = VK_IMAGE_TILING_LINEAR,
          .usage = usage,
          .flags = 0,
-      }, &vk_image);
+      }, alloc, &vk_image);
    assert(result == VK_SUCCESS);
 
    ANV_FROM_HANDLE(anv_image, image, vk_image);
@@ -1062,24 +1246,26 @@ void anv_CmdCopyBufferToImage(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_image, dest_image, destImage);
    VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
-   const VkFormat orig_format = dest_image->format->vk_format;
    struct anv_meta_saved_state saved_state;
 
+   /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
+    * VK_SAMPLE_COUNT_1_BIT."
+    */
+   assert(dest_image->samples == 1);
+
    meta_prepare_blit(cmd_buffer, &saved_state);
 
    for (unsigned r = 0; r < regionCount; r++) {
-      VkFormat proxy_format = orig_format;
-      VkImageAspectFlags proxy_aspect = pRegions[r].imageSubresource.aspectMask;
+      VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
 
-      if (orig_format == VK_FORMAT_S8_UINT) {
-         proxy_format = VK_FORMAT_R8_UINT;
-         proxy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
-      }
+      VkFormat image_format = choose_iview_format(dest_image, aspect);
+      VkFormat buffer_format = choose_buffer_format(dest_image, aspect);
 
       struct anv_image *src_image =
-         make_image_for_buffer(vk_device, srcBuffer, proxy_format,
+         make_image_for_buffer(vk_device, srcBuffer, buffer_format,
                                VK_IMAGE_USAGE_SAMPLED_BIT,
-                               dest_image->type, &pRegions[r]);
+                               dest_image->type, &cmd_buffer->pool->alloc,
+                               &pRegions[r]);
 
       const uint32_t dest_base_array_slice =
          meta_blit_get_dest_view_base_array_slice(dest_image,
@@ -1102,9 +1288,9 @@ void anv_CmdCopyBufferToImage(
                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
                .image = anv_image_to_handle(src_image),
                .viewType = VK_IMAGE_VIEW_TYPE_2D,
-               .format = proxy_format,
+               .format = buffer_format,
                .subresourceRange = {
-                  .aspectMask = proxy_aspect,
+                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
                   .baseMipLevel = 0,
                   .levelCount = 1,
                   .baseArrayLayer = 0,
@@ -1118,8 +1304,8 @@ void anv_CmdCopyBufferToImage(
             &(VkImageViewCreateInfo) {
                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
                .image = anv_image_to_handle(dest_image),
-               .viewType = VK_IMAGE_VIEW_TYPE_2D,
-               .format = proxy_format,
+               .viewType = anv_meta_get_view_type(dest_image),
+               .format = image_format,
                .subresourceRange = {
                   .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
                   .baseMipLevel = pRegions[r].imageSubresource.mipLevel,
@@ -1154,12 +1340,13 @@ void anv_CmdCopyBufferToImage(
           * increment the offset directly in the image effectively
           * re-binding it to different backing memory.
           */
-         /* XXX: Insert a real CPP */
          src_image->offset += src_image->extent.width *
-                              src_image->extent.height * 4;
+                              src_image->extent.height *
+                              src_image->format->isl_layout->bs;
       }
 
-      anv_DestroyImage(vk_device, anv_image_to_handle(src_image));
+      anv_DestroyImage(vk_device, anv_image_to_handle(src_image),
+                       &cmd_buffer->pool->alloc);
    }
 
    meta_finish_blit(cmd_buffer, &saved_state);
@@ -1178,21 +1365,29 @@ void anv_CmdCopyImageToBuffer(
    VkDevice vk_device = anv_device_to_handle(cmd_buffer->device);
    struct anv_meta_saved_state saved_state;
 
-   const VkImageViewType src_iview_type =
-      meta_blit_get_src_image_view_type(src_image);
+
+   /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to
+    * VK_SAMPLE_COUNT_1_BIT."
+    */
+   assert(src_image->samples == 1);
 
    meta_prepare_blit(cmd_buffer, &saved_state);
 
    for (unsigned r = 0; r < regionCount; r++) {
+      VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
+
+      VkFormat image_format = choose_iview_format(src_image, aspect);
+      VkFormat buffer_format = choose_buffer_format(src_image, aspect);
+
       struct anv_image_view src_iview;
       anv_image_view_init(&src_iview, cmd_buffer->device,
          &(VkImageViewCreateInfo) {
             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
             .image = srcImage,
-            .viewType = src_iview_type,
-            .format = src_image->format->vk_format,
+            .viewType = anv_meta_get_view_type(src_image),
+            .format = image_format,
             .subresourceRange = {
-               .aspectMask = pRegions[r].imageSubresource.aspectMask,
+               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
                .baseMipLevel = pRegions[r].imageSubresource.mipLevel,
                .levelCount = 1,
                .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer,
@@ -1201,15 +1396,11 @@ void anv_CmdCopyImageToBuffer(
          },
          cmd_buffer);
 
-      VkFormat dest_format = src_image->format->vk_format;
-      if (dest_format == VK_FORMAT_S8_UINT) {
-         dest_format = VK_FORMAT_R8_UINT;
-      }
-
       struct anv_image *dest_image =
-         make_image_for_buffer(vk_device, destBuffer, dest_format,
+         make_image_for_buffer(vk_device, destBuffer, buffer_format,
                                VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
-                               src_image->type, &pRegions[r]);
+                               src_image->type, &cmd_buffer->pool->alloc,
+                               &pRegions[r]);
 
       unsigned num_slices;
       if (src_image->type == VK_IMAGE_TYPE_3D) {
@@ -1230,7 +1421,7 @@ void anv_CmdCopyImageToBuffer(
                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
                .image = anv_image_to_handle(dest_image),
                .viewType = VK_IMAGE_VIEW_TYPE_2D,
-               .format = dest_format,
+               .format = buffer_format,
                .subresourceRange = {
                   .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
                   .baseMipLevel = 0,
@@ -1257,37 +1448,18 @@ void anv_CmdCopyImageToBuffer(
           * increment the offset directly in the image effectively
           * re-binding it to different backing memory.
           */
-         /* XXX: Insert a real CPP */
          dest_image->offset += dest_image->extent.width *
-                               dest_image->extent.height * 4;
+                               dest_image->extent.height *
+                               src_image->format->isl_layout->bs;
       }
 
-      anv_DestroyImage(vk_device, anv_image_to_handle(dest_image));
+      anv_DestroyImage(vk_device, anv_image_to_handle(dest_image),
+                       &cmd_buffer->pool->alloc);
    }
 
    meta_finish_blit(cmd_buffer, &saved_state);
 }
 
-void anv_CmdUpdateBuffer(
-    VkCommandBuffer                             commandBuffer,
-    VkBuffer                                    destBuffer,
-    VkDeviceSize                                destOffset,
-    VkDeviceSize                                dataSize,
-    const uint32_t*                             pData)
-{
-   stub();
-}
-
-void anv_CmdFillBuffer(
-    VkCommandBuffer                             commandBuffer,
-    VkBuffer                                    destBuffer,
-    VkDeviceSize                                destOffset,
-    VkDeviceSize                                fillSize,
-    uint32_t                                    data)
-{
-   stub();
-}
-
 void anv_CmdResolveImage(
     VkCommandBuffer                             commandBuffer,
     VkImage                                     srcImage,
@@ -1300,11 +1472,54 @@ void anv_CmdResolveImage(
    stub();
 }
 
-void
+static void *
+meta_alloc(void* _device, size_t size, size_t alignment,
+           VkSystemAllocationScope allocationScope)
+{
+   struct anv_device *device = _device;
+   return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment,
+                                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+}
+
+static void *
+meta_realloc(void* _device, void *original, size_t size, size_t alignment,
+             VkSystemAllocationScope allocationScope)
+{
+   struct anv_device *device = _device;
+   return device->alloc.pfnReallocation(device->alloc.pUserData, original,
+                                        size, alignment,
+                                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+}
+
+static void
+meta_free(void* _device, void *data)
+{
+   struct anv_device *device = _device;
+   return device->alloc.pfnFree(device->alloc.pUserData, data);
+}
+
+VkResult
 anv_device_init_meta(struct anv_device *device)
 {
-   anv_device_init_meta_clear_state(device);
-   anv_device_init_meta_blit_state(device);
+   device->meta_state.alloc = (VkAllocationCallbacks) {
+      .pUserData = device,
+      .pfnAllocation = meta_alloc,
+      .pfnReallocation = meta_realloc,
+      .pfnFree = meta_free,
+   };
+
+   VkResult result;
+   result = anv_device_init_meta_clear_state(device);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = anv_device_init_meta_blit_state(device);
+   if (result != VK_SUCCESS) {
+      anv_device_finish_meta_clear_state(device);
+      return result;
+   }
+
+   return VK_SUCCESS;
 }
 
 void
@@ -1314,13 +1529,21 @@ anv_device_finish_meta(struct anv_device *device)
 
    /* Blit */
    anv_DestroyRenderPass(anv_device_to_handle(device),
-                         device->meta_state.blit.render_pass);
+                         device->meta_state.blit.render_pass,
+                         &device->meta_state.alloc);
+   anv_DestroyPipeline(anv_device_to_handle(device),
+                       device->meta_state.blit.pipeline_1d_src,
+                       &device->meta_state.alloc);
    anv_DestroyPipeline(anv_device_to_handle(device),
-                       device->meta_state.blit.pipeline_2d_src);
+                       device->meta_state.blit.pipeline_2d_src,
+                       &device->meta_state.alloc);
    anv_DestroyPipeline(anv_device_to_handle(device),
-                       device->meta_state.blit.pipeline_3d_src);
+                       device->meta_state.blit.pipeline_3d_src,
+                       &device->meta_state.alloc);
    anv_DestroyPipelineLayout(anv_device_to_handle(device),
-                             device->meta_state.blit.pipeline_layout);
+                             device->meta_state.blit.pipeline_layout,
+                             &device->meta_state.alloc);
    anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
-                                  device->meta_state.blit.ds_layout);
+                                  device->meta_state.blit.ds_layout,
+                                  &device->meta_state.alloc);
 }