aco: Fix integer overflows when emitting parallel copies during RA
[mesa.git] / src / amd / vulkan / radv_meta_resolve.c
index 254861ad18a9df6c0e499b31477e4b81427277d4..0a03e62f9cbd1ee334ed6123a62ed5705f8e1916 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "radv_meta.h"
 #include "radv_private.h"
+#include "vk_format.h"
 #include "nir/nir_builder.h"
 #include "sid.h"
 
@@ -50,7 +51,7 @@ build_nir_fs(void)
 }
 
 static VkResult
-create_pass(struct radv_device *device)
+create_pass(struct radv_device *device, VkFormat vk_format, VkRenderPass *pass)
 {
        VkResult result;
        VkDevice device_h = radv_device_to_handle(device);
@@ -59,7 +60,7 @@ create_pass(struct radv_device *device)
        int i;
 
        for (i = 0; i < 2; i++) {
-               attachments[i].format = VK_FORMAT_UNDEFINED;
+               attachments[i].format = vk_format;
                attachments[i].samples = 1;
                attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
                attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
@@ -96,17 +97,39 @@ create_pass(struct radv_device *device)
                                                       .preserveAttachmentCount = 0,
                                                       .pPreserveAttachments = NULL,
                                               },
-                                                               .dependencyCount = 0,
+                                                       .dependencyCount = 2,
+                                                       .pDependencies = (VkSubpassDependency[]) {
+                                                               {
+                                                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .dstSubpass = 0,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               },
+                                                               {
+                                                                       .srcSubpass = 0,
+                                                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               }
+                                                       },
                                                                         },
                                       alloc,
-                                      &device->meta_state.resolve.pass);
+                                      pass);
 
        return result;
 }
 
 static VkResult
 create_pipeline(struct radv_device *device,
-                VkShaderModule vs_module_h)
+                VkShaderModule vs_module_h,
+                VkPipeline *pipeline,
+                VkRenderPass pass)
 {
        VkResult result;
        VkDevice device_h = radv_device_to_handle(device);
@@ -121,6 +144,23 @@ create_pipeline(struct radv_device *device,
                goto cleanup;
        }
 
+       VkPipelineLayoutCreateInfo pl_create_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+               .setLayoutCount = 0,
+               .pSetLayouts = NULL,
+               .pushConstantRangeCount = 0,
+               .pPushConstantRanges = NULL,
+       };
+
+       if (!device->meta_state.resolve.p_layout) {
+               result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+                                                  &pl_create_info,
+                                                  &device->meta_state.alloc,
+                                                  &device->meta_state.resolve.p_layout);
+               if (result != VK_SUCCESS)
+                       goto cleanup;
+       }
+
        result = radv_graphics_pipeline_create(device_h,
                                               radv_pipeline_cache_to_handle(&device->meta_state.cache),
                                               &(VkGraphicsPipelineCreateInfo) {
@@ -196,15 +236,15 @@ create_pipeline(struct radv_device *device,
                                                                VK_DYNAMIC_STATE_SCISSOR,
                                                        },
                                                },
-                                                                                                                                      .renderPass = device->meta_state.resolve.pass,
+                                               .layout = device->meta_state.resolve.p_layout,
+                                               .renderPass = pass,
                                                                                                                                       .subpass = 0,
                                                                                                                                       },
                                               &(struct radv_graphics_pipeline_create_info) {
                                                       .use_rectlist = true,
                                                       .custom_blend_mode = V_028808_CB_RESOLVE,
                                                               },
-                                              &device->meta_state.alloc,
-                                              &device->meta_state.resolve.pipeline);
+                                              &device->meta_state.alloc, pipeline);
        if (result != VK_SUCCESS)
                goto cleanup;
 
@@ -220,17 +260,25 @@ radv_device_finish_meta_resolve_state(struct radv_device *device)
 {
        struct radv_meta_state *state = &device->meta_state;
 
-       radv_DestroyRenderPass(radv_device_to_handle(device),
-                              state->resolve.pass, &state->alloc);
-       radv_DestroyPipeline(radv_device_to_handle(device),
-                            state->resolve.pipeline, &state->alloc);
+       for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
+               radv_DestroyRenderPass(radv_device_to_handle(device),
+                                      state->resolve.pass[j], &state->alloc);
+               radv_DestroyPipeline(radv_device_to_handle(device),
+                                    state->resolve.pipeline[j], &state->alloc);
+       }
+       radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                                  state->resolve.p_layout, &state->alloc);
+
 }
 
 VkResult
-radv_device_init_meta_resolve_state(struct radv_device *device)
+radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)
 {
-       VkResult res = VK_SUCCESS;
+       if (on_demand)
+               return VK_SUCCESS;
 
+       VkResult res = VK_SUCCESS;
+       struct radv_meta_state *state = &device->meta_state;
        struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
        if (!vs_module.nir) {
                /* XXX: Need more accurate error */
@@ -238,14 +286,19 @@ radv_device_init_meta_resolve_state(struct radv_device *device)
                goto fail;
        }
 
-       res = create_pass(device);
-       if (res != VK_SUCCESS)
-               goto fail;
-
-       VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
-       res = create_pipeline(device, vs_module_h);
-       if (res != VK_SUCCESS)
-               goto fail;
+       for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
+               VkFormat format = radv_fs_key_format_exemplars[i];
+               unsigned fs_key = radv_format_meta_fs_key(format);
+               res = create_pass(device, format, &state->resolve.pass[fs_key]);
+               if (res != VK_SUCCESS)
+                       goto fail;
+
+               VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
+               res = create_pipeline(device, vs_module_h,
+                                     &state->resolve.pipeline[fs_key], state->resolve.pass[fs_key]);
+               if (res != VK_SUCCESS)
+                       goto fail;
+       }
 
        goto cleanup;
 
@@ -260,16 +313,18 @@ cleanup:
 
 static void
 emit_resolve(struct radv_cmd_buffer *cmd_buffer,
+            VkFormat vk_format,
              const VkOffset2D *dest_offset,
              const VkExtent2D *resolve_extent)
 {
        struct radv_device *device = cmd_buffer->device;
        VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+       unsigned fs_key = radv_format_meta_fs_key(vk_format);
 
        cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
 
        radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-                            device->meta_state.resolve.pipeline);
+                            device->meta_state.resolve.pipeline[fs_key]);
 
        radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
                .x = dest_offset->x,
@@ -295,19 +350,76 @@ enum radv_resolve_method {
        RESOLVE_FRAGMENT,
 };
 
-static void radv_pick_resolve_method_images(struct radv_image *src_image,
+static void radv_pick_resolve_method_images(struct radv_device *device,
+                                           struct radv_image *src_image,
+                                           VkFormat src_format,
                                            struct radv_image *dest_image,
+                                           VkImageLayout dest_image_layout,
+                                           bool dest_render_loop,
+                                           struct radv_cmd_buffer *cmd_buffer,
                                            enum radv_resolve_method *method)
 
 {
-       if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
-               if (dest_image->surface.num_dcc_levels > 0)
+       uint32_t queue_mask = radv_image_queue_family_mask(dest_image,
+                                                          cmd_buffer->queue_family_index,
+                                                          cmd_buffer->queue_family_index);
+
+       if (vk_format_is_color(src_format)) {
+               if (src_format == VK_FORMAT_R16G16_UNORM ||
+                   src_format == VK_FORMAT_R16G16_SNORM)
+                       *method = RESOLVE_COMPUTE;
+               else if (vk_format_is_int(src_format))
+                       *method = RESOLVE_COMPUTE;
+               else if (src_image->info.array_size > 1 ||
+                        dest_image->info.array_size > 1)
+                       *method = RESOLVE_COMPUTE;
+       
+               if (radv_layout_dcc_compressed(device, dest_image, dest_image_layout,
+                                              dest_render_loop, queue_mask)) {
                        *method = RESOLVE_FRAGMENT;
-               else
+               } else if (dest_image->planes[0].surface.micro_tile_mode !=
+                          src_image->planes[0].surface.micro_tile_mode) {
                        *method = RESOLVE_COMPUTE;
+               }
+       } else {
+               if (src_image->info.array_size > 1 ||
+                   dest_image->info.array_size > 1)
+                       *method = RESOLVE_COMPUTE;
+               else
+                       *method = RESOLVE_FRAGMENT;
        }
 }
 
+static VkResult
+build_resolve_pipeline(struct radv_device *device,
+                       unsigned fs_key)
+{
+       VkResult result = VK_SUCCESS;
+
+       if (device->meta_state.resolve.pipeline[fs_key])
+               return result;
+
+       mtx_lock(&device->meta_state.mtx);
+       if (device->meta_state.resolve.pipeline[fs_key]) {
+               mtx_unlock(&device->meta_state.mtx);
+               return result;
+       }
+
+       struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
+
+       result = create_pass(device, radv_fs_key_format_exemplars[fs_key], &device->meta_state.resolve.pass[fs_key]);
+       if (result != VK_SUCCESS)
+               goto fail;
+
+       VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
+       result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key], device->meta_state.resolve.pass[fs_key]);
+
+fail:
+       ralloc_free(vs_module.nir);
+       mtx_unlock(&device->meta_state.mtx);
+       return result;
+}
+
 void radv_CmdResolveImage(
        VkCommandBuffer                             cmd_buffer_h,
        VkImage                                     src_image_h,
@@ -342,7 +454,9 @@ void radv_CmdResolveImage(
        } else
                resolve_method = RESOLVE_COMPUTE;
 
-       radv_pick_resolve_method_images(src_image, dest_image,
+       radv_pick_resolve_method_images(cmd_buffer->device, src_image,
+                                       src_image->vk_format, dest_image,
+                                       dest_image_layout, false, cmd_buffer,
                                        &resolve_method);
 
        if (resolve_method == RESOLVE_FRAGMENT) {
@@ -358,8 +472,10 @@ void radv_CmdResolveImage(
        if (resolve_method == RESOLVE_COMPUTE) {
                radv_meta_resolve_compute_image(cmd_buffer,
                                                src_image,
+                                               src_image->vk_format,
                                                src_image_layout,
                                                dest_image,
+                                               dest_image->vk_format,
                                                dest_image_layout,
                                                region_count, regions);
                return;
@@ -376,20 +492,10 @@ void radv_CmdResolveImage(
        }
        assert(dest_image->info.samples == 1);
 
-       if (src_image->info.samples >= 16) {
-               /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
-                * glBlitFramebuffer workaround for samples >= 16.
-                */
-               radv_finishme("vkCmdResolveImage: need interpolation workaround when "
-                             "samples >= 16");
-       }
-
        if (src_image->info.array_size > 1)
                radv_finishme("vkCmdResolveImage: multisample array images");
 
-       if (dest_image->surface.dcc_size) {
-               radv_initialize_dcc(cmd_buffer, dest_image, 0xffffffff);
-       }
+       unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
        for (uint32_t r = 0; r < region_count; ++r) {
                const VkImageResolve *region = &regions[r];
 
@@ -432,10 +538,27 @@ void radv_CmdResolveImage(
                const struct VkOffset3D dstOffset =
                        radv_sanitize_image_offset(dest_image->type, region->dstOffset);
 
+               if (radv_dcc_enabled(dest_image, region->dstSubresource.mipLevel)) {
+                       VkImageSubresourceRange range = {
+                               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                               .baseMipLevel = region->dstSubresource.mipLevel,
+                               .levelCount = 1,
+                               .baseArrayLayer = dest_base_layer,
+                               .layerCount = region->dstSubresource.layerCount,
+                       };
+
+                       radv_initialize_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
+               }
 
                for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
                     ++layer) {
 
+                       VkResult ret = build_resolve_pipeline(device, fs_key);
+                       if (ret != VK_SUCCESS) {
+                               cmd_buffer->record_result = ret;
+                               break;
+                       }
+
                        struct radv_image_view src_iview;
                        radv_image_view_init(&src_iview, cmd_buffer->device,
                                             &(VkImageViewCreateInfo) {
@@ -450,7 +573,7 @@ void radv_CmdResolveImage(
                                                             .baseArrayLayer = src_base_layer + layer,
                                                             .layerCount = 1,
                                                     },
-                                            });
+                                            }, NULL);
 
                        struct radv_image_view dest_iview;
                        radv_image_view_init(&dest_iview, cmd_buffer->device,
@@ -466,7 +589,7 @@ void radv_CmdResolveImage(
                                                             .baseArrayLayer = dest_base_layer + layer,
                                                             .layerCount = 1,
                                                     },
-                                             });
+                                             }, NULL);
 
                        VkFramebuffer fb_h;
                        radv_CreateFramebuffer(device_h,
@@ -486,27 +609,30 @@ void radv_CmdResolveImage(
                                               &cmd_buffer->pool->alloc,
                                               &fb_h);
 
-                       radv_CmdBeginRenderPass(cmd_buffer_h,
-                                                     &(VkRenderPassBeginInfo) {
-                                                             .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-                                                                     .renderPass = device->meta_state.resolve.pass,
-                                                                     .framebuffer = fb_h,
-                                                                     .renderArea = {
-                                                                     .offset = {
-                                                                             dstOffset.x,
-                                                                             dstOffset.y,
-                                                                     },
-                                                                     .extent = {
-                                                                             extent.width,
-                                                                             extent.height,
+                       radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                                         &(VkRenderPassBeginInfo) {
+                                                               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+                                                               .renderPass = device->meta_state.resolve.pass[fs_key],
+                                                               .framebuffer = fb_h,
+                                                               .renderArea = {
+                                                                       .offset = {
+                                                                               dstOffset.x,
+                                                                               dstOffset.y,
+                                                                       },
+                                                                       .extent = {
+                                                                               extent.width,
+                                                                               extent.height,
                                                                      }
-                                                             },
-                                                             .clearValueCount = 0,
-                                                             .pClearValues = NULL,
-                                                     },
-                                                     VK_SUBPASS_CONTENTS_INLINE);
+                                                               },
+                                                               .clearValueCount = 0,
+                                                               .pClearValues = NULL,
+                                                     });
+
+                       radv_cmd_buffer_set_subpass(cmd_buffer,
+                                                   &cmd_buffer->state.pass->subpasses[0]);
 
                        emit_resolve(cmd_buffer,
+                                    dest_iview.vk_format,
                                     &(VkOffset2D) {
                                             .x = dstOffset.x,
                                             .y = dstOffset.y,
@@ -516,7 +642,7 @@ void radv_CmdResolveImage(
                                             .height = extent.height,
                                     });
 
-                       radv_CmdEndRenderPass(cmd_buffer_h);
+                       radv_cmd_buffer_end_render_pass(cmd_buffer);
 
                        radv_DestroyFramebuffer(device_h, fb_h,
                                                &cmd_buffer->pool->alloc);
@@ -537,30 +663,78 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
        struct radv_meta_saved_state saved_state;
        enum radv_resolve_method resolve_method = RESOLVE_HW;
 
-       /* FINISHME(perf): Skip clears for resolve attachments.
-        *
-        * From the Vulkan 1.0 spec:
-        *
-        *    If the first use of an attachment in a render pass is as a resolve
-        *    attachment, then the loadOp is effectively ignored as the resolve is
-        *    guaranteed to overwrite all pixels in the render area.
-        */
+       if (subpass->ds_resolve_attachment) {
+               struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+               struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
+               struct radv_image_view *src_iview =
+                       cmd_buffer->state.attachments[src_att.attachment].iview;
+               struct radv_image_view *dst_iview =
+                       cmd_buffer->state.attachments[dst_att.attachment].iview;
+
+               /* Make sure to not clear the depth/stencil attachment after resolves. */
+               cmd_buffer->state.attachments[dst_att.attachment].pending_clear_aspects = 0;
+
+               radv_pick_resolve_method_images(cmd_buffer->device,
+                                               src_iview->image,
+                                               src_iview->vk_format,
+                                               dst_iview->image,
+                                               dst_att.layout,
+                                               dst_att.in_render_loop,
+                                               cmd_buffer,
+                                               &resolve_method);
+
+               if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+                   subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+                       if (resolve_method == RESOLVE_FRAGMENT) {
+                               radv_depth_stencil_resolve_subpass_fs(cmd_buffer,
+                                                                     VK_IMAGE_ASPECT_DEPTH_BIT,
+                                                                     subpass->depth_resolve_mode);
+                       } else {
+                               assert(resolve_method == RESOLVE_COMPUTE);
+                               radv_depth_stencil_resolve_subpass_cs(cmd_buffer,
+                                                                     VK_IMAGE_ASPECT_DEPTH_BIT,
+                                                                     subpass->depth_resolve_mode);
+                       }
+               }
 
-       if (!subpass->has_resolve)
+               if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+                   subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+                       if (resolve_method == RESOLVE_FRAGMENT) {
+                               radv_depth_stencil_resolve_subpass_fs(cmd_buffer,
+                                                                     VK_IMAGE_ASPECT_STENCIL_BIT,
+                                                                     subpass->stencil_resolve_mode);
+                       } else {
+                               assert(resolve_method == RESOLVE_COMPUTE);
+                               radv_depth_stencil_resolve_subpass_cs(cmd_buffer,
+                                                                     VK_IMAGE_ASPECT_STENCIL_BIT,
+                                                                     subpass->stencil_resolve_mode);
+                       }
+               }
+       }
+
+       if (!subpass->has_color_resolve)
                return;
 
        for (uint32_t i = 0; i < subpass->color_count; ++i) {
-               VkAttachmentReference src_att = subpass->color_attachments[i];
-               VkAttachmentReference dest_att = subpass->resolve_attachments[i];
+               struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+               struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
 
-               if (src_att.attachment == VK_ATTACHMENT_UNUSED ||
-                   dest_att.attachment == VK_ATTACHMENT_UNUSED)
+               if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
                        continue;
 
-               struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
-               struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
+               /* Make sure to not clear color attachments after resolves. */
+               cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0;
+
+               struct radv_image *dst_img = cmd_buffer->state.attachments[dest_att.attachment].iview->image;
+               struct radv_image_view *src_iview= cmd_buffer->state.attachments[src_att.attachment].iview;
+               struct radv_image *src_img = src_iview->image;
+
+               radv_pick_resolve_method_images(cmd_buffer->device, src_img,
+                                               src_iview->vk_format, dst_img,
+                                               dest_att.layout,
+                                               dest_att.in_render_loop,
+                                               cmd_buffer, &resolve_method);
 
-               radv_pick_resolve_method_images(dst_img, src_img, &resolve_method);
                if (resolve_method == RESOLVE_FRAGMENT) {
                        break;
                }
@@ -578,33 +752,166 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
                       RADV_META_SAVE_GRAPHICS_PIPELINE);
 
        for (uint32_t i = 0; i < subpass->color_count; ++i) {
-               VkAttachmentReference src_att = subpass->color_attachments[i];
-               VkAttachmentReference dest_att = subpass->resolve_attachments[i];
+               struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+               struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
 
-               if (src_att.attachment == VK_ATTACHMENT_UNUSED ||
-                   dest_att.attachment == VK_ATTACHMENT_UNUSED)
+               if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
                        continue;
 
-               struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
+               struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
+               struct radv_image *dst_img = dest_iview->image;
+
+               if (radv_dcc_enabled(dst_img, dest_iview->base_mip)) {
+                       VkImageSubresourceRange range = {
+                               .aspectMask = dest_iview->aspect_mask,
+                               .baseMipLevel = dest_iview->base_mip,
+                               .levelCount = dest_iview->level_count,
+                               .baseArrayLayer = dest_iview->base_layer,
+                               .layerCount = dest_iview->layer_count,
+                       };
 
-               if (dst_img->surface.dcc_size) {
-                       radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff);
+                       radv_initialize_dcc(cmd_buffer, dst_img, &range, 0xffffffff);
                        cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
                }
 
                struct radv_subpass resolve_subpass = {
                        .color_count = 2,
-                       .color_attachments = (VkAttachmentReference[]) { src_att, dest_att },
-                       .depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED },
+                       .color_attachments = (struct radv_subpass_attachment[]) { src_att, dest_att },
+                       .depth_stencil_attachment = NULL,
                };
 
-               radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
+               radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+               VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(dest_iview->vk_format));
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       continue;
+               }
 
                emit_resolve(cmd_buffer,
+                            dest_iview->vk_format,
                             &(VkOffset2D) { 0, 0 },
                             &(VkExtent2D) { fb->width, fb->height });
        }
 
-       cmd_buffer->state.subpass = subpass;
+       radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+
        radv_meta_restore(&saved_state, cmd_buffer);
 }
+
+/**
+ * Decompress CMask/FMask before resolving a multisampled source image inside a
+ * subpass.
+ */
+void
+radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer)
+{
+       const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+       struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+       uint32_t layer_count = fb->layers;
+
+       if (subpass->view_mask)
+               layer_count = util_last_bit(subpass->view_mask);
+
+       for (uint32_t i = 0; i < subpass->color_count; ++i) {
+               struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+               struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+
+               if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+                       continue;
+
+               struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+               struct radv_image *src_image = src_iview->image;
+
+               VkImageResolve region = {};
+               region.srcSubresource.aspectMask = src_iview->aspect_mask;
+               region.srcSubresource.mipLevel = 0;
+               region.srcSubresource.baseArrayLayer = src_iview->base_layer;
+               region.srcSubresource.layerCount = layer_count;
+
+               radv_decompress_resolve_src(cmd_buffer, src_image,
+                                           src_att.layout, 1, &region);
+       }
+
+       if (subpass->ds_resolve_attachment) {
+               struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+               struct radv_image_view *src_iview = fb->attachments[src_att.attachment];
+               struct radv_image *src_image = src_iview->image;
+
+               VkImageResolve region = {};
+               region.srcSubresource.aspectMask = src_iview->aspect_mask;
+               region.srcSubresource.mipLevel = 0;
+               region.srcSubresource.baseArrayLayer = src_iview->base_layer;
+               region.srcSubresource.layerCount = layer_count;
+
+               radv_decompress_resolve_src(cmd_buffer, src_image,
+                                           src_att.layout, 1, &region);
+       }
+}
+
+static struct radv_sample_locations_state *
+radv_get_resolve_sample_locations(struct radv_cmd_buffer *cmd_buffer)
+{
+       struct radv_cmd_state *state = &cmd_buffer->state;
+       uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+
+       for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
+               if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
+                       return &state->subpass_sample_locs[i].sample_location;
+       }
+
+       return NULL;
+}
+
+/**
+ * Decompress CMask/FMask before resolving a multisampled source image.
+ */
+void
+radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer,
+                           struct radv_image *src_image,
+                           VkImageLayout src_image_layout,
+                           uint32_t region_count,
+                           const VkImageResolve *regions)
+{
+       for (uint32_t r = 0; r < region_count; ++r) {
+               const VkImageResolve *region = &regions[r];
+               const uint32_t src_base_layer =
+                       radv_meta_get_iview_layer(src_image, &region->srcSubresource,
+                                                 &region->srcOffset);
+
+               VkImageMemoryBarrier barrier = {};
+               barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+               barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+               barrier.oldLayout = src_image_layout;
+               barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+               barrier.image = radv_image_to_handle(src_image);
+               barrier.subresourceRange = (VkImageSubresourceRange) {
+                       .aspectMask = region->srcSubresource.aspectMask,
+                       .baseMipLevel = region->srcSubresource.mipLevel,
+                       .levelCount = 1,
+                       .baseArrayLayer = src_base_layer,
+                       .layerCount = region->srcSubresource.layerCount,
+               };
+
+               if (src_image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {
+                       /* If the depth/stencil image uses different sample
+                        * locations, we need them during HTILE decompressions.
+                        */
+                       struct radv_sample_locations_state *sample_locs =
+                               radv_get_resolve_sample_locations(cmd_buffer);
+
+                       barrier.pNext = &(VkSampleLocationsInfoEXT) {
+                               .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
+                               .sampleLocationsPerPixel = sample_locs->per_pixel,
+                               .sampleLocationGridSize = sample_locs->grid_size,
+                               .sampleLocationsCount = sample_locs->count,
+                               .pSampleLocations = sample_locs->locations,
+                       };
+               }
+
+               radv_CmdPipelineBarrier(radv_cmd_buffer_to_handle(cmd_buffer),
+                                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                       VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                       false, 0, NULL, 0, NULL, 1, &barrier);
+       }
+}