aco: Fix integer overflows when emitting parallel copies during RA
[mesa.git] / src / amd / vulkan / radv_meta_clear.c
index 091b73841f80be9c86d1ed435ce372630626fc7a..bfeda8e3fa9369391b638f2da761f786b6959e5b 100644 (file)
@@ -235,7 +235,27 @@ create_color_renderpass(struct radv_device *device,
                                                       .preserveAttachmentCount = 0,
                                                       .pPreserveAttachments = NULL,
                                               },
-                                                               .dependencyCount = 0,
+                                                       .dependencyCount = 2,
+                                                       .pDependencies = (VkSubpassDependency[]) {
+                                                               {
+                                                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .dstSubpass = 0,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               },
+                                                               {
+                                                                       .srcSubpass = 0,
+                                                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               }
+                                                       },
                                                                         }, &device->meta_state.alloc, pass);
        mtx_unlock(&device->meta_state.mtx);
        return result;
@@ -344,6 +364,16 @@ radv_device_finish_meta_clear_state(struct radv_device *device)
                        radv_DestroyPipeline(radv_device_to_handle(device),
                                             state->clear[i].depthstencil_pipeline[j],
                                             &state->alloc);
+
+                       radv_DestroyPipeline(radv_device_to_handle(device),
+                                            state->clear[i].depth_only_unrestricted_pipeline[j],
+                                            &state->alloc);
+                       radv_DestroyPipeline(radv_device_to_handle(device),
+                                            state->clear[i].stencil_only_unrestricted_pipeline[j],
+                                            &state->alloc);
+                       radv_DestroyPipeline(radv_device_to_handle(device),
+                                            state->clear[i].depthstencil_unrestricted_pipeline[j],
+                                            &state->alloc);
                }
                radv_DestroyRenderPass(radv_device_to_handle(device),
                                      state->clear[i].depthstencil_rp,
@@ -355,6 +385,9 @@ radv_device_finish_meta_clear_state(struct radv_device *device)
        radv_DestroyPipelineLayout(radv_device_to_handle(device),
                                   state->clear_depth_p_layout,
                                   &state->alloc);
+       radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                                  state->clear_depth_unrestricted_p_layout,
+                                  &state->alloc);
 
        finish_meta_clear_htile_mask_state(device);
 }
@@ -367,10 +400,10 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 {
        struct radv_device *device = cmd_buffer->device;
        const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-       const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
        const uint32_t subpass_att = clear_att->colorAttachment;
        const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
-       const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+       const struct radv_image_view *iview = cmd_buffer->state.attachments ?
+               cmd_buffer->state.attachments[pass_att].iview : NULL;
        uint32_t samples, samples_log2;
        VkFormat format;
        unsigned fs_key;
@@ -470,15 +503,21 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 
 
 static void
-build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs)
+build_depthstencil_shader(struct nir_shader **out_vs,
+                         struct nir_shader **out_fs,
+                         bool unrestricted)
 {
        nir_builder vs_b, fs_b;
 
        nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
        nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
 
-       vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
-       fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
+       vs_b.shader->info.name = ralloc_strdup(vs_b.shader,
+                                              unrestricted ? "meta_clear_depthstencil_unrestricted_vs"
+                                                           : "meta_clear_depthstencil_vs");
+       fs_b.shader->info.name = ralloc_strdup(fs_b.shader,
+                                              unrestricted ? "meta_clear_depthstencil_unrestricted_fs"
+                                                           : "meta_clear_depthstencil_fs");
        const struct glsl_type *position_out_type = glsl_vec4_type();
 
        nir_variable *vs_out_pos =
@@ -486,15 +525,36 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs
                                    "gl_Position");
        vs_out_pos->data.location = VARYING_SLOT_POS;
 
-       nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(vs_b.shader, nir_intrinsic_load_push_constant);
-       nir_intrinsic_set_base(in_color_load, 0);
-       nir_intrinsic_set_range(in_color_load, 4);
-       in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&vs_b, 0));
-       in_color_load->num_components = 1;
-       nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
-       nir_builder_instr_insert(&vs_b, &in_color_load->instr);
+       nir_ssa_def *z;
+       if (unrestricted) {
+               nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant);
+               nir_intrinsic_set_base(in_color_load, 0);
+               nir_intrinsic_set_range(in_color_load, 4);
+               in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0));
+               in_color_load->num_components = 1;
+               nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
+               nir_builder_instr_insert(&fs_b, &in_color_load->instr);
+
+               nir_variable *fs_out_depth =
+                       nir_variable_create(fs_b.shader, nir_var_shader_out,
+                                           glsl_int_type(), "f_depth");
+               fs_out_depth->data.location = FRAG_RESULT_DEPTH;
+               nir_store_var(&fs_b, fs_out_depth, &in_color_load->dest.ssa, 0x1);
+
+               z = nir_imm_float(&vs_b, 0.0);
+       } else {
+               nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(vs_b.shader, nir_intrinsic_load_push_constant);
+               nir_intrinsic_set_base(in_color_load, 0);
+               nir_intrinsic_set_range(in_color_load, 4);
+               in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&vs_b, 0));
+               in_color_load->num_components = 1;
+               nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
+               nir_builder_instr_insert(&vs_b, &in_color_load->instr);
+
+               z = &in_color_load->dest.ssa;
+       }
 
-       nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, &in_color_load->dest.ssa);
+       nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, z);
        nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
 
        const struct glsl_type *layer_type = glsl_int_type();
@@ -550,7 +610,27 @@ create_depthstencil_renderpass(struct radv_device *device,
                                                       .preserveAttachmentCount = 0,
                                                       .pPreserveAttachments = NULL,
                                               },
-                                                               .dependencyCount = 0,
+                                                       .dependencyCount = 2,
+                                                       .pDependencies = (VkSubpassDependency[]) {
+                                                               {
+                                                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .dstSubpass = 0,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               },
+                                                               {
+                                                                       .srcSubpass = 0,
+                                                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               }
+                                                       }
                                                                         }, &device->meta_state.alloc, render_pass);
        mtx_unlock(&device->meta_state.mtx);
        return result;
@@ -561,6 +641,7 @@ create_depthstencil_pipeline(struct radv_device *device,
                              VkImageAspectFlags aspects,
                             uint32_t samples,
                             int index,
+                            bool unrestricted,
                             VkPipeline *pipeline,
                             VkRenderPass render_pass)
 {
@@ -573,7 +654,7 @@ create_depthstencil_pipeline(struct radv_device *device,
                return VK_SUCCESS;
        }
 
-       build_depthstencil_shader(&vs_nir, &fs_nir);
+       build_depthstencil_shader(&vs_nir, &fs_nir, unrestricted);
 
        const VkPipelineVertexInputStateCreateInfo vi_state = {
                .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -583,11 +664,11 @@ create_depthstencil_pipeline(struct radv_device *device,
 
        const VkPipelineDepthStencilStateCreateInfo ds_state = {
                .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-               .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+               .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
                .depthCompareOp = VK_COMPARE_OP_ALWAYS,
-               .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+               .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
                .depthBoundsTestEnable = false,
-               .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
+               .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
                .front = {
                        .passOp = VK_STENCIL_OP_REPLACE,
                        .compareOp = VK_COMPARE_OP_ALWAYS,
@@ -629,6 +710,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
                                      const struct radv_image_view *iview,
                                      VkImageAspectFlags aspects,
                                      VkImageLayout layout,
+                                     bool in_render_loop,
                                      const VkClearRect *clear_rect,
                                      VkClearDepthStencilValue clear_value)
 {
@@ -651,7 +733,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
            iview->base_mip == 0 &&
            iview->base_layer == 0 &&
            iview->layer_count == iview->image->info.array_size &&
-           radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
+           radv_layout_is_htile_compressed(iview->image, layout, in_render_loop, queue_mask) &&
            radv_image_extent_compare(iview->image, &iview->extent))
                return true;
        return false;
@@ -664,10 +746,13 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
                           int samples_log2,
                           VkImageAspectFlags aspects,
                           VkImageLayout layout,
+                          bool in_render_loop,
                           const VkClearRect *clear_rect,
                           VkClearDepthStencilValue clear_value)
 {
-       bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value);
+       bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout,
+                                             in_render_loop, clear_rect, clear_value);
+       bool unrestricted = cmd_buffer->device->enabled_extensions.EXT_depth_range_unrestricted;
        int index = DEPTH_CLEAR_SLOW;
        VkPipeline *pipeline;
 
@@ -679,13 +764,19 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
 
        switch (aspects) {
        case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
-               pipeline = &meta_state->clear[samples_log2].depthstencil_pipeline[index];
+               pipeline = unrestricted ?
+                          &meta_state->clear[samples_log2].depthstencil_unrestricted_pipeline[index] :
+                          &meta_state->clear[samples_log2].depthstencil_pipeline[index];
                break;
        case VK_IMAGE_ASPECT_DEPTH_BIT:
-               pipeline = &meta_state->clear[samples_log2].depth_only_pipeline[index];
+               pipeline = unrestricted ?
+                          &meta_state->clear[samples_log2].depth_only_unrestricted_pipeline[index] :
+                          &meta_state->clear[samples_log2].depth_only_pipeline[index];
                break;
        case VK_IMAGE_ASPECT_STENCIL_BIT:
-               pipeline = &meta_state->clear[samples_log2].stencil_only_pipeline[index];
+               pipeline = unrestricted ?
+                          &meta_state->clear[samples_log2].stencil_only_unrestricted_pipeline[index] :
+                          &meta_state->clear[samples_log2].stencil_only_pipeline[index];
                break;
        default:
                unreachable("expected depth or stencil aspect");
@@ -701,7 +792,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
        }
 
        if (*pipeline == VK_NULL_HANDLE) {
-               VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index,
+               VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted,
                                                            pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
                if (ret != VK_SUCCESS) {
                        cmd_buffer->record_result = ret;
@@ -721,11 +812,11 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
        struct radv_device *device = cmd_buffer->device;
        struct radv_meta_state *meta_state = &device->meta_state;
        const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-       const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
        const uint32_t pass_att = ds_att->attachment;
        VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
        VkImageAspectFlags aspects = clear_att->aspectMask;
-       const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+       const struct radv_image_view *iview = cmd_buffer->state.attachments ?
+               cmd_buffer->state.attachments[pass_att].iview : NULL;
        uint32_t samples, samples_log2;
        VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
 
@@ -746,10 +837,17 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
        if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
                clear_value.depth = 1.0f;
 
-       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-                             device->meta_state.clear_depth_p_layout,
-                             VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
-                             &clear_value.depth);
+       if (cmd_buffer->device->enabled_extensions.EXT_depth_range_unrestricted) {
+               radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                                     device->meta_state.clear_depth_unrestricted_p_layout,
+                                     VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
+                                     &clear_value.depth);
+       } else {
+               radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                                     device->meta_state.clear_depth_p_layout,
+                                     VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
+                                     &clear_value.depth);
+       }
 
        uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
        if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
@@ -763,6 +861,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
                                                         samples_log2,
                                                         aspects,
                                                         ds_att->layout,
+                                                        ds_att->in_render_loop,
                                                         clear_rect,
                                                         clear_value);
        if (!pipeline)
@@ -780,8 +879,9 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
                             pipeline);
 
        if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
-                                     ds_att->layout, clear_rect, clear_value))
-               radv_update_ds_clear_metadata(cmd_buffer, iview->image,
+                                     ds_att->layout, ds_att->in_render_loop,
+                                     clear_rect, clear_value))
+               radv_update_ds_clear_metadata(cmd_buffer, iview,
                                              clear_value, aspects);
 
        radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
@@ -981,6 +1081,7 @@ static bool
 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
                          const struct radv_image_view *iview,
                          VkImageLayout image_layout,
+                         bool in_render_loop,
                          VkImageAspectFlags aspects,
                          const VkClearRect *clear_rect,
                          const VkClearDepthStencilValue clear_value,
@@ -989,7 +1090,10 @@ radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
        if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
                return false;
 
-       if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
+       if (!radv_layout_is_htile_compressed(iview->image, image_layout, in_render_loop,
+                                            radv_image_queue_family_mask(iview->image,
+                                                                         cmd_buffer->queue_family_index,
+                                                                         cmd_buffer->queue_family_index)))
                return false;
 
        if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
@@ -1005,12 +1109,6 @@ radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
        if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
                return false;
 
-       if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 &&
-           (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ||
-           ((vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) &&
-            !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT))))
-               return false;
-
        if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
            !radv_is_fast_clear_depth_allowed(clear_value)) ||
            ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
@@ -1030,10 +1128,8 @@ radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
        VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
        VkImageAspectFlags aspects = clear_att->aspectMask;
        uint32_t clear_word, flush_bits;
-       uint32_t htile_mask;
 
        clear_word = radv_get_htile_fast_clear_value(iview->image, clear_value);
-       htile_mask = radv_get_htile_mask(iview->image, aspects);
 
        if (pre_flush) {
                cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
@@ -1041,21 +1137,27 @@ radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
                *pre_flush |= cmd_buffer->state.flush_bits;
        }
 
-       if (htile_mask == UINT_MAX) {
-               /* Clear the whole HTILE buffer. */
-               flush_bits = radv_fill_buffer(cmd_buffer, iview->image->bo,
-                                             iview->image->offset + iview->image->htile_offset,
-                                             iview->image->planes[0].surface.htile_size, clear_word);
-       } else {
-               /* Only clear depth or stencil bytes in the HTILE buffer. */
-               assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9);
-               flush_bits = clear_htile_mask(cmd_buffer, iview->image->bo,
-                                             iview->image->offset + iview->image->htile_offset,
-                                             iview->image->planes[0].surface.htile_size, clear_word,
-                                             htile_mask);
+       struct VkImageSubresourceRange range = {
+               .aspectMask = aspects,
+               .baseMipLevel = 0,
+               .levelCount = VK_REMAINING_MIP_LEVELS,
+               .baseArrayLayer = 0,
+               .layerCount = VK_REMAINING_ARRAY_LAYERS,
+       };
+
+       flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
+
+       if (iview->image->planes[0].surface.has_stencil &&
+           !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
+               /* Synchronize after performing a depth-only or a stencil-only
+                * fast clear because the driver uses an optimized path which
+                * performs a read-modify-write operation, and the two separate
+                * aspects might use the same HTILE memory.
+                */
+               cmd_buffer->state.flush_bits |= flush_bits;
        }
 
-       radv_update_ds_clear_metadata(cmd_buffer, iview->image, clear_value, aspects);
+       radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
        if (post_flush) {
                *post_flush |= flush_bits;
        }
@@ -1073,7 +1175,7 @@ build_clear_htile_mask_shader()
        b.shader->info.cs.local_size[2] = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -1111,6 +1213,7 @@ build_clear_htile_mask_shader()
        load->src[1] = nir_src_for_ssa(offset);
        nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
        load->num_components = 4;
+       nir_intrinsic_set_align(load, 16, 0);
        nir_builder_instr_insert(&b, &load->instr);
 
        /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
@@ -1126,6 +1229,7 @@ build_clear_htile_mask_shader()
        store->src[2] = nir_src_for_ssa(offset);
        nir_intrinsic_set_write_mask(store, 0xf);
        nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
+       nir_intrinsic_set_align(store, 16, 0);
        store->num_components = 4;
        nir_builder_instr_insert(&b, &store->instr);
 
@@ -1239,6 +1343,20 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
        if (res != VK_SUCCESS)
                goto fail;
 
+       VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+               .setLayoutCount = 0,
+               .pushConstantRangeCount = 1,
+               .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
+       };
+
+       res = radv_CreatePipelineLayout(radv_device_to_handle(device),
+                                       &pl_depth_unrestricted_create_info,
+                                       &device->meta_state.alloc,
+                                       &device->meta_state.clear_depth_unrestricted_p_layout);
+       if (res != VK_SUCCESS)
+               goto fail;
+
        res = init_meta_clear_htile_mask_state(device);
        if (res != VK_SUCCESS)
                goto fail;
@@ -1276,6 +1394,7 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
                                                           VK_IMAGE_ASPECT_DEPTH_BIT,
                                                           samples,
                                                           j,
+                                                          false,
                                                           &state->clear[i].depth_only_pipeline[j],
                                                           state->clear[i].depthstencil_rp);
                        if (res != VK_SUCCESS)
@@ -1285,6 +1404,7 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
                                                           VK_IMAGE_ASPECT_STENCIL_BIT,
                                                           samples,
                                                           j,
+                                                          false,
                                                           &state->clear[i].stencil_only_pipeline[j],
                                                           state->clear[i].depthstencil_rp);
                        if (res != VK_SUCCESS)
@@ -1295,10 +1415,42 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
                                                           VK_IMAGE_ASPECT_STENCIL_BIT,
                                                           samples,
                                                           j,
+                                                          false,
                                                           &state->clear[i].depthstencil_pipeline[j],
                                                           state->clear[i].depthstencil_rp);
                        if (res != VK_SUCCESS)
                                goto fail;
+
+                       res = create_depthstencil_pipeline(device,
+                                                          VK_IMAGE_ASPECT_DEPTH_BIT,
+                                                          samples,
+                                                          j,
+                                                          true,
+                                                          &state->clear[i].depth_only_unrestricted_pipeline[j],
+                                                          state->clear[i].depthstencil_rp);
+                       if (res != VK_SUCCESS)
+                               goto fail;
+
+                       res = create_depthstencil_pipeline(device,
+                                                          VK_IMAGE_ASPECT_STENCIL_BIT,
+                                                          samples,
+                                                          j,
+                                                          true,
+                                                          &state->clear[i].stencil_only_unrestricted_pipeline[j],
+                                                          state->clear[i].depthstencil_rp);
+                       if (res != VK_SUCCESS)
+                               goto fail;
+
+                       res = create_depthstencil_pipeline(device,
+                                                          VK_IMAGE_ASPECT_DEPTH_BIT |
+                                                          VK_IMAGE_ASPECT_STENCIL_BIT,
+                                                          samples,
+                                                          j,
+                                                          true,
+                                                          &state->clear[i].depthstencil_unrestricted_pipeline[j],
+                                                          state->clear[i].depthstencil_rp);
+                       if (res != VK_SUCCESS)
+                               goto fail;
                }
        }
        return VK_SUCCESS;
@@ -1329,15 +1481,18 @@ radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
                 struct radv_image *image,
                 const VkImageSubresourceRange *range, uint32_t value)
 {
-       uint64_t offset = image->offset + image->cmask.offset;
+       uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;
        uint64_t size;
 
        if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
                /* TODO: clear layers. */
-               size = image->cmask.size;
+               size = image->planes[0].surface.cmask_size;
        } else {
-               offset += image->cmask.slice_size * range->baseArrayLayer;
-               size = image->cmask.slice_size * radv_get_layerCount(image, range);
+               unsigned cmask_slice_size =
+                       image->planes[0].surface.cmask_slice_size;
+
+               offset += cmask_slice_size * range->baseArrayLayer;
+               size = cmask_slice_size * radv_get_layerCount(image, range);
        }
 
        return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
@@ -1349,7 +1504,7 @@ radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
                 struct radv_image *image,
                 const VkImageSubresourceRange *range, uint32_t value)
 {
-       uint64_t offset = image->offset + image->fmask.offset;
+       uint64_t offset = image->offset + image->planes[0].surface.fmask_offset;
        uint64_t size;
 
        /* MSAA images do not support mipmap levels. */
@@ -1358,36 +1513,14 @@ radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
 
        if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
                /* TODO: clear layers. */
-               size = image->fmask.size;
+               size = image->planes[0].surface.fmask_size;
        } else {
-               offset += image->fmask.slice_size * range->baseArrayLayer;
-               size = image->fmask.slice_size * radv_get_layerCount(image, range);
-       }
-
-       return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
-}
-
-uint32_t
-radv_dcc_clear_level(struct radv_cmd_buffer *cmd_buffer,
-                    const struct radv_image *image,
-                    uint32_t level, uint32_t value)
-{
-       uint64_t offset = image->offset + image->dcc_offset;
-       uint32_t size;
-
-       if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
-               /* Mipmap levels aren't implemented. */
-               assert(level == 0);
-               size = image->planes[0].surface.dcc_size;
-       } else {
-               const struct legacy_surf_level *surf_level =
-                       &image->planes[0].surface.u.legacy.level[level];
+               unsigned fmask_slice_size =
+                       image->planes[0].surface.u.legacy.fmask.slice_size;
 
-               /* If this is 0, fast clear isn't possible. */
-               assert(surf_level->dcc_fast_clear_size);
 
-               offset += surf_level->dcc_offset;
-               size = surf_level->dcc_fast_clear_size;
+               offset += fmask_slice_size * range->baseArrayLayer;
+               size = fmask_slice_size * radv_get_layerCount(image, range);
        }
 
        return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
@@ -1405,28 +1538,72 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
        radv_update_dcc_metadata(cmd_buffer, image, range, true);
 
        for (uint32_t l = 0; l < level_count; l++) {
+               uint64_t offset = image->offset + image->planes[0].surface.dcc_offset;
                uint32_t level = range->baseMipLevel + l;
+               uint64_t size;
+
+               if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+                       /* Mipmap levels aren't implemented. */
+                       assert(level == 0);
+                       size = image->planes[0].surface.dcc_size;
+               } else {
+                       const struct legacy_surf_level *surf_level =
+                               &image->planes[0].surface.u.legacy.level[level];
+
+                       /* If dcc_fast_clear_size is 0 (which might happens for
+                        * mipmaps) the fill buffer operation below is a no-op.
+                        * This can only happen during initialization as the
+                        * fast clear path fallbacks to slow clears if one
+                        * level can't be fast cleared.
+                        */
+                       offset += surf_level->dcc_offset +
+                                 surf_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
+                       size = surf_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
+               }
 
-               flush_bits |= radv_dcc_clear_level(cmd_buffer, image,
-                                                  level, value);
+               flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, offset,
+                                              size, value);
        }
 
        return flush_bits;
 }
 
 uint32_t
-radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
-                const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
+                const struct radv_image *image,
+                const VkImageSubresourceRange *range,
+                uint32_t value)
 {
        unsigned layer_count = radv_get_layerCount(image, range);
        uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
-       uint64_t offset = image->offset + image->htile_offset +
+       uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
                          image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
+       uint32_t htile_mask, flush_bits;
 
-       return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
+       htile_mask = radv_get_htile_mask(image, range->aspectMask);
+
+       if (htile_mask == UINT_MAX) {
+               /* Clear the whole HTILE buffer. */
+               flush_bits = radv_fill_buffer(cmd_buffer, image->bo, offset,
+                                             size, value);
+       } else {
+               /* Only clear depth or stencil bytes in the HTILE buffer. */
+               flush_bits = clear_htile_mask(cmd_buffer, image->bo, offset,
+                                             size, value, htile_mask);
+       }
+
+       return flush_bits;
 }
 
-static void vi_get_fast_clear_parameters(VkFormat format,
+enum {
+       RADV_DCC_CLEAR_REG = 0x20202020U,
+       RADV_DCC_CLEAR_MAIN_1 = 0x80808080U,
+       RADV_DCC_CLEAR_SECONDARY_1 = 0x40404040U
+};
+
+static void vi_get_fast_clear_parameters(struct radv_device *device,
+                                        VkFormat image_format,
+                                        VkFormat view_format,
                                         const VkClearColorValue *clear_value,
                                         uint32_t* reset_value,
                                         bool *can_avoid_fast_clear_elim)
@@ -1435,18 +1612,20 @@ static void vi_get_fast_clear_parameters(VkFormat format,
        int extra_channel;
        bool main_value = false;
        bool extra_value = false;
+       bool has_color = false;
+       bool has_alpha = false;
        int i;
        *can_avoid_fast_clear_elim = false;
 
-       *reset_value = 0x20202020U;
+       *reset_value = RADV_DCC_CLEAR_REG;
 
-       const struct vk_format_description *desc = vk_format_description(format);
-       if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
-           format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
-           format == VK_FORMAT_B5G6R5_UNORM_PACK16)
+       const struct vk_format_description *desc = vk_format_description(view_format);
+       if (view_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
+           view_format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
+           view_format == VK_FORMAT_B5G6R5_UNORM_PACK16)
                extra_channel = -1;
        else if (desc->layout == VK_FORMAT_LAYOUT_PLAIN) {
-               if (radv_translate_colorswap(format, false) <= 1)
+               if (vi_alpha_is_on_msb(device, view_format))
                        extra_channel = desc->nr_channels - 1;
                else
                        extra_channel = 0;
@@ -1481,12 +1660,21 @@ static void vi_get_fast_clear_parameters(VkFormat format,
                                return;
                }
 
-               if (index == extra_channel)
+               if (index == extra_channel) {
                        extra_value = values[i];
-               else
+                       has_alpha = true;
+               } else {
                        main_value = values[i];
+                       has_color = true;
+               }
        }
 
+       /* If alpha isn't present, make it the same as color, and vice versa. */
+       if (!has_alpha)
+               extra_value = main_value;
+       else if (!has_color)
+               main_value = extra_value;
+
        for (int i = 0; i < 4; ++i)
                if (values[i] != main_value &&
                    desc->swizzle[i] - VK_SWIZZLE_X != extra_channel &&
@@ -1495,11 +1683,12 @@ static void vi_get_fast_clear_parameters(VkFormat format,
                        return;
 
        *can_avoid_fast_clear_elim = true;
+       *reset_value = 0;
        if (main_value)
-               *reset_value |= 0x80808080U;
+               *reset_value |= RADV_DCC_CLEAR_MAIN_1;
 
        if (extra_value)
-               *reset_value |= 0x40404040U;
+               *reset_value |= RADV_DCC_CLEAR_SECONDARY_1;
        return;
 }
 
@@ -1507,6 +1696,7 @@ static bool
 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
                          const struct radv_image_view *iview,
                          VkImageLayout image_layout,
+                         bool in_render_loop,
                          const VkClearRect *clear_rect,
                          VkClearColorValue clear_value,
                          uint32_t view_mask)
@@ -1516,7 +1706,10 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
        if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
                return false;
 
-       if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
+       if (!radv_layout_can_fast_clear(iview->image, image_layout, in_render_loop,
+                                       radv_image_queue_family_mask(iview->image,
+                                                                    cmd_buffer->queue_family_index,
+                                                                    cmd_buffer->queue_family_index)))
                return false;
 
        if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
@@ -1541,7 +1734,9 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
                bool can_avoid_fast_clear_elim;
                uint32_t reset_value;
 
-               vi_get_fast_clear_parameters(iview->vk_format,
+               vi_get_fast_clear_parameters(cmd_buffer->device,
+                                            iview->image->vk_format,
+                                            iview->vk_format,
                                             &clear_value, &reset_value,
                                             &can_avoid_fast_clear_elim);
 
@@ -1613,7 +1808,9 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
                bool can_avoid_fast_clear_elim;
                bool need_decompress_pass = false;
 
-               vi_get_fast_clear_parameters(iview->vk_format,
+               vi_get_fast_clear_parameters(cmd_buffer->device,
+                                            iview->image->vk_format,
+                                            iview->vk_format,
                                             &clear_value, &reset_value,
                                             &can_avoid_fast_clear_elim);
 
@@ -1669,10 +1866,11 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
                        return;
 
                VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
-               const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+               bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
+               const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
                VkClearColorValue clear_value = clear_att->clearValue.color;
 
-               if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout,
+               if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop,
                                              clear_rect, clear_value, view_mask)) {
                        radv_fast_clear_color(cmd_buffer, iview, clear_att,
                                              subpass_att, pre_flush,
@@ -1686,19 +1884,20 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
                if (ds_resolve_clear)
                        ds_att = subpass->ds_resolve_attachment;
 
-               if (ds_att->attachment == VK_ATTACHMENT_UNUSED)
+               if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
                        return;
 
                VkImageLayout image_layout = ds_att->layout;
-               const struct radv_image_view *iview = fb ? fb->attachments[ds_att->attachment].attachment : NULL;
+               bool in_render_loop = ds_att->in_render_loop;
+               const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
                VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
 
                assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
                                  VK_IMAGE_ASPECT_STENCIL_BIT));
 
                if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout,
-                                             aspects, clear_rect, clear_value,
-                                             view_mask)) {
+                                             in_render_loop, aspects, clear_rect,
+                                             clear_value, view_mask)) {
                        radv_fast_clear_depth(cmd_buffer, iview, clear_att,
                                              pre_flush, post_flush);
                } else {
@@ -1762,12 +1961,16 @@ radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
                .layerCount = cmd_state->framebuffer->layers,
        };
 
+       radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
+
        emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
                   view_mask & ~attachment->cleared_views, ds_resolve_clear);
        if (view_mask)
                attachment->cleared_views |= view_mask;
        else
                attachment->pending_clear_aspects = 0;
+
+       radv_describe_end_render_pass_clear(cmd_buffer);
 }
 
 /**
@@ -1871,7 +2074,7 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
                                             .baseArrayLayer = range->baseArrayLayer + layer,
                                             .layerCount = 1
                                     },
-                            });
+                            }, NULL);
 
        VkFramebuffer fb;
        radv_CreateFramebuffer(device_h,
@@ -1929,26 +2132,49 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
                                              .pAttachments = &att_desc,
                                              .subpassCount = 1,
                                              .pSubpasses = &subpass_desc,
-                                             },
+                                             .dependencyCount = 2,
+                                             .pDependencies = (VkSubpassDependency[]) {
+                                                       {
+                                                               .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                               .dstSubpass = 0,
+                                                               .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                               .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                               .srcAccessMask = 0,
+                                                               .dstAccessMask = 0,
+                                                               .dependencyFlags = 0
+                                                       },
+                                                       {
+                                                               .srcSubpass = 0,
+                                                               .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                               .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                               .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                               .srcAccessMask = 0,
+                                                               .dstAccessMask = 0,
+                                                               .dependencyFlags = 0
+                                                       }
+                                               }
+                                       },
                              &cmd_buffer->pool->alloc,
                              &pass);
 
-       radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-                               &(VkRenderPassBeginInfo) {
-                                       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+       radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                         &(VkRenderPassBeginInfo) {
+                                               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
                                                .renderArea = {
                                                .offset = { 0, 0, },
                                                .extent = {
                                                        .width = width,
                                                        .height = height,
+                                                       },
                                                },
-                                       },
                                                .renderPass = pass,
                                                .framebuffer = fb,
                                                .clearValueCount = 0,
                                                .pClearValues = NULL,
-                                               },
-                               VK_SUBPASS_CONTENTS_INLINE);
+                                        });
+
+       radv_cmd_buffer_set_subpass(cmd_buffer,
+                                   &cmd_buffer->state.pass->subpasses[0]);
 
        VkClearAttachment clear_att = {
                .aspectMask = range->aspectMask,
@@ -1967,7 +2193,7 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 
        emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
 
-       radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+       radv_cmd_buffer_end_render_pass(cmd_buffer);
        radv_DestroyRenderPass(device_h, pass,
                               &cmd_buffer->pool->alloc);
        radv_DestroyFramebuffer(device_h, fb,
@@ -1982,6 +2208,7 @@ radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
                      struct radv_image *image,
                      VkFormat format,
                      VkImageLayout image_layout,
+                     bool in_render_loop,
                      const VkImageSubresourceRange *range,
                      const VkClearValue *clear_val)
 {
@@ -2000,7 +2227,7 @@ radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
                                        .baseArrayLayer = range->baseArrayLayer,
                                        .layerCount = range->layerCount,
                                   },
-                            });
+                            }, NULL);
 
        VkClearRect clear_rect = {
                .rect = {
@@ -2021,8 +2248,8 @@ radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
        };
 
        if (vk_format_is_color(format)) {
-               if (radv_can_fast_clear_color(cmd_buffer, &iview,
-                                             image_layout, &clear_rect,
+               if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout,
+                                             in_render_loop, &clear_rect,
                                              clear_att.clearValue.color, 0)) {
                        radv_fast_clear_color(cmd_buffer, &iview, &clear_att,
                                              clear_att.colorAttachment,
@@ -2031,8 +2258,9 @@ radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
                }
        } else {
                if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout,
-                                             range->aspectMask, &clear_rect,
-                                             clear_att.clearValue.depthStencil, 0)) {
+                                             in_render_loop,range->aspectMask,
+                                             &clear_rect, clear_att.clearValue.depthStencil,
+                                             0)) {
                        radv_fast_clear_depth(cmd_buffer, &iview, &clear_att,
                                              NULL, NULL);
                        return true;
@@ -2082,7 +2310,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
                 */
                if (!cs &&
                    radv_fast_clear_range(cmd_buffer, image, format,
-                                         image_layout, range,
+                                         image_layout, false, range,
                                          &internal_clear_value)) {
                        continue;
                }