aco: consider branch definitions in spiller
[mesa.git] / src / amd / vulkan / radv_meta_clear.c
index 29905f01f1f1091184d3a6905d383e604f8a132d..bfeda8e3fa9369391b638f2da761f786b6959e5b 100644 (file)
@@ -235,7 +235,27 @@ create_color_renderpass(struct radv_device *device,
                                                       .preserveAttachmentCount = 0,
                                                       .pPreserveAttachments = NULL,
                                               },
-                                                               .dependencyCount = 0,
+                                                       .dependencyCount = 2,
+                                                       .pDependencies = (VkSubpassDependency[]) {
+                                                               {
+                                                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .dstSubpass = 0,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               },
+                                                               {
+                                                                       .srcSubpass = 0,
+                                                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               }
+                                                       },
                                                                         }, &device->meta_state.alloc, pass);
        mtx_unlock(&device->meta_state.mtx);
        return result;
@@ -492,8 +512,12 @@ build_depthstencil_shader(struct nir_shader **out_vs,
        nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
        nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
 
-       vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
-       fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
+       vs_b.shader->info.name = ralloc_strdup(vs_b.shader,
+                                              unrestricted ? "meta_clear_depthstencil_unrestricted_vs"
+                                                           : "meta_clear_depthstencil_vs");
+       fs_b.shader->info.name = ralloc_strdup(fs_b.shader,
+                                              unrestricted ? "meta_clear_depthstencil_unrestricted_fs"
+                                                           : "meta_clear_depthstencil_fs");
        const struct glsl_type *position_out_type = glsl_vec4_type();
 
        nir_variable *vs_out_pos =
@@ -586,7 +610,27 @@ create_depthstencil_renderpass(struct radv_device *device,
                                                       .preserveAttachmentCount = 0,
                                                       .pPreserveAttachments = NULL,
                                               },
-                                                               .dependencyCount = 0,
+                                                       .dependencyCount = 2,
+                                                       .pDependencies = (VkSubpassDependency[]) {
+                                                               {
+                                                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .dstSubpass = 0,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               },
+                                                               {
+                                                                       .srcSubpass = 0,
+                                                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                                       .srcAccessMask = 0,
+                                                                       .dstAccessMask = 0,
+                                                                       .dependencyFlags = 0
+                                                               }
+                                                       }
                                                                         }, &device->meta_state.alloc, render_pass);
        mtx_unlock(&device->meta_state.mtx);
        return result;
@@ -620,11 +664,11 @@ create_depthstencil_pipeline(struct radv_device *device,
 
        const VkPipelineDepthStencilStateCreateInfo ds_state = {
                .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-               .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+               .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
                .depthCompareOp = VK_COMPARE_OP_ALWAYS,
-               .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+               .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
                .depthBoundsTestEnable = false,
-               .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
+               .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
                .front = {
                        .passOp = VK_STENCIL_OP_REPLACE,
                        .compareOp = VK_COMPARE_OP_ALWAYS,
@@ -1084,10 +1128,8 @@ radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
        VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
        VkImageAspectFlags aspects = clear_att->aspectMask;
        uint32_t clear_word, flush_bits;
-       uint32_t htile_mask;
 
        clear_word = radv_get_htile_fast_clear_value(iview->image, clear_value);
-       htile_mask = radv_get_htile_mask(iview->image, aspects);
 
        if (pre_flush) {
                cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
@@ -1095,17 +1137,24 @@ radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
                *pre_flush |= cmd_buffer->state.flush_bits;
        }
 
-       if (htile_mask == UINT_MAX) {
-               /* Clear the whole HTILE buffer. */
-               flush_bits = radv_fill_buffer(cmd_buffer, iview->image->bo,
-                                             iview->image->offset + iview->image->htile_offset,
-                                             iview->image->planes[0].surface.htile_size, clear_word);
-       } else {
-               /* Only clear depth or stencil bytes in the HTILE buffer. */
-               flush_bits = clear_htile_mask(cmd_buffer, iview->image->bo,
-                                             iview->image->offset + iview->image->htile_offset,
-                                             iview->image->planes[0].surface.htile_size, clear_word,
-                                             htile_mask);
+       struct VkImageSubresourceRange range = {
+               .aspectMask = aspects,
+               .baseMipLevel = 0,
+               .levelCount = VK_REMAINING_MIP_LEVELS,
+               .baseArrayLayer = 0,
+               .layerCount = VK_REMAINING_ARRAY_LAYERS,
+       };
+
+       flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
+
+       if (iview->image->planes[0].surface.has_stencil &&
+           !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
+               /* Synchronize after performing a depth-only or a stencil-only
+                * fast clear because the driver uses an optimized path which
+                * performs a read-modify-write operation, and the two separate
+                * aspects might use the same HTILE memory.
+                */
+               cmd_buffer->state.flush_bits |= flush_bits;
        }
 
        radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
@@ -1126,7 +1175,7 @@ build_clear_htile_mask_shader()
        b.shader->info.cs.local_size[2] = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -1432,7 +1481,7 @@ radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
                 struct radv_image *image,
                 const VkImageSubresourceRange *range, uint32_t value)
 {
-       uint64_t offset = image->offset + image->cmask_offset;
+       uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;
        uint64_t size;
 
        if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
@@ -1455,7 +1504,7 @@ radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
                 struct radv_image *image,
                 const VkImageSubresourceRange *range, uint32_t value)
 {
-       uint64_t offset = image->offset + image->fmask_offset;
+       uint64_t offset = image->offset + image->planes[0].surface.fmask_offset;
        uint64_t size;
 
        /* MSAA images do not support mipmap levels. */
@@ -1489,7 +1538,7 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
        radv_update_dcc_metadata(cmd_buffer, image, range, true);
 
        for (uint32_t l = 0; l < level_count; l++) {
-               uint64_t offset = image->offset + image->dcc_offset;
+               uint64_t offset = image->offset + image->planes[0].surface.dcc_offset;
                uint32_t level = range->baseMipLevel + l;
                uint64_t size;
 
@@ -1520,15 +1569,30 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
 }
 
 uint32_t
-radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
-                const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
+                const struct radv_image *image,
+                const VkImageSubresourceRange *range,
+                uint32_t value)
 {
        unsigned layer_count = radv_get_layerCount(image, range);
        uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
-       uint64_t offset = image->offset + image->htile_offset +
+       uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
                          image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
+       uint32_t htile_mask, flush_bits;
 
-       return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
+       htile_mask = radv_get_htile_mask(image, range->aspectMask);
+
+       if (htile_mask == UINT_MAX) {
+               /* Clear the whole HTILE buffer. */
+               flush_bits = radv_fill_buffer(cmd_buffer, image->bo, offset,
+                                             size, value);
+       } else {
+               /* Only clear depth or stencil bytes in the HTILE buffer. */
+               flush_bits = clear_htile_mask(cmd_buffer, image->bo, offset,
+                                             size, value, htile_mask);
+       }
+
+       return flush_bits;
 }
 
 enum {
@@ -1897,12 +1961,16 @@ radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
                .layerCount = cmd_state->framebuffer->layers,
        };
 
+       radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
+
        emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
                   view_mask & ~attachment->cleared_views, ds_resolve_clear);
        if (view_mask)
                attachment->cleared_views |= view_mask;
        else
                attachment->pending_clear_aspects = 0;
+
+       radv_describe_end_render_pass_clear(cmd_buffer);
 }
 
 /**
@@ -2064,26 +2132,49 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
                                              .pAttachments = &att_desc,
                                              .subpassCount = 1,
                                              .pSubpasses = &subpass_desc,
-                                             },
+                                             .dependencyCount = 2,
+                                             .pDependencies = (VkSubpassDependency[]) {
+                                                       {
+                                                               .srcSubpass = VK_SUBPASS_EXTERNAL,
+                                                               .dstSubpass = 0,
+                                                               .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                               .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                               .srcAccessMask = 0,
+                                                               .dstAccessMask = 0,
+                                                               .dependencyFlags = 0
+                                                       },
+                                                       {
+                                                               .srcSubpass = 0,
+                                                               .dstSubpass = VK_SUBPASS_EXTERNAL,
+                                                               .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                                               .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                                               .srcAccessMask = 0,
+                                                               .dstAccessMask = 0,
+                                                               .dependencyFlags = 0
+                                                       }
+                                               }
+                                       },
                              &cmd_buffer->pool->alloc,
                              &pass);
 
-       radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-                               &(VkRenderPassBeginInfo) {
-                                       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+       radv_cmd_buffer_begin_render_pass(cmd_buffer,
+                                         &(VkRenderPassBeginInfo) {
+                                               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
                                                .renderArea = {
                                                .offset = { 0, 0, },
                                                .extent = {
                                                        .width = width,
                                                        .height = height,
+                                                       },
                                                },
-                                       },
                                                .renderPass = pass,
                                                .framebuffer = fb,
                                                .clearValueCount = 0,
                                                .pClearValues = NULL,
-                                               },
-                               VK_SUBPASS_CONTENTS_INLINE);
+                                        });
+
+       radv_cmd_buffer_set_subpass(cmd_buffer,
+                                   &cmd_buffer->state.pass->subpasses[0]);
 
        VkClearAttachment clear_att = {
                .aspectMask = range->aspectMask,
@@ -2102,7 +2193,7 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 
        emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
 
-       radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+       radv_cmd_buffer_end_render_pass(cmd_buffer);
        radv_DestroyRenderPass(device_h, pass,
                               &cmd_buffer->pool->alloc);
        radv_DestroyFramebuffer(device_h, fb,