radv: implement all depth/stencil resolve modes using compute
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 22 May 2019 07:42:12 +0000 (09:42 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 21 Jun 2019 12:50:19 +0000 (14:50 +0200)
This path supports layers but it requires to decompress HTILE
before resolving. The driver also needs to fixup HTILE after
the resolve. This path is probably slower than the graphics one.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_meta_resolve_cs.c
src/amd/vulkan/radv_private.h

index fc4bcf27bb904b19cc8da147fe216e4e3a2d067b..c06f0f2c5ce6ef10a857a4ccab9cd2ce25db1cf4 100644 (file)
@@ -139,6 +139,165 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
        return b.shader;
 }
 
+enum {
+       DEPTH_RESOLVE,
+       STENCIL_RESOLVE,
+};
+
+static const char *
+get_resolve_mode_str(VkResolveModeFlagBitsKHR resolve_mode)
+{
+       switch (resolve_mode) {
+       case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+               return "zero";
+       case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+               return "average";
+       case VK_RESOLVE_MODE_MIN_BIT_KHR:
+               return "min";
+       case VK_RESOLVE_MODE_MAX_BIT_KHR:
+               return "max";
+       default:
+               unreachable("invalid resolve mode");
+       }
+}
+
+static nir_shader *
+build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
+                                          int index,
+                                          VkResolveModeFlagBitsKHR resolve_mode)
+{
+       nir_builder b;
+       char name[64];
+       const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+                                                                false,
+                                                                false,
+                                                                GLSL_TYPE_FLOAT);
+       const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+                                                            false,
+                                                            false,
+                                                            GLSL_TYPE_FLOAT);
+       snprintf(name, 64, "meta_resolve_cs_%s-%s-%d",
+                index == DEPTH_RESOLVE ? "depth" : "stencil",
+                get_resolve_mode_str(resolve_mode), samples);
+
+       nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+       b.shader->info.name = ralloc_strdup(b.shader, name);
+       b.shader->info.cs.local_size[0] = 16;
+       b.shader->info.cs.local_size[1] = 16;
+       b.shader->info.cs.local_size[2] = 1;
+
+       nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+                                                     sampler_type, "s_tex");
+       input_img->data.descriptor_set = 0;
+       input_img->data.binding = 0;
+
+       nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+                                                      img_type, "out_img");
+       output_img->data.descriptor_set = 0;
+       output_img->data.binding = 1;
+       nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *block_size = nir_imm_ivec4(&b,
+                                               b.shader->info.cs.local_size[0],
+                                               b.shader->info.cs.local_size[1],
+                                               b.shader->info.cs.local_size[2], 0);
+
+       nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+       nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+       nir_intrinsic_set_base(src_offset, 0);
+       nir_intrinsic_set_range(src_offset, 16);
+       src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+       src_offset->num_components = 2;
+       nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+       nir_builder_instr_insert(&b, &src_offset->instr);
+
+       nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+       nir_intrinsic_set_base(dst_offset, 0);
+       nir_intrinsic_set_range(dst_offset, 16);
+       dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+       dst_offset->num_components = 2;
+       nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
+       nir_builder_instr_insert(&b, &dst_offset->instr);
+
+       nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
+
+       nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+       nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint;
+
+       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+       tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+       tex->op = nir_texop_txf_ms;
+       tex->src[0].src_type = nir_tex_src_coord;
+       tex->src[0].src = nir_src_for_ssa(img_coord);
+       tex->src[1].src_type = nir_tex_src_ms_index;
+       tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+       tex->src[2].src_type = nir_tex_src_texture_deref;
+       tex->src[2].src = nir_src_for_ssa(input_img_deref);
+       tex->dest_type = type;
+       tex->is_array = false;
+       tex->coord_components = 2;
+
+       nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+       nir_builder_instr_insert(&b, &tex->instr);
+
+       nir_ssa_def *outval = &tex->dest.ssa;
+
+       if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+               for (int i = 1; i < samples; i++) {
+                       nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+                       tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+                       tex_add->op = nir_texop_txf_ms;
+                       tex_add->src[0].src_type = nir_tex_src_coord;
+                       tex_add->src[0].src = nir_src_for_ssa(img_coord);
+                       tex_add->src[1].src_type = nir_tex_src_ms_index;
+                       tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+                       tex_add->src[2].src_type = nir_tex_src_texture_deref;
+                       tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+                       tex_add->dest_type = type;
+                       tex_add->is_array = false;
+                       tex_add->coord_components = 2;
+
+                       nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+                       nir_builder_instr_insert(&b, &tex_add->instr);
+
+                       switch (resolve_mode) {
+                       case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+                               assert(index == DEPTH_RESOLVE);
+                               outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+                               break;
+                       case VK_RESOLVE_MODE_MIN_BIT_KHR:
+                               if (index == DEPTH_RESOLVE)
+                                       outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+                               else
+                                       outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+                               break;
+                       case VK_RESOLVE_MODE_MAX_BIT_KHR:
+                               if (index == DEPTH_RESOLVE)
+                                       outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+                               else
+                                       outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+                               break;
+                       default:
+                               unreachable("invalid resolve mode");
+                       }
+               }
+
+               if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+                       outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+       }
+
+       nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
+       nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
+       store->num_components = 4;
+       store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
+       store->src[1] = nir_src_for_ssa(coord);
+       store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+       store->src[3] = nir_src_for_ssa(outval);
+       nir_builder_instr_insert(&b, &store->instr);
+       return b.shader;
+}
 
 static VkResult
 create_layout(struct radv_device *device)
@@ -248,6 +407,57 @@ fail:
        return result;
 }
 
+static VkResult
+create_depth_stencil_resolve_pipeline(struct radv_device *device,
+                                     int samples,
+                                     int index,
+                                     VkResolveModeFlagBitsKHR resolve_mode,
+                                     VkPipeline *pipeline)
+{
+       VkResult result;
+       struct radv_shader_module cs = { .nir = NULL };
+
+       mtx_lock(&device->meta_state.mtx);
+       if (*pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
+       cs.nir = build_depth_stencil_resolve_compute_shader(device, samples,
+                                                           index, resolve_mode);
+
+       /* compute shader */
+       VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+               .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+               .module = radv_shader_module_to_handle(&cs),
+               .pName = "main",
+               .pSpecializationInfo = NULL,
+       };
+
+       VkComputePipelineCreateInfo vk_pipeline_info = {
+               .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+               .stage = pipeline_shader_stage,
+               .flags = 0,
+               .layout = device->meta_state.resolve_compute.p_layout,
+       };
+
+       result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                            radv_pipeline_cache_to_handle(&device->meta_state.cache),
+                                            1, &vk_pipeline_info, NULL,
+                                            pipeline);
+       if (result != VK_SUCCESS)
+               goto fail;
+
+       ralloc_free(cs.nir);
+       mtx_unlock(&device->meta_state.mtx);
+       return VK_SUCCESS;
+fail:
+       ralloc_free(cs.nir);
+       mtx_unlock(&device->meta_state.mtx);
+       return result;
+}
+
 VkResult
 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
 {
@@ -279,8 +489,56 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_
                if (res != VK_SUCCESS)
                        goto fail;
 
+               res = create_depth_stencil_resolve_pipeline(device, samples,
+                                                           DEPTH_RESOLVE,
+                                                           VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
+                                                           &state->resolve_compute.depth[i].average_pipeline);
+               if (res != VK_SUCCESS)
+                       goto fail;
+
+               res = create_depth_stencil_resolve_pipeline(device, samples,
+                                                           DEPTH_RESOLVE,
+                                                           VK_RESOLVE_MODE_MAX_BIT_KHR,
+                                                           &state->resolve_compute.depth[i].max_pipeline);
+               if (res != VK_SUCCESS)
+                       goto fail;
+
+               res = create_depth_stencil_resolve_pipeline(device, samples,
+                                                           DEPTH_RESOLVE,
+                                                           VK_RESOLVE_MODE_MIN_BIT_KHR,
+                                                           &state->resolve_compute.depth[i].min_pipeline);
+               if (res != VK_SUCCESS)
+                       goto fail;
+
+               res = create_depth_stencil_resolve_pipeline(device, samples,
+                                                           STENCIL_RESOLVE,
+                                                           VK_RESOLVE_MODE_MAX_BIT_KHR,
+                                                           &state->resolve_compute.stencil[i].max_pipeline);
+               if (res != VK_SUCCESS)
+                       goto fail;
+
+               res = create_depth_stencil_resolve_pipeline(device, samples,
+                                                           STENCIL_RESOLVE,
+                                                           VK_RESOLVE_MODE_MIN_BIT_KHR,
+                                                           &state->resolve_compute.stencil[i].min_pipeline);
+               if (res != VK_SUCCESS)
+                       goto fail;
        }
 
+       res = create_depth_stencil_resolve_pipeline(device, 0,
+                                                   DEPTH_RESOLVE,
+                                                   VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+                                                   &state->resolve_compute.depth_zero_pipeline);
+       if (res != VK_SUCCESS)
+               goto fail;
+
+       res = create_depth_stencil_resolve_pipeline(device, 0,
+                                                   STENCIL_RESOLVE,
+                                                   VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+                                                   &state->resolve_compute.stencil_zero_pipeline);
+       if (res != VK_SUCCESS)
+               goto fail;
+
        return VK_SUCCESS;
 fail:
        radv_device_finish_meta_resolve_compute_state(device);
@@ -303,8 +561,36 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
                radv_DestroyPipeline(radv_device_to_handle(device),
                                     state->resolve_compute.rc[i].srgb_pipeline,
                                     &state->alloc);
+
+               radv_DestroyPipeline(radv_device_to_handle(device),
+                                    state->resolve_compute.depth[i].average_pipeline,
+                                    &state->alloc);
+
+               radv_DestroyPipeline(radv_device_to_handle(device),
+                                    state->resolve_compute.depth[i].max_pipeline,
+                                    &state->alloc);
+
+               radv_DestroyPipeline(radv_device_to_handle(device),
+                                    state->resolve_compute.depth[i].min_pipeline,
+                                    &state->alloc);
+
+               radv_DestroyPipeline(radv_device_to_handle(device),
+                                    state->resolve_compute.stencil[i].max_pipeline,
+                                    &state->alloc);
+
+               radv_DestroyPipeline(radv_device_to_handle(device),
+                                    state->resolve_compute.stencil[i].min_pipeline,
+                                    &state->alloc);
        }
 
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->resolve_compute.depth_zero_pipeline,
+                            &state->alloc);
+
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->resolve_compute.stencil_zero_pipeline,
+                            &state->alloc);
+
        radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
                                        state->resolve_compute.ds_layout,
                                        &state->alloc);
@@ -411,6 +697,113 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 
 }
 
+static void
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
+                          struct radv_image_view *src_iview,
+                          struct radv_image_view *dest_iview,
+                          const VkOffset2D *src_offset,
+                          const VkOffset2D *dest_offset,
+                          const VkExtent2D *resolve_extent,
+                          VkImageAspectFlags aspects,
+                          VkResolveModeFlagBitsKHR resolve_mode)
+{
+       struct radv_device *device = cmd_buffer->device;
+       const uint32_t samples = src_iview->image->info.samples;
+       const uint32_t samples_log2 = ffs(samples) - 1;
+       VkPipeline *pipeline;
+
+       radv_meta_push_descriptor_set(cmd_buffer,
+                                     VK_PIPELINE_BIND_POINT_COMPUTE,
+                                     device->meta_state.resolve_compute.p_layout,
+                                     0, /* set */
+                                     2, /* descriptorWriteCount */
+                                     (VkWriteDescriptorSet[]) {
+                                       {
+                                               .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                               .dstBinding = 0,
+                                               .dstArrayElement = 0,
+                                               .descriptorCount = 1,
+                                               .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                             .pImageInfo = (VkDescriptorImageInfo[]) {
+                                             {
+                                             .sampler = VK_NULL_HANDLE,
+                                             .imageView = radv_image_view_to_handle(src_iview),
+                                             .imageLayout = VK_IMAGE_LAYOUT_GENERAL                                  },
+                             }
+                             },
+                             {
+                                     .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                     .dstBinding = 1,
+                                     .dstArrayElement = 0,
+                                     .descriptorCount = 1,
+                                     .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                             .pImageInfo = (VkDescriptorImageInfo[]) {
+                              {
+                                      .sampler = VK_NULL_HANDLE,
+                                     .imageView = radv_image_view_to_handle(dest_iview),
+                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                              },
+                      }
+                             }
+                                     });
+
+       switch (resolve_mode) {
+       case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+               if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+                       pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
+               else
+                       pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
+               break;
+       case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+               assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+               pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
+               break;
+       case VK_RESOLVE_MODE_MIN_BIT_KHR:
+               if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+                       pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
+               else
+                       pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
+               break;
+       case VK_RESOLVE_MODE_MAX_BIT_KHR:
+               if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+                       pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
+               else
+                       pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
+               break;
+       default:
+               unreachable("invalid resolve mode");
+       }
+
+       if (!*pipeline) {
+               int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+               VkResult ret;
+
+               ret = create_depth_stencil_resolve_pipeline(device, samples,
+                                                           index, resolve_mode,
+                                                           pipeline);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
+
+       radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+                            VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+
+       unsigned push_constants[4] = {
+               src_offset->x,
+               src_offset->y,
+               dest_offset->x,
+               dest_offset->y,
+       };
+       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                             device->meta_state.resolve_compute.p_layout,
+                             VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+                             push_constants);
+       radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
+
+}
+
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
                                     struct radv_image *src_image,
                                     VkFormat src_format,
@@ -561,3 +954,116 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
        cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
                                        RADV_CMD_FLAG_INV_VMEM_L1;
 }
+
+void
+radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+                                     VkImageAspectFlags aspects,
+                                     VkResolveModeFlagBitsKHR resolve_mode)
+{
+       struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+       const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+       struct radv_meta_saved_state saved_state;
+       struct radv_subpass_barrier barrier;
+       uint32_t layer_count = fb->layers;
+
+       if (subpass->view_mask)
+               layer_count = util_last_bit(subpass->view_mask);
+
+       /* Resolves happen before the end-of-subpass barriers get executed, so
+        * we have to make the attachment shader-readable.
+        */
+       barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+       barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+       barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+       radv_subpass_barrier(cmd_buffer, &barrier);
+
+       radv_decompress_resolve_subpass_src(cmd_buffer);
+
+       radv_meta_save(&saved_state, cmd_buffer,
+                      RADV_META_SAVE_COMPUTE_PIPELINE |
+                      RADV_META_SAVE_CONSTANTS |
+                      RADV_META_SAVE_DESCRIPTORS);
+
+       struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+       struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
+
+       struct radv_image_view *src_iview =
+               cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
+       struct radv_image_view *dst_iview =
+               cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
+
+       struct radv_image *src_image = src_iview->image;
+       struct radv_image *dst_image = dst_iview->image;
+
+       for (uint32_t layer = 0; layer < layer_count; layer++) {
+               struct radv_image_view tsrc_iview;
+               radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+                                    &(VkImageViewCreateInfo) {
+                                       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                                       .image = radv_image_to_handle(src_image),
+                                       .viewType = radv_meta_get_view_type(src_image),
+                                       .format = src_iview->vk_format,
+                                       .subresourceRange = {
+                                               .aspectMask = aspects,
+                                               .baseMipLevel = src_iview->base_mip,
+                                               .levelCount = 1,
+                                               .baseArrayLayer = src_iview->base_layer + layer,
+                                               .layerCount = 1,
+                                       },
+                                    });
+
+               struct radv_image_view tdst_iview;
+               radv_image_view_init(&tdst_iview, cmd_buffer->device,
+                                    &(VkImageViewCreateInfo) {
+                                       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                                       .image = radv_image_to_handle(dst_image),
+                                       .viewType = radv_meta_get_view_type(dst_image),
+                                       .format = dst_iview->vk_format,
+                                       .subresourceRange = {
+                                               .aspectMask = aspects,
+                                               .baseMipLevel = dst_iview->base_mip,
+                                               .levelCount = 1,
+                                               .baseArrayLayer = dst_iview->base_layer + layer,
+                                               .layerCount = 1,
+                                       },
+                                    });
+
+               emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
+                                          &(VkOffset2D) { 0, 0 },
+                                          &(VkOffset2D) { 0, 0 },
+                                          &(VkExtent2D) { fb->width, fb->height },
+                                          aspects,
+                                          resolve_mode);
+       }
+
+       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+                                       RADV_CMD_FLAG_INV_VMEM_L1;
+
+       if (radv_image_has_htile(dst_image)) {
+               if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+                       VkImageSubresourceRange range = {};
+                       range.aspectMask = aspects;
+                       range.baseMipLevel = dst_iview->base_mip;
+                       range.levelCount = 1;
+                       range.baseArrayLayer = dst_iview->base_layer;
+                       range.layerCount = layer_count;
+
+                       uint32_t clear_value = 0xfffc000f;
+
+                       if (vk_format_is_stencil(dst_image->vk_format) &&
+                           subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+                               /* Only clear the stencil part of the HTILE
+                                * buffer if it's resolved, otherwise this
+                                * might break if the stencil has been cleared.
+                                */
+                               clear_value = 0xfffff30f;
+                       }
+
+                       cmd_buffer->state.flush_bits |=
+                               radv_clear_htile(cmd_buffer, dst_image, &range,
+                                                clear_value);
+               }
+       }
+
+       radv_meta_restore(&saved_state, cmd_buffer);
+}
index 66483e306d92d2e78c68c4a299d931136ee44b40..9de464944548d48496b2432dd35f835636d6a117 100644 (file)
@@ -577,6 +577,19 @@ struct radv_meta_state {
                        VkPipeline                                i_pipeline;
                        VkPipeline                                srgb_pipeline;
                } rc[MAX_SAMPLES_LOG2];
+
+               VkPipeline depth_zero_pipeline;
+               struct {
+                       VkPipeline average_pipeline;
+                       VkPipeline max_pipeline;
+                       VkPipeline min_pipeline;
+               } depth[MAX_SAMPLES_LOG2];
+
+               VkPipeline stencil_zero_pipeline;
+               struct {
+                       VkPipeline max_pipeline;
+                       VkPipeline min_pipeline;
+               } stencil[MAX_SAMPLES_LOG2];
        } resolve_compute;
 
        struct {
@@ -1256,6 +1269,9 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
+void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
+                                          VkImageAspectFlags aspects,
+                                          VkResolveModeFlagBitsKHR resolve_mode);
 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
                                           VkImageAspectFlags aspects,