From cdc6efddf918bc07d30db779f3b0675192a77333 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 22 May 2019 09:43:39 +0200 Subject: [PATCH] radv: implement all depth/stencil resolve modes using graphics When using graphics, the driver doesn't need to decompress HTILE before resolving. This path currently doesn't support layers so we have to fallback to the compute path. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_meta_resolve_fs.c | 596 ++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 18 + 2 files changed, 614 insertions(+) diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c b/src/amd/vulkan/radv_meta_resolve_fs.c index d059760edf1..8ee3cc07f12 100644 --- a/src/amd/vulkan/radv_meta_resolve_fs.c +++ b/src/amd/vulkan/radv_meta_resolve_fs.c @@ -316,6 +316,356 @@ create_resolve_pipeline(struct radv_device *device, return result; } +enum { + DEPTH_RESOLVE, + STENCIL_RESOLVE +}; + +static const char * +get_resolve_mode_str(VkResolveModeFlagBitsKHR resolve_mode) +{ + switch (resolve_mode) { + case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR: + return "zero"; + case VK_RESOLVE_MODE_AVERAGE_BIT_KHR: + return "average"; + case VK_RESOLVE_MODE_MIN_BIT_KHR: + return "min"; + case VK_RESOLVE_MODE_MAX_BIT_KHR: + return "max"; + default: + unreachable("invalid resolve mode"); + } +} + +static nir_shader * +build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples, + int index, + VkResolveModeFlagBitsKHR resolve_mode) +{ + nir_builder b; + char name[64]; + const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, + false, + false, + GLSL_TYPE_FLOAT); + + snprintf(name, 64, "meta_resolve_fs_%s-%s-%d", + index == DEPTH_RESOLVE ? "depth" : "stencil", + get_resolve_mode_str(resolve_mode), samples); + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, name); + + nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + input_img->data.descriptor_set = 0; + input_img->data.binding = 0; + + nir_variable *fs_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "fs_pos_in"); + fs_pos_in->data.location = VARYING_SLOT_POS; + + nir_variable *fs_out = nir_variable_create(b.shader, + nir_var_shader_out, vec4, + "f_out"); + fs_out->data.location = + index == DEPTH_RESOLVE ? FRAG_RESULT_DEPTH : FRAG_RESULT_STENCIL; + + nir_ssa_def *pos_in = nir_load_var(&b, fs_pos_in); + + nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); + nir_intrinsic_set_base(src_offset, 0); + nir_intrinsic_set_range(src_offset, 8); + src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + src_offset->num_components = 2; + nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset"); + nir_builder_instr_insert(&b, &src_offset->instr); + + nir_ssa_def *pos_int = nir_f2i32(&b, pos_in); + + nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3); + + nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; + + nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float : nir_type_uint; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(img_coord); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + tex->src[2].src_type = nir_tex_src_texture_deref; + tex->src[2].src = nir_src_for_ssa(input_img_deref); + tex->dest_type = type; + tex->is_array = false; + tex->coord_components = 2; + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_ssa_def *outval = &tex->dest.ssa; + + if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) { + for (int i = 1; i < samples; i++) { + nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3); + tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex_add->op = nir_texop_txf_ms; + tex_add->src[0].src_type = nir_tex_src_coord; + tex_add->src[0].src = nir_src_for_ssa(img_coord); + tex_add->src[1].src_type = nir_tex_src_ms_index; + tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex_add->src[2].src_type = nir_tex_src_texture_deref; + tex_add->src[2].src = nir_src_for_ssa(input_img_deref); + tex_add->dest_type = type; + tex_add->is_array = false; + tex_add->coord_components = 2; + + nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex"); + nir_builder_instr_insert(&b, &tex_add->instr); + + switch (resolve_mode) { + case VK_RESOLVE_MODE_AVERAGE_BIT_KHR: + assert(index == DEPTH_RESOLVE); + outval = nir_fadd(&b, outval, &tex_add->dest.ssa); + break; + case VK_RESOLVE_MODE_MIN_BIT_KHR: + if (index == DEPTH_RESOLVE) + outval = nir_fmin(&b, outval, &tex_add->dest.ssa); + else + outval = nir_umin(&b, outval, &tex_add->dest.ssa); + break; + case VK_RESOLVE_MODE_MAX_BIT_KHR: + if (index == DEPTH_RESOLVE) + outval = nir_fmax(&b, outval, &tex_add->dest.ssa); + else + outval = nir_umax(&b, outval, &tex_add->dest.ssa); + break; + default: + unreachable("invalid resolve mode"); + } + } + + if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR) + outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples)); + } + + nir_store_var(&b, fs_out, outval, 0x1); + + return b.shader; +} + +static VkResult +create_depth_stencil_resolve_pipeline(struct radv_device *device, + int samples_log2, + int index, + VkResolveModeFlagBitsKHR resolve_mode) +{ + VkRenderPass *render_pass; + VkPipeline *pipeline; + VkFormat src_format; + VkResult result; + + mtx_lock(&device->meta_state.mtx); + + switch (resolve_mode) { + case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR: + if (index == DEPTH_RESOLVE) + pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline; + else + pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline; + break; + case VK_RESOLVE_MODE_AVERAGE_BIT_KHR: + assert(index == DEPTH_RESOLVE); + pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline; + break; + case VK_RESOLVE_MODE_MIN_BIT_KHR: + if (index == DEPTH_RESOLVE) + pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline; + else + pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline; + break; + case VK_RESOLVE_MODE_MAX_BIT_KHR: + if (index == DEPTH_RESOLVE) + pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline; + else + pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline; + break; + default: + unreachable("invalid resolve mode"); + } + + if (*pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + + struct radv_shader_module fs = { .nir = NULL }; + struct radv_shader_module vs = { .nir = NULL }; + uint32_t samples = 1 << samples_log2; + + vs.nir = build_nir_vertex_shader(); + fs.nir = build_depth_stencil_resolve_fragment_shader(device, samples, + index, resolve_mode); + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = radv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = radv_shader_module_to_handle(&fs), + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + if (index == DEPTH_RESOLVE) { + src_format = VK_FORMAT_D32_SFLOAT; + render_pass = &device->meta_state.resolve_fragment.depth_render_pass; + } else { + render_pass = &device->meta_state.resolve_fragment.stencil_render_pass; + src_format = VK_FORMAT_S8_UINT; + } + + if (!*render_pass) { + result = radv_CreateRenderPass(radv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = src_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .storeOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, render_pass); + } + + VkStencilOp stencil_op = + index == DEPTH_RESOLVE ? VK_STENCIL_OP_KEEP : VK_STENCIL_OP_REPLACE; + + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = true, + .depthWriteEnable = index == DEPTH_RESOLVE, + .stencilTestEnable = index == STENCIL_RESOLVE, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .front = { + .failOp = stencil_op, + .passOp = stencil_op, + .depthFailOp = stencil_op, + .compareOp = VK_COMPARE_OP_ALWAYS, + }, + .back = { + .failOp = stencil_op, + .passOp = stencil_op, + .depthFailOp = stencil_op, + .compareOp = VK_COMPARE_OP_ALWAYS, + } + }; + + const VkPipelineVertexInputStateCreateInfo *vi_create_info; + vi_create_info = &normal_vi_create_info; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pDepthStencilState = &depth_stencil_state, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = NULL, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 0, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.resolve_fragment.p_layout, + .renderPass = *render_pass, + .subpass = 0, + }; + + const struct radv_graphics_pipeline_create_info radv_pipeline_info = { + .use_rectlist = true + }; + + result = radv_graphics_pipeline_create(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&device->meta_state.cache), + &vk_pipeline_info, &radv_pipeline_info, + &device->meta_state.alloc, + pipeline); + + ralloc_free(vs.nir); + ralloc_free(fs.nir); + + mtx_unlock(&device->meta_state.mtx); + return result; +} + VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand) { @@ -334,8 +684,43 @@ radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on if (res != VK_SUCCESS) goto fail; } + + res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, + VK_RESOLVE_MODE_AVERAGE_BIT_KHR); + if (res != VK_SUCCESS) + goto fail; + + res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, + VK_RESOLVE_MODE_MIN_BIT_KHR); + if (res != VK_SUCCESS) + goto fail; + + res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE, + VK_RESOLVE_MODE_MAX_BIT_KHR); + if (res != VK_SUCCESS) + goto fail; + + res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE, + VK_RESOLVE_MODE_MIN_BIT_KHR); + if (res != VK_SUCCESS) + goto fail; + + res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE, + VK_RESOLVE_MODE_MAX_BIT_KHR); + if (res != VK_SUCCESS) + goto fail; } + res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR); + if (res != VK_SUCCESS) + goto fail; + + res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR); + if (res != VK_SUCCESS) + goto fail; + return VK_SUCCESS; fail: radv_device_finish_meta_resolve_fragment_state(device); @@ -357,8 +742,42 @@ radv_device_finish_meta_resolve_fragment_state(struct radv_device *device) state->resolve_fragment.rc[i].pipeline[j], &state->alloc); } + + radv_DestroyPipeline(radv_device_to_handle(device), + state->resolve_fragment.depth[i].average_pipeline, + &state->alloc); + + radv_DestroyPipeline(radv_device_to_handle(device), + state->resolve_fragment.depth[i].max_pipeline, + &state->alloc); + + radv_DestroyPipeline(radv_device_to_handle(device), + state->resolve_fragment.depth[i].min_pipeline, + &state->alloc); + + radv_DestroyPipeline(radv_device_to_handle(device), + state->resolve_fragment.stencil[i].max_pipeline, + &state->alloc); + + radv_DestroyPipeline(radv_device_to_handle(device), + state->resolve_fragment.stencil[i].min_pipeline, + &state->alloc); } + radv_DestroyRenderPass(radv_device_to_handle(device), + state->resolve_fragment.depth_render_pass, + &state->alloc); + radv_DestroyRenderPass(radv_device_to_handle(device), + state->resolve_fragment.stencil_render_pass, + &state->alloc); + + radv_DestroyPipeline(radv_device_to_handle(device), + state->resolve_fragment.depth_zero_pipeline, + &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), + state->resolve_fragment.stencil_zero_pipeline, + &state->alloc); + radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_fragment.ds_layout, &state->alloc); @@ -461,6 +880,111 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; } +static void +emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, + struct radv_image_view *src_iview, + struct radv_image_view *dst_iview, + const VkOffset2D *src_offset, + const VkOffset2D *dst_offset, + const VkExtent2D *resolve_extent, + VkImageAspectFlags aspects, + VkResolveModeFlagBitsKHR resolve_mode) +{ + struct radv_device *device = cmd_buffer->device; + const uint32_t samples = src_iview->image->info.samples; + const uint32_t samples_log2 = ffs(samples) - 1; + VkPipeline *pipeline; + + radv_meta_push_descriptor_set(cmd_buffer, + VK_PIPELINE_BIND_POINT_GRAPHICS, + cmd_buffer->device->meta_state.resolve_fragment.p_layout, + 0, /* set */ + 1, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + }, + }); + + unsigned push_constants[2] = { + src_offset->x - dst_offset->x, + src_offset->y - dst_offset->y, + }; + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.resolve_fragment.p_layout, + VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8, + push_constants); + + switch (resolve_mode) { + case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR: + if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) + pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline; + else + pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline; + break; + case VK_RESOLVE_MODE_AVERAGE_BIT_KHR: + assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT); + pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline; + break; + case VK_RESOLVE_MODE_MIN_BIT_KHR: + if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) + pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline; + else + pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline; + break; + case VK_RESOLVE_MODE_MAX_BIT_KHR: + if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) + pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline; + else + pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline; + break; + default: + unreachable("invalid resolve mode"); + } + + if (!*pipeline) { + int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE; + VkResult ret; + + ret = create_depth_stencil_resolve_pipeline(device, samples_log2, + index, resolve_mode); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + + radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { + .x = dst_offset->x, + .y = dst_offset->y, + .width = resolve_extent->width, + .height = resolve_extent->height, + .minDepth = 0.0f, + .maxDepth = 1.0f + }); + + radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) { + .offset = *dst_offset, + .extent = *resolve_extent, + }); + + radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); +} + void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, @@ -653,3 +1177,75 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer) radv_meta_restore(&saved_state, cmd_buffer); } + +/** + * Depth/stencil resolves for the current subpass. + */ +void +radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer, + VkImageAspectFlags aspects, + VkResolveModeFlagBitsKHR resolve_mode) +{ + struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + struct radv_meta_saved_state saved_state; + struct radv_subpass_barrier barrier; + + /* Resolves happen before the end-of-subpass barriers get executed, + * so we have to make the attachment shader-readable */ + barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + radv_subpass_barrier(cmd_buffer, &barrier); + + radv_decompress_resolve_subpass_src(cmd_buffer); + + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_GRAPHICS_PIPELINE | + RADV_META_SAVE_CONSTANTS | + RADV_META_SAVE_DESCRIPTORS); + + struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment; + struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment; + + struct radv_image_view *src_iview = + cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment; + struct radv_image *src_image = src_iview->image; + struct radv_image_view *dst_iview = + cmd_buffer->state.framebuffer->attachments[dst_att.attachment].attachment; + + struct radv_subpass resolve_subpass = { + .color_count = 0, + .color_attachments = NULL, + .depth_stencil_attachment = (struct radv_subpass_attachment *) { &dst_att }, + }; + + radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); + + struct radv_image_view tsrc_iview; + radv_image_view_init(&tsrc_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(src_image), + .viewType = radv_meta_get_view_type(src_image), + .format = src_iview->vk_format, + .subresourceRange = { + .aspectMask = aspects, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); + + emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview, + &(VkOffset2D) { 0, 0 }, + &(VkOffset2D) { 0, 0 }, + &(VkExtent2D) { fb->width, fb->height }, + aspects, + resolve_mode); + + radv_cmd_buffer_set_subpass(cmd_buffer, subpass); + + radv_meta_restore(&saved_state, cmd_buffer); +} diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 1b7191c1b99..66483e306d9 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -587,6 +587,21 @@ struct radv_meta_state { VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT]; VkPipeline pipeline[NUM_META_FS_KEYS]; } rc[MAX_SAMPLES_LOG2]; + + VkRenderPass depth_render_pass; + VkPipeline depth_zero_pipeline; + struct { + VkPipeline average_pipeline; + VkPipeline max_pipeline; + VkPipeline min_pipeline; + } depth[MAX_SAMPLES_LOG2]; + + VkRenderPass stencil_render_pass; + VkPipeline stencil_zero_pipeline; + struct { + VkPipeline max_pipeline; + VkPipeline min_pipeline; + } stencil[MAX_SAMPLES_LOG2]; } resolve_fragment; struct { @@ -1242,6 +1257,9 @@ void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer); void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer); +void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer, + VkImageAspectFlags aspects, + VkResolveModeFlagBitsKHR resolve_mode); void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples); unsigned radv_get_default_max_sample_dist(int log_samples); void radv_device_init_msaa(struct radv_device *device); -- 2.30.2