X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_meta_resolve.c;h=2b97c42fc6937127d5b5c4d11af5827f3a59ad3c;hb=cd247cf45648159cb2676e88108ec8d8dc97095b;hp=dd811c2514282357d6e7f2d9e300531462809dce;hpb=e5c4e107691cf5b6e96dc43a090746c0fa17152d;p=mesa.git diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c index dd811c25142..2b97c42fc69 100644 --- a/src/amd/vulkan/radv_meta_resolve.c +++ b/src/amd/vulkan/radv_meta_resolve.c @@ -26,6 +26,7 @@ #include "radv_meta.h" #include "radv_private.h" +#include "vk_format.h" #include "nir/nir_builder.h" #include "sid.h" @@ -50,7 +51,7 @@ build_nir_fs(void) } static VkResult -create_pass(struct radv_device *device) +create_pass(struct radv_device *device, VkFormat vk_format, VkRenderPass *pass) { VkResult result; VkDevice device_h = radv_device_to_handle(device); @@ -59,7 +60,7 @@ create_pass(struct radv_device *device) int i; for (i = 0; i < 2; i++) { - attachments[i].format = VK_FORMAT_UNDEFINED; + attachments[i].format = vk_format; attachments[i].samples = 1; attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; @@ -99,14 +100,16 @@ create_pass(struct radv_device *device) .dependencyCount = 0, }, alloc, - &device->meta_state.resolve.pass); + pass); return result; } static VkResult create_pipeline(struct radv_device *device, - VkShaderModule vs_module_h) + VkShaderModule vs_module_h, + VkPipeline *pipeline, + VkRenderPass pass) { VkResult result; VkDevice device_h = radv_device_to_handle(device); @@ -121,6 +124,23 @@ create_pipeline(struct radv_device *device, goto cleanup; } + VkPipelineLayoutCreateInfo pl_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 0, + .pSetLayouts = NULL, + .pushConstantRangeCount = 0, + .pPushConstantRanges = NULL, + }; + + if (!device->meta_state.resolve.p_layout) { + result = radv_CreatePipelineLayout(radv_device_to_handle(device), + &pl_create_info, + &device->meta_state.alloc, + &device->meta_state.resolve.p_layout); + if (result != VK_SUCCESS) + goto cleanup; + } + result = radv_graphics_pipeline_create(device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &(VkGraphicsPipelineCreateInfo) { @@ -196,15 +216,15 @@ create_pipeline(struct radv_device *device, VK_DYNAMIC_STATE_SCISSOR, }, }, - .renderPass = device->meta_state.resolve.pass, + .layout = device->meta_state.resolve.p_layout, + .renderPass = pass, .subpass = 0, }, &(struct radv_graphics_pipeline_create_info) { .use_rectlist = true, .custom_blend_mode = V_028808_CB_RESOLVE, }, - &device->meta_state.alloc, - &device->meta_state.resolve.pipeline); + &device->meta_state.alloc, pipeline); if (result != VK_SUCCESS) goto cleanup; @@ -219,27 +239,26 @@ void radv_device_finish_meta_resolve_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - VkDevice device_h = radv_device_to_handle(device); - VkRenderPass pass_h = device->meta_state.resolve.pass; - const VkAllocationCallbacks *alloc = &device->meta_state.alloc; - - if (pass_h) - radv_DestroyRenderPass(device_h, pass_h, - &device->meta_state.alloc); - VkPipeline pipeline_h = state->resolve.pipeline; - if (pipeline_h) { - radv_DestroyPipeline(device_h, pipeline_h, alloc); + for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) { + radv_DestroyRenderPass(radv_device_to_handle(device), + state->resolve.pass[j], &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), + state->resolve.pipeline[j], &state->alloc); } + radv_DestroyPipelineLayout(radv_device_to_handle(device), + state->resolve.p_layout, &state->alloc); + } VkResult -radv_device_init_meta_resolve_state(struct radv_device *device) +radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand) { - VkResult res = VK_SUCCESS; - - zero(device->meta_state.resolve); + if (on_demand) + return VK_SUCCESS; + VkResult res = VK_SUCCESS; + struct radv_meta_state *state = &device->meta_state; struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() }; if (!vs_module.nir) { /* XXX: Need more accurate error */ @@ -247,14 +266,19 @@ radv_device_init_meta_resolve_state(struct radv_device *device) goto fail; } - res = create_pass(device); - if (res != VK_SUCCESS) - goto fail; - - VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module); - res = create_pipeline(device, vs_module_h); - if (res != VK_SUCCESS) - goto fail; + for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) { + VkFormat format = radv_fs_key_format_exemplars[i]; + unsigned fs_key = radv_format_meta_fs_key(format); + res = create_pass(device, format, &state->resolve.pass[fs_key]); + if (res != VK_SUCCESS) + goto fail; + + VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module); + res = create_pipeline(device, vs_module_h, + &state->resolve.pipeline[fs_key], state->resolve.pass[fs_key]); + if (res != VK_SUCCESS) + goto fail; + } goto cleanup; @@ -269,21 +293,18 @@ cleanup: static void emit_resolve(struct radv_cmd_buffer *cmd_buffer, + VkFormat vk_format, const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent) { struct radv_device *device = cmd_buffer->device; VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); + unsigned fs_key = radv_format_meta_fs_key(vk_format); cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - VkPipeline pipeline_h = device->meta_state.resolve.pipeline; - RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h); - - if (cmd_buffer->state.pipeline != pipeline) { - radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); - } + radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.resolve.pipeline[fs_key]); radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { .x = dest_offset->x, @@ -310,18 +331,64 @@ enum radv_resolve_method { }; static void radv_pick_resolve_method_images(struct radv_image *src_image, + VkFormat src_format, struct radv_image *dest_image, + VkImageLayout dest_image_layout, + struct radv_cmd_buffer *cmd_buffer, enum radv_resolve_method *method) { - if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) { - if (dest_image->surface.num_dcc_levels > 0) - *method = RESOLVE_FRAGMENT; - else - *method = RESOLVE_COMPUTE; + uint32_t queue_mask = radv_image_queue_family_mask(dest_image, + cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index); + + if (src_format == VK_FORMAT_R16G16_UNORM || + src_format == VK_FORMAT_R16G16_SNORM) + *method = RESOLVE_COMPUTE; + else if (vk_format_is_int(src_format)) + *method = RESOLVE_COMPUTE; + else if (src_image->info.array_size > 1 || + dest_image->info.array_size > 1) + *method = RESOLVE_COMPUTE; + + if (radv_layout_dcc_compressed(dest_image, dest_image_layout, queue_mask)) { + *method = RESOLVE_FRAGMENT; + } else if (dest_image->planes[0].surface.micro_tile_mode != + src_image->planes[0].surface.micro_tile_mode) { + *method = RESOLVE_COMPUTE; } } +static VkResult +build_resolve_pipeline(struct radv_device *device, + unsigned fs_key) +{ + VkResult result = VK_SUCCESS; + + if (device->meta_state.resolve.pipeline[fs_key]) + return result; + + mtx_lock(&device->meta_state.mtx); + if (device->meta_state.resolve.pipeline[fs_key]) { + mtx_unlock(&device->meta_state.mtx); + return result; + } + + struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() }; + + result = create_pass(device, radv_fs_key_format_exemplars[fs_key], &device->meta_state.resolve.pass[fs_key]); + if (result != VK_SUCCESS) + goto fail; + + VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module); + result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key], device->meta_state.resolve.pass[fs_key]); + +fail: + ralloc_free(vs_module.nir); + mtx_unlock(&device->meta_state.mtx); + return result; +} + void radv_CmdResolveImage( VkCommandBuffer cmd_buffer_h, VkImage src_image_h, @@ -356,8 +423,9 @@ void radv_CmdResolveImage( } else resolve_method = RESOLVE_COMPUTE; - radv_pick_resolve_method_images(src_image, dest_image, - &resolve_method); + radv_pick_resolve_method_images(src_image, src_image->vk_format, + dest_image, dest_image_layout, + cmd_buffer, &resolve_method); if (resolve_method == RESOLVE_FRAGMENT) { radv_meta_resolve_fragment_image(cmd_buffer, @@ -372,14 +440,17 @@ void radv_CmdResolveImage( if (resolve_method == RESOLVE_COMPUTE) { radv_meta_resolve_compute_image(cmd_buffer, src_image, + src_image->vk_format, src_image_layout, dest_image, + dest_image->vk_format, dest_image_layout, region_count, regions); return; } - radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_GRAPHICS_PIPELINE); assert(src_image->info.samples > 1); if (src_image->info.samples <= 1) { @@ -389,20 +460,13 @@ void radv_CmdResolveImage( } assert(dest_image->info.samples == 1); - if (src_image->info.samples >= 16) { - /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the - * glBlitFramebuffer workaround for samples >= 16. - */ - radv_finishme("vkCmdResolveImage: need interpolation workaround when " - "samples >= 16"); - } - if (src_image->info.array_size > 1) radv_finishme("vkCmdResolveImage: multisample array images"); - if (dest_image->surface.dcc_size) { + if (radv_image_has_dcc(dest_image)) { radv_initialize_dcc(cmd_buffer, dest_image, 0xffffffff); } + unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format); for (uint32_t r = 0; r < region_count; ++r) { const VkImageResolve *region = ®ions[r]; @@ -449,6 +513,12 @@ void radv_CmdResolveImage( for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) { + VkResult ret = build_resolve_pipeline(device, fs_key); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + break; + } + struct radv_image_view src_iview; radv_image_view_init(&src_iview, cmd_buffer->device, &(VkImageViewCreateInfo) { @@ -502,7 +572,7 @@ void radv_CmdResolveImage( radv_CmdBeginRenderPass(cmd_buffer_h, &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.resolve.pass, + .renderPass = device->meta_state.resolve.pass[fs_key], .framebuffer = fb_h, .renderArea = { .offset = { @@ -520,6 +590,7 @@ void radv_CmdResolveImage( VK_SUBPASS_CONTENTS_INLINE); emit_resolve(cmd_buffer, + dest_iview.vk_format, &(VkOffset2D) { .x = dstOffset.x, .y = dstOffset.y, @@ -550,30 +621,27 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer) struct radv_meta_saved_state saved_state; enum radv_resolve_method resolve_method = RESOLVE_HW; - /* FINISHME(perf): Skip clears for resolve attachments. - * - * From the Vulkan 1.0 spec: - * - * If the first use of an attachment in a render pass is as a resolve - * attachment, then the loadOp is effectively ignored as the resolve is - * guaranteed to overwrite all pixels in the render area. - */ - if (!subpass->has_resolve) return; for (uint32_t i = 0; i < subpass->color_count; ++i) { - VkAttachmentReference src_att = subpass->color_attachments[i]; - VkAttachmentReference dest_att = subpass->resolve_attachments[i]; + struct radv_subpass_attachment src_att = subpass->color_attachments[i]; + struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i]; - if (src_att.attachment == VK_ATTACHMENT_UNUSED || - dest_att.attachment == VK_ATTACHMENT_UNUSED) + if (dest_att.attachment == VK_ATTACHMENT_UNUSED) continue; + /* Make sure to not clear color attachments after resolves. */ + cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0; + struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image; - struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image; + struct radv_image_view *src_iview= cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment; + struct radv_image *src_img = src_iview->image; + + radv_pick_resolve_method_images(src_img, src_iview->vk_format, + dst_img, dest_att.layout, + cmd_buffer, &resolve_method); - radv_pick_resolve_method_images(dst_img, src_img, &resolve_method); if (resolve_method == RESOLVE_FRAGMENT) { break; } @@ -587,36 +655,112 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer) return; } - radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_GRAPHICS_PIPELINE); for (uint32_t i = 0; i < subpass->color_count; ++i) { - VkAttachmentReference src_att = subpass->color_attachments[i]; - VkAttachmentReference dest_att = subpass->resolve_attachments[i]; + struct radv_subpass_attachment src_att = subpass->color_attachments[i]; + struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i]; - if (src_att.attachment == VK_ATTACHMENT_UNUSED || - dest_att.attachment == VK_ATTACHMENT_UNUSED) + if (dest_att.attachment == VK_ATTACHMENT_UNUSED) continue; - struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image; + struct radv_image_view *dest_iview = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment; + struct radv_image *dst_img = dest_iview->image; - if (dst_img->surface.dcc_size) { + if (radv_image_has_dcc(dst_img)) { radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff); cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; } struct radv_subpass resolve_subpass = { .color_count = 2, - .color_attachments = (VkAttachmentReference[]) { src_att, dest_att }, - .depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED }, + .color_attachments = (struct radv_subpass_attachment[]) { src_att, dest_att }, + .depth_stencil_attachment = NULL, }; - radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false); + radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); + + VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(dest_iview->vk_format)); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + continue; + } emit_resolve(cmd_buffer, + dest_iview->vk_format, &(VkOffset2D) { 0, 0 }, &(VkExtent2D) { fb->width, fb->height }); } - cmd_buffer->state.subpass = subpass; + radv_cmd_buffer_set_subpass(cmd_buffer, subpass); + radv_meta_restore(&saved_state, cmd_buffer); } + +/** + * Decompress CMask/FMask before resolving a multisampled source image inside a + * subpass. + */ +void +radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + struct radv_subpass_attachment src_att = subpass->color_attachments[i]; + struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i]; + + if (dest_att.attachment == VK_ATTACHMENT_UNUSED) + continue; + + struct radv_image *src_image = + fb->attachments[src_att.attachment].attachment->image; + + VkImageResolve region = {}; + region.srcSubresource.baseArrayLayer = 0; + region.srcSubresource.mipLevel = 0; + region.srcSubresource.layerCount = src_image->info.array_size; + + radv_decompress_resolve_src(cmd_buffer, src_image, + src_att.layout, 1, ®ion); + } +} + +/** + * Decompress CMask/FMask before resolving a multisampled source image. + */ +void +radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *src_image, + VkImageLayout src_image_layout, + uint32_t region_count, + const VkImageResolve *regions) +{ + for (uint32_t r = 0; r < region_count; ++r) { + const VkImageResolve *region = ®ions[r]; + const uint32_t src_base_layer = + radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, + ®ion->srcOffset); + VkImageSubresourceRange range; + range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + range.baseMipLevel = region->srcSubresource.mipLevel; + range.levelCount = 1; + range.baseArrayLayer = src_base_layer; + range.layerCount = region->srcSubresource.layerCount; + + uint32_t queue_mask = + radv_image_queue_family_mask(src_image, + cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index); + + if (radv_layout_dcc_compressed(src_image, src_image_layout, + queue_mask)) { + radv_decompress_dcc(cmd_buffer, src_image, &range); + } else { + radv_fast_clear_flush_image_inplace(cmd_buffer, + src_image, &range); + } + } +}