From 3e2a6191c9e161b687eb8561f3cf8da8208811fb Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 23 Dec 2017 13:17:52 +0100 Subject: [PATCH] radv: Add compute DCC decompress. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We do an in place copy where we read compressed and write decompressed. By doing this in sizes that cover entire DCC blocks and waiting for all reads in the block before starting to write we avoid corruption. In the end we clear the DCC metadata to 0xffffffff. Reviewed-by: Dave Airlie Tested-by: Dieter Nützel --- src/amd/vulkan/radv_meta.h | 3 + src/amd/vulkan/radv_meta_fast_clear.c | 268 ++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 4 + 3 files changed, 275 insertions(+) diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h index 3edf5fa6461..9f3198e8797 100644 --- a/src/amd/vulkan/radv_meta.h +++ b/src/amd/vulkan/radv_meta.h @@ -171,6 +171,9 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange); +void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *subresourceRange); void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index 2603229a1f7..98e8f6ac18a 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -28,6 +28,160 @@ #include "radv_private.h" #include "sid.h" + +static nir_shader * +build_dcc_decompress_compute_shader(struct radv_device *dev) +{ + nir_builder b; + const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, + false, + false, + GLSL_TYPE_FLOAT); + const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, + false, + false, + GLSL_TYPE_FLOAT); + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "dcc_decompress_compute"); + + /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */ + b.shader->info.cs.local_size[0] = 16; + b.shader->info.cs.local_size[1] = 16; + b.shader->info.cs.local_size[2] = 1; + nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, + buf_type, "s_tex"); + input_img->data.descriptor_set = 0; + input_img->data.binding = 0; + + nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, + img_type, "out_img"); + output_img->data.descriptor_set = 0; + output_img->data.binding = 1; + + nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); + nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *block_size = nir_imm_ivec4(&b, + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); + + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; + tex->op = nir_texop_txf; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3)); + tex->src[1].src_type = nir_tex_src_lod; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 2; + tex->texture = nir_deref_var_create(tex, input_img); + tex->sampler = NULL; + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_intrinsic_instr *membar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_memory_barrier); + nir_builder_instr_insert(&b, &membar->instr); + + nir_intrinsic_instr *bar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_barrier); + nir_builder_instr_insert(&b, &bar->instr); + + nir_ssa_def *outval = &tex->dest.ssa; + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); + store->src[0] = nir_src_for_ssa(global_id); + store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); + store->src[2] = nir_src_for_ssa(outval); + store->variables[0] = nir_deref_var_create(store, output_img); + + nir_builder_instr_insert(&b, &store->instr); + return b.shader; +} + +static VkResult +create_dcc_compress_compute(struct radv_device *device) +{ + VkResult result = VK_SUCCESS; + struct radv_shader_module cs = { .nir = NULL }; + + cs.nir = build_dcc_decompress_compute_shader(device); + + VkDescriptorSetLayoutCreateInfo ds_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + } + }; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), + &ds_create_info, + &device->meta_state.alloc, + &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout); + if (result != VK_SUCCESS) + goto cleanup; + + + VkPipelineLayoutCreateInfo pl_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 8}, + }; + + result = radv_CreatePipelineLayout(radv_device_to_handle(device), + &pl_create_info, + &device->meta_state.alloc, + &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout); + if (result != VK_SUCCESS) + goto cleanup; + + /* compute shader */ + + VkPipelineShaderStageCreateInfo pipeline_shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = radv_shader_module_to_handle(&cs), + .pName = "main", + .pSpecializationInfo = NULL, + }; + + VkComputePipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = pipeline_shader_stage, + .flags = 0, + .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, + }; + + result = radv_CreateComputePipelines(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&device->meta_state.cache), + 1, &vk_pipeline_info, NULL, + &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline); + if (result != VK_SUCCESS) + goto cleanup; + +cleanup: + ralloc_free(cs.nir); + return result; +} + static VkResult create_pass(struct radv_device *device) { @@ -322,6 +476,16 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device) radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout, &state->alloc); + + radv_DestroyPipeline(radv_device_to_handle(device), + state->fast_clear_flush.dcc_decompress_compute_pipeline, + &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), + state->fast_clear_flush.dcc_decompress_compute_p_layout, + &state->alloc); + radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->fast_clear_flush.dcc_decompress_compute_ds_layout, + &state->alloc); } VkResult @@ -351,6 +515,10 @@ radv_device_init_meta_fast_clear_flush_state(struct radv_device *device) if (res != VK_SUCCESS) goto fail; + res = create_dcc_compress_compute(device); + if (res != VK_SUCCESS) + goto fail; + goto cleanup; fail: @@ -521,3 +689,103 @@ radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer, { radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true); } + +static void +radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *subresourceRange) +{ + struct radv_meta_saved_state saved_state; + struct radv_image_view iview = {0}; + struct radv_device *device = cmd_buffer->device; + + /* This assumes the image is 2d with 1 layer and 1 mipmap level */ + struct radv_cmd_state *state = &cmd_buffer->state; + + state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + + radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | + RADV_META_SAVE_COMPUTE_PIPELINE); + + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline); + + radv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }); + + radv_meta_push_descriptor_set(cmd_buffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, + 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }); + + radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1); + + /* The fill buffer below does its own saving */ + radv_meta_restore(&saved_state, cmd_buffer); + + state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + RADV_CMD_FLAG_INV_VMEM_L1; + + state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, + image->offset + image->dcc_offset, + image->surface.dcc_size, 0xffffffff); + + state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; +} + +void +radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *subresourceRange) +{ + if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) + radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange); + else + radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange); +} diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 4fb3c218eb3..d7e9070fbb8 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -492,6 +492,10 @@ struct radv_meta_state { VkPipeline fmask_decompress_pipeline; VkPipeline dcc_decompress_pipeline; VkRenderPass pass; + + VkDescriptorSetLayout dcc_decompress_compute_ds_layout; + VkPipelineLayout dcc_decompress_compute_p_layout; + VkPipeline dcc_decompress_compute_pipeline; } fast_clear_flush; struct { -- 2.30.2