radv: Add compute DCC decompress.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sat, 23 Dec 2017 12:17:52 +0000 (13:17 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Fri, 29 Dec 2017 11:21:40 +0000 (12:21 +0100)
We do an in place copy where we read compressed and write decompressed.
By doing this in sizes that cover entire DCC blocks and waiting for all
reads in the block before starting to write we avoid corruption.

In the end we clear the DCC metadata to 0xffffffff.

Reviewed-by: Dave Airlie <airlied@redhat.com>
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
src/amd/vulkan/radv_meta.h
src/amd/vulkan/radv_meta_fast_clear.c
src/amd/vulkan/radv_private.h

index 3edf5fa64612617becf7d17bc9061af8df4e725f..9f3198e87976c6bca7d894a640c44664381cff23 100644 (file)
@@ -171,6 +171,9 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                         struct radv_image *image,
                                         const VkImageSubresourceRange *subresourceRange);
+void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
+                       struct radv_image *image,
+                        const VkImageSubresourceRange *subresourceRange);
 
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
                                     struct radv_image *src_image,
index 2603229a1f7f2543ca28bbe14494c7af726b1a2b..98e8f6ac18a2e558e872207d6c076f6b2a713cb3 100644 (file)
 #include "radv_private.h"
 #include "sid.h"
 
+
+static nir_shader *
+build_dcc_decompress_compute_shader(struct radv_device *dev)
+{
+       nir_builder b;
+       const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+                                                            false,
+                                                            false,
+                                                            GLSL_TYPE_FLOAT);
+       const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+                                                            false,
+                                                            false,
+                                                            GLSL_TYPE_FLOAT);
+       nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+       b.shader->info.name = ralloc_strdup(b.shader, "dcc_decompress_compute");
+
+       /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
+       b.shader->info.cs.local_size[0] = 16;
+       b.shader->info.cs.local_size[1] = 16;
+       b.shader->info.cs.local_size[2] = 1;
+       nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+                                                     buf_type, "s_tex");
+       input_img->data.descriptor_set = 0;
+       input_img->data.binding = 0;
+
+       nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+                                                      img_type, "out_img");
+       output_img->data.descriptor_set = 0;
+       output_img->data.binding = 1;
+
+       nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
+       nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+       nir_ssa_def *block_size = nir_imm_ivec4(&b,
+                                               b.shader->info.cs.local_size[0],
+                                               b.shader->info.cs.local_size[1],
+                                               b.shader->info.cs.local_size[2], 0);
+
+       nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
+       tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+       tex->op = nir_texop_txf;
+       tex->src[0].src_type = nir_tex_src_coord;
+       tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
+       tex->src[1].src_type = nir_tex_src_lod;
+       tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+       tex->dest_type = nir_type_float;
+       tex->is_array = false;
+       tex->coord_components = 2;
+       tex->texture = nir_deref_var_create(tex, input_img);
+       tex->sampler = NULL;
+
+       nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+       nir_builder_instr_insert(&b, &tex->instr);
+
+       nir_intrinsic_instr *membar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_memory_barrier);
+       nir_builder_instr_insert(&b, &membar->instr);
+
+       nir_intrinsic_instr *bar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_barrier);
+       nir_builder_instr_insert(&b, &bar->instr);
+
+       nir_ssa_def *outval = &tex->dest.ssa;
+       nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
+       store->src[0] = nir_src_for_ssa(global_id);
+       store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+       store->src[2] = nir_src_for_ssa(outval);
+       store->variables[0] = nir_deref_var_create(store, output_img);
+
+       nir_builder_instr_insert(&b, &store->instr);
+       return b.shader;
+}
+
+static VkResult
+create_dcc_compress_compute(struct radv_device *device)
+{
+       VkResult result = VK_SUCCESS;
+       struct radv_shader_module cs = { .nir = NULL };
+
+       cs.nir = build_dcc_decompress_compute_shader(device);
+
+       VkDescriptorSetLayoutCreateInfo ds_create_info = {
+               .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+               .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+               .bindingCount = 2,
+               .pBindings = (VkDescriptorSetLayoutBinding[]) {
+                       {
+                               .binding = 0,
+                               .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                               .descriptorCount = 1,
+                               .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+                               .pImmutableSamplers = NULL
+                       },
+                       {
+                               .binding = 1,
+                               .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                               .descriptorCount = 1,
+                               .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+                               .pImmutableSamplers = NULL
+                       },
+               }
+       };
+
+       result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+                                               &ds_create_info,
+                                               &device->meta_state.alloc,
+                                               &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
+       if (result != VK_SUCCESS)
+               goto cleanup;
+
+
+       VkPipelineLayoutCreateInfo pl_create_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+               .setLayoutCount = 1,
+               .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
+               .pushConstantRangeCount = 1,
+               .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 8},
+       };
+
+       result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+                                         &pl_create_info,
+                                         &device->meta_state.alloc,
+                                         &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
+       if (result != VK_SUCCESS)
+               goto cleanup;
+
+       /* compute shader */
+
+       VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+               .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+               .module = radv_shader_module_to_handle(&cs),
+               .pName = "main",
+               .pSpecializationInfo = NULL,
+       };
+
+       VkComputePipelineCreateInfo vk_pipeline_info = {
+               .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+               .stage = pipeline_shader_stage,
+               .flags = 0,
+               .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
+       };
+
+       result = radv_CreateComputePipelines(radv_device_to_handle(device),
+                                            radv_pipeline_cache_to_handle(&device->meta_state.cache),
+                                            1, &vk_pipeline_info, NULL,
+                                            &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+       if (result != VK_SUCCESS)
+               goto cleanup;
+
+cleanup:
+       ralloc_free(cs.nir);
+       return result;
+}
+
 static VkResult
 create_pass(struct radv_device *device)
 {
@@ -322,6 +476,16 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
        radv_DestroyPipelineLayout(radv_device_to_handle(device),
                                   state->fast_clear_flush.p_layout,
                                   &state->alloc);
+
+       radv_DestroyPipeline(radv_device_to_handle(device),
+                            state->fast_clear_flush.dcc_decompress_compute_pipeline,
+                            &state->alloc);
+       radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                                  state->fast_clear_flush.dcc_decompress_compute_p_layout,
+                                  &state->alloc);
+       radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+                                       state->fast_clear_flush.dcc_decompress_compute_ds_layout,
+                                       &state->alloc);
 }
 
 VkResult
@@ -351,6 +515,10 @@ radv_device_init_meta_fast_clear_flush_state(struct radv_device *device)
        if (res != VK_SUCCESS)
                goto fail;
 
+       res = create_dcc_compress_compute(device);
+       if (res != VK_SUCCESS)
+               goto fail;
+
        goto cleanup;
 
 fail:
@@ -521,3 +689,103 @@ radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer,
 {
        radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
 }
+
+static void
+radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
+                            struct radv_image *image,
+                            const VkImageSubresourceRange *subresourceRange)
+{
+       struct radv_meta_saved_state saved_state;
+       struct radv_image_view iview = {0};
+       struct radv_device *device = cmd_buffer->device;
+
+       /* This assumes the image is 2d with 1 layer and 1 mipmap level */
+       struct radv_cmd_state *state = &cmd_buffer->state;
+
+       state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+                            RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+       radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
+                                                RADV_META_SAVE_COMPUTE_PIPELINE);
+
+       radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+                            VK_PIPELINE_BIND_POINT_COMPUTE,
+                            device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+
+       radv_image_view_init(&iview, cmd_buffer->device,
+                            &(VkImageViewCreateInfo) {
+                                    .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                                            .image = radv_image_to_handle(image),
+                                            .viewType = VK_IMAGE_VIEW_TYPE_2D,
+                                            .format = image->vk_format,
+                                            .subresourceRange = {
+                                               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                               .baseMipLevel = 0,
+                                               .levelCount = 1,
+                                               .baseArrayLayer = 0,
+                                               .layerCount = 1
+                                            },
+                            });
+
+       radv_meta_push_descriptor_set(cmd_buffer,
+                                     VK_PIPELINE_BIND_POINT_COMPUTE,
+                                     device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
+                                     0, /* set */
+                                     2, /* descriptorWriteCount */
+                                     (VkWriteDescriptorSet[]) {
+                                             {
+                                                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                                      .dstBinding = 0,
+                                                      .dstArrayElement = 0,
+                                                      .descriptorCount = 1,
+                                                      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                                                      .pImageInfo = (VkDescriptorImageInfo[]) {
+                                                              {
+                                                                      .sampler = VK_NULL_HANDLE,
+                                                                      .imageView = radv_image_view_to_handle(&iview),
+                                                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                                              },
+                                                      }
+                                             },
+                                             {
+                                                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                                                      .dstBinding = 1,
+                                                      .dstArrayElement = 0,
+                                                      .descriptorCount = 1,
+                                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                                                      .pImageInfo = (VkDescriptorImageInfo[]) {
+                                                              {
+                                                                      .sampler = VK_NULL_HANDLE,
+                                                                      .imageView = radv_image_view_to_handle(&iview),
+                                                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                                                              },
+                                                      }
+                                             }
+                                     });
+
+       radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1);
+
+       /* The fill buffer below does its own saving */
+       radv_meta_restore(&saved_state, cmd_buffer);
+
+       state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+                            RADV_CMD_FLAG_INV_VMEM_L1;
+
+       state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo,
+                                             image->offset + image->dcc_offset,
+                                             image->surface.dcc_size, 0xffffffff);
+
+       state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+                            RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+}
+
+void
+radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
+                    struct radv_image *image,
+                    const VkImageSubresourceRange *subresourceRange)
+{
+       if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
+               radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
+       else
+               radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
+}
index 4fb3c218eb3efbb5384147ee73818af6356aed9b..d7e9070fbb8294249615bc5ad338a93e4c38a60c 100644 (file)
@@ -492,6 +492,10 @@ struct radv_meta_state {
                VkPipeline                                fmask_decompress_pipeline;
                VkPipeline                                dcc_decompress_pipeline;
                VkRenderPass                              pass;
+
+               VkDescriptorSetLayout                     dcc_decompress_compute_ds_layout;
+               VkPipelineLayout                          dcc_decompress_compute_p_layout;
+               VkPipeline                                dcc_decompress_compute_pipeline;
        } fast_clear_flush;
 
        struct {