From 6b976024a87c4f4138435d4e416fdcdfbed39724 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 17 Dec 2018 20:59:33 +0100 Subject: [PATCH] radv: add support for FMASK expand Original patch by Dave Airlie. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/Makefile.sources | 1 + src/amd/vulkan/meson.build | 1 + src/amd/vulkan/radv_cmd_buffer.c | 7 + src/amd/vulkan/radv_meta.c | 8 + src/amd/vulkan/radv_meta.h | 6 + src/amd/vulkan/radv_meta_fmask_expand.c | 306 ++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 6 + 7 files changed, 335 insertions(+) create mode 100644 src/amd/vulkan/radv_meta_fmask_expand.c diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources index 53a638362b8..f6327563ee9 100644 --- a/src/amd/vulkan/Makefile.sources +++ b/src/amd/vulkan/Makefile.sources @@ -50,6 +50,7 @@ VULKAN_FILES := \ radv_meta_copy.c \ radv_meta_decompress.c \ radv_meta_fast_clear.c \ + radv_meta_fmask_expand.c \ radv_meta_resolve.c \ radv_meta_resolve_cs.c \ radv_meta_resolve_fs.c \ diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index cc2aa7fd17a..06c33ca45dc 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -78,6 +78,7 @@ libradv_files = files( 'radv_meta_copy.c', 'radv_meta_decompress.c', 'radv_meta_fast_clear.c', + 'radv_meta_fmask_expand.c', 'radv_meta_resolve.c', 'radv_meta_resolve_cs.c', 'radv_meta_resolve_fs.c', diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f4dbab8171a..3361df1f345 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -4466,6 +4466,13 @@ static void radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffe !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); } + + if (radv_image_has_fmask(image)) { + if (src_layout != VK_IMAGE_LAYOUT_GENERAL && + dst_layout == VK_IMAGE_LAYOUT_GENERAL) { + radv_expand_fmask_image_inplace(cmd_buffer, image, range); + } + } } } diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c index 1ec8896afa2..ec4fc4a6d4b 100644 --- a/src/amd/vulkan/radv_meta.c +++ b/src/amd/vulkan/radv_meta.c @@ -389,8 +389,15 @@ radv_device_init_meta(struct radv_device *device) result = radv_device_init_meta_resolve_fragment_state(device, on_demand); if (result != VK_SUCCESS) goto fail_resolve_fragment; + + result = radv_device_init_meta_fmask_expand_state(device); + if (result != VK_SUCCESS) + goto fail_fmask_expand; + return VK_SUCCESS; +fail_fmask_expand: + radv_device_finish_meta_resolve_fragment_state(device); fail_resolve_fragment: radv_device_finish_meta_resolve_compute_state(device); fail_resolve_compute: @@ -431,6 +438,7 @@ radv_device_finish_meta(struct radv_device *device) radv_device_finish_meta_fast_clear_flush_state(device); radv_device_finish_meta_resolve_compute_state(device); radv_device_finish_meta_resolve_fragment_state(device); + radv_device_finish_meta_fmask_expand_state(device); radv_store_meta_pipeline(device); radv_pipeline_cache_finish(&device->meta_state.cache); diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h index 22f7ae7490a..5e0afd11a00 100644 --- a/src/amd/vulkan/radv_meta.h +++ b/src/amd/vulkan/radv_meta.h @@ -88,6 +88,9 @@ void radv_device_finish_meta_resolve_compute_state(struct radv_device *device); VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device); +VkResult radv_device_init_meta_fmask_expand_state(struct radv_device *device); +void radv_device_finish_meta_fmask_expand_state(struct radv_device *device); + void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer, uint32_t flags); @@ -174,6 +177,9 @@ void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange); +void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *subresourceRange); void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c new file mode 100644 index 00000000000..f4b55328929 --- /dev/null +++ b/src/amd/vulkan/radv_meta_fmask_expand.c @@ -0,0 +1,306 @@ +/* + * Copyright © 2019 Valve Corporation + * Copyright © 2018 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "radv_meta.h" +#include "radv_private.h" + +static nir_shader * +build_fmask_expand_compute_shader(struct radv_device *device, int samples) +{ + nir_builder b; + char name[64]; + const struct glsl_type *input_img_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, + GLSL_TYPE_FLOAT); + const struct glsl_type *output_img_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, + GLSL_TYPE_FLOAT); + + snprintf(name, 64, "meta_fmask_expand_cs-%d", samples); + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info.name = ralloc_strdup(b.shader, name); + b.shader->info.cs.local_size[0] = 16; + b.shader->info.cs.local_size[1] = 16; + b.shader->info.cs.local_size[2] = 1; + + nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, + input_img_type, "s_tex"); + input_img->data.descriptor_set = 0; + input_img->data.binding = 0; + + nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, + output_img_type, "out_img"); + output_img->data.descriptor_set = 0; + output_img->data.binding = 1; + + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *block_size = nir_imm_ivec4(&b, + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); + + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + + nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; + nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa; + + nir_tex_instr *tex_instr[8]; + for (uint32_t i = 0; i < samples; i++) { + tex_instr[i] = nir_tex_instr_create(b.shader, 3); + + nir_tex_instr *tex = tex_instr[i]; + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 0x3)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex->src[2].src_type = nir_tex_src_texture_deref; + tex->src[2].src = nir_src_for_ssa(input_img_deref); + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 2; + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + } + + for (uint32_t i = 0; i < samples; i++) { + nir_ssa_def *outval = &tex_instr[i]->dest.ssa; + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_image_deref_store); + store->num_components = 4; + store->src[0] = nir_src_for_ssa(output_img_deref); + store->src[1] = nir_src_for_ssa(global_id); + store->src[2] = nir_src_for_ssa(nir_imm_int(&b, i)); + store->src[3] = nir_src_for_ssa(outval); + nir_builder_instr_insert(&b, &store->instr); + } + + return b.shader; +} + +void +radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *subresourceRange) +{ + struct radv_device *device = cmd_buffer->device; + struct radv_meta_saved_state saved_state; + const uint32_t samples = image->info.samples; + const uint32_t samples_log2 = ffs(samples) - 1; + + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | + RADV_META_SAVE_DESCRIPTORS); + + VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2]; + + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + for (unsigned l = 0; l < subresourceRange->layerCount; l++) { + struct radv_image_view iview; + + radv_image_view_init(&iview, device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = radv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = subresourceRange->baseArrayLayer + l, + .layerCount = 1, + }, + }); + + radv_meta_push_descriptor_set(cmd_buffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + cmd_buffer->device->meta_state.fmask_expand.p_layout, + 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }, + } + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }, + } + } + }); + + radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1); + } + + radv_meta_restore(&saved_state, cmd_buffer); + + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + RADV_CMD_FLAG_INV_GLOBAL_L2; + + /* Re-initialize FMASK in fully expanded mode. */ + radv_initialize_fmask(cmd_buffer, image); +} + +void radv_device_finish_meta_fmask_expand_state(struct radv_device *device) +{ + struct radv_meta_state *state = &device->meta_state; + + for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { + radv_DestroyPipeline(radv_device_to_handle(device), + state->fmask_expand.pipeline[i], + &state->alloc); + } + radv_DestroyPipelineLayout(radv_device_to_handle(device), + state->fmask_expand.p_layout, + &state->alloc); + + radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->fmask_expand.ds_layout, + &state->alloc); +} + +static VkResult +create_fmask_expand_pipeline(struct radv_device *device, + int samples, + VkPipeline *pipeline) +{ + struct radv_meta_state *state = &device->meta_state; + struct radv_shader_module cs = { .nir = NULL }; + VkResult result; + + cs.nir = build_fmask_expand_compute_shader(device, samples); + + VkPipelineShaderStageCreateInfo pipeline_shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = radv_shader_module_to_handle(&cs), + .pName = "main", + .pSpecializationInfo = NULL, + }; + + VkComputePipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = pipeline_shader_stage, + .flags = 0, + .layout = state->fmask_expand.p_layout, + }; + + result = radv_CreateComputePipelines(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&state->cache), + 1, &vk_pipeline_info, NULL, + pipeline); + + ralloc_free(cs.nir); + return result; +} + +VkResult +radv_device_init_meta_fmask_expand_state(struct radv_device *device) +{ + struct radv_meta_state *state = &device->meta_state; + VkResult result; + + VkDescriptorSetLayoutCreateInfo ds_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + } + }; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), + &ds_create_info, &state->alloc, + &state->fmask_expand.ds_layout); + if (result != VK_SUCCESS) + goto fail; + + VkPipelineLayoutCreateInfo color_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &state->fmask_expand.ds_layout, + .pushConstantRangeCount = 0, + .pPushConstantRanges = NULL, + }; + + result = radv_CreatePipelineLayout(radv_device_to_handle(device), + &color_create_info, &state->alloc, + &state->fmask_expand.p_layout); + if (result != VK_SUCCESS) + goto fail; + + for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) { + uint32_t samples = 1 << i; + result = create_fmask_expand_pipeline(device, samples, + &state->fmask_expand.pipeline[i]); + if (result != VK_SUCCESS) + goto fail; + } + + return VK_SUCCESS; +fail: + radv_device_finish_meta_fmask_expand_state(device); + return result; +} diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 5204e8761d9..b9a1f7e3b7c 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -603,6 +603,12 @@ struct radv_meta_state { VkPipeline pipeline_statistics_query_pipeline; VkPipeline tfb_query_pipeline; } query; + + struct { + VkDescriptorSetLayout ds_layout; + VkPipelineLayout p_layout; + VkPipeline pipeline[MAX_SAMPLES_LOG2]; + } fmask_expand; }; /* queue types */ -- 2.30.2