From: Samuel Pitoiset Date: Fri, 12 Oct 2018 09:30:13 +0000 (+0200) Subject: radv: implement buffer to image operations for R32G32B32 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=593996bc026c9e383da9683ff30e784b0ea09015;p=mesa.git radv: implement buffer to image operations for R32G32B32 This should fix rendering issues with Batman Arkham City. We will probably need to implement itob and itoi at some point, but currently nothing hits these paths. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107765 Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c index 9efb971638c..73a50342220 100644 --- a/src/amd/vulkan/radv_meta_bufimage.c +++ b/src/amd/vulkan/radv_meta_bufimage.c @@ -483,6 +483,214 @@ radv_device_finish_meta_btoi_state(struct radv_device *device) state->btoi.pipeline_3d, &state->alloc); } +/* Buffer to image - special path for R32G32B32 */ +static nir_shader * +build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev) +{ + nir_builder b; + const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, + false, + false, + GLSL_TYPE_FLOAT); + const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, + false, + false, + GLSL_TYPE_FLOAT); + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs"); + b.shader->info.cs.local_size[0] = 16; + b.shader->info.cs.local_size[1] = 16; + b.shader->info.cs.local_size[2] = 1; + nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, + buf_type, "s_tex"); + input_img->data.descriptor_set = 0; + input_img->data.binding = 0; + + nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, + img_type, "out_img"); + output_img->data.descriptor_set = 0; + output_img->data.binding = 1; + + nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); + nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *block_size = nir_imm_ivec4(&b, + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); + + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + + nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); + nir_intrinsic_set_base(offset, 0); + nir_intrinsic_set_range(offset, 16); + offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + offset->num_components = 2; + nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset"); + nir_builder_instr_insert(&b, &offset->instr); + + nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); + nir_intrinsic_set_base(pitch, 0); + nir_intrinsic_set_range(pitch, 16); + pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); + pitch->num_components = 1; + nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch"); + nir_builder_instr_insert(&b, &pitch->instr); + + nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); + nir_intrinsic_set_base(stride, 0); + nir_intrinsic_set_range(stride, 16); + stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12)); + stride->num_components = 1; + nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride"); + nir_builder_instr_insert(&b, &stride->instr); + + nir_ssa_def *pos_x = nir_channel(&b, global_id, 0); + nir_ssa_def *pos_y = nir_channel(&b, global_id, 1); + + nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa); + tmp = nir_iadd(&b, tmp, pos_x); + + nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp); + + nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa); + + nir_ssa_def *global_pos = + nir_iadd(&b, + nir_imul(&b, pos_y, &pitch->dest.ssa), + nir_imul(&b, pos_x, nir_imm_int(&b, 3))); + + nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); + tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; + tex->op = nir_texop_txf; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1)); + tex->src[1].src_type = nir_tex_src_lod; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + tex->src[2].src_type = nir_tex_src_texture_deref; + tex->src[2].src = nir_src_for_ssa(input_img_deref); + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 1; + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_ssa_def *outval = &tex->dest.ssa; + + for (int chan = 0; chan < 3; chan++) { + nir_ssa_def *local_pos = + nir_iadd(&b, global_pos, nir_imm_int(&b, chan)); + + nir_ssa_def *coord = + nir_vec4(&b, local_pos, local_pos, local_pos, local_pos); + + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store); + store->num_components = 1; + store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa); + store->src[1] = nir_src_for_ssa(coord); + store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); + store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan)); + nir_builder_instr_insert(&b, &store->instr); + } + + return b.shader; +} + +static VkResult +radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device) +{ + VkResult result; + struct radv_shader_module cs = { .nir = NULL }; + + cs.nir = build_nir_btoi_r32g32b32_compute_shader(device); + + VkDescriptorSetLayoutCreateInfo ds_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + } + }; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), + &ds_create_info, + &device->meta_state.alloc, + &device->meta_state.btoi_r32g32b32.img_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + + VkPipelineLayoutCreateInfo pl_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, + }; + + result = radv_CreatePipelineLayout(radv_device_to_handle(device), + &pl_create_info, + &device->meta_state.alloc, + &device->meta_state.btoi_r32g32b32.img_p_layout); + if (result != VK_SUCCESS) + goto fail; + + /* compute shader */ + + VkPipelineShaderStageCreateInfo pipeline_shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = radv_shader_module_to_handle(&cs), + .pName = "main", + .pSpecializationInfo = NULL, + }; + + VkComputePipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = pipeline_shader_stage, + .flags = 0, + .layout = device->meta_state.btoi_r32g32b32.img_p_layout, + }; + + result = radv_CreateComputePipelines(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&device->meta_state.cache), + 1, &vk_pipeline_info, NULL, + &device->meta_state.btoi_r32g32b32.pipeline); + +fail: + ralloc_free(cs.nir); + return result; +} + +static void +radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device) +{ + struct radv_meta_state *state = &device->meta_state; + + radv_DestroyPipelineLayout(radv_device_to_handle(device), + state->btoi_r32g32b32.img_p_layout, &state->alloc); + radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->btoi_r32g32b32.img_ds_layout, + &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), + state->btoi_r32g32b32.pipeline, &state->alloc); +} + static nir_shader * build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d) { @@ -1056,6 +1264,7 @@ radv_device_finish_meta_bufimage_state(struct radv_device *device) { radv_device_finish_meta_itob_state(device); radv_device_finish_meta_btoi_state(device); + radv_device_finish_meta_btoi_r32g32b32_state(device); radv_device_finish_meta_itoi_state(device); radv_device_finish_meta_cleari_state(device); radv_device_finish_meta_cleari_r32g32b32_state(device); @@ -1074,6 +1283,10 @@ radv_device_init_meta_bufimage_state(struct radv_device *device) if (result != VK_SUCCESS) goto fail_btoi; + result = radv_device_init_meta_btoi_r32g32b32_state(device); + if (result != VK_SUCCESS) + goto fail_btoi_r32g32b32; + result = radv_device_init_meta_itoi_state(device); if (result != VK_SUCCESS) goto fail_itoi; @@ -1093,6 +1306,8 @@ fail_cleari: radv_device_finish_meta_cleari_state(device); fail_itoi: radv_device_finish_meta_itoi_state(device); +fail_btoi_r32g32b32: + radv_device_finish_meta_btoi_r32g32b32_state(device); fail_btoi: radv_device_finish_meta_btoi_state(device); fail_itob: @@ -1219,6 +1434,125 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, } } +static void +btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, + struct radv_buffer_view *src, + struct radv_buffer_view *dst) +{ + struct radv_device *device = cmd_buffer->device; + + radv_meta_push_descriptor_set(cmd_buffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + device->meta_state.btoi_r32g32b32.img_p_layout, + 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) }, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) }, + } + }); +} + +static void +radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, + struct radv_meta_blit2d_buffer *src, + struct radv_meta_blit2d_surf *dst, + unsigned num_rects, + struct radv_meta_blit2d_rect *rects) +{ + VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline; + struct radv_device_memory mem = { .bo = dst->image->bo }; + struct radv_device *device = cmd_buffer->device; + struct radv_buffer_view src_view, dst_view; + unsigned dst_offset = 0; + unsigned stride; + VkFormat dst_format; + VkBuffer buffer; + + switch (dst->format) { + case VK_FORMAT_R32G32B32_UINT: + dst_format = VK_FORMAT_R32_UINT; + break; + case VK_FORMAT_R32G32B32_SINT: + dst_format = VK_FORMAT_R32_SINT; + break; + case VK_FORMAT_R32G32B32_SFLOAT: + dst_format = VK_FORMAT_R32_SFLOAT; + break; + default: + unreachable("invalid R32G32B32 format"); + } + + /* This special btoi path for R32G32B32 formats will write the linear + * image as a buffer with the same underlying memory. The compute + * shader will clear all components separately using a R32 format. + */ + radv_CreateBuffer(radv_device_to_handle(device), + &(VkBufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .flags = 0, + .size = dst->image->size, + .usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }, NULL, &buffer); + + radv_BindBufferMemory2(radv_device_to_handle(device), 1, + (VkBindBufferMemoryInfoKHR[]) { + { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, + .buffer = buffer, + .memory = radv_device_memory_to_handle(&mem), + .memoryOffset = dst->image->offset, + } + }); + + create_bview(cmd_buffer, src->buffer, src->offset, + src->format, &src_view); + create_bview(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, + dst_format, &dst_view); + btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view); + + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { + stride = dst->image->surface.u.gfx9.surf_pitch; + } else { + stride = dst->image->surface.u.legacy.level[0].nblk_x * 3; + } + + for (unsigned r = 0; r < num_rects; ++r) { + unsigned push_constants[4] = { + rects[r].dst_x, + rects[r].dst_y, + stride, + src->pitch, + }; + + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.btoi_r32g32b32.img_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, + push_constants); + + radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1); + } + + radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL); +} + static void btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, @@ -1269,6 +1603,14 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view src_view; struct radv_image_view dst_view; + if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT || + dst->image->vk_format == VK_FORMAT_R32G32B32_SINT || + dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) { + radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, + num_rects, rects); + return; + } + create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view); create_iview(cmd_buffer, dst, &dst_view); btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c index f4de5528edf..41da302cf84 100644 --- a/src/amd/vulkan/radv_meta_copy.c +++ b/src/amd/vulkan/radv_meta_copy.c @@ -195,10 +195,14 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, /* Perform Blit */ - if (cs) + if (cs || + (img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_UINT || + img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SINT || + img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)) { radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect); - else + } else { radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect); + } /* Once we've done the blit, all of the actual information about * the image is embedded in the command buffer so we can just diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index b35aa8d818f..0464fa4a412 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -505,6 +505,11 @@ struct radv_meta_state { VkPipeline pipeline; VkPipeline pipeline_3d; } btoi; + struct { + VkPipelineLayout img_p_layout; + VkDescriptorSetLayout img_ds_layout; + VkPipeline pipeline; + } btoi_r32g32b32; struct { VkPipelineLayout img_p_layout; VkDescriptorSetLayout img_ds_layout;