X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_meta_decompress.c;h=578a287d07b689ceb954ab13e7f99f2a3c07d7a4;hb=b92d87f7f0dc2dbd5b7a3d64a1a9a2863ab7262a;hp=490747511c30d17ad2cc83989acf19ddc239df68;hpb=d8423772cad8245c21d1a63ed4fabb8d9e20b4fa;p=mesa.git diff --git a/src/amd/vulkan/radv_meta_decompress.c b/src/amd/vulkan/radv_meta_decompress.c index 490747511c3..578a287d07b 100644 --- a/src/amd/vulkan/radv_meta_decompress.c +++ b/src/amd/vulkan/radv_meta_decompress.c @@ -26,66 +26,25 @@ #include "radv_meta.h" #include "radv_private.h" -#include "nir/nir_builder.h" #include "sid.h" -/** - * Vertex attributes used by all pipelines. - */ -struct vertex_attrs { - float position[2]; /**< 3DPRIM_RECTLIST */ -}; - -/* passthrough vertex shader */ -static nir_shader * -build_nir_vs(void) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - - nir_builder b; - nir_variable *a_position; - nir_variable *v_position; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info->name = ralloc_strdup(b.shader, "meta_depth_decomp_vs"); - - a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "a_position"); - a_position->data.location = VERT_ATTRIB_GENERIC0; - - v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, - "gl_Position"); - v_position->data.location = VARYING_SLOT_POS; - - nir_copy_var(&b, v_position, a_position); - - return b.shader; -} - -/* simple passthrough shader */ -static nir_shader * -build_nir_fs(void) -{ - nir_builder b; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info->name = ralloc_asprintf(b.shader, - "meta_depth_decomp_noop_fs"); - - return b.shader; -} static VkResult -create_pass(struct radv_device *device) +create_pass(struct radv_device *device, + uint32_t samples, + VkRenderPass *pass) { VkResult result; VkDevice device_h = radv_device_to_handle(device); const VkAllocationCallbacks *alloc = &device->meta_state.alloc; VkAttachmentDescription attachment; - attachment.format = VK_FORMAT_UNDEFINED; - attachment.samples = 1; + attachment.flags = 0; + attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT; + attachment.samples = samples; attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; @@ -111,20 +70,54 @@ create_pass(struct radv_device *device) .dependencyCount = 0, }, alloc, - &device->meta_state.depth_decomp.pass); + pass); return result; } +static VkResult +create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout) +{ + VkPipelineLayoutCreateInfo pl_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 0, + .pSetLayouts = NULL, + .pushConstantRangeCount = 0, + .pPushConstantRanges = NULL, + }; + + return radv_CreatePipelineLayout(radv_device_to_handle(device), + &pl_create_info, + &device->meta_state.alloc, + layout); +} + static VkResult create_pipeline(struct radv_device *device, - VkShaderModule vs_module_h) + VkShaderModule vs_module_h, + uint32_t samples, + VkRenderPass pass, + VkPipelineLayout layout, + VkPipeline *decompress_pipeline, + VkPipeline *resummarize_pipeline) { VkResult result; VkDevice device_h = radv_device_to_handle(device); + struct radv_shader_module vs_module = {0}; + + mtx_lock(&device->meta_state.mtx); + if (*decompress_pipeline) { + mtx_unlock(&device->meta_state.mtx); + return VK_SUCCESS; + } + + if (!vs_module_h) { + vs_module.nir = radv_meta_build_nir_vs_generate_vertices(); + vs_module_h = radv_shader_module_to_handle(&vs_module); + } struct radv_shader_module fs_module = { - .nir = build_nir_fs(), + .nir = radv_meta_build_nir_fs_noop(), }; if (!fs_module.nir) { @@ -133,6 +126,11 @@ create_pipeline(struct radv_device *device, goto cleanup; } + const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT, + .sampleLocationsEnable = false, + }; + const VkGraphicsPipelineCreateInfo pipeline_create_info = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = 2, @@ -152,24 +150,8 @@ create_pipeline(struct radv_device *device, }, .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = sizeof(struct vertex_attrs), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 1, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* Position */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct vertex_attrs, position), - }, - }, + .vertexBindingDescriptionCount = 0, + .vertexAttributeDescriptionCount = 0, }, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, @@ -178,8 +160,8 @@ create_pipeline(struct radv_device *device, }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 0, - .scissorCount = 0, + .viewportCount = 1, + .scissorCount = 1, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, @@ -191,7 +173,8 @@ create_pipeline(struct radv_device *device, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, + .pNext = &sample_locs_create_info, + .rasterizationSamples = samples, .sampleShadingEnable = false, .pSampleMask = NULL, .alphaToCoverageEnable = false, @@ -210,8 +193,17 @@ create_pipeline(struct radv_device *device, .depthBoundsTestEnable = false, .stencilTestEnable = false, }, - .pDynamicState = NULL, - .renderPass = device->meta_state.depth_decomp.pass, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 3, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT, + }, + }, + .layout = layout, + .renderPass = pass, .subpass = 0, }; @@ -224,7 +216,7 @@ create_pipeline(struct radv_device *device, .db_flush_stencil_inplace = true, }, &device->meta_state.alloc, - &device->meta_state.depth_decomp.decompress_pipeline); + decompress_pipeline); if (result != VK_SUCCESS) goto cleanup; @@ -238,7 +230,7 @@ create_pipeline(struct radv_device *device, .db_resummarize = true, }, &device->meta_state.alloc, - &device->meta_state.depth_decomp.resummarize_pipeline); + resummarize_pipeline); if (result != VK_SUCCESS) goto cleanup; @@ -246,6 +238,9 @@ create_pipeline(struct radv_device *device, cleanup: ralloc_free(fs_module.nir); + if (vs_module.nir) + ralloc_free(vs_module.nir); + mtx_unlock(&device->meta_state.mtx); return result; } @@ -253,46 +248,61 @@ void radv_device_finish_meta_depth_decomp_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; - VkDevice device_h = radv_device_to_handle(device); - VkRenderPass pass_h = device->meta_state.depth_decomp.pass; - const VkAllocationCallbacks *alloc = &device->meta_state.alloc; - if (pass_h) - radv_DestroyRenderPass(device_h, pass_h, - &device->meta_state.alloc); - - VkPipeline pipeline_h = state->depth_decomp.decompress_pipeline; - if (pipeline_h) { - radv_DestroyPipeline(device_h, pipeline_h, alloc); - } - pipeline_h = state->depth_decomp.resummarize_pipeline; - if (pipeline_h) { - radv_DestroyPipeline(device_h, pipeline_h, alloc); + for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) { + radv_DestroyRenderPass(radv_device_to_handle(device), + state->depth_decomp[i].pass, + &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), + state->depth_decomp[i].p_layout, + &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), + state->depth_decomp[i].decompress_pipeline, + &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), + state->depth_decomp[i].resummarize_pipeline, + &state->alloc); } } VkResult -radv_device_init_meta_depth_decomp_state(struct radv_device *device) +radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand) { + struct radv_meta_state *state = &device->meta_state; VkResult res = VK_SUCCESS; - zero(device->meta_state.depth_decomp); - - struct radv_shader_module vs_module = { .nir = build_nir_vs() }; + struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() }; if (!vs_module.nir) { /* XXX: Need more accurate error */ res = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; } - res = create_pass(device); - if (res != VK_SUCCESS) - goto fail; - VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module); - res = create_pipeline(device, vs_module_h); - if (res != VK_SUCCESS) - goto fail; + + for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) { + uint32_t samples = 1 << i; + + res = create_pass(device, samples, &state->depth_decomp[i].pass); + if (res != VK_SUCCESS) + goto fail; + + res = create_pipeline_layout(device, + &state->depth_decomp[i].p_layout); + if (res != VK_SUCCESS) + goto fail; + + if (on_demand) + continue; + + res = create_pipeline(device, vs_module_h, samples, + state->depth_decomp[i].pass, + state->depth_decomp[i].p_layout, + &state->depth_decomp[i].decompress_pipeline, + &state->depth_decomp[i].resummarize_pipeline); + if (res != VK_SUCCESS) + goto fail; + } goto cleanup; @@ -305,90 +315,100 @@ cleanup: return res; } -static void -emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer, - const VkOffset2D *dest_offset, - const VkExtent2D *depth_decomp_extent, - VkPipeline pipeline_h) -{ - struct radv_device *device = cmd_buffer->device; - VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); - uint32_t offset; - const struct vertex_attrs vertex_data[3] = { - { - .position = { - dest_offset->x, - dest_offset->y, - }, - }, - { - .position = { - dest_offset->x, - dest_offset->y + depth_decomp_extent->height, - }, - }, - { - .position = { - dest_offset->x + depth_decomp_extent->width, - dest_offset->y, - }, - }, - }; - - radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset); - struct radv_buffer vertex_buffer = { - .device = device, - .size = sizeof(vertex_data), - .bo = cmd_buffer->upload.upload_bo, - .offset = offset, - }; - - VkBuffer vertex_buffer_h = radv_buffer_to_handle(&vertex_buffer); - - radv_CmdBindVertexBuffers(cmd_buffer_h, - /*firstBinding*/ 0, - /*bindingCount*/ 1, - (VkBuffer[]) { vertex_buffer_h }, - (VkDeviceSize[]) { 0 }); - - RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h); - - if (cmd_buffer->state.pipeline != pipeline) { - radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); - } - - radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); -} - +enum radv_depth_op { + DEPTH_DECOMPRESS, + DEPTH_RESUMMARIZE, +}; static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageSubresourceRange *subresourceRange, - VkPipeline pipeline_h) + struct radv_sample_locations_state *sample_locs, + enum radv_depth_op op) { struct radv_meta_saved_state saved_state; - struct radv_meta_saved_pass_state saved_pass_state; VkDevice device_h = radv_device_to_handle(cmd_buffer->device); VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); - uint32_t width = radv_minify(image->extent.width, + uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel); - uint32_t height = radv_minify(image->extent.height, + uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel); + uint32_t samples = image->info.samples; + uint32_t samples_log2 = ffs(samples) - 1; + struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state; + VkPipeline pipeline_h; - if (!image->htile.size) + if (!radv_image_has_htile(image)) return; - radv_meta_save_pass(&saved_pass_state, cmd_buffer); - radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer); + if (!meta_state->depth_decomp[samples_log2].decompress_pipeline) { + VkResult ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples, + meta_state->depth_decomp[samples_log2].pass, + meta_state->depth_decomp[samples_log2].p_layout, + &meta_state->depth_decomp[samples_log2].decompress_pipeline, + &meta_state->depth_decomp[samples_log2].resummarize_pipeline); + if (ret != VK_SUCCESS) { + cmd_buffer->record_result = ret; + return; + } + } + + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_GRAPHICS_PIPELINE | + RADV_META_SAVE_SAMPLE_LOCATIONS | + RADV_META_SAVE_PASS); + + switch (op) { + case DEPTH_DECOMPRESS: + pipeline_h = meta_state->depth_decomp[samples_log2].decompress_pipeline; + break; + case DEPTH_RESUMMARIZE: + pipeline_h = meta_state->depth_decomp[samples_log2].resummarize_pipeline; + break; + default: + unreachable("unknown operation"); + } + + radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + + radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) { + .x = 0, + .y = 0, + .width = width, + .height = height, + .minDepth = 0.0f, + .maxDepth = 1.0f + }); + + radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) { + .offset = { 0, 0 }, + .extent = { width, height }, + }); + + if (sample_locs) { + assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT); + + /* Set the sample locations specified during explicit or + * automatic layout transitions, otherwise the depth decompress + * pass uses the default HW locations. + */ + radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) { + .sampleLocationsPerPixel = sample_locs->per_pixel, + .sampleLocationGridSize = sample_locs->grid_size, + .sampleLocationsCount = sample_locs->count, + .pSampleLocations = sample_locs->locations, + }); + } - for (uint32_t layer = 0; layer < subresourceRange->layerCount; layer++) { + for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) { struct radv_image_view iview; radv_image_view_init(&iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = radv_image_to_handle(image), + .viewType = radv_meta_get_view_type(image), .format = image->vk_format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT, @@ -397,8 +417,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, .baseArrayLayer = subresourceRange->baseArrayLayer + layer, .layerCount = 1, }, - }, - cmd_buffer, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + }); VkFramebuffer fb_h; @@ -419,7 +438,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, radv_CmdBeginRenderPass(cmd_buffer_h, &(VkRenderPassBeginInfo) { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = cmd_buffer->device->meta_state.depth_decomp.pass, + .renderPass = meta_state->depth_decomp[samples_log2].pass, .framebuffer = fb_h, .renderArea = { .offset = { @@ -436,30 +455,31 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, }, VK_SUBPASS_CONTENTS_INLINE); - emit_depth_decomp(cmd_buffer, &(VkOffset2D){0, 0 }, &(VkExtent2D){width, height}, pipeline_h); + radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); radv_CmdEndRenderPass(cmd_buffer_h); radv_DestroyFramebuffer(device_h, fb_h, &cmd_buffer->pool->alloc); } radv_meta_restore(&saved_state, cmd_buffer); - radv_meta_restore_pass(&saved_pass_state, cmd_buffer); } void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageSubresourceRange *subresourceRange) + VkImageSubresourceRange *subresourceRange, + struct radv_sample_locations_state *sample_locs) { assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, - cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline); + sample_locs, DEPTH_DECOMPRESS); } void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkImageSubresourceRange *subresourceRange) + VkImageSubresourceRange *subresourceRange, + struct radv_sample_locations_state *sample_locs) { assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, - cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline); + sample_locs, DEPTH_RESUMMARIZE); }