radv: Only save the descriptor set if we have one.
[mesa.git] / src / amd / vulkan / radv_meta_decompress.c
index 854b88a3622edaabdad9792286ce464965379337..fa5de24314a9ab086566d3602da2b75cd62f925c 100644 (file)
 
 #include "radv_meta.h"
 #include "radv_private.h"
-#include "nir/nir_builder.h"
 #include "sid.h"
-/**
- * Vertex attributes used by all pipelines.
- */
-struct vertex_attrs {
-       float position[2]; /**< 3DPRIM_RECTLIST */
-};
-
-/* passthrough vertex shader */
-static nir_shader *
-build_nir_vs(void)
-{
-       const struct glsl_type *vec4 = glsl_vec4_type();
-
-       nir_builder b;
-       nir_variable *a_position;
-       nir_variable *v_position;
-
-       nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-       b.shader->info->name = ralloc_strdup(b.shader, "meta_depth_decomp_vs");
-
-       a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
-                                        "a_position");
-       a_position->data.location = VERT_ATTRIB_GENERIC0;
-
-       v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
-                                        "gl_Position");
-       v_position->data.location = VARYING_SLOT_POS;
-
-       nir_copy_var(&b, v_position, a_position);
-
-       return b.shader;
-}
-
-/* simple passthrough shader */
-static nir_shader *
-build_nir_fs(void)
-{
-       nir_builder b;
-
-       nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-       b.shader->info->name = ralloc_asprintf(b.shader,
-                                              "meta_depth_decomp_noop_fs");
-
-       return b.shader;
-}
 
 static VkResult
-create_pass(struct radv_device *device)
+create_pass(struct radv_device *device,
+           uint32_t samples,
+           VkRenderPass *pass)
 {
        VkResult result;
        VkDevice device_h = radv_device_to_handle(device);
        const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
        VkAttachmentDescription attachment;
 
-       attachment.format = VK_FORMAT_UNDEFINED;
-       attachment.samples = 1;
+       attachment.flags = 0;
+       attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
+       attachment.samples = samples;
        attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
        attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+       attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+       attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
        attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
        attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
 
@@ -111,20 +70,54 @@ create_pass(struct radv_device *device)
                                                                .dependencyCount = 0,
                                                                   },
                                       alloc,
-                                      &device->meta_state.depth_decomp.pass);
+                                      pass);
 
        return result;
 }
 
+static VkResult
+create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
+{
+       VkPipelineLayoutCreateInfo pl_create_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+               .setLayoutCount = 0,
+               .pSetLayouts = NULL,
+               .pushConstantRangeCount = 0,
+               .pPushConstantRanges = NULL,
+       };
+
+       return radv_CreatePipelineLayout(radv_device_to_handle(device),
+                                        &pl_create_info,
+                                        &device->meta_state.alloc,
+                                        layout);
+}
+
 static VkResult
 create_pipeline(struct radv_device *device,
-                VkShaderModule vs_module_h)
+                VkShaderModule vs_module_h,
+               uint32_t samples,
+               VkRenderPass pass,
+               VkPipelineLayout layout,
+               VkPipeline *decompress_pipeline,
+               VkPipeline *resummarize_pipeline)
 {
        VkResult result;
        VkDevice device_h = radv_device_to_handle(device);
+       struct radv_shader_module vs_module = {0};
+
+       mtx_lock(&device->meta_state.mtx);
+       if (*decompress_pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
+       if (!vs_module_h) {
+               vs_module.nir = radv_meta_build_nir_vs_generate_vertices();
+               vs_module_h = radv_shader_module_to_handle(&vs_module);
+       }
 
        struct radv_shader_module fs_module = {
-               .nir = build_nir_fs(),
+               .nir = radv_meta_build_nir_fs_noop(),
        };
 
        if (!fs_module.nir) {
@@ -133,6 +126,11 @@ create_pipeline(struct radv_device *device,
                goto cleanup;
        }
 
+       const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
+               .sampleLocationsEnable = false,
+       };
+
        const VkGraphicsPipelineCreateInfo pipeline_create_info = {
                .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
                .stageCount = 2,
@@ -152,24 +150,8 @@ create_pipeline(struct radv_device *device,
                },
                .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-                       .vertexBindingDescriptionCount = 1,
-                       .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-                               {
-                                       .binding = 0,
-                                       .stride = sizeof(struct vertex_attrs),
-                                       .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-                               },
-                       },
-                       .vertexAttributeDescriptionCount = 1,
-                       .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-                               {
-                                       /* Position */
-                                       .location = 0,
-                                       .binding = 0,
-                                       .format = VK_FORMAT_R32G32_SFLOAT,
-                                       .offset = offsetof(struct vertex_attrs, position),
-                               },
-                       },
+                       .vertexBindingDescriptionCount = 0,
+                       .vertexAttributeDescriptionCount = 0,
                },
                .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
@@ -191,7 +173,8 @@ create_pipeline(struct radv_device *device,
                },
                .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-                       .rasterizationSamples = 1,
+                       .pNext = &sample_locs_create_info,
+                       .rasterizationSamples = samples,
                        .sampleShadingEnable = false,
                        .pSampleMask = NULL,
                        .alphaToCoverageEnable = false,
@@ -212,13 +195,15 @@ create_pipeline(struct radv_device *device,
                },
                .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-                       .dynamicStateCount = 2,
+                       .dynamicStateCount = 3,
                        .pDynamicStates = (VkDynamicState[]) {
                                VK_DYNAMIC_STATE_VIEWPORT,
                                VK_DYNAMIC_STATE_SCISSOR,
+                               VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
                        },
                },
-               .renderPass = device->meta_state.depth_decomp.pass,
+               .layout = layout,
+               .renderPass = pass,
                .subpass = 0,
        };
 
@@ -231,7 +216,7 @@ create_pipeline(struct radv_device *device,
                                                        .db_flush_stencil_inplace = true,
                                               },
                                               &device->meta_state.alloc,
-                                              &device->meta_state.depth_decomp.decompress_pipeline);
+                                              decompress_pipeline);
        if (result != VK_SUCCESS)
                goto cleanup;
 
@@ -245,7 +230,7 @@ create_pipeline(struct radv_device *device,
                                                        .db_resummarize = true,
                                               },
                                               &device->meta_state.alloc,
-                                              &device->meta_state.depth_decomp.resummarize_pipeline);
+                                              resummarize_pipeline);
        if (result != VK_SUCCESS)
                goto cleanup;
 
@@ -253,6 +238,9 @@ create_pipeline(struct radv_device *device,
 
 cleanup:
        ralloc_free(fs_module.nir);
+       if (vs_module.nir)
+               ralloc_free(vs_module.nir);
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
@@ -260,46 +248,61 @@ void
 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
 {
        struct radv_meta_state *state = &device->meta_state;
-       VkDevice device_h = radv_device_to_handle(device);
-       VkRenderPass pass_h = device->meta_state.depth_decomp.pass;
-       const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
-
-       if (pass_h)
-               radv_DestroyRenderPass(device_h, pass_h,
-                                            &device->meta_state.alloc);
 
-       VkPipeline pipeline_h = state->depth_decomp.decompress_pipeline;
-       if (pipeline_h) {
-               radv_DestroyPipeline(device_h, pipeline_h, alloc);
-       }
-       pipeline_h = state->depth_decomp.resummarize_pipeline;
-       if (pipeline_h) {
-               radv_DestroyPipeline(device_h, pipeline_h, alloc);
+       for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+               radv_DestroyRenderPass(radv_device_to_handle(device),
+                                      state->depth_decomp[i].pass,
+                                      &state->alloc);
+               radv_DestroyPipelineLayout(radv_device_to_handle(device),
+                                          state->depth_decomp[i].p_layout,
+                                          &state->alloc);
+               radv_DestroyPipeline(radv_device_to_handle(device),
+                                    state->depth_decomp[i].decompress_pipeline,
+                                    &state->alloc);
+               radv_DestroyPipeline(radv_device_to_handle(device),
+                                    state->depth_decomp[i].resummarize_pipeline,
+                                    &state->alloc);
        }
 }
 
 VkResult
-radv_device_init_meta_depth_decomp_state(struct radv_device *device)
+radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
 {
+       struct radv_meta_state *state = &device->meta_state;
        VkResult res = VK_SUCCESS;
 
-       zero(device->meta_state.depth_decomp);
-
-       struct radv_shader_module vs_module = { .nir = build_nir_vs() };
+       struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
        if (!vs_module.nir) {
                /* XXX: Need more accurate error */
                res = VK_ERROR_OUT_OF_HOST_MEMORY;
                goto fail;
        }
 
-       res = create_pass(device);
-       if (res != VK_SUCCESS)
-               goto fail;
-
        VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
-       res = create_pipeline(device, vs_module_h);
-       if (res != VK_SUCCESS)
-               goto fail;
+
+       for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+               uint32_t samples = 1 << i;
+
+               res = create_pass(device, samples, &state->depth_decomp[i].pass);
+               if (res != VK_SUCCESS)
+                       goto fail;
+
+               res = create_pipeline_layout(device,
+                                            &state->depth_decomp[i].p_layout);
+               if (res != VK_SUCCESS)
+                       goto fail;
+
+               if (on_demand)
+                       continue;
+
+               res = create_pipeline(device, vs_module_h, samples,
+                                     state->depth_decomp[i].pass,
+                                     state->depth_decomp[i].p_layout,
+                                     &state->depth_decomp[i].decompress_pipeline,
+                                     &state->depth_decomp[i].resummarize_pipeline);
+               if (res != VK_SUCCESS)
+                       goto fail;
+       }
 
        goto cleanup;
 
@@ -312,96 +315,107 @@ cleanup:
        return res;
 }
 
-static void
-emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
-                 const VkOffset2D *dest_offset,
-                 const VkExtent2D *depth_decomp_extent,
-                 VkPipeline pipeline_h)
-{
-       struct radv_device *device = cmd_buffer->device;
-       VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-       uint32_t offset;
-       const struct vertex_attrs vertex_data[3] = {
-               {
-                       .position = {
-                               -1.0,
-                               -1.0,
-                       },
-               },
-               {
-                       .position = {
-                               -1.0,
-                               1.0,
-                       },
-               },
-               {
-                       .position = {
-                               1.0,
-                               -1.0,
-                       },
-               },
-       };
-
-       radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
-       struct radv_buffer vertex_buffer = {
-               .device = device,
-               .size = sizeof(vertex_data),
-               .bo = cmd_buffer->upload.upload_bo,
-               .offset = offset,
-       };
-
-       VkBuffer vertex_buffer_h = radv_buffer_to_handle(&vertex_buffer);
-
-       radv_CmdBindVertexBuffers(cmd_buffer_h,
-                                 /*firstBinding*/ 0,
-                                 /*bindingCount*/ 1,
-                                 (VkBuffer[]) { vertex_buffer_h },
-                                 (VkDeviceSize[]) { 0 });
-
-       RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h);
+enum radv_depth_op {
+       DEPTH_DECOMPRESS,
+       DEPTH_RESUMMARIZE,
+};
 
-       if (cmd_buffer->state.pipeline != pipeline) {
-               radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-                                    pipeline_h);
+static VkPipeline *
+radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
+                       struct radv_image *image, enum radv_depth_op op)
+{
+       struct radv_meta_state *state = &cmd_buffer->device->meta_state;
+       uint32_t samples = image->info.samples;
+       uint32_t samples_log2 = ffs(samples) - 1;
+       VkPipeline *pipeline;
+
+       if (!state->depth_decomp[samples_log2].decompress_pipeline) {
+               VkResult ret;
+
+               ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples,
+                                     state->depth_decomp[samples_log2].pass,
+                                     state->depth_decomp[samples_log2].p_layout,
+                                     &state->depth_decomp[samples_log2].decompress_pipeline,
+                                     &state->depth_decomp[samples_log2].resummarize_pipeline);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return NULL;
+               }
+       }
+
+       switch (op) {
+       case DEPTH_DECOMPRESS:
+               pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;
+               break;
+       case DEPTH_RESUMMARIZE:
+               pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
+               break;
+       default:
+               unreachable("unknown operation");
        }
 
-       radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-               .x = dest_offset->x,
-               .y = dest_offset->y,
-               .width = depth_decomp_extent->width,
-               .height = depth_decomp_extent->height,
-               .minDepth = 0.0f,
-               .maxDepth = 1.0f
-       });
-
-       radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-               .offset = *dest_offset,
-               .extent = *depth_decomp_extent,
-       });
-
-       radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+       return pipeline;
 }
 
-
 static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                             struct radv_image *image,
                                             VkImageSubresourceRange *subresourceRange,
-                                            VkPipeline pipeline_h)
+                                            struct radv_sample_locations_state *sample_locs,
+                                            enum radv_depth_op op)
 {
        struct radv_meta_saved_state saved_state;
-       struct radv_meta_saved_pass_state saved_pass_state;
        VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
        VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-       uint32_t width = radv_minify(image->extent.width,
+       uint32_t width = radv_minify(image->info.width,
                                     subresourceRange->baseMipLevel);
-       uint32_t height = radv_minify(image->extent.height,
+       uint32_t height = radv_minify(image->info.height,
                                     subresourceRange->baseMipLevel);
+       uint32_t samples = image->info.samples;
+       uint32_t samples_log2 = ffs(samples) - 1;
+       struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+       VkPipeline *pipeline;
 
-       if (!image->surface.htile_size)
+       if (!radv_image_has_htile(image))
                return;
-       radv_meta_save_pass(&saved_pass_state, cmd_buffer);
 
-       radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+       radv_meta_save(&saved_state, cmd_buffer,
+                      RADV_META_SAVE_GRAPHICS_PIPELINE |
+                      RADV_META_SAVE_SAMPLE_LOCATIONS |
+                      RADV_META_SAVE_PASS);
+
+       pipeline = radv_get_depth_pipeline(cmd_buffer, image, op);
+
+       radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+                            VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+       radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) {
+               .x = 0,
+               .y = 0,
+               .width = width,
+               .height = height,
+               .minDepth = 0.0f,
+               .maxDepth = 1.0f
+       });
+
+       radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) {
+               .offset = { 0, 0 },
+               .extent = { width, height },
+       });
+
+       if (sample_locs) {
+               assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
+
+               /* Set the sample locations specified during explicit or
+                * automatic layout transitions, otherwise the depth decompress
+                * pass uses the default HW locations.
+                */
+               radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
+                       .sampleLocationsPerPixel = sample_locs->per_pixel,
+                       .sampleLocationGridSize = sample_locs->grid_size,
+                       .sampleLocationsCount = sample_locs->count,
+                       .pSampleLocations = sample_locs->locations,
+               });
+       }
 
        for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
                struct radv_image_view iview;
@@ -410,6 +424,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                     &(VkImageViewCreateInfo) {
                                             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
                                             .image = radv_image_to_handle(image),
+                                            .viewType = radv_meta_get_view_type(image),
                                             .format = image->vk_format,
                                             .subresourceRange = {
                                                     .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
@@ -418,8 +433,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                                     .baseArrayLayer = subresourceRange->baseArrayLayer + layer,
                                                     .layerCount = 1,
                                             },
-                                    },
-                                    cmd_buffer, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
+                                    });
 
 
                VkFramebuffer fb_h;
@@ -440,7 +454,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                radv_CmdBeginRenderPass(cmd_buffer_h,
                                              &(VkRenderPassBeginInfo) {
                                                      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-                                                             .renderPass = cmd_buffer->device->meta_state.depth_decomp.pass,
+                                                             .renderPass = meta_state->depth_decomp[samples_log2].pass,
                                                              .framebuffer = fb_h,
                                                              .renderArea = {
                                                              .offset = {
@@ -457,30 +471,31 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                           },
                                           VK_SUBPASS_CONTENTS_INLINE);
 
-               emit_depth_decomp(cmd_buffer, &(VkOffset2D){0, 0 }, &(VkExtent2D){width, height}, pipeline_h);
+               radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
                radv_CmdEndRenderPass(cmd_buffer_h);
 
                radv_DestroyFramebuffer(device_h, fb_h,
                                        &cmd_buffer->pool->alloc);
        }
        radv_meta_restore(&saved_state, cmd_buffer);
-       radv_meta_restore_pass(&saved_pass_state, cmd_buffer);
 }
 
 void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                         struct radv_image *image,
-                                        VkImageSubresourceRange *subresourceRange)
+                                        VkImageSubresourceRange *subresourceRange,
+                                        struct radv_sample_locations_state *sample_locs)
 {
        assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
        radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
-                                        cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline);
+                                        sample_locs, DEPTH_DECOMPRESS);
 }
 
 void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                         struct radv_image *image,
-                                        VkImageSubresourceRange *subresourceRange)
+                                        VkImageSubresourceRange *subresourceRange,
+                                        struct radv_sample_locations_state *sample_locs)
 {
        assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
        radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
-                                        cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline);
+                                        sample_locs, DEPTH_RESUMMARIZE);
 }