radv: gather if a vertex shaders needs the instance ID
[mesa.git] / src / amd / vulkan / radv_meta_decompress.c
index 7a5681414ff1a8f78c49f57b5e6b313c35ee1a40..578a287d07b689ceb954ab13e7f99f2a3c07d7a4 100644 (file)
@@ -103,6 +103,18 @@ create_pipeline(struct radv_device *device,
 {
        VkResult result;
        VkDevice device_h = radv_device_to_handle(device);
+       struct radv_shader_module vs_module = {0};
+
+       mtx_lock(&device->meta_state.mtx);
+       if (*decompress_pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
+       if (!vs_module_h) {
+               vs_module.nir = radv_meta_build_nir_vs_generate_vertices();
+               vs_module_h = radv_shader_module_to_handle(&vs_module);
+       }
 
        struct radv_shader_module fs_module = {
                .nir = radv_meta_build_nir_fs_noop(),
@@ -114,6 +126,11 @@ create_pipeline(struct radv_device *device,
                goto cleanup;
        }
 
+       const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
+               .sampleLocationsEnable = false,
+       };
+
        const VkGraphicsPipelineCreateInfo pipeline_create_info = {
                .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
                .stageCount = 2,
@@ -156,6 +173,7 @@ create_pipeline(struct radv_device *device,
                },
                .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+                       .pNext = &sample_locs_create_info,
                        .rasterizationSamples = samples,
                        .sampleShadingEnable = false,
                        .pSampleMask = NULL,
@@ -177,10 +195,11 @@ create_pipeline(struct radv_device *device,
                },
                .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-                       .dynamicStateCount = 2,
+                       .dynamicStateCount = 3,
                        .pDynamicStates = (VkDynamicState[]) {
                                VK_DYNAMIC_STATE_VIEWPORT,
                                VK_DYNAMIC_STATE_SCISSOR,
+                               VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
                        },
                },
                .layout = layout,
@@ -219,6 +238,9 @@ create_pipeline(struct radv_device *device,
 
 cleanup:
        ralloc_free(fs_module.nir);
+       if (vs_module.nir)
+               ralloc_free(vs_module.nir);
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
@@ -244,7 +266,7 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
 }
 
 VkResult
-radv_device_init_meta_depth_decomp_state(struct radv_device *device)
+radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
 {
        struct radv_meta_state *state = &device->meta_state;
        VkResult res = VK_SUCCESS;
@@ -270,6 +292,9 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device)
                if (res != VK_SUCCESS)
                        goto fail;
 
+               if (on_demand)
+                       continue;
+
                res = create_pipeline(device, vs_module_h, samples,
                                      state->depth_decomp[i].pass,
                                      state->depth_decomp[i].p_layout,
@@ -290,34 +315,6 @@ cleanup:
        return res;
 }
 
-static void
-emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
-                 const VkExtent2D *depth_decomp_extent,
-                 VkPipeline pipeline_h)
-{
-       VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-
-       radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-                            pipeline_h);
-
-       radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-               .x = 0,
-               .y = 0,
-               .width = depth_decomp_extent->width,
-               .height = depth_decomp_extent->height,
-               .minDepth = 0.0f,
-               .maxDepth = 1.0f
-       });
-
-       radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-               .offset = { 0, 0 },
-               .extent = *depth_decomp_extent,
-       });
-
-       radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
-}
-
-
 enum radv_depth_op {
        DEPTH_DECOMPRESS,
        DEPTH_RESUMMARIZE,
@@ -326,6 +323,7 @@ enum radv_depth_op {
 static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                             struct radv_image *image,
                                             VkImageSubresourceRange *subresourceRange,
+                                            struct radv_sample_locations_state *sample_locs,
                                             enum radv_depth_op op)
 {
        struct radv_meta_saved_state saved_state;
@@ -340,11 +338,24 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
        struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
        VkPipeline pipeline_h;
 
-       if (!image->surface.htile_size)
+       if (!radv_image_has_htile(image))
                return;
 
+       if (!meta_state->depth_decomp[samples_log2].decompress_pipeline) {
+               VkResult ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples,
+                                              meta_state->depth_decomp[samples_log2].pass,
+                                              meta_state->depth_decomp[samples_log2].p_layout,
+                                              &meta_state->depth_decomp[samples_log2].decompress_pipeline,
+                                              &meta_state->depth_decomp[samples_log2].resummarize_pipeline);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
+
        radv_meta_save(&saved_state, cmd_buffer,
                       RADV_META_SAVE_GRAPHICS_PIPELINE |
+                      RADV_META_SAVE_SAMPLE_LOCATIONS |
                       RADV_META_SAVE_PASS);
 
        switch (op) {
@@ -358,6 +369,38 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                unreachable("unknown operation");
        }
 
+       radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                            pipeline_h);
+
+       radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) {
+               .x = 0,
+               .y = 0,
+               .width = width,
+               .height = height,
+               .minDepth = 0.0f,
+               .maxDepth = 1.0f
+       });
+
+       radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) {
+               .offset = { 0, 0 },
+               .extent = { width, height },
+       });
+
+       if (sample_locs) {
+               assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
+
+               /* Set the sample locations specified during explicit or
+                * automatic layout transitions, otherwise the depth decompress
+                * pass uses the default HW locations.
+                */
+               radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
+                       .sampleLocationsPerPixel = sample_locs->per_pixel,
+                       .sampleLocationGridSize = sample_locs->grid_size,
+                       .sampleLocationsCount = sample_locs->count,
+                       .pSampleLocations = sample_locs->locations,
+               });
+       }
+
        for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
                struct radv_image_view iview;
 
@@ -412,7 +455,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                           },
                                           VK_SUBPASS_CONTENTS_INLINE);
 
-               emit_depth_decomp(cmd_buffer, &(VkExtent2D){width, height}, pipeline_h);
+               radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
                radv_CmdEndRenderPass(cmd_buffer_h);
 
                radv_DestroyFramebuffer(device_h, fb_h,
@@ -423,16 +466,20 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 
 void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                         struct radv_image *image,
-                                        VkImageSubresourceRange *subresourceRange)
+                                        VkImageSubresourceRange *subresourceRange,
+                                        struct radv_sample_locations_state *sample_locs)
 {
        assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-       radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_DECOMPRESS);
+       radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
+                                        sample_locs, DEPTH_DECOMPRESS);
 }
 
 void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                         struct radv_image *image,
-                                        VkImageSubresourceRange *subresourceRange)
+                                        VkImageSubresourceRange *subresourceRange,
+                                        struct radv_sample_locations_state *sample_locs)
 {
        assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-       radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_RESUMMARIZE);
+       radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
+                                        sample_locs, DEPTH_RESUMMARIZE);
 }