radv: Add on-demand compilation of built-in shaders.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 13 Aug 2018 22:07:57 +0000 (00:07 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 14 Aug 2018 08:26:24 +0000 (10:26 +0200)
In environments where we cannot cache, e.g. Android (no homedir),
ChromeOS (readonly rootfs) or sandboxes (cannot open cache), the
startup cost of creating a device in radv is rather high, due
to compiling all possible built-in pipelines up front. This meant
depending on the CPU a 1-4 sec cost of creating a Device.

For CTS this cost is unacceptable, and likely for starting random
apps too.

So if there is no cache, with this patch radv will compile shaders
on demand. Once there is a cache from the first run, even if
incomplete, the driver knows that it can likely write the cache
and precompiles everything.

Note that I did not switch the buffer and itob/btoi compute pipelines
to on-demand, since you cannot really do anything in Vulkan without
them and there are only a few.

This reduces the CTS runtime for the no caches scenario on my
threadripper from 32 minutes to 8 minutes.

Reviewed-by: Dave Airlie <airlied@redhat.com>
13 files changed:
src/amd/vulkan/radv_meta.c
src/amd/vulkan/radv_meta.h
src/amd/vulkan/radv_meta_blit.c
src/amd/vulkan/radv_meta_blit2d.c
src/amd/vulkan/radv_meta_clear.c
src/amd/vulkan/radv_meta_decompress.c
src/amd/vulkan/radv_meta_fast_clear.c
src/amd/vulkan/radv_meta_resolve.c
src/amd/vulkan/radv_meta_resolve_cs.c
src/amd/vulkan/radv_meta_resolve_fs.c
src/amd/vulkan/radv_pipeline_cache.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_query.c

index a59f38fb21d3018a9a96c47ea50276c9e6fb69dd..ccdcb9ceb078c04852074c4c23ab59875cd0f8b0 100644 (file)
@@ -257,7 +257,7 @@ radv_builtin_cache_path(char *path)
        return true;
 }
 
-static void
+static bool
 radv_load_meta_pipeline(struct radv_device *device)
 {
        char path[PATH_MAX + 1];
@@ -265,11 +265,11 @@ radv_load_meta_pipeline(struct radv_device *device)
        void *data = NULL;
 
        if (!radv_builtin_cache_path(path))
-               return;
+               return false;
 
        int fd = open(path, O_RDONLY);
        if (fd < 0)
-               return;
+               return false;
        if (fstat(fd, &st))
                goto fail;
        data = malloc(st.st_size);
@@ -278,10 +278,11 @@ radv_load_meta_pipeline(struct radv_device *device)
        if(read(fd, data, st.st_size) == -1)
                goto fail;
 
-       radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
+       return radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
 fail:
        free(data);
        close(fd);
+       return false;
 }
 
 static void
@@ -330,6 +331,8 @@ radv_device_init_meta(struct radv_device *device)
 {
        VkResult result;
 
+       memset(&device->meta_state, 0, sizeof(device->meta_state));
+
        device->meta_state.alloc = (VkAllocationCallbacks) {
                .pUserData = device,
                .pfnAllocation = meta_alloc,
@@ -339,21 +342,24 @@ radv_device_init_meta(struct radv_device *device)
 
        device->meta_state.cache.alloc = device->meta_state.alloc;
        radv_pipeline_cache_init(&device->meta_state.cache, device);
-       radv_load_meta_pipeline(device);
+       bool loaded_cache = radv_load_meta_pipeline(device);
+       bool on_demand = !loaded_cache;
+
+       mtx_init(&device->meta_state.mtx, mtx_plain);
 
-       result = radv_device_init_meta_clear_state(device);
+       result = radv_device_init_meta_clear_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_clear;
 
-       result = radv_device_init_meta_resolve_state(device);
+       result = radv_device_init_meta_resolve_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_resolve;
 
-       result = radv_device_init_meta_blit_state(device);
+       result = radv_device_init_meta_blit_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_blit;
 
-       result = radv_device_init_meta_blit2d_state(device);
+       result = radv_device_init_meta_blit2d_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_blit2d;
 
@@ -361,7 +367,7 @@ radv_device_init_meta(struct radv_device *device)
        if (result != VK_SUCCESS)
                goto fail_bufimage;
 
-       result = radv_device_init_meta_depth_decomp_state(device);
+       result = radv_device_init_meta_depth_decomp_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_depth_decomp;
 
@@ -369,19 +375,19 @@ radv_device_init_meta(struct radv_device *device)
        if (result != VK_SUCCESS)
                goto fail_buffer;
 
-       result = radv_device_init_meta_query_state(device);
+       result = radv_device_init_meta_query_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_query;
 
-       result = radv_device_init_meta_fast_clear_flush_state(device);
+       result = radv_device_init_meta_fast_clear_flush_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_fast_clear;
 
-       result = radv_device_init_meta_resolve_compute_state(device);
+       result = radv_device_init_meta_resolve_compute_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_resolve_compute;
 
-       result = radv_device_init_meta_resolve_fragment_state(device);
+       result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail_resolve_fragment;
        return VK_SUCCESS;
@@ -407,6 +413,7 @@ fail_blit:
 fail_resolve:
        radv_device_finish_meta_clear_state(device);
 fail_clear:
+       mtx_destroy(&device->meta_state.mtx);
        radv_pipeline_cache_finish(&device->meta_state.cache);
        return result;
 }
@@ -428,6 +435,7 @@ radv_device_finish_meta(struct radv_device *device)
 
        radv_store_meta_pipeline(device);
        radv_pipeline_cache_finish(&device->meta_state.cache);
+       mtx_destroy(&device->meta_state.mtx);
 }
 
 nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
index 35067f67124b7e4db7d912c07dacef26a6898567..f8d48f4d791b6e72a421f2eefd118c4868870c4f 100644 (file)
@@ -58,34 +58,34 @@ struct radv_meta_saved_state {
        VkRect2D render_area;
 };
 
-VkResult radv_device_init_meta_clear_state(struct radv_device *device);
+VkResult radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_clear_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_resolve_state(struct radv_device *device);
+VkResult radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_resolve_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_depth_decomp_state(struct radv_device *device);
+VkResult radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_depth_decomp_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_fast_clear_flush_state(struct radv_device *device);
+VkResult radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_blit_state(struct radv_device *device);
+VkResult radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_blit_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_blit2d_state(struct radv_device *device);
+VkResult radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_blit2d_state(struct radv_device *device);
 
 VkResult radv_device_init_meta_buffer_state(struct radv_device *device);
 void radv_device_finish_meta_buffer_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_query_state(struct radv_device *device);
+VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_query_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device);
+VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_resolve_compute_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device);
+VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device);
 
 void radv_meta_save(struct radv_meta_saved_state *saved_state,
index 370d4fcd8311ec335d17242264e33d570e6aa219..a205686e55392f5f7940dcd004dc55b4e3a7b609 100644 (file)
@@ -31,6 +31,13 @@ struct blit_region {
        VkExtent3D dest_extent;
 };
 
+static VkResult
+build_pipeline(struct radv_device *device,
+               VkImageAspectFlagBits aspect,
+               enum glsl_sampler_dim tex_dim,
+               unsigned fs_key,
+               VkPipeline *pipeline);
+
 static nir_shader *
 build_nir_vertex_shader(void)
 {
@@ -273,6 +280,20 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
        return b.shader;
 }
 
+static enum glsl_sampler_dim
+translate_sampler_dim(VkImageType type) {
+       switch(type) {
+       case VK_IMAGE_TYPE_1D:
+               return GLSL_SAMPLER_DIM_1D;
+       case VK_IMAGE_TYPE_2D:
+               return GLSL_SAMPLER_DIM_2D;
+       case VK_IMAGE_TYPE_3D:
+               return GLSL_SAMPLER_DIM_3D;
+       default:
+               unreachable("Unhandled image type");
+       }
+}
+
 static void
 meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
                struct radv_image *src_image,
@@ -333,11 +354,12 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
                                       .height = dst_height,
                                       .layers = 1,
                                }, &cmd_buffer->pool->alloc, &fb);
-       VkPipeline pipeline;
+       VkPipeline* pipeline = NULL;
+       unsigned fs_key = 0;
        switch (src_iview->aspect_mask) {
        case VK_IMAGE_ASPECT_COLOR_BIT: {
-               unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
                unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
+               fs_key = radv_format_meta_fs_key(dest_image->vk_format);
 
                radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
                                              &(VkRenderPassBeginInfo) {
@@ -353,13 +375,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
                                                       }, VK_SUBPASS_CONTENTS_INLINE);
                switch (src_image->type) {
                case VK_IMAGE_TYPE_1D:
-                       pipeline = device->meta_state.blit.pipeline_1d_src[fs_key];
+                       pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key];
                        break;
                case VK_IMAGE_TYPE_2D:
-                       pipeline = device->meta_state.blit.pipeline_2d_src[fs_key];
+                       pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key];
                        break;
                case VK_IMAGE_TYPE_3D:
-                       pipeline = device->meta_state.blit.pipeline_3d_src[fs_key];
+                       pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key];
                        break;
                default:
                        unreachable(!"bad VkImageType");
@@ -382,13 +404,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
                                                       }, VK_SUBPASS_CONTENTS_INLINE);
                switch (src_image->type) {
                case VK_IMAGE_TYPE_1D:
-                       pipeline = device->meta_state.blit.depth_only_1d_pipeline;
+                       pipeline = &device->meta_state.blit.depth_only_1d_pipeline;
                        break;
                case VK_IMAGE_TYPE_2D:
-                       pipeline = device->meta_state.blit.depth_only_2d_pipeline;
+                       pipeline = &device->meta_state.blit.depth_only_2d_pipeline;
                        break;
                case VK_IMAGE_TYPE_3D:
-                       pipeline = device->meta_state.blit.depth_only_3d_pipeline;
+                       pipeline = &device->meta_state.blit.depth_only_3d_pipeline;
                        break;
                default:
                        unreachable(!"bad VkImageType");
@@ -411,13 +433,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
                                                       }, VK_SUBPASS_CONTENTS_INLINE);
                switch (src_image->type) {
                case VK_IMAGE_TYPE_1D:
-                       pipeline = device->meta_state.blit.stencil_only_1d_pipeline;
+                       pipeline = &device->meta_state.blit.stencil_only_1d_pipeline;
                        break;
                case VK_IMAGE_TYPE_2D:
-                       pipeline = device->meta_state.blit.stencil_only_2d_pipeline;
+                       pipeline = &device->meta_state.blit.stencil_only_2d_pipeline;
                        break;
                case VK_IMAGE_TYPE_3D:
-                       pipeline = device->meta_state.blit.stencil_only_3d_pipeline;
+                       pipeline = &device->meta_state.blit.stencil_only_3d_pipeline;
                        break;
                default:
                        unreachable(!"bad VkImageType");
@@ -428,8 +450,16 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
                unreachable(!"bad VkImageType");
        }
 
+       if (!*pipeline) {
+               VkResult ret = build_pipeline(device, src_iview->aspect_mask, translate_sampler_dim(src_image->type), fs_key, pipeline);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       goto fail_pipeline;
+               }
+       }
+
        radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-                            VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+                            VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
 
        radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                      device->meta_state.blit.pipeline_layout,
@@ -471,6 +501,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 
        radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
 
+fail_pipeline:
        radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
 
        /* At the point where we emit the draw call, all data from the
@@ -722,6 +753,14 @@ build_pipeline(struct radv_device *device,
                VkPipeline *pipeline)
 {
        VkResult result = VK_SUCCESS;
+
+       mtx_lock(&device->meta_state.mtx);
+
+       if (*pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        struct radv_shader_module fs = {0};
        struct radv_shader_module vs = {.nir = build_nir_vertex_shader()};
        VkRenderPass rp;
@@ -871,11 +910,12 @@ build_pipeline(struct radv_device *device,
                                               &device->meta_state.alloc, pipeline);
        ralloc_free(vs.nir);
        ralloc_free(fs.nir);
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
 static VkResult
-radv_device_init_meta_blit_color(struct radv_device *device)
+radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand)
 {
        VkResult result;
 
@@ -917,6 +957,9 @@ radv_device_init_meta_blit_color(struct radv_device *device)
                                goto fail;
                }
 
+               if (on_demand)
+                       continue;
+
                result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key, &device->meta_state.blit.pipeline_1d_src[key]);
                if (result != VK_SUCCESS)
                        goto fail;
@@ -937,7 +980,7 @@ fail:
 }
 
 static VkResult
-radv_device_init_meta_blit_depth(struct radv_device *device)
+radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand)
 {
        VkResult result;
 
@@ -974,6 +1017,9 @@ radv_device_init_meta_blit_depth(struct radv_device *device)
                        goto fail;
        }
 
+       if (on_demand)
+               return VK_SUCCESS;
+
        result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.depth_only_1d_pipeline);
        if (result != VK_SUCCESS)
                goto fail;
@@ -991,7 +1037,7 @@ fail:
 }
 
 static VkResult
-radv_device_init_meta_blit_stencil(struct radv_device *device)
+radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand)
 {
        VkResult result;
 
@@ -1028,6 +1074,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device)
        if (result != VK_SUCCESS)
                goto fail;
 
+       if (on_demand)
+               return VK_SUCCESS;
 
        result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.stencil_only_1d_pipeline);
        if (result != VK_SUCCESS)
@@ -1047,7 +1095,7 @@ fail:
 }
 
 VkResult
-radv_device_init_meta_blit_state(struct radv_device *device)
+radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand)
 {
        VkResult result;
 
@@ -1086,15 +1134,15 @@ radv_device_init_meta_blit_state(struct radv_device *device)
        if (result != VK_SUCCESS)
                goto fail;
 
-       result = radv_device_init_meta_blit_color(device);
+       result = radv_device_init_meta_blit_color(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail;
 
-       result = radv_device_init_meta_blit_depth(device);
+       result = radv_device_init_meta_blit_depth(device, on_demand);
        if (result != VK_SUCCESS)
                goto fail;
 
-       result = radv_device_init_meta_blit_stencil(device);
+       result = radv_device_init_meta_blit_stencil(device, on_demand);
 
 fail:
        if (result != VK_SUCCESS)
index 7965285694231b5a89e4ce0de776c70d694bd9eb..d2975532d4bad24d709d41f549a5d974d2522f1a 100644 (file)
@@ -35,6 +35,22 @@ enum blit2d_src_type {
        BLIT2D_NUM_SRC_TYPES,
 };
 
+static VkResult
+blit2d_init_color_pipeline(struct radv_device *device,
+                          enum blit2d_src_type src_type,
+                          VkFormat format,
+                          uint32_t log2_samples);
+
+static VkResult
+blit2d_init_depth_only_pipeline(struct radv_device *device,
+                               enum blit2d_src_type src_type,
+                               uint32_t log2_samples);
+
+static VkResult
+blit2d_init_stencil_only_pipeline(struct radv_device *device,
+                                 enum blit2d_src_type src_type,
+                                 uint32_t log2_samples);
+
 static void
 create_iview(struct radv_cmd_buffer *cmd_buffer,
              struct radv_meta_blit2d_surf *surf,
@@ -268,6 +284,14 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
                                unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
                                unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
 
+                               if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
+                                       VkResult ret = blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
+                                       if (ret != VK_SUCCESS) {
+                                               cmd_buffer->record_result = ret;
+                                               goto fail_pipeline;
+                                       }
+                               }
+
                                radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
                                                        &(VkRenderPassBeginInfo) {
                                                                .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
@@ -285,6 +309,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
                                bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
                        } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
                                enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
+
+                               if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) {
+                                       VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
+                                       if (ret != VK_SUCCESS) {
+                                               cmd_buffer->record_result = ret;
+                                               goto fail_pipeline;
+                                       }
+                               }
+
                                radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
                                                        &(VkRenderPassBeginInfo) {
                                                                .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
@@ -303,6 +336,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 
                        } else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
                                enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
+
+                               if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) {
+                                       VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
+                                       if (ret != VK_SUCCESS) {
+                                               cmd_buffer->record_result = ret;
+                                               goto fail_pipeline;
+                                       }
+                               }
+
                                radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
                                                        &(VkRenderPassBeginInfo) {
                                                                .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
@@ -357,6 +399,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
                                radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
                        radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
 
+fail_pipeline:
                        /* At the point where we emit the draw call, all data from the
                        * descriptor sets, etc. has been used.  We are free to delete it.
                        */
@@ -737,6 +780,12 @@ blit2d_init_color_pipeline(struct radv_device *device,
        unsigned fs_key = radv_format_meta_fs_key(format);
        const char *name;
 
+       mtx_lock(&device->meta_state.mtx);
+       if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        texel_fetch_build_func src_func;
        switch(src_type) {
        case BLIT2D_SRC_TYPE_IMAGE:
@@ -894,6 +943,7 @@ blit2d_init_color_pipeline(struct radv_device *device,
        ralloc_free(vs.nir);
        ralloc_free(fs.nir);
 
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
@@ -905,6 +955,12 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
        VkResult result;
        const char *name;
 
+       mtx_lock(&device->meta_state.mtx);
+       if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        texel_fetch_build_func src_func;
        switch(src_type) {
        case BLIT2D_SRC_TYPE_IMAGE:
@@ -1057,6 +1113,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
        ralloc_free(vs.nir);
        ralloc_free(fs.nir);
 
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
@@ -1068,6 +1125,12 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
        VkResult result;
        const char *name;
 
+       mtx_lock(&device->meta_state.mtx);
+       if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        texel_fetch_build_func src_func;
        switch(src_type) {
        case BLIT2D_SRC_TYPE_IMAGE:
@@ -1236,6 +1299,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
        ralloc_free(vs.nir);
        ralloc_free(fs.nir);
 
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
@@ -1287,7 +1351,7 @@ fail:
 }
 
 VkResult
-radv_device_init_meta_blit2d_state(struct radv_device *device)
+radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand)
 {
        VkResult result;
        bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
@@ -1305,6 +1369,9 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
                        if (result != VK_SUCCESS)
                                goto fail;
 
+                       if (on_demand)
+                               continue;
+
                        for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
                                result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples);
                                if (result != VK_SUCCESS)
index 4f77e32b83fedc83d9e8f5bb352c983224a8c9d2..0ae7191f17d8be340390c568aa1bb077dbdb756c 100644 (file)
@@ -200,7 +200,13 @@ create_color_renderpass(struct radv_device *device,
                        uint32_t samples,
                        VkRenderPass *pass)
 {
-       return radv_CreateRenderPass(radv_device_to_handle(device),
+       mtx_lock(&device->meta_state.mtx);
+       if (*pass) {
+               mtx_unlock (&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
+       VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
                                       &(VkRenderPassCreateInfo) {
                                               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
                                                       .attachmentCount = 1,
@@ -231,6 +237,8 @@ create_color_renderpass(struct radv_device *device,
                                               },
                                                                .dependencyCount = 0,
                                                                         }, &device->meta_state.alloc, pass);
+       mtx_unlock(&device->meta_state.mtx);
+       return result;
 }
 
 static VkResult
@@ -243,6 +251,13 @@ create_color_pipeline(struct radv_device *device,
        struct nir_shader *vs_nir;
        struct nir_shader *fs_nir;
        VkResult result;
+
+       mtx_lock(&device->meta_state.mtx);
+       if (*pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        build_color_shaders(&vs_nir, &fs_nir, frag_output);
 
        const VkPipelineVertexInputStateCreateInfo vi_state = {
@@ -284,6 +299,7 @@ create_color_pipeline(struct radv_device *device,
                                 device->meta_state.clear_color_p_layout,
                                 &extra, &device->meta_state.alloc, pipeline);
 
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
@@ -349,6 +365,26 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
                return;
        }
 
+       if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
+               VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key],
+                                                      samples,
+                                                      &device->meta_state.clear[samples_log2].render_pass[fs_key]);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
+
+       if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
+               VkResult ret = create_color_pipeline(device, samples, 0,
+                                                    &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
+                                                    device->meta_state.clear[samples_log2].render_pass[fs_key]);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
+
        pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
        if (!pipeline) {
                radv_finishme("color clears incomplete");
@@ -449,7 +485,13 @@ create_depthstencil_renderpass(struct radv_device *device,
                               uint32_t samples,
                               VkRenderPass *render_pass)
 {
-       return radv_CreateRenderPass(radv_device_to_handle(device),
+       mtx_lock(&device->meta_state.mtx);
+       if (*render_pass) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
+       VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
                                       &(VkRenderPassCreateInfo) {
                                               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
                                                       .attachmentCount = 1,
@@ -477,6 +519,8 @@ create_depthstencil_renderpass(struct radv_device *device,
                                               },
                                                                .dependencyCount = 0,
                                                                         }, &device->meta_state.alloc, render_pass);
+       mtx_unlock(&device->meta_state.mtx);
+       return result;
 }
 
 static VkResult
@@ -489,6 +533,13 @@ create_depthstencil_pipeline(struct radv_device *device,
 {
        struct nir_shader *vs_nir, *fs_nir;
        VkResult result;
+
+       mtx_lock(&device->meta_state.mtx);
+       if (*pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        build_depthstencil_shader(&vs_nir, &fs_nir);
 
        const VkPipelineVertexInputStateCreateInfo vi_state = {
@@ -536,6 +587,8 @@ create_depthstencil_pipeline(struct radv_device *device,
                                 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
                                 device->meta_state.clear_depth_p_layout,
                                 &extra, &device->meta_state.alloc, pipeline);
+
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
@@ -579,6 +632,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
 {
        bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value);
        int index = DEPTH_CLEAR_SLOW;
+       VkPipeline *pipeline;
 
        if (fast) {
                /* we don't know the previous clear values, so we always have
@@ -588,13 +642,36 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
 
        switch (aspects) {
        case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
-               return meta_state->clear[samples_log2].depthstencil_pipeline[index];
+               pipeline = &meta_state->clear[samples_log2].depthstencil_pipeline[index];
+               break;
        case VK_IMAGE_ASPECT_DEPTH_BIT:
-               return meta_state->clear[samples_log2].depth_only_pipeline[index];
+               pipeline = &meta_state->clear[samples_log2].depth_only_pipeline[index];
+               break;
        case VK_IMAGE_ASPECT_STENCIL_BIT:
-               return meta_state->clear[samples_log2].stencil_only_pipeline[index];
+               pipeline = &meta_state->clear[samples_log2].stencil_only_pipeline[index];
+               break;
+       default:
+               unreachable("expected depth or stencil aspect");
+       }
+
+       if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
+               VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2,
+                                                             &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return VK_NULL_HANDLE;
+               }
        }
-       unreachable("expected depth or stencil aspect");
+
+       if (*pipeline == VK_NULL_HANDLE) {
+               VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index,
+                                                           pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return VK_NULL_HANDLE;
+               }
+       }
+       return *pipeline;
 }
 
 static void
@@ -638,6 +715,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
                                                         subpass->depth_stencil_attachment.layout,
                                                         clear_rect,
                                                         clear_value);
+       if (!pipeline)
+               return;
 
        radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
                             pipeline);
@@ -758,7 +837,7 @@ fail:
 }
 
 VkResult
-radv_device_init_meta_clear_state(struct radv_device *device)
+radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
 {
        VkResult res;
        struct radv_meta_state *state = &device->meta_state;
@@ -791,6 +870,9 @@ radv_device_init_meta_clear_state(struct radv_device *device)
        if (res != VK_SUCCESS)
                goto fail;
 
+       if (on_demand)
+               return VK_SUCCESS;
+
        for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
                uint32_t samples = 1 << i;
                for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
index 1a8058c7cc53d8333a1da73032d5d330dec7f002..41ed7b6d043f89a23da551c3ec53164296beb248 100644 (file)
@@ -103,6 +103,18 @@ create_pipeline(struct radv_device *device,
 {
        VkResult result;
        VkDevice device_h = radv_device_to_handle(device);
+       struct radv_shader_module vs_module = {0};
+
+       mtx_lock(&device->meta_state.mtx);
+       if (*decompress_pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
+       if (!vs_module_h) {
+               vs_module.nir = radv_meta_build_nir_vs_generate_vertices();
+               vs_module_h = radv_shader_module_to_handle(&vs_module);
+       }
 
        struct radv_shader_module fs_module = {
                .nir = radv_meta_build_nir_fs_noop(),
@@ -219,6 +231,9 @@ create_pipeline(struct radv_device *device,
 
 cleanup:
        ralloc_free(fs_module.nir);
+       if (vs_module.nir)
+               ralloc_free(vs_module.nir);
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
@@ -244,7 +259,7 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
 }
 
 VkResult
-radv_device_init_meta_depth_decomp_state(struct radv_device *device)
+radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
 {
        struct radv_meta_state *state = &device->meta_state;
        VkResult res = VK_SUCCESS;
@@ -270,6 +285,9 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device)
                if (res != VK_SUCCESS)
                        goto fail;
 
+               if (on_demand)
+                       continue;
+
                res = create_pipeline(device, vs_module_h, samples,
                                      state->depth_decomp[i].pass,
                                      state->depth_decomp[i].p_layout,
@@ -343,6 +361,18 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
        if (!radv_image_has_htile(image))
                return;
 
+       if (!meta_state->depth_decomp[samples_log2].decompress_pipeline) {
+               VkResult ret = create_pipeline(cmd_buffer->device, NULL, samples,
+                                              meta_state->depth_decomp[samples_log2].pass,
+                                              meta_state->depth_decomp[samples_log2].p_layout,
+                                              &meta_state->depth_decomp[samples_log2].decompress_pipeline,
+                                              &meta_state->depth_decomp[samples_log2].resummarize_pipeline);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
+
        radv_meta_save(&saved_state, cmd_buffer,
                       RADV_META_SAVE_GRAPHICS_PIPELINE |
                       RADV_META_SAVE_PASS);
index b42a6783fd289a4f29ad3e081a441a69ba805242..f469a9ee8f8e57cf7c6c91085a1b892092be74ac 100644 (file)
@@ -489,11 +489,17 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
                                        &state->alloc);
 }
 
-VkResult
-radv_device_init_meta_fast_clear_flush_state(struct radv_device *device)
+static VkResult
+radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
 {
        VkResult res = VK_SUCCESS;
 
+       mtx_lock(&device->meta_state.mtx);
+       if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
        if (!vs_module.nir) {
                /* XXX: Need more accurate error */
@@ -527,10 +533,21 @@ fail:
 
 cleanup:
        ralloc_free(vs_module.nir);
+       mtx_unlock(&device->meta_state.mtx);
 
        return res;
 }
 
+
+VkResult
+radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
+{
+       if (on_demand)
+               return VK_SUCCESS;
+
+       return radv_device_init_meta_fast_clear_flush_state_internal(device);
+}
+
 static void
 emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
                      const VkExtent2D *resolve_extent,
@@ -591,6 +608,14 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
 
        assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
 
+       if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
+               VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
+
        radv_meta_save(&saved_state, cmd_buffer,
                       RADV_META_SAVE_GRAPHICS_PIPELINE |
                       RADV_META_SAVE_PASS);
index 30fed97441478ceebfc597b429d39721a15e72c1..309c7a5be0d9f71ab50d38c020b4b1e75cbc336b 100644 (file)
@@ -252,8 +252,11 @@ radv_device_finish_meta_resolve_state(struct radv_device *device)
 }
 
 VkResult
-radv_device_init_meta_resolve_state(struct radv_device *device)
+radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)
 {
+       if (on_demand)
+               return VK_SUCCESS;
+
        VkResult res = VK_SUCCESS;
        struct radv_meta_state *state = &device->meta_state;
        struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
@@ -353,6 +356,36 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image,
        }
 }
 
+static VkResult
+build_resolve_pipeline(struct radv_device *device,
+                       unsigned fs_key)
+{
+       VkResult result = VK_SUCCESS;
+
+       if (device->meta_state.resolve.pipeline[fs_key])
+               return result;
+
+       mtx_lock(&device->meta_state.mtx);
+       if (device->meta_state.resolve.pipeline[fs_key]) {
+               mtx_unlock(&device->meta_state.mtx);
+               return result;
+       }
+
+       struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
+
+       result = create_pass(device, radv_fs_key_format_exemplars[fs_key], &device->meta_state.resolve.pass[fs_key]);
+       if (result != VK_SUCCESS)
+               goto fail;
+
+       VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
+       result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key], device->meta_state.resolve.pass[fs_key]);
+
+fail:
+       ralloc_free(vs_module.nir);
+       mtx_unlock(&device->meta_state.mtx);
+       return result;
+}
+
 void radv_CmdResolveImage(
        VkCommandBuffer                             cmd_buffer_h,
        VkImage                                     src_image_h,
@@ -483,6 +516,12 @@ void radv_CmdResolveImage(
                for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
                     ++layer) {
 
+                       VkResult ret = build_resolve_pipeline(device, fs_key);
+                       if (ret != VK_SUCCESS) {
+                               cmd_buffer->record_result = ret;
+                               break;
+                       }
+
                        struct radv_image_view src_iview;
                        radv_image_view_init(&src_iview, cmd_buffer->device,
                                             &(VkImageViewCreateInfo) {
@@ -648,6 +687,12 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 
                radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
 
+               VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(dst_img->vk_format));
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       continue;
+               }
+
                emit_resolve(cmd_buffer,
                             dst_img->vk_format,
                             &(VkOffset2D) { 0, 0 },
index 2d79cb09fecda019346d76eb30a8351633bcfb19..fca49a01bb03ec453953700e7bc90f17e6ab1380 100644 (file)
@@ -212,6 +212,12 @@ create_resolve_pipeline(struct radv_device *device,
        VkResult result;
        struct radv_shader_module cs = { .nir = NULL };
 
+       mtx_lock(&device->meta_state.mtx);
+       if (*pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        cs.nir = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
 
        /* compute shader */
@@ -239,14 +245,16 @@ create_resolve_pipeline(struct radv_device *device,
                goto fail;
 
        ralloc_free(cs.nir);
+       mtx_unlock(&device->meta_state.mtx);
        return VK_SUCCESS;
 fail:
        ralloc_free(cs.nir);
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
 VkResult
-radv_device_init_meta_resolve_compute_state(struct radv_device *device)
+radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
 {
        struct radv_meta_state *state = &device->meta_state;
        VkResult res;
@@ -255,6 +263,9 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device)
        if (res != VK_SUCCESS)
                goto fail;
 
+       if (on_demand)
+               return VK_SUCCESS;
+
        for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
                uint32_t samples = 1 << i;
 
@@ -353,16 +364,27 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
                              }
                                      });
 
-       VkPipeline pipeline;
+       VkPipeline *pipeline;
        if (vk_format_is_int(src_iview->image->vk_format))
-               pipeline = device->meta_state.resolve_compute.rc[samples_log2].i_pipeline;
+               pipeline = &device->meta_state.resolve_compute.rc[samples_log2].i_pipeline;
        else if (vk_format_is_srgb(src_iview->image->vk_format))
-               pipeline = device->meta_state.resolve_compute.rc[samples_log2].srgb_pipeline;
+               pipeline = &device->meta_state.resolve_compute.rc[samples_log2].srgb_pipeline;
        else
-               pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline;
+               pipeline = &device->meta_state.resolve_compute.rc[samples_log2].pipeline;
+
+       if (!*pipeline) {
+               VkResult ret = create_resolve_pipeline(device, samples,
+                                                      vk_format_is_int(src_iview->image->vk_format),
+                                                      vk_format_is_srgb(src_iview->image->vk_format),
+                                                      pipeline);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
 
        radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-                            VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+                            VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
 
        unsigned push_constants[4] = {
                src_offset->x,
index 6013503b837b9a9d3dabc8bf8cff97b7e6a0589f..21a5922f5df438c17eefe7cff5bfb42449b38b5b 100644 (file)
@@ -161,10 +161,18 @@ create_resolve_pipeline(struct radv_device *device,
                        int samples_log2,
                        VkFormat format)
 {
+       mtx_lock(&device->meta_state.mtx);
+
+       unsigned fs_key = radv_format_meta_fs_key(format);
+       VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+       if (*pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
+
        VkResult result;
        bool is_integer = false;
        uint32_t samples = 1 << samples_log2;
-       unsigned fs_key = radv_format_meta_fs_key(format);
        const VkPipelineVertexInputStateCreateInfo *vi_create_info;
        vi_create_info = &normal_vi_create_info;
        if (vk_format_is_int(format))
@@ -180,9 +188,6 @@ create_resolve_pipeline(struct radv_device *device,
 
        assert(!*rp);
 
-       VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
-       assert(!*pipeline);
-
        VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
                {
                        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@@ -307,11 +312,12 @@ create_resolve_pipeline(struct radv_device *device,
        ralloc_free(vs.nir);
        ralloc_free(fs.nir);
 
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
 VkResult
-radv_device_init_meta_resolve_fragment_state(struct radv_device *device)
+radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand)
 {
        VkResult res;
 
@@ -319,6 +325,9 @@ radv_device_init_meta_resolve_fragment_state(struct radv_device *device)
        if (res != VK_SUCCESS)
                goto fail;
 
+       if (on_demand)
+               return VK_SUCCESS;
+
        for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
                for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
                        res = create_resolve_pipeline(device, i, radv_fs_key_format_exemplars[j]);
@@ -404,10 +413,18 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
                              push_constants);
 
        unsigned fs_key = radv_format_meta_fs_key(dest_iview->vk_format);
-       VkPipeline pipeline_h = device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+       VkPipeline* pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+
+       if (*pipeline == VK_NULL_HANDLE) {
+               VkResult ret = create_resolve_pipeline(device, samples_log2, radv_fs_key_format_exemplars[fs_key]);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
 
        radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-                            pipeline_h);
+                            *pipeline);
 
        radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
                .x = dest_offset->x,
index 7e2c305b1a033d7acecc3cdb5a8eea50a3df607f..9f01191a2f67024d5a64d7cf9e82fa06c5e8640a 100644 (file)
@@ -455,7 +455,7 @@ struct cache_header {
        uint8_t  uuid[VK_UUID_SIZE];
 };
 
-void
+bool
 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
                         const void *data, size_t size)
 {
@@ -463,18 +463,18 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
        struct cache_header header;
 
        if (size < sizeof(header))
-               return;
+               return false;
        memcpy(&header, data, sizeof(header));
        if (header.header_size < sizeof(header))
-               return;
+               return false;
        if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
-               return;
+               return false;
        if (header.vendor_id != ATI_VENDOR_ID)
-               return;
+               return false;
        if (header.device_id != device->physical_device->rad_info.pci_id)
-               return;
+               return false;
        if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
-               return;
+               return false;
 
        char *end = (void *) data + size;
        char *p = (void *) data + header.header_size;
@@ -496,6 +496,8 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
                }
                p += size;
        }
+
+       return true;
 }
 
 VkResult radv_CreatePipelineCache(
index 9374b730b5256475dfc7dbdaa99dd2ae919ed5a5..01a5a698a0dfba2587daedc015ddaf4670375c03 100644 (file)
@@ -372,7 +372,7 @@ radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
                         struct radv_device *device);
 void
 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
-void
+bool
 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
                         const void *data, size_t size);
 
@@ -429,6 +429,12 @@ struct radv_meta_state {
 
        struct radv_pipeline_cache cache;
 
+       /*
+        * For on-demand pipeline creation, makes sure that
+        * only one thread tries to build a pipeline at the same time.
+        */
+       mtx_t mtx;
+
        /**
         * Use array element `i` for images with `2^i` samples.
         */
index e3229ab59bbe827ab5364361cc44ce0abbaad0cf..bdfd7620cfc1ccea7cdf060c2b4d8c00ca762d0e 100644 (file)
@@ -511,12 +511,17 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
        return b.shader;
 }
 
-VkResult radv_device_init_meta_query_state(struct radv_device *device)
+static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device)
 {
        VkResult result;
        struct radv_shader_module occlusion_cs = { .nir = NULL };
        struct radv_shader_module pipeline_statistics_cs = { .nir = NULL };
 
+       mtx_lock(&device->meta_state.mtx);
+       if (device->meta_state.query.pipeline_statistics_query_pipeline) {
+               mtx_unlock(&device->meta_state.mtx);
+               return VK_SUCCESS;
+       }
        occlusion_cs.nir = build_occlusion_query_shader(device);
        pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device);
 
@@ -611,9 +616,18 @@ fail:
                radv_device_finish_meta_query_state(device);
        ralloc_free(occlusion_cs.nir);
        ralloc_free(pipeline_statistics_cs.nir);
+       mtx_unlock(&device->meta_state.mtx);
        return result;
 }
 
+VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_demand)
+{
+       if (on_demand)
+               return VK_SUCCESS;
+
+       return radv_device_init_meta_query_state_internal(device);
+}
+
 void radv_device_finish_meta_query_state(struct radv_device *device)
 {
        if (device->meta_state.query.pipeline_statistics_query_pipeline)
@@ -638,7 +652,7 @@ void radv_device_finish_meta_query_state(struct radv_device *device)
 }
 
 static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
-                              VkPipeline pipeline,
+                              VkPipeline *pipeline,
                               struct radeon_winsys_bo *src_bo,
                               struct radeon_winsys_bo *dst_bo,
                               uint64_t src_offset, uint64_t dst_offset,
@@ -649,6 +663,14 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
        struct radv_device *device = cmd_buffer->device;
        struct radv_meta_saved_state saved_state;
 
+       if (!*pipeline) {
+               VkResult ret = radv_device_init_meta_query_state_internal(device);
+               if (ret != VK_SUCCESS) {
+                       cmd_buffer->record_result = ret;
+                       return;
+               }
+       }
+
        radv_meta_save(&saved_state, cmd_buffer,
                       RADV_META_SAVE_COMPUTE_PIPELINE |
                       RADV_META_SAVE_CONSTANTS |
@@ -667,7 +689,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
        };
 
        radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-                            VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+                            VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
 
        radv_meta_push_descriptor_set(cmd_buffer,
                                      VK_PIPELINE_BIND_POINT_COMPUTE,
@@ -974,7 +996,7 @@ void radv_CmdCopyQueryPoolResults(
                                radeon_emit(cs, 4); /* poll interval */
                        }
                }
-               radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
+               radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
                                  pool->bo, dst_buffer->bo, firstQuery * pool->stride,
                                  dst_buffer->offset + dstOffset,
                                  get_max_db(cmd_buffer->device) * 16, stride,
@@ -993,7 +1015,7 @@ void radv_CmdCopyQueryPoolResults(
                                si_emit_wait_fence(cs, avail_va, 1, 0xffffffff);
                        }
                }
-               radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
+               radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
                                  pool->bo, dst_buffer->bo, firstQuery * pool->stride,
                                  dst_buffer->offset + dstOffset,
                                  pipelinestat_block_size * 2, stride, queryCount, flags,