radeonsi: enable shader caching for compute shaders
authorMarek Olšák <marek.olsak@amd.com>
Thu, 14 Jun 2018 06:43:19 +0000 (02:43 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 29 Jun 2018 02:27:25 +0000 (22:27 -0400)
Compute shaders were not using the shader cache.

src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 6096766f694571b1787fae62626bd010310181d7..c8f864760f614e96af8b5c8e21914c0f01799780 100644 (file)
@@ -88,15 +88,16 @@ static void si_create_compute_state_async(void *job, int thread_index)
        struct si_shader_selector sel;
        struct si_compiler *compiler;
        struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
+       struct si_screen *sscreen = program->screen;
 
        assert(!debug->debug_message || debug->async);
        assert(thread_index >= 0);
-       assert(thread_index < ARRAY_SIZE(program->screen->compiler));
-       compiler = &program->screen->compiler[thread_index];
+       assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+       compiler = &sscreen->compiler[thread_index];
 
        memset(&sel, 0, sizeof(sel));
 
-       sel.screen = program->screen;
+       sel.screen = sscreen;
 
        if (program->ir_type == PIPE_SHADER_IR_TGSI) {
                tgsi_scan_shader(program->ir.tgsi, &sel.info);
@@ -125,10 +126,36 @@ static void si_create_compute_state_async(void *job, int thread_index)
        program->uses_block_size = sel.info.uses_block_size;
        program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
        program->uses_bindless_images = sel.info.uses_bindless_images;
+       program->variable_group_size =
+               sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
 
-       if (si_shader_create(program->screen, compiler, &program->shader, debug)) {
-               program->shader.compilation_failed = true;
+       void *ir_binary = si_get_ir_binary(&sel);
+
+       /* Try to load the shader from the shader cache. */
+       mtx_lock(&sscreen->shader_cache_mutex);
+
+       if (ir_binary &&
+           si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
+               mtx_unlock(&sscreen->shader_cache_mutex);
+
+               si_shader_dump_stats_for_shader_db(shader, debug);
+               si_shader_dump(sscreen, shader, debug, PIPE_SHADER_COMPUTE,
+                              stderr, true);
+
+               if (si_shader_binary_upload(sscreen, shader))
+                       program->shader.compilation_failed = true;
        } else {
+               mtx_unlock(&sscreen->shader_cache_mutex);
+
+               if (si_shader_create(sscreen, compiler, &program->shader, debug)) {
+                       program->shader.compilation_failed = true;
+
+                       if (program->ir_type == PIPE_SHADER_IR_TGSI)
+                               FREE(program->ir.tgsi);
+                       program->shader.selector = NULL;
+                       return;
+               }
+
                bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
                unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
                                      (sel.info.uses_grid_size ? 3 : 0) +
@@ -150,8 +177,12 @@ static void si_create_compute_state_async(void *job, int thread_index)
                                                sel.info.uses_thread_id[1] ? 1 : 0) |
                        S_00B84C_LDS_SIZE(shader->config.lds_size);
 
-               program->variable_group_size =
-                       sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
+               if (ir_binary) {
+                       mtx_lock(&sscreen->shader_cache_mutex);
+                       if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
+                               FREE(ir_binary);
+                       mtx_unlock(&sscreen->shader_cache_mutex);
+               }
        }
 
        if (program->ir_type == PIPE_SHADER_IR_TGSI)
index 8fd80f73effe69af688d14c193c47da41161c1f6..f8748bdfffb028ac3501fe331cbffbe917adbf96 100644 (file)
@@ -488,6 +488,12 @@ void si_set_occlusion_query_state(struct si_context *sctx,
 void si_emit_dpbb_state(struct si_context *sctx);
 
 /* si_state_shaders.c */
+void *si_get_ir_binary(struct si_shader_selector *sel);
+bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary,
+                                struct si_shader *shader);
+bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary,
+                                  struct si_shader *shader,
+                                  bool insert_into_disk_cache);
 bool si_update_shaders(struct si_context *sctx);
 void si_init_shader_functions(struct si_context *sctx);
 bool si_init_shader_cache(struct si_screen *sscreen);
index 1a8b2c0852465ce9a21df7a7e6e18c201dbce84f..ddd38dabbe670e6d246f77af1a8a8407816b6510 100644 (file)
@@ -45,7 +45,7 @@
  * Return the IR binary in a buffer. For TGSI the first 4 bytes contain its
  * size as integer.
  */
-static void *si_get_ir_binary(struct si_shader_selector *sel)
+void *si_get_ir_binary(struct si_shader_selector *sel)
 {
        struct blob blob;
        unsigned ir_size;
@@ -202,10 +202,9 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
  *
  * Returns false on failure, in which case the ir_binary should be freed.
  */
-static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
-                                         void *ir_binary,
-                                         struct si_shader *shader,
-                                         bool insert_into_disk_cache)
+bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary,
+                                  struct si_shader *shader,
+                                  bool insert_into_disk_cache)
 {
        void *hw_binary;
        struct hash_entry *entry;
@@ -235,9 +234,8 @@ static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
        return true;
 }
 
-static bool si_shader_cache_load_shader(struct si_screen *sscreen,
-                                       void *ir_binary,
-                                       struct si_shader *shader)
+bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary,
+                                struct si_shader *shader)
 {
        struct hash_entry *entry =
                _mesa_hash_table_search(sscreen->shader_cache, ir_binary);