radeonsi: do compilation from si_create_shader_selector asynchronously

author Marek Olšák <marek.olsak@amd.com>

Sat, 11 Jun 2016 17:57:40 +0000 (19:57 +0200)

committer Marek Olšák <marek.olsak@amd.com>

Mon, 4 Jul 2016 22:47:13 +0000 (00:47 +0200)
author Marek Olšák <marek.olsak@amd.com>
Sat, 11 Jun 2016 17:57:40 +0000 (19:57 +0200)
committer Marek Olšák <marek.olsak@amd.com>
Mon, 4 Jul 2016 22:47:13 +0000 (00:47 +0200)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index 06b32db43db76feeb88fc7b17e2ac66c0dc2ba50..ee97bcfaea5cc0c22fb29eb4232d648e0c42c4cf 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -663,6 +663,13 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
         if (!sscreen->b.ws->unref(sscreen->b.ws))
                 return;
  
+       if (util_queue_is_initialized(&sscreen->shader_compiler_queue))
+               util_queue_destroy(&sscreen->shader_compiler_queue);
+
+       for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
+               if (sscreen->tm[i])
+                       LLVMDisposeTargetMachine(sscreen->tm[i]);
+
         /* Free shader parts. */
         for (i = 0; i < ARRAY_SIZE(parts); i++) {
                 while (parts[i]) {
@@ -710,6 +717,7 @@ static bool si_init_gs_info(struct si_screen *sscreen)
  struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
  {
         struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
+       unsigned num_cpus, num_compiler_threads, i;
  
         if (!sscreen) {
                 return NULL;
@@ -754,6 +762,16 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
         if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
                 sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
  
+       /* Only enable as many threads as we have target machines and CPUs. */
+       num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+       num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm));
+
+       for (i = 0; i < num_compiler_threads; i++)
+               sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
+
+       util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
+                        32, num_compiler_threads);
+
         /* Create the auxiliary context. This must be done last. */
         sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0);
  
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index fc7e73e01a7e831e50ca24ca6545bfc9b21e0b04..1f63c12e5b41c1600923720047f495331b40af16 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -27,6 +27,7 @@
  #define SI_PIPE_H
  
  #include "si_state.h"
+#include "util/u_queue.h"
  
  #include <llvm-c/TargetMachine.h>
  
@@ -110,6 +111,10 @@ struct si_screen {
          */
         pipe_mutex                      shader_cache_mutex;
         struct hash_table               *shader_cache;
+
+       /* Shader compiler queue for multithreaded compilation. */
+       struct util_queue               shader_compiler_queue;
+       LLVMTargetMachineRef            tm[4]; /* used by the queue only */
  };
  
  struct si_blend_color {
@@ -207,7 +212,7 @@ struct si_context {
  
         struct pipe_fence_handle        *last_gfx_fence;
         struct si_shader_ctx_state      fixed_func_tcs_shader;
-       LLVMTargetMachineRef            tm;
+       LLVMTargetMachineRef            tm; /* only non-threaded compilation */
         bool                            gfx_flush_in_progress;
  
         /* Atoms (direct states). */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h

index be75a354279bae0836454b29c3920062219f0652..8fc0a36abbcd9dff680f3d90372532da12aff68d 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -234,6 +234,7 @@ struct si_shader;
   */
  struct si_shader_selector {
         struct si_screen        *screen;
+       struct util_queue_fence ready;
  
         /* Should only be used by si_init_shader_selector_async
          * if thread_index == -1 (non-threaded). */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c

index 799aa5708c15525fd42d2e9f97e866a558386e21..117cf4be1bcc7ab9151eac683aa101af620a9a81 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -991,7 +991,8 @@ static int si_shader_select_with_key(struct si_screen *sscreen,
                                      struct si_shader_ctx_state *state,
                                      union si_shader_key *key,
                                      LLVMTargetMachineRef tm,
-                                    struct pipe_debug_callback *debug)
+                                    struct pipe_debug_callback *debug,
+                                    bool wait)
  {
         struct si_shader_selector *sel = state->cso;
         struct si_shader *current = state->current;
@@ -1005,6 +1006,13 @@ static int si_shader_select_with_key(struct si_screen *sscreen,
         if (likely(current && memcmp(&current->key, key, sizeof(*key)) == 0))
                 return 0;
  
+       /* This must be done before the mutex is locked, because async GS
+        * compilation calls this function too, and therefore must enter
+        * the mutex first.
+        */
+       if (wait)
+               util_queue_job_wait(&sel->ready);
+
         pipe_mutex_lock(sel->mutex);
  
         /* Find the shader variant. */
@@ -1057,7 +1065,7 @@ static int si_shader_select(struct pipe_context *ctx,
  
         si_shader_selector_key(ctx, state->cso, &key);
         return si_shader_select_with_key(sctx->screen, state, &key,
-                                        sctx->tm, &sctx->b.debug);
+                                        sctx->tm, &sctx->b.debug, true);
  }
  
  static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
@@ -1094,10 +1102,19 @@ void si_init_shader_selector_async(void *job, int thread_index)
  {
         struct si_shader_selector *sel = (struct si_shader_selector *)job;
         struct si_screen *sscreen = sel->screen;
-       LLVMTargetMachineRef tm = sel->tm;
-       struct pipe_debug_callback *debug = &sel->debug;
+       LLVMTargetMachineRef tm;
+       struct pipe_debug_callback *debug;
         unsigned i;
  
+       if (thread_index >= 0) {
+               assert(thread_index < ARRAY_SIZE(sscreen->tm));
+               tm = sscreen->tm[thread_index];
+               debug = NULL;
+       } else {
+               tm = sel->tm;
+               debug = &sel->debug;
+       }
+
         /* Compile the main shader part for use with a prolog and/or epilog.
          * If this fails, the driver will try to compile a monolithic shader
          * on demand.
@@ -1172,7 +1189,8 @@ void si_init_shader_selector_async(void *job, int thread_index)
                         break;
                 }
  
-               if (si_shader_select_with_key(sscreen, &state, &key, tm, debug))
+               if (si_shader_select_with_key(sscreen, &state, &key, tm, debug,
+                                             false))
                         fprintf(stderr, "radeonsi: can't create a monolithic shader\n");
         }
  }
@@ -1304,8 +1322,14 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                 sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) |
                                           S_02880C_EXEC_ON_NOOP(1);
         pipe_mutex_init(sel->mutex);
+       util_queue_fence_init(&sel->ready);
  
-       si_init_shader_selector_async(sel, -1);
+       if (sctx->b.debug.debug_message ||
+           !util_queue_is_initialized(&sscreen->shader_compiler_queue))
+               si_init_shader_selector_async(sel, -1);
+       else
+               util_queue_add_job(&sscreen->shader_compiler_queue, sel,
+                                   &sel->ready, si_init_shader_selector_async);
  
         return sel;
  }
@@ -1442,6 +1466,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
                 [PIPE_SHADER_FRAGMENT] = &sctx->ps_shader,
         };
  
+       util_queue_job_wait(&sel->ready);
+
         if (current_shader[sel->type]->cso == sel) {
                 current_shader[sel->type]->cso = NULL;
                 current_shader[sel->type]->current = NULL;
@@ -1456,6 +1482,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
         if (sel->main_shader_part)
                 si_delete_shader(sctx, sel->main_shader_part);
  
+       util_queue_fence_destroy(&sel->ready);
         pipe_mutex_destroy(sel->mutex);
         free(sel->tokens);
         free(sel);
author	Marek Olšák <marek.olsak@amd.com>
	Sat, 11 Jun 2016 17:57:40 +0000 (19:57 +0200)
committer	Marek Olšák <marek.olsak@amd.com>
	Mon, 4 Jul 2016 22:47:13 +0000 (00:47 +0200)
src/gallium/drivers/radeonsi/si_pipe.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_shaders.c		patch \| blob \| history