radeonsi: increase the number of compiler threads depending on the CPU

author Marek Olšák <marek.olsak@amd.com>

Fri, 13 Apr 2018 22:09:11 +0000 (18:09 -0400)

committer Marek Olšák <marek.olsak@amd.com>

Fri, 27 Apr 2018 21:56:04 +0000 (17:56 -0400)
author Marek Olšák <marek.olsak@amd.com>
Fri, 13 Apr 2018 22:09:11 +0000 (18:09 -0400)
committer Marek Olšák <marek.olsak@amd.com>
Fri, 27 Apr 2018 21:56:04 +0000 (17:56 -0400)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index c7f1e27f6bab4bb646b4646e4854b289ca4db545..1ca38ed55cb5e6773127679c4c0c5e1450f5bcc4 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -832,7 +832,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
                                            const struct pipe_screen_config *config)
  {
         struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
-       unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i;
+       unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i;
  
         if (!sscreen) {
                 return NULL;
@@ -889,17 +889,30 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
  
         si_disk_cache_create(sscreen);
  
-       /* Only enable as many threads as we have target machines, but at most
-        * the number of CPUs - 1 if there is more than one.
-        */
-       num_threads = sysconf(_SC_NPROCESSORS_ONLN);
-       num_threads = MAX2(1, num_threads - 1);
-       num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->compiler));
-       num_compiler_threads_lowprio =
-               MIN2(num_threads, ARRAY_SIZE(sscreen->compiler_lowp));
+       /* Determine the number of shader compiler threads. */
+       hw_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+       if (hw_threads >= 12) {
+               num_comp_hi_threads = hw_threads * 3 / 4;
+               num_comp_lo_threads = hw_threads / 3;
+       } else if (hw_threads >= 6) {
+               num_comp_hi_threads = hw_threads - 2;
+               num_comp_lo_threads = hw_threads / 2;
+       } else if (hw_threads >= 2) {
+               num_comp_hi_threads = hw_threads - 1;
+               num_comp_lo_threads = hw_threads / 2;
+       } else {
+               num_comp_hi_threads = 1;
+               num_comp_lo_threads = 1;
+       }
+
+       num_comp_hi_threads = MIN2(num_comp_hi_threads,
+                                  ARRAY_SIZE(sscreen->compiler));
+       num_comp_lo_threads = MIN2(num_comp_lo_threads,
+                                  ARRAY_SIZE(sscreen->compiler_lowp));
  
         if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
-                            32, num_compiler_threads,
+                            64, num_comp_hi_threads,
                              UTIL_QUEUE_INIT_RESIZE_IF_FULL)) {
                 si_destroy_shader_cache(sscreen);
                 FREE(sscreen);
@@ -908,7 +921,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
  
         if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
                              "si_shader_low",
-                            32, num_compiler_threads_lowprio,
+                            64, num_comp_lo_threads,
                              UTIL_QUEUE_INIT_RESIZE_IF_FULL |
                              UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
                si_destroy_shader_cache(sscreen);
@@ -1059,9 +1072,9 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
         if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
                 sscreen->debug_flags |= DBG_ALL_SHADERS;
  
-       for (i = 0; i < num_compiler_threads; i++)
+       for (i = 0; i < num_comp_hi_threads; i++)
                 si_init_compiler(sscreen, &sscreen->compiler[i]);
-       for (i = 0; i < num_compiler_threads_lowprio; i++)
+       for (i = 0; i < num_comp_lo_threads; i++)
                 si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
  
         /* Create the auxiliary context. This must be done last. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index a67786c84d9b4445205a772811eb3333b527296e..27efc5099f00a3b176655caa02d75f2c8161b2f0 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -529,13 +529,13 @@ struct si_screen {
         struct util_queue               shader_compiler_queue;
         /* Use at most 3 normal compiler threads on quadcore and better.
          * Hyperthreaded CPUs report the number of threads, but we want
-        * the number of cores. */
-       struct si_compiler              compiler[3]; /* used by the queue only */
+        * the number of cores. We only need this many threads for shader-db. */
+       struct si_compiler              compiler[24]; /* used by the queue only */
  
         struct util_queue               shader_compiler_queue_low_priority;
         /* Use at most 2 low priority threads on quadcore and better.
          * We want to minimize the impact on multithreaded Mesa. */
-       struct si_compiler              compiler_lowp[2]; /* at most 2 threads */
+       struct si_compiler              compiler_lowp[10];
  };
  
  struct si_blend_color {
author	Marek Olšák <marek.olsak@amd.com>
	Fri, 13 Apr 2018 22:09:11 +0000 (18:09 -0400)
committer	Marek Olšák <marek.olsak@amd.com>
	Fri, 27 Apr 2018 21:56:04 +0000 (17:56 -0400)
src/gallium/drivers/radeonsi/si_pipe.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history