if (!sscreen->b.ws->unref(sscreen->b.ws))
return;
+ if (util_queue_is_initialized(&sscreen->shader_compiler_queue))
+ util_queue_destroy(&sscreen->shader_compiler_queue);
+
+ for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
+ if (sscreen->tm[i])
+ LLVMDisposeTargetMachine(sscreen->tm[i]);
+
/* Free shader parts. */
for (i = 0; i < ARRAY_SIZE(parts); i++) {
while (parts[i]) {
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
{
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
+ unsigned num_cpus, num_compiler_threads, i;
if (!sscreen) {
return NULL;
if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
+ /* Only enable as many threads as we have target machines and CPUs. */
+ num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm));
+
+ for (i = 0; i < num_compiler_threads; i++)
+ sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
+
+ util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
+ 32, num_compiler_threads);
+
/* Create the auxiliary context. This must be done last. */
sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0);
#define SI_PIPE_H
#include "si_state.h"
+#include "util/u_queue.h"
#include <llvm-c/TargetMachine.h>
*/
pipe_mutex shader_cache_mutex;
struct hash_table *shader_cache;
+
+ /* Shader compiler queue for multithreaded compilation. */
+ struct util_queue shader_compiler_queue;
+ LLVMTargetMachineRef tm[4]; /* used by the queue only */
};
struct si_blend_color {
struct pipe_fence_handle *last_gfx_fence;
struct si_shader_ctx_state fixed_func_tcs_shader;
- LLVMTargetMachineRef tm;
+ LLVMTargetMachineRef tm; /* only non-threaded compilation */
bool gfx_flush_in_progress;
/* Atoms (direct states). */
struct si_shader_ctx_state *state,
union si_shader_key *key,
LLVMTargetMachineRef tm,
- struct pipe_debug_callback *debug)
+ struct pipe_debug_callback *debug,
+ bool wait)
{
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
if (likely(current && memcmp(¤t->key, key, sizeof(*key)) == 0))
return 0;
+ /* This must be done before the mutex is locked, because async GS
+ * compilation calls this function too, and therefore must enter
+ * the mutex first.
+ */
+ if (wait)
+ util_queue_job_wait(&sel->ready);
+
pipe_mutex_lock(sel->mutex);
/* Find the shader variant. */
si_shader_selector_key(ctx, state->cso, &key);
return si_shader_select_with_key(sctx->screen, state, &key,
- sctx->tm, &sctx->b.debug);
+ sctx->tm, &sctx->b.debug, true);
}
static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
{
struct si_shader_selector *sel = (struct si_shader_selector *)job;
struct si_screen *sscreen = sel->screen;
- LLVMTargetMachineRef tm = sel->tm;
- struct pipe_debug_callback *debug = &sel->debug;
+ LLVMTargetMachineRef tm;
+ struct pipe_debug_callback *debug;
unsigned i;
+ if (thread_index >= 0) {
+ assert(thread_index < ARRAY_SIZE(sscreen->tm));
+ tm = sscreen->tm[thread_index];
+ debug = NULL;
+ } else {
+ tm = sel->tm;
+ debug = &sel->debug;
+ }
+
/* Compile the main shader part for use with a prolog and/or epilog.
* If this fails, the driver will try to compile a monolithic shader
* on demand.
break;
}
- if (si_shader_select_with_key(sscreen, &state, &key, tm, debug))
+ if (si_shader_select_with_key(sscreen, &state, &key, tm, debug,
+ false))
fprintf(stderr, "radeonsi: can't create a monolithic shader\n");
}
}
sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) |
S_02880C_EXEC_ON_NOOP(1);
pipe_mutex_init(sel->mutex);
+ util_queue_fence_init(&sel->ready);
- si_init_shader_selector_async(sel, -1);
+ if (sctx->b.debug.debug_message ||
+ !util_queue_is_initialized(&sscreen->shader_compiler_queue))
+ si_init_shader_selector_async(sel, -1);
+ else
+ util_queue_add_job(&sscreen->shader_compiler_queue, sel,
+ &sel->ready, si_init_shader_selector_async);
return sel;
}
[PIPE_SHADER_FRAGMENT] = &sctx->ps_shader,
};
+ util_queue_job_wait(&sel->ready);
+
if (current_shader[sel->type]->cso == sel) {
current_shader[sel->type]->cso = NULL;
current_shader[sel->type]->current = NULL;
if (sel->main_shader_part)
si_delete_shader(sctx, sel->main_shader_part);
+ util_queue_fence_destroy(&sel->ready);
pipe_mutex_destroy(sel->mutex);
free(sel->tokens);
free(sel);