radeonsi: always use async compiles when creating shader/compute states
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Sun, 22 Oct 2017 15:38:43 +0000 (17:38 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Thu, 9 Nov 2017 10:53:20 +0000 (11:53 +0100)
With Gallium threaded contexts, creating shader/compute states is
effectively a screen operation, so we should not use context state.

In particular, this allows us to avoid using the context's LLVM
TargetMachine.

This isn't an issue yet because u_threaded_context filters out non-async
debug callbacks, and we disable threaded contexts for debug contexts.
However, we may want to change that in the future.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index e55988af4cc2c1f9a0ff0093d2e5b44bcaf87a42..3eee907d44becca6d181b5c934080472d74dcd17 100644 (file)
@@ -23,6 +23,7 @@
  */
 
 #include "tgsi/tgsi_parse.h"
+#include "util/u_async_debug.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 
@@ -84,14 +85,10 @@ static void si_create_compute_state_async(void *job, int thread_index)
        LLVMTargetMachineRef tm;
        struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
 
-       if (thread_index >= 0) {
-               assert(thread_index < ARRAY_SIZE(program->screen->tm));
-               tm = program->screen->tm[thread_index];
-               if (!debug->async)
-                       debug = NULL;
-       } else {
-               tm = program->compiler_ctx_state.tm;
-       }
+       assert(!debug->debug_message || debug->async);
+       assert(thread_index >= 0);
+       assert(thread_index < ARRAY_SIZE(program->screen->tm));
+       tm = program->screen->tm[thread_index];
 
        memset(&sel, 0, sizeof(sel));
 
@@ -167,20 +164,31 @@ static void *si_create_compute_state(
                        return NULL;
                }
 
-               program->compiler_ctx_state.tm = sctx->tm;
                program->compiler_ctx_state.debug = sctx->debug;
                program->compiler_ctx_state.is_debug_context = sctx->is_debug;
                p_atomic_inc(&sscreen->b.num_shaders_created);
                util_queue_fence_init(&program->ready);
 
-               if ((sctx->debug.debug_message && !sctx->debug.async) ||
-                   sctx->is_debug ||
-                   si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
-                       si_create_compute_state_async(program, -1);
-               else
-                       util_queue_add_job(&sscreen->shader_compiler_queue,
-                                          program, &program->ready,
-                                          si_create_compute_state_async, NULL);
+               struct util_async_debug_callback async_debug;
+               bool wait =
+                       (sctx->debug.debug_message && !sctx->debug.async) ||
+                       sctx->is_debug ||
+                       si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE);
+
+               if (wait) {
+                       u_async_debug_init(&async_debug);
+                       program->compiler_ctx_state.debug = async_debug.base;
+               }
+
+               util_queue_add_job(&sscreen->shader_compiler_queue,
+                                  program, &program->ready,
+                                  si_create_compute_state_async, NULL);
+
+               if (wait) {
+                       util_queue_fence_wait(&program->ready);
+                       u_async_debug_drain(&async_debug, &sctx->debug);
+                       u_async_debug_cleanup(&async_debug);
+               }
        } else {
                const struct pipe_llvm_program_header *header;
                const char *code;
index 757e889c3b730d91cc24632a1e424013351311ed..3edc340f01f497ae440804c30e979b715e2f88c3 100644 (file)
@@ -30,6 +30,7 @@
 #include "tgsi/tgsi_ureg.h"
 #include "util/hash_table.h"
 #include "util/crc32.h"
+#include "util/u_async_debug.h"
 #include "util/u_memory.h"
 #include "util/u_prim.h"
 
@@ -1840,14 +1841,10 @@ static void si_init_shader_selector_async(void *job, int thread_index)
        struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
        unsigned i;
 
-       if (thread_index >= 0) {
-               assert(thread_index < ARRAY_SIZE(sscreen->tm));
-               tm = sscreen->tm[thread_index];
-               if (!debug->async)
-                       debug = NULL;
-       } else {
-               tm = sel->compiler_ctx_state.tm;
-       }
+       assert(!debug->debug_message || debug->async);
+       assert(thread_index >= 0);
+       assert(thread_index < ARRAY_SIZE(sscreen->tm));
+       tm = sscreen->tm[thread_index];
 
        /* Compile the main shader part for use with a prolog and/or epilog.
         * If this fails, the driver will try to compile a monolithic shader
@@ -2042,7 +2039,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
        pipe_reference_init(&sel->reference, 1);
        sel->screen = sscreen;
-       sel->compiler_ctx_state.tm = sctx->tm;
        sel->compiler_ctx_state.debug = sctx->debug;
        sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
 
@@ -2272,14 +2268,26 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
        (void) mtx_init(&sel->mutex, mtx_plain);
        util_queue_fence_init(&sel->ready);
 
-       if ((sctx->debug.debug_message && !sctx->debug.async) ||
-           sctx->is_debug ||
-           si_can_dump_shader(&sscreen->b, sel->info.processor))
-               si_init_shader_selector_async(sel, -1);
-       else
-               util_queue_add_job(&sscreen->shader_compiler_queue, sel,
-                                   &sel->ready, si_init_shader_selector_async,
-                                   NULL);
+       struct util_async_debug_callback async_debug;
+       bool wait =
+               (sctx->debug.debug_message && !sctx->debug.async) ||
+               sctx->is_debug ||
+               si_can_dump_shader(&sscreen->b, sel->info.processor);
+
+       if (wait) {
+               u_async_debug_init(&async_debug);
+               sel->compiler_ctx_state.debug = async_debug.base;
+       }
+
+       util_queue_add_job(&sscreen->shader_compiler_queue, sel,
+                          &sel->ready, si_init_shader_selector_async,
+                          NULL);
+
+       if (wait) {
+               util_queue_fence_wait(&sel->ready);
+               u_async_debug_drain(&async_debug, &sctx->debug);
+               u_async_debug_cleanup(&async_debug);
+       }
 
        return sel;
 }