radeonsi: compile non-GS middle parts of shaders immediately if enabled
authorMarek Olšák <marek.olsak@amd.com>
Thu, 28 Jan 2016 01:53:13 +0000 (02:53 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Sun, 21 Feb 2016 20:08:58 +0000 (21:08 +0100)
Still disabled.

Only prologs & epilogs are compiled in draw calls, but each variant of those
is compiled only once per process.

VS is always compiled as hw VS.
TES is always compiled as hw VS.

LS and ES stages are always compiled on demand.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 2f27da4f4209d5adc3bc2f84006dbe3022b55b14..e3ba1f97252646fe19a5b3bfc1d13691bda9d26a 100644 (file)
@@ -4815,11 +4815,11 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
        bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
 }
 
-static int si_compile_tgsi_shader(struct si_screen *sscreen,
-                                 LLVMTargetMachineRef tm,
-                                 struct si_shader *shader,
-                                 bool is_monolithic,
-                                 struct pipe_debug_callback *debug)
+int si_compile_tgsi_shader(struct si_screen *sscreen,
+                          LLVMTargetMachineRef tm,
+                          struct si_shader *shader,
+                          bool is_monolithic,
+                          struct pipe_debug_callback *debug)
 {
        struct si_shader_selector *sel = shader->selector;
        struct si_shader_context ctx;
@@ -5854,15 +5854,48 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug)
 {
+       struct si_shader *mainp = shader->selector->main_shader_part;
        int r;
 
-       /* Compile TGSI. */
-       r = si_compile_tgsi_shader(sscreen, tm, shader,
-                                  sscreen->use_monolithic_shaders, debug);
-       if (r)
-               return r;
+       /* LS and ES are always compiled on demand. */
+       if (!mainp ||
+           (shader->selector->type == PIPE_SHADER_VERTEX &&
+            (shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+           (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
+            shader->key.tes.as_es)) {
+               /* Monolithic shader (compiled as a whole, has many variants,
+                * may take a long time to compile).
+                */
+               r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
+               if (r)
+                       return r;
+       } else {
+               /* The shader consists of 2-3 parts:
+                *
+                * - the middle part is the user shader, it has 1 variant only
+                *   and it was compiled during the creation of the shader
+                *   selector
+                * - the prolog part is inserted at the beginning
+                * - the epilog part is inserted at the end
+                *
+                * The prolog and epilog have many (but simple) variants.
+                */
 
-       if (!sscreen->use_monolithic_shaders) {
+               /* Copy the compiled TGSI shader data over. */
+               shader->is_binary_shared = true;
+               shader->binary = mainp->binary;
+               shader->config = mainp->config;
+               shader->num_input_sgprs = mainp->num_input_sgprs;
+               shader->num_input_vgprs = mainp->num_input_vgprs;
+               shader->face_vgpr_index = mainp->face_vgpr_index;
+               memcpy(shader->vs_output_param_offset,
+                      mainp->vs_output_param_offset,
+                      sizeof(mainp->vs_output_param_offset));
+               shader->uses_instanceid = mainp->uses_instanceid;
+               shader->nr_pos_exports = mainp->nr_pos_exports;
+               shader->nr_param_exports = mainp->nr_param_exports;
+
+               /* Select prologs and/or epilogs. */
                switch (shader->selector->type) {
                case PIPE_SHADER_VERTEX:
                        if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
@@ -5927,5 +5960,6 @@ void si_shader_destroy(struct si_shader *shader)
 
        r600_resource_reference(&shader->bo, NULL);
 
-       radeon_shader_binary_clean(&shader->binary);
+       if (!shader->is_binary_shared)
+               radeon_shader_binary_clean(&shader->binary);
 }
index 196fa3e9086637c082a0f2488fdbcf48ba2117f4..ee81621a702e52013f4d314652e5bca8ad6d70f4 100644 (file)
@@ -181,6 +181,11 @@ struct si_shader_selector {
        struct si_shader        *first_variant; /* immutable after the first variant */
        struct si_shader        *last_variant; /* mutable */
 
+       /* The compiled TGSI shader expecting a prolog and/or epilog (not
+        * uploaded to a buffer).
+        */
+       struct si_shader        *main_shader_part;
+
        struct tgsi_token       *tokens;
        struct pipe_stream_output_info  so;
        struct tgsi_shader_info         info;
@@ -347,6 +352,7 @@ struct si_shader {
        struct r600_resource            *scratch_bo;
        union si_shader_key             key;
        struct radeon_shader_binary     binary;
+       bool                            is_binary_shared;
        struct si_shader_config         config;
 
        ubyte                   num_input_sgprs;
@@ -399,6 +405,11 @@ static inline bool si_vs_exports_prim_id(struct si_shader *shader)
 }
 
 /* si_shader.c */
+int si_compile_tgsi_shader(struct si_screen *sscreen,
+                          LLVMTargetMachineRef tm,
+                          struct si_shader *shader,
+                          bool is_monolithic,
+                          struct pipe_debug_callback *debug);
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug);
index fbc377a6a4eb0cf69a308aa1d1dd70b60d61b2dd..dc813437f1481a74579bb91f012f5ceb1f7d4b67 100644 (file)
@@ -824,6 +824,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                                       const struct pipe_shader_state *state)
 {
        struct si_screen *sscreen = (struct si_screen *)ctx->screen;
+       struct si_context *sctx = (struct si_context*)ctx;
        struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
        int i;
 
@@ -931,6 +932,24 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                break;
        }
 
+       /* Compile the main shader part for use with a prolog and/or epilog. */
+       if (sel->type != PIPE_SHADER_GEOMETRY &&
+           !sscreen->use_monolithic_shaders) {
+               struct si_shader *shader = CALLOC_STRUCT(si_shader);
+
+               if (!shader)
+                       goto error;
+
+               shader->selector = sel;
+
+               if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false,
+                                          &sctx->b.debug) != 0) {
+                       FREE(shader);
+                       goto error;
+               }
+               sel->main_shader_part = shader;
+       }
+
        /* Pre-compilation. */
        if (sel->type == PIPE_SHADER_GEOMETRY ||
            sscreen->b.debug_flags & DBG_PRECOMPILE) {
@@ -955,16 +974,18 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                        break;
                }
 
-               if (si_shader_select_with_key(ctx, &state, &key)) {
-                       fprintf(stderr, "radeonsi: can't create a shader\n");
-                       tgsi_free_tokens(sel->tokens);
-                       FREE(sel);
-                       return NULL;
-               }
+               if (si_shader_select_with_key(ctx, &state, &key))
+                       goto error;
        }
 
        pipe_mutex_init(sel->mutex);
        return sel;
+
+error:
+       fprintf(stderr, "radeonsi: can't create a shader\n");
+       tgsi_free_tokens(sel->tokens);
+       FREE(sel);
+       return NULL;
 }
 
 /**
@@ -1129,6 +1150,9 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
                p = c;
        }
 
+       if (sel->main_shader_part)
+               si_delete_shader(sctx, sel->main_shader_part);
+
        pipe_mutex_destroy(sel->mutex);
        free(sel->tokens);
        free(sel);