radeonsi: keep serialized NIR instead of nir_shader in si_shader_selector
authorMarek Olšák <marek.olsak@amd.com>
Sat, 2 Nov 2019 03:55:58 +0000 (23:55 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 6 Nov 2019 04:28:45 +0000 (23:28 -0500)
This decreases memory usage, because serialized NIR is more compact.

The main shader part is compiled from nir_shader.
Monolithic shader variants are compiled from nir_binary.

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 446b3e393ee81986835940b982637c79704fb562..fba7187204d6788c6bbe15ca67562f1ef2dfbc0f 100644 (file)
@@ -42,6 +42,7 @@
 #include "sid.h"
 
 #include "compiler/nir/nir.h"
+#include "compiler/nir/nir_serialize.h"
 
 static const char scratch_rsrc_dword0_symbol[] =
        "SCRATCH_RSRC_DWORD0";
@@ -6069,7 +6070,8 @@ static bool si_vs_needs_prolog(const struct si_shader_selector *sel,
        return sel->vs_needs_prolog || key->ls_vgpr_fix;
 }
 
-static bool si_compile_tgsi_main(struct si_shader_context *ctx)
+static bool si_compile_tgsi_main(struct si_shader_context *ctx,
+                                struct nir_shader *nir, bool free_nir)
 {
        struct si_shader *shader = ctx->shader;
        struct si_shader_selector *sel = shader->selector;
@@ -6304,7 +6306,10 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
                        return false;
                }
        } else {
-               if (!si_nir_build_llvm(ctx, sel->nir)) {
+               bool success = si_nir_build_llvm(ctx, nir);
+               if (free_nir)
+                       ralloc_free(nir);
+               if (!success) {
                        fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
                        return false;
                }
@@ -6924,6 +6929,27 @@ static bool si_should_optimize_less(struct ac_llvm_compiler *compiler,
               sel->info.num_memory_instructions > 1000;
 }
 
+static struct nir_shader *get_nir_shader(struct si_shader_selector *sel,
+                                        bool *free_nir)
+{
+       *free_nir = false;
+
+       if (sel->nir) {
+               return sel->nir;
+       } else if (sel->nir_binary) {
+               struct pipe_screen *screen = &sel->screen->b;
+               const void *options =
+                       screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR,
+                                                    sel->type);
+
+               struct blob_reader blob_reader;
+               blob_reader_init(&blob_reader, sel->nir_binary, sel->nir_size);
+               *free_nir = true;
+               return nir_deserialize(NULL, options, &blob_reader);
+       }
+       return NULL;
+}
+
 int si_compile_tgsi_shader(struct si_screen *sscreen,
                           struct ac_llvm_compiler *compiler,
                           struct si_shader *shader,
@@ -6931,6 +6957,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 {
        struct si_shader_selector *sel = shader->selector;
        struct si_shader_context ctx;
+       bool free_nir;
+       struct nir_shader *nir = get_nir_shader(sel, &free_nir);
        int r = -1;
 
        /* Dump TGSI code before doing TGSI->LLVM conversion in case the
@@ -6940,20 +6968,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                if (sel->tokens)
                        tgsi_dump(sel->tokens, 0);
                else
-                       nir_print_shader(sel->nir, stderr);
+                       nir_print_shader(nir, stderr);
                si_dump_streamout(&sel->so);
        }
 
        si_init_shader_ctx(&ctx, sscreen, compiler, si_get_shader_wave_size(shader),
-                          sel->nir != NULL);
-       si_llvm_context_set_ir(&ctx, shader);
+                          nir != NULL);
+       si_llvm_context_set_ir(&ctx, shader, nir);
 
        memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
               sizeof(shader->info.vs_output_param_offset));
 
        shader->info.uses_instanceid = sel->info.uses_instanceid;
 
-       if (!si_compile_tgsi_main(&ctx)) {
+       if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
                si_llvm_dispose(&ctx);
                return -1;
        }
@@ -6997,15 +7025,16 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        parts[3] = ctx.main_fn;
 
                        /* VS as LS main part */
+                       nir = get_nir_shader(ls, &free_nir);
                        struct si_shader shader_ls = {};
                        shader_ls.selector = ls;
                        shader_ls.key.as_ls = 1;
                        shader_ls.key.mono = shader->key.mono;
                        shader_ls.key.opt = shader->key.opt;
                        shader_ls.is_monolithic = true;
-                       si_llvm_context_set_ir(&ctx, &shader_ls);
+                       si_llvm_context_set_ir(&ctx, &shader_ls, nir);
 
-                       if (!si_compile_tgsi_main(&ctx)) {
+                       if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
                                si_llvm_dispose(&ctx);
                                return -1;
                        }
@@ -7063,6 +7092,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        gs_prolog = ctx.main_fn;
 
                        /* ES main part */
+                       nir = get_nir_shader(es, &free_nir);
                        struct si_shader shader_es = {};
                        shader_es.selector = es;
                        shader_es.key.as_es = 1;
@@ -7070,9 +7100,9 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        shader_es.key.mono = shader->key.mono;
                        shader_es.key.opt = shader->key.opt;
                        shader_es.is_monolithic = true;
-                       si_llvm_context_set_ir(&ctx, &shader_es);
+                       si_llvm_context_set_ir(&ctx, &shader_es, nir);
 
-                       if (!si_compile_tgsi_main(&ctx)) {
+                       if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
                                si_llvm_dispose(&ctx);
                                return -1;
                        }
index 1d41b7aa042cb25d2affe6a12f2435adb573b715..20f2c812c21ed87807acb920797a70352f16cf48 100644 (file)
@@ -328,6 +328,9 @@ struct si_shader_selector {
 
        struct tgsi_token       *tokens;
        struct nir_shader       *nir;
+       void                    *nir_binary;
+       unsigned                nir_size;
+
        struct pipe_stream_output_info  so;
        struct tgsi_shader_info         info;
        struct tgsi_tessctrl_info       tcs_info;
index 98abbdfc6939855606c1ac0c54eee3c0110f7ffc..8f904471d1fa6a51a8b60e7b642f9e7c0d8e15da 100644 (file)
@@ -282,7 +282,8 @@ void si_llvm_context_init(struct si_shader_context *ctx,
                          unsigned wave_size,
                          unsigned ballot_mask_bits);
 void si_llvm_context_set_ir(struct si_shader_context *ctx,
-                           struct si_shader *shader);
+                           struct si_shader *shader,
+                           struct nir_shader *nir);
 
 void si_llvm_create_func(struct si_shader_context *ctx,
                         const char *name,
index 91e9bd3dd68d93b17f8cd9c24dcb4a53a5326f0e..cb965cc8eb3f3f8ff8122c1b69af9982b99ee56e 100644 (file)
@@ -1029,7 +1029,8 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 /* Set the context to a certain TGSI shader. Can be called repeatedly
  * to change the shader. */
 void si_llvm_context_set_ir(struct si_shader_context *ctx,
-                           struct si_shader *shader)
+                           struct si_shader *shader,
+                           struct nir_shader *nir)
 {
        struct si_shader_selector *sel = shader->selector;
        const struct tgsi_shader_info *info = &sel->info;
@@ -1058,7 +1059,7 @@ void si_llvm_context_set_ir(struct si_shader_context *ctx,
        ctx->num_samplers = util_last_bit(info->samplers_declared);
        ctx->num_images = util_last_bit(info->images_declared);
 
-       if (sel->nir)
+       if (nir)
                return;
 
        if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
index 10b95ff0e78a6e3bbf85ef4539f92b3c9fcbd890..ddd27fb8176f31985db4f98e0882fb97a3f3ddfc 100644 (file)
@@ -2581,6 +2581,22 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 
                si_shader_vs(sscreen, sel->gs_copy_shader, sel);
        }
+
+       if (sel->nir) {
+               /* Serialize NIR to save memory. Monolithic shader variants
+                * have to deserialize NIR before compilation.
+                */
+               struct blob blob;
+               blob_init(&blob);
+               nir_serialize(&blob, sel->nir, false);
+               sel->nir_binary = malloc(blob.size);
+               memcpy(sel->nir_binary, blob.data, blob.size);
+               sel->nir_size = blob.size;
+               blob_finish(&blob);
+
+               ralloc_free(sel->nir);
+               sel->nir = NULL;
+       }
 }
 
 void si_schedule_initial_compile(struct si_context *sctx, unsigned processor,
@@ -3281,6 +3297,7 @@ void si_destroy_shader_selector(struct si_context *sctx,
        simple_mtx_destroy(&sel->mutex);
        free(sel->tokens);
        ralloc_free(sel->nir);
+       free(sel->nir_binary);
        free(sel);
 }