From 442ef8c3e35e8e3e907ed2505344bcd745913b99 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 1 Nov 2019 23:55:58 -0400 Subject: [PATCH] radeonsi: keep serialized NIR instead of nir_shader in si_shader_selector This decreases memory usage, because serialized NIR is more compact. The main shader part is compiled from nir_shader. Monolithic shader variants are compiled from nir_binary. Reviewed-by: Timothy Arceri --- src/gallium/drivers/radeonsi/si_shader.c | 50 +++++++++++++++---- src/gallium/drivers/radeonsi/si_shader.h | 3 ++ .../drivers/radeonsi/si_shader_internal.h | 3 +- .../drivers/radeonsi/si_shader_tgsi_setup.c | 5 +- .../drivers/radeonsi/si_state_shaders.c | 17 +++++++ 5 files changed, 65 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 446b3e393ee..fba7187204d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -42,6 +42,7 @@ #include "sid.h" #include "compiler/nir/nir.h" +#include "compiler/nir/nir_serialize.h" static const char scratch_rsrc_dword0_symbol[] = "SCRATCH_RSRC_DWORD0"; @@ -6069,7 +6070,8 @@ static bool si_vs_needs_prolog(const struct si_shader_selector *sel, return sel->vs_needs_prolog || key->ls_vgpr_fix; } -static bool si_compile_tgsi_main(struct si_shader_context *ctx) +static bool si_compile_tgsi_main(struct si_shader_context *ctx, + struct nir_shader *nir, bool free_nir) { struct si_shader *shader = ctx->shader; struct si_shader_selector *sel = shader->selector; @@ -6304,7 +6306,10 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) return false; } } else { - if (!si_nir_build_llvm(ctx, sel->nir)) { + bool success = si_nir_build_llvm(ctx, nir); + if (free_nir) + ralloc_free(nir); + if (!success) { fprintf(stderr, "Failed to translate shader from NIR to LLVM\n"); return false; } @@ -6924,6 +6929,27 @@ static bool si_should_optimize_less(struct ac_llvm_compiler *compiler, sel->info.num_memory_instructions > 1000; } +static struct nir_shader *get_nir_shader(struct si_shader_selector *sel, + bool *free_nir) +{ + *free_nir = false; + + if (sel->nir) { + return sel->nir; + } else if (sel->nir_binary) { + struct pipe_screen *screen = &sel->screen->b; + const void *options = + screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, + sel->type); + + struct blob_reader blob_reader; + blob_reader_init(&blob_reader, sel->nir_binary, sel->nir_size); + *free_nir = true; + return nir_deserialize(NULL, options, &blob_reader); + } + return NULL; +} + int si_compile_tgsi_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, struct si_shader *shader, @@ -6931,6 +6957,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, { struct si_shader_selector *sel = shader->selector; struct si_shader_context ctx; + bool free_nir; + struct nir_shader *nir = get_nir_shader(sel, &free_nir); int r = -1; /* Dump TGSI code before doing TGSI->LLVM conversion in case the @@ -6940,20 +6968,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if (sel->tokens) tgsi_dump(sel->tokens, 0); else - nir_print_shader(sel->nir, stderr); + nir_print_shader(nir, stderr); si_dump_streamout(&sel->so); } si_init_shader_ctx(&ctx, sscreen, compiler, si_get_shader_wave_size(shader), - sel->nir != NULL); - si_llvm_context_set_ir(&ctx, shader); + nir != NULL); + si_llvm_context_set_ir(&ctx, shader, nir); memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(shader->info.vs_output_param_offset)); shader->info.uses_instanceid = sel->info.uses_instanceid; - if (!si_compile_tgsi_main(&ctx)) { + if (!si_compile_tgsi_main(&ctx, nir, free_nir)) { si_llvm_dispose(&ctx); return -1; } @@ -6997,15 +7025,16 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, parts[3] = ctx.main_fn; /* VS as LS main part */ + nir = get_nir_shader(ls, &free_nir); struct si_shader shader_ls = {}; shader_ls.selector = ls; shader_ls.key.as_ls = 1; shader_ls.key.mono = shader->key.mono; shader_ls.key.opt = shader->key.opt; shader_ls.is_monolithic = true; - si_llvm_context_set_ir(&ctx, &shader_ls); + si_llvm_context_set_ir(&ctx, &shader_ls, nir); - if (!si_compile_tgsi_main(&ctx)) { + if (!si_compile_tgsi_main(&ctx, nir, free_nir)) { si_llvm_dispose(&ctx); return -1; } @@ -7063,6 +7092,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, gs_prolog = ctx.main_fn; /* ES main part */ + nir = get_nir_shader(es, &free_nir); struct si_shader shader_es = {}; shader_es.selector = es; shader_es.key.as_es = 1; @@ -7070,9 +7100,9 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, shader_es.key.mono = shader->key.mono; shader_es.key.opt = shader->key.opt; shader_es.is_monolithic = true; - si_llvm_context_set_ir(&ctx, &shader_es); + si_llvm_context_set_ir(&ctx, &shader_es, nir); - if (!si_compile_tgsi_main(&ctx)) { + if (!si_compile_tgsi_main(&ctx, nir, free_nir)) { si_llvm_dispose(&ctx); return -1; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1d41b7aa042..20f2c812c21 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -328,6 +328,9 @@ struct si_shader_selector { struct tgsi_token *tokens; struct nir_shader *nir; + void *nir_binary; + unsigned nir_size; + struct pipe_stream_output_info so; struct tgsi_shader_info info; struct tgsi_tessctrl_info tcs_info; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 98abbdfc693..8f904471d1f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -282,7 +282,8 @@ void si_llvm_context_init(struct si_shader_context *ctx, unsigned wave_size, unsigned ballot_mask_bits); void si_llvm_context_set_ir(struct si_shader_context *ctx, - struct si_shader *shader); + struct si_shader *shader, + struct nir_shader *nir); void si_llvm_create_func(struct si_shader_context *ctx, const char *name, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 91e9bd3dd68..cb965cc8eb3 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -1029,7 +1029,8 @@ void si_llvm_context_init(struct si_shader_context *ctx, /* Set the context to a certain TGSI shader. Can be called repeatedly * to change the shader. */ void si_llvm_context_set_ir(struct si_shader_context *ctx, - struct si_shader *shader) + struct si_shader *shader, + struct nir_shader *nir) { struct si_shader_selector *sel = shader->selector; const struct tgsi_shader_info *info = &sel->info; @@ -1058,7 +1059,7 @@ void si_llvm_context_set_ir(struct si_shader_context *ctx, ctx->num_samplers = util_last_bit(info->samplers_declared); ctx->num_images = util_last_bit(info->images_declared); - if (sel->nir) + if (nir) return; if (info->array_max[TGSI_FILE_TEMPORARY] > 0) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 10b95ff0e78..ddd27fb8176 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2581,6 +2581,22 @@ static void si_init_shader_selector_async(void *job, int thread_index) si_shader_vs(sscreen, sel->gs_copy_shader, sel); } + + if (sel->nir) { + /* Serialize NIR to save memory. Monolithic shader variants + * have to deserialize NIR before compilation. + */ + struct blob blob; + blob_init(&blob); + nir_serialize(&blob, sel->nir, false); + sel->nir_binary = malloc(blob.size); + memcpy(sel->nir_binary, blob.data, blob.size); + sel->nir_size = blob.size; + blob_finish(&blob); + + ralloc_free(sel->nir); + sel->nir = NULL; + } } void si_schedule_initial_compile(struct si_context *sctx, unsigned processor, @@ -3281,6 +3297,7 @@ void si_destroy_shader_selector(struct si_context *sctx, simple_mtx_destroy(&sel->mutex); free(sel->tokens); ralloc_free(sel->nir); + free(sel->nir_binary); free(sel); } -- 2.30.2