From 761e36b4caab4e8e09a4c2b1409a825902fc7d2c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 15 Oct 2014 12:24:30 -0400 Subject: [PATCH] radeonsi: Cache LLVMTargetMachine object in si_screen Rather than building a new one every compile. This should reduce some of the overhead of compiling shaders. One consequence of this change is that we lose the MachineInstrs dumps when dumping the shaders via R600_DEBUG. The LLVM IR and assembly is still dumped, and if you still want to see the MachineInstr dump, you can run the dumped LLVM IR through llc. --- src/gallium/drivers/r600/r600_llvm.c | 2 +- src/gallium/drivers/radeon/radeon_llvm_emit.c | 47 ++++++++++--------- src/gallium/drivers/radeon/radeon_llvm_emit.h | 6 ++- src/gallium/drivers/radeonsi/si_pipe.c | 17 +++++++ src/gallium/drivers/radeonsi/si_pipe.h | 3 ++ src/gallium/drivers/radeonsi/si_shader.c | 2 +- 6 files changed, 51 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index a928fb81262..af4604435a3 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -881,7 +881,7 @@ unsigned r600_llvm_compile( const char * gpu_family = r600_get_llvm_processor_name(family); memset(&binary, 0, sizeof(struct radeon_shader_binary)); - r = radeon_llvm_compile(mod, &binary, gpu_family, dump); + r = radeon_llvm_compile(mod, &binary, gpu_family, dump, NULL); r = r600_create_shader(bc, &binary, use_kill); diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index dc871d79717..4f288e42b1f 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -98,9 +98,10 @@ static void init_r600_target() } } -static LLVMTargetRef get_r600_target() +LLVMTargetRef radeon_llvm_get_r600_target() { LLVMTargetRef target = NULL; + init_r600_target(); for (target = LLVMGetFirstTarget(); target; target = LLVMGetNextTarget(target)) { @@ -138,14 +139,13 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) * @returns 0 for success, 1 for failure */ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, - const char *gpu_family, unsigned dump) + const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm) { - LLVMTargetRef target; - LLVMTargetMachineRef tm; char cpu[CPU_STRING_LEN]; char fs[FS_STRING_LEN]; char *err; + bool dispose_tm = false; LLVMContextRef llvm_ctx; unsigned rval = 0; LLVMMemoryBufferRef out_buffer; @@ -154,22 +154,23 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar char triple[TRIPLE_STRING_LEN]; LLVMBool mem_err; - /* initialise */ - init_r600_target(); - - target = get_r600_target(); - if (!target) { - return 1; - } - - strncpy(cpu, gpu_family, CPU_STRING_LEN); - memset(fs, 0, sizeof(fs)); - if (dump) { - LLVMDumpModule(M); - strncpy(fs, "+DumpCode", FS_STRING_LEN); + if (!tm) { + LLVMTargetRef target = radeon_llvm_get_r600_target(); + if (!target) { + return 1; + } + strncpy(cpu, gpu_family, CPU_STRING_LEN); + memset(fs, 0, sizeof(fs)); + if (dump) { + LLVMDumpModule(M); + strncpy(fs, "+DumpCode", FS_STRING_LEN); + } + strncpy(triple, "r600--", TRIPLE_STRING_LEN); + tm = LLVMCreateTargetMachine(target, triple, cpu, fs, + LLVMCodeGenLevelDefault, LLVMRelocDefault, + LLVMCodeModelDefault); + dispose_tm = true; } - strncpy(triple, "r600--", TRIPLE_STRING_LEN); - /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); @@ -179,9 +180,6 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar rval = 0; /* Compile IR*/ - tm = LLVMCreateTargetMachine(target, triple, cpu, fs, - LLVMCodeGenLevelDefault, LLVMRelocDefault, - LLVMCodeModelDefault); mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, &out_buffer); @@ -205,6 +203,9 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar /* Clean up */ LLVMDisposeMemoryBuffer(out_buffer); - LLVMDisposeTargetMachine(tm); + + if (dispose_tm) { + LLVMDisposeTargetMachine(tm); + } return rval; } diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h index 780ff5f67f2..66217b5ce5f 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.h +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h @@ -28,15 +28,19 @@ #define RADEON_LLVM_EMIT_H #include +#include struct radeon_shader_binary; void radeon_llvm_shader_type(LLVMValueRef F, unsigned type); +LLVMTargetRef radeon_llvm_get_r600_target(void); + unsigned radeon_llvm_compile( LLVMModuleRef M, struct radeon_shader_binary *binary, const char * gpu_family, - unsigned dump); + unsigned dump, + LLVMTargetMachineRef tm); #endif /* RADEON_LLVM_EMIT_H */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index afb6364a776..4b7156140b7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -25,10 +25,14 @@ #include "si_public.h" #include "sid.h" +#include "radeon/radeon_llvm_emit.h" #include "radeon/radeon_uvd.h" #include "util/u_memory.h" #include "vl/vl_decoder.h" +#include +#include + /* * pipe_context */ @@ -420,6 +424,10 @@ static void si_destroy_screen(struct pipe_screen* pscreen) return; r600_destroy_common_screen(&sscreen->b); + +#if HAVE_LLVM >= 0x0306 + LLVMDisposeTargetMachine(sscreen->tm); +#endif } #define SI_TILE_MODE_COLOR_2D_8BPP 14 @@ -476,6 +484,7 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen) struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); + LLVMTargetRef r600_target; if (sscreen == NULL) { return NULL; } @@ -503,5 +512,13 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) /* Create the auxiliary context. This must be done last. */ sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL); +#if HAVE_LLVM >= 0x0306 + /* Initialize LLVM TargetMachine */ + r600_target = radeon_llvm_get_r600_target(); + sscreen->tm = LLVMCreateTargetMachine(r600_target, "r600--", + r600_get_llvm_processor_name(sscreen->b.family), + "+DumpCode", LLVMCodeGenLevelDefault, LLVMRelocDefault, + LLVMCodeModelDefault); +#endif return &sscreen->b.b; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 9ba4970b3ff..b2caef02eb1 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -28,6 +28,8 @@ #include "si_state.h" +#include + #ifdef PIPE_ARCH_BIG_ENDIAN #define SI_BIG_ENDIAN 1 #else @@ -50,6 +52,7 @@ struct si_compute; struct si_screen { struct r600_common_screen b; + LLVMTargetMachineRef tm; }; struct si_sampler_view { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ce59f0ebb66..ba42dab78bb 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2590,7 +2590,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, shader->selector ? shader->selector->tokens : NULL); memset(&binary, 0, sizeof(binary)); r = radeon_llvm_compile(mod, &binary, - r600_get_llvm_processor_name(sscreen->b.family), dump); + r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm); if (r) { return r; -- 2.30.2