radeonsi: Cache LLVMTargetMachine object in si_screen
authorTom Stellard <thomas.stellard@amd.com>
Wed, 15 Oct 2014 16:24:30 +0000 (12:24 -0400)
committerTom Stellard <thomas.stellard@amd.com>
Tue, 6 Jan 2015 20:53:21 +0000 (12:53 -0800)
Rather than building a new one every compile.  This should reduce some
of the overhead of compiling shaders.

One consequence of this change is that we lose the MachineInstrs dumps
when dumping the shaders via R600_DEBUG.  The LLVM IR and assembly is
still dumped, and if you still want to see the MachineInstr dump, you
can run the dumped LLVM IR through llc.

src/gallium/drivers/r600/r600_llvm.c
src/gallium/drivers/radeon/radeon_llvm_emit.c
src/gallium/drivers/radeon/radeon_llvm_emit.h
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c

index a928fb812621d86895e81bb008b3ec9488dce568..af4604435a3bed498f66be0364185ee0b61da661 100644 (file)
@@ -881,7 +881,7 @@ unsigned r600_llvm_compile(
        const char * gpu_family = r600_get_llvm_processor_name(family);
 
        memset(&binary, 0, sizeof(struct radeon_shader_binary));
-       r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
+       r = radeon_llvm_compile(mod, &binary, gpu_family, dump, NULL);
 
        r = r600_create_shader(bc, &binary, use_kill);
 
index dc871d79717bd6292800298bffe1980f8030742c..4f288e42b1f24d83a3c543f9eb6f660d36947dbb 100644 (file)
@@ -98,9 +98,10 @@ static void init_r600_target()
        }
 }
 
-static LLVMTargetRef get_r600_target()
+LLVMTargetRef radeon_llvm_get_r600_target()
 {
        LLVMTargetRef target = NULL;
+       init_r600_target();
 
        for (target = LLVMGetFirstTarget(); target;
                                        target = LLVMGetNextTarget(target)) {
@@ -138,14 +139,13 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
  * @returns 0 for success, 1 for failure
  */
 unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
-                                         const char *gpu_family, unsigned dump)
+                         const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm)
 {
 
-       LLVMTargetRef target;
-       LLVMTargetMachineRef tm;
        char cpu[CPU_STRING_LEN];
        char fs[FS_STRING_LEN];
        char *err;
+       bool dispose_tm = false;
        LLVMContextRef llvm_ctx;
        unsigned rval = 0;
        LLVMMemoryBufferRef out_buffer;
@@ -154,22 +154,23 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
        char triple[TRIPLE_STRING_LEN];
        LLVMBool mem_err;
 
-       /* initialise */
-       init_r600_target();
-
-       target = get_r600_target();
-       if (!target) {
-               return 1;
-       }
-
-       strncpy(cpu, gpu_family, CPU_STRING_LEN);
-       memset(fs, 0, sizeof(fs));
-       if (dump) {
-               LLVMDumpModule(M);
-               strncpy(fs, "+DumpCode", FS_STRING_LEN);
+       if (!tm) {
+               LLVMTargetRef target = radeon_llvm_get_r600_target();
+               if (!target) {
+                       return 1;
+               }
+               strncpy(cpu, gpu_family, CPU_STRING_LEN);
+               memset(fs, 0, sizeof(fs));
+               if (dump) {
+                       LLVMDumpModule(M);
+                       strncpy(fs, "+DumpCode", FS_STRING_LEN);
+               }
+               strncpy(triple, "r600--", TRIPLE_STRING_LEN);
+               tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
+                                 LLVMCodeGenLevelDefault, LLVMRelocDefault,
+                                                 LLVMCodeModelDefault);
+               dispose_tm = true;
        }
-       strncpy(triple, "r600--", TRIPLE_STRING_LEN);
-
        /* Setup Diagnostic Handler*/
        llvm_ctx = LLVMGetModuleContext(M);
 
@@ -179,9 +180,6 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
        rval = 0;
 
        /* Compile IR*/
-       tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
-                                 LLVMCodeGenLevelDefault, LLVMRelocDefault,
-                                                 LLVMCodeModelDefault);
        mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
                                                                 &out_buffer);
 
@@ -205,6 +203,9 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
 
        /* Clean up */
        LLVMDisposeMemoryBuffer(out_buffer);
-       LLVMDisposeTargetMachine(tm);
+
+       if (dispose_tm) {
+               LLVMDisposeTargetMachine(tm);
+       }
        return rval;
 }
index 780ff5f67f206282da17c946c1f7f73f50698f9f..66217b5ce5f5d5d7471de010280e869d67c93f61 100644 (file)
 #define RADEON_LLVM_EMIT_H
 
 #include <llvm-c/Core.h>
+#include <llvm-c/TargetMachine.h>
 
 struct radeon_shader_binary;
 
 void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
 
+LLVMTargetRef radeon_llvm_get_r600_target(void);
+
 unsigned  radeon_llvm_compile(
        LLVMModuleRef M,
        struct radeon_shader_binary *binary,
        const char * gpu_family,
-       unsigned dump);
+       unsigned dump,
+       LLVMTargetMachineRef tm);
 
 #endif /* RADEON_LLVM_EMIT_H */
index afb6364a776b3f31ae4c8fbe89c1e3c7bcb459d5..4b7156140b787f1966e9c5750941d41e358966d4 100644 (file)
 #include "si_public.h"
 #include "sid.h"
 
+#include "radeon/radeon_llvm_emit.h"
 #include "radeon/radeon_uvd.h"
 #include "util/u_memory.h"
 #include "vl/vl_decoder.h"
 
+#include <llvm-c/Target.h>
+#include <llvm-c/TargetMachine.h>
+
 /*
  * pipe_context
  */
@@ -420,6 +424,10 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
                return;
 
        r600_destroy_common_screen(&sscreen->b);
+
+#if HAVE_LLVM >= 0x0306
+       LLVMDisposeTargetMachine(sscreen->tm);
+#endif
 }
 
 #define SI_TILE_MODE_COLOR_2D_8BPP  14
@@ -476,6 +484,7 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen)
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 {
        struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
+       LLVMTargetRef r600_target;
        if (sscreen == NULL) {
                return NULL;
        }
@@ -503,5 +512,13 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
        /* Create the auxiliary context. This must be done last. */
        sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL);
 
+#if HAVE_LLVM >= 0x0306
+       /* Initialize LLVM TargetMachine */
+       r600_target = radeon_llvm_get_r600_target();
+       sscreen->tm = LLVMCreateTargetMachine(r600_target, "r600--",
+                               r600_get_llvm_processor_name(sscreen->b.family),
+                               "+DumpCode", LLVMCodeGenLevelDefault, LLVMRelocDefault,
+                               LLVMCodeModelDefault);
+#endif
        return &sscreen->b.b;
 }
index 9ba4970b3ff0534c4f9e297e11b2b2d308df42ff..b2caef02eb18abaacc3bf17c71eba65ffe071775 100644 (file)
@@ -28,6 +28,8 @@
 
 #include "si_state.h"
 
+#include <llvm-c/TargetMachine.h>
+
 #ifdef PIPE_ARCH_BIG_ENDIAN
 #define SI_BIG_ENDIAN 1
 #else
@@ -50,6 +52,7 @@ struct si_compute;
 
 struct si_screen {
        struct r600_common_screen       b;
+       LLVMTargetMachineRef            tm;
 };
 
 struct si_sampler_view {
index ce59f0ebb6644888f362a1671ed4c5968b77ce04..ba42dab78bbf051f3cb1d622bb63c7dff7a815bd 100644 (file)
@@ -2590,7 +2590,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
                        shader->selector ? shader->selector->tokens : NULL);
        memset(&binary, 0, sizeof(binary));
        r = radeon_llvm_compile(mod, &binary,
-               r600_get_llvm_processor_name(sscreen->b.family), dump);
+               r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm);
 
        if (r) {
                return r;