Reorder LLVM passes, running mem2reg earlier.
authorTörök Edwin <edwintorok@gmail.com>
Mon, 3 May 2010 14:43:03 +0000 (07:43 -0700)
committerJosé Fonseca <jfonseca@vmware.com>
Mon, 3 May 2010 16:01:20 +0000 (17:01 +0100)
This gives a ~30% shader optimization time improvement on blender.
Tested by comparing the dumped LLVM modules.
Current ordering:
time ~/llvm-git/obj/Release-Asserts/bin/opt l.bc  -constprop -instcombine
-mem2reg -gvn  -simplifycfg
real    0m1.126s
user    0m1.108s
sys     0m0.012s

With this patch:
time ~/llvm-git/obj/Release-Asserts/bin/opt l.bc -mem2reg -constprop -instcombine   -gvn  -simplifycfg
real    0m0.885s
user    0m0.880s
sys     0m0.000s

The overall improvement in blender is ~15%.
Blender without the patch takes 1m13s:
edwin     5934 87.6 11.5 729440 458296 pts/5   SLl+ 17:35   1:13 blender

Blender with the patch takes 1m3s:
edwin     5726 94.2 11.2 716424 446168 pts/5   SLl+ 17:32   1:03 blender

It is still slow with the patch, but better (most of the optimization time is
taken up by GVN, see LLVM PR7023).

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: José Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/drivers/llvmpipe/lp_jit.c

index 2c234285b5e0b968b201d754ca08bd38c7dae889..ea9b7c90a51d16bfb468d68d1eac27db9548cf48 100644 (file)
@@ -182,6 +182,8 @@ draw_llvm_create(struct draw_context *draw)
    /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
     * but there are more on SVN. */
    /* TODO: Add more passes */
+   LLVMAddCFGSimplificationPass(llvm->pass);
+   LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
    LLVMAddConstantPropagationPass(llvm->pass);
    if(util_cpu_caps.has_sse4_1) {
       /* FIXME: There is a bug in this pass, whereby the combination of fptosi
@@ -190,9 +192,7 @@ draw_llvm_create(struct draw_context *draw)
        */
       LLVMAddInstructionCombiningPass(llvm->pass);
    }
-   LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
    LLVMAddGVNPass(llvm->pass);
-   LLVMAddCFGSimplificationPass(llvm->pass);
 
    init_globals(llvm);
 
index 466a2f54fbe29f8ea4e11c9eca599e2c8de55f7f..30e206a2b42001d428ff5ed35fc2475d506766dd 100644 (file)
@@ -185,6 +185,8 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
       /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
        * but there are more on SVN. */
       /* TODO: Add more passes */
+      LLVMAddCFGSimplificationPass(screen->pass);
+      LLVMAddPromoteMemoryToRegisterPass(screen->pass);
       LLVMAddConstantPropagationPass(screen->pass);
       if(util_cpu_caps.has_sse4_1) {
          /* FIXME: There is a bug in this pass, whereby the combination of fptosi
@@ -193,9 +195,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
           */
          LLVMAddInstructionCombiningPass(screen->pass);
       }
-      LLVMAddPromoteMemoryToRegisterPass(screen->pass);
       LLVMAddGVNPass(screen->pass);
-      LLVMAddCFGSimplificationPass(screen->pass);
    }
 
    lp_jit_init_globals(screen);