radeon/llvm: Use amdgcn triple for SI+ on LLVM >= 3.6
[mesa.git] / src / gallium / drivers / radeon / radeon_llvm_util.c
index f2b3e136d4734d2e2e9c58541e18a5cce5a8f0e8..0dfd9ad4867988e840c1ee0dc142375361c7dd90 100644 (file)
 #include <llvm-c/BitReader.h>
 #include <llvm-c/Core.h>
 #include <llvm-c/Target.h>
+#include <llvm-c/Transforms/IPO.h>
 #include <llvm-c/Transforms/PassManagerBuilder.h>
 
-LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
-                                                       unsigned bitcode_len)
+LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
+                                                       const char * bitcode, unsigned bitcode_len)
 {
        LLVMMemoryBufferRef buf;
-       LLVMContextRef ctx = LLVMContextCreate();
        LLVMModuleRef module;
 
        buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode,
@@ -46,10 +46,10 @@ LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
        return module;
 }
 
-unsigned radeon_llvm_get_num_kernels(const unsigned char *bitcode,
-                               unsigned bitcode_len)
+unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
+                               const char *bitcode, unsigned bitcode_len)
 {
-       LLVMModuleRef mod = radeon_llvm_parse_bitcode(bitcode, bitcode_len);
+       LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
        return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
 }
 
@@ -59,36 +59,58 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
        LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
        LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
        LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
-       LLVMAddTargetData(TD, pass_manager);
 
-       LLVMPassManagerBuilderUseInlinerWithThreshold(builder, 1000000000);
+       /* Functions calls are not supported yet, so we need to inline
+        * everything.  The most efficient way to do this is to add
+        * the always_inline attribute to all non-kernel functions
+        * and then run the Always Inline pass.  The Always Inline
+        * pass will automaically inline functions with this attribute
+        * and does not perform the expensive cost analysis that the normal
+        * inliner does.
+        */
+
+       LLVMValueRef fn;
+       for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
+               /* All the non-kernel functions have internal linkage */
+               if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
+                       LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
+               }
+       }
+
+       LLVMAddTargetData(TD, pass_manager);
+       LLVMAddAlwaysInlinerPass(pass_manager);
        LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
 
        LLVMRunPassManager(pass_manager, mod);
        LLVMPassManagerBuilderDispose(builder);
        LLVMDisposePassManager(pass_manager);
+       LLVMDisposeTargetData(TD);
 }
 
-LLVMModuleRef radeon_llvm_get_kernel_module(unsigned index,
-               const unsigned char *bitcode, unsigned bitcode_len)
+LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
+               const char *bitcode, unsigned bitcode_len)
 {
        LLVMModuleRef mod;
        unsigned num_kernels;
        LLVMValueRef *kernel_metadata;
        unsigned i;
 
-       mod = radeon_llvm_parse_bitcode(bitcode, bitcode_len);
+       mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
        num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
        kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
        LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata);
        for (i = 0; i < num_kernels; i++) {
-               LLVMValueRef kernel_signature, kernel_function;
+               LLVMValueRef kernel_signature, *kernel_function;
+               unsigned num_kernel_md_operands;
                if (i == index) {
                        continue;
                }
                kernel_signature = kernel_metadata[i];
-               LLVMGetMDNodeOperands(kernel_signature, &kernel_function);
-               LLVMDeleteFunction(kernel_function);
+               num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature);
+               kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef));
+               LLVMGetMDNodeOperands(kernel_signature, kernel_function);
+               LLVMDeleteFunction(*kernel_function);
+               FREE(kernel_function);
        }
        FREE(kernel_metadata);
        radeon_llvm_optimize(mod);