#include <llvm-c/BitReader.h>
#include <llvm-c/Core.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/IPO.h>
+#include <llvm-c/Transforms/PassManagerBuilder.h>
-LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
- unsigned bitcode_len)
+LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
+ const char * bitcode, unsigned bitcode_len)
{
LLVMMemoryBufferRef buf;
- LLVMContextRef ctx = LLVMContextCreate();
LLVMModuleRef module;
buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode,
bitcode_len, "radeon");
LLVMParseBitcodeInContext(ctx, buf, &module, NULL);
+ LLVMDisposeMemoryBuffer(buf);
return module;
}
-unsigned radeon_llvm_get_num_kernels(const unsigned char *bitcode,
- unsigned bitcode_len)
+unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
+ const char *bitcode, unsigned bitcode_len)
{
- LLVMModuleRef mod = radeon_llvm_parse_bitcode(bitcode, bitcode_len);
+ LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
}
-LLVMModuleRef radeon_llvm_get_kernel_module(unsigned index,
- const unsigned char *bitcode, unsigned bitcode_len)
+static void radeon_llvm_optimize(LLVMModuleRef mod)
+{
+#if HAVE_LLVM < 0x0309
+ const char *data_layout = LLVMGetDataLayout(mod);
+ LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
+#endif
+ LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
+ LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
+
+ /* Functions calls are not supported yet, so we need to inline
+ * everything. The most efficient way to do this is to add
+ * the always_inline attribute to all non-kernel functions
+ * and then run the Always Inline pass. The Always Inline
+ * pass will automaically inline functions with this attribute
+ * and does not perform the expensive cost analysis that the normal
+ * inliner does.
+ */
+
+ LLVMValueRef fn;
+ for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
+ /* All the non-kernel functions have internal linkage */
+ if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
+ LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
+ }
+ }
+
+#if HAVE_LLVM < 0x0309
+ LLVMAddTargetData(TD, pass_manager);
+#endif
+ LLVMAddAlwaysInlinerPass(pass_manager);
+ LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
+
+ LLVMRunPassManager(pass_manager, mod);
+ LLVMPassManagerBuilderDispose(builder);
+ LLVMDisposePassManager(pass_manager);
+#if HAVE_LLVM < 0x0309
+ LLVMDisposeTargetData(TD);
+#endif
+}
+
+LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
+ const char *bitcode, unsigned bitcode_len)
{
LLVMModuleRef mod;
unsigned num_kernels;
LLVMValueRef *kernel_metadata;
unsigned i;
- mod = radeon_llvm_parse_bitcode(bitcode, bitcode_len);
+ mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata);
for (i = 0; i < num_kernels; i++) {
- LLVMValueRef kernel_signature, kernel_function;
+ LLVMValueRef kernel_signature, *kernel_function;
+ unsigned num_kernel_md_operands;
if (i == index) {
continue;
}
kernel_signature = kernel_metadata[i];
- LLVMGetMDNodeOperands(kernel_signature, &kernel_function);
- LLVMDeleteFunction(kernel_function);
+ num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature);
+ kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef));
+ LLVMGetMDNodeOperands(kernel_signature, kernel_function);
+ LLVMDeleteFunction(*kernel_function);
+ FREE(kernel_function);
}
FREE(kernel_metadata);
+ radeon_llvm_optimize(mod);
return mod;
}