2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
23 #include "core/compiler.hpp"
25 #include <clang/Frontend/CompilerInstance.h>
26 #include <clang/Frontend/TextDiagnosticBuffer.h>
27 #include <clang/Frontend/TextDiagnosticPrinter.h>
28 #include <clang/CodeGen/CodeGenAction.h>
29 #include <clang/Basic/TargetInfo.h>
30 #include <llvm/Bitcode/BitstreamWriter.h>
31 #include <llvm/Bitcode/ReaderWriter.h>
32 #if HAVE_LLVM < 0x0305
33 #include <llvm/Linker.h>
35 #include <llvm/Linker/Linker.h>
36 #include <llvm/IR/DiagnosticInfo.h>
37 #include <llvm/IR/DiagnosticPrinter.h>
39 #if HAVE_LLVM < 0x0303
40 #include <llvm/DerivedTypes.h>
41 #include <llvm/LLVMContext.h>
42 #include <llvm/Module.h>
44 #include <llvm/IR/DerivedTypes.h>
45 #include <llvm/IR/LLVMContext.h>
46 #include <llvm/IR/Module.h>
47 #include <llvm/Support/SourceMgr.h>
48 #include <llvm/IRReader/IRReader.h>
50 #if HAVE_LLVM < 0x0305
51 #include <llvm/ADT/OwningPtr.h>
53 #include <llvm/PassManager.h>
54 #include <llvm/Support/CodeGen.h>
55 #include <llvm/Support/TargetSelect.h>
56 #include <llvm/Support/MemoryBuffer.h>
57 #if HAVE_LLVM < 0x0303
58 #include <llvm/Support/PathV1.h>
60 #include <llvm/Support/FormattedStream.h>
61 #include <llvm/Support/TargetRegistry.h>
62 #include <llvm/Transforms/IPO.h>
63 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
65 #if HAVE_LLVM < 0x0302
66 #include <llvm/Target/TargetData.h>
67 #elif HAVE_LLVM < 0x0303
68 #include <llvm/DataLayout.h>
70 #include <llvm/IR/DataLayout.h>
72 #include <llvm/Target/TargetLibraryInfo.h>
73 #include <llvm/Target/TargetMachine.h>
74 #include <llvm/Target/TargetOptions.h>
76 #include <llvm-c/Target.h>
77 #include <llvm-c/TargetMachine.h>
78 #include <llvm-c/Core.h>
80 #include "pipe/p_state.h"
81 #include "util/u_memory.h"
82 #include "util/u_math.h"
92 using namespace clover
;
97 build_binary(const std::string
&source
, const std::string
&target
,
98 const std::string
&name
) {
99 clang::CompilerInstance c
;
100 clang::EmitObjAction
act(&llvm::getGlobalContext());
102 llvm::raw_string_ostream
s_log(log
);
104 LLVMInitializeTGSITarget();
105 LLVMInitializeTGSITargetInfo();
106 LLVMInitializeTGSITargetMC();
107 LLVMInitializeTGSIAsmPrinter();
109 c
.getFrontendOpts().Inputs
.push_back(
110 std::make_pair(clang::IK_OpenCL
, name
));
111 c
.getHeaderSearchOpts().UseBuiltinIncludes
= false;
112 c
.getHeaderSearchOpts().UseStandardIncludes
= false;
113 c
.getLangOpts().NoBuiltin
= true;
114 c
.getTargetOpts().Triple
= target
;
115 c
.getInvocation().setLangDefaults(clang::IK_OpenCL
);
116 c
.createDiagnostics(0, NULL
, new clang::TextDiagnosticPrinter(
117 s_log
, c
.getDiagnosticOpts()));
119 c
.getPreprocessorOpts().addRemappedFile(
120 name
, llvm::MemoryBuffer::getMemBuffer(source
));
122 if (!c
.ExecuteAction(act
))
123 throw build_error(log
);
127 load_binary(const char *name
) {
128 std::ifstream
fs((name
));
129 std::vector
<unsigned char> str((std::istreambuf_iterator
<char>(fs
)),
130 (std::istreambuf_iterator
<char>()));
131 compat::istream
cs(str
);
132 return module::deserialize(cs
);
137 compile_llvm(llvm::LLVMContext
&llvm_ctx
, const std::string
&source
,
138 const std::string
&name
, const std::string
&triple
,
139 const std::string
&processor
, const std::string
&opts
,
140 clang::LangAS::Map
& address_spaces
, unsigned &optimization_level
,
141 compat::string
&r_log
) {
143 clang::CompilerInstance c
;
144 clang::EmitLLVMOnlyAction
act(&llvm_ctx
);
146 llvm::raw_string_ostream
s_log(log
);
147 std::string libclc_path
= LIBCLC_LIBEXECDIR
+ processor
+ "-"
150 // Parse the compiler options:
151 std::vector
<std::string
> opts_array
;
152 std::istringstream
ss(opts
);
156 getline(ss
, opt
, ' ');
157 opts_array
.push_back(opt
);
160 opts_array
.push_back(name
);
162 std::vector
<const char *> opts_carray
;
163 for (unsigned i
= 0; i
< opts_array
.size(); i
++) {
164 opts_carray
.push_back(opts_array
.at(i
).c_str());
167 llvm::IntrusiveRefCntPtr
<clang::DiagnosticIDs
> DiagID
;
168 llvm::IntrusiveRefCntPtr
<clang::DiagnosticOptions
> DiagOpts
;
169 clang::TextDiagnosticBuffer
*DiagsBuffer
;
171 DiagID
= new clang::DiagnosticIDs();
172 DiagOpts
= new clang::DiagnosticOptions();
173 DiagsBuffer
= new clang::TextDiagnosticBuffer();
175 clang::DiagnosticsEngine
Diags(DiagID
, &*DiagOpts
, DiagsBuffer
);
178 Success
= clang::CompilerInvocation::CreateFromArgs(c
.getInvocation(),
180 opts_carray
.data() + opts_carray
.size(),
183 throw error(CL_INVALID_BUILD_OPTIONS
);
185 c
.getFrontendOpts().ProgramAction
= clang::frontend::EmitLLVMOnly
;
186 c
.getHeaderSearchOpts().UseBuiltinIncludes
= true;
187 c
.getHeaderSearchOpts().UseStandardSystemIncludes
= true;
188 c
.getHeaderSearchOpts().ResourceDir
= CLANG_RESOURCE_DIR
;
190 // Add libclc generic search path
191 c
.getHeaderSearchOpts().AddPath(LIBCLC_INCLUDEDIR
,
192 clang::frontend::Angled
,
194 #if HAVE_LLVM < 0x0303
199 // Add libclc include
200 c
.getPreprocessorOpts().Includes
.push_back("clc/clc.h");
202 // clc.h requires that this macro be defined:
203 c
.getPreprocessorOpts().addMacroDef("cl_clang_storage_class_specifiers");
204 c
.getPreprocessorOpts().addMacroDef("cl_khr_fp64");
206 c
.getLangOpts().NoBuiltin
= true;
207 c
.getTargetOpts().Triple
= triple
;
208 c
.getTargetOpts().CPU
= processor
;
210 // This is a workaround for a Clang bug which causes the number
211 // of warnings and errors to be printed to stderr.
212 // http://www.llvm.org/bugs/show_bug.cgi?id=19735
213 c
.getDiagnosticOpts().ShowCarets
= false;
214 #if HAVE_LLVM <= 0x0301
215 c
.getInvocation().setLangDefaults(clang::IK_OpenCL
);
217 c
.getInvocation().setLangDefaults(c
.getLangOpts(), clang::IK_OpenCL
,
218 clang::LangStandard::lang_opencl11
);
221 #if HAVE_LLVM < 0x0303
224 new clang::TextDiagnosticPrinter(
226 #if HAVE_LLVM <= 0x0301
227 c
.getDiagnosticOpts()));
229 &c
.getDiagnosticOpts()));
232 #if HAVE_LLVM >= 0x0306
233 c
.getPreprocessorOpts().addRemappedFile(name
,
234 llvm::MemoryBuffer::getMemBuffer(source
).release());
236 c
.getPreprocessorOpts().addRemappedFile(name
,
237 llvm::MemoryBuffer::getMemBuffer(source
));
240 // Setting this attribute tells clang to link this file before
241 // performing any optimizations. This is required so that
242 // we can replace calls to the OpenCL C barrier() builtin
243 // with calls to target intrinsics that have the noduplicate
244 // attribute. This attribute will prevent Clang from creating
245 // illegal uses of barrier() (e.g. Moving barrier() inside a conditional
246 // that is no executed by all threads) during its optimizaton passes.
247 c
.getCodeGenOpts().LinkBitcodeFile
= libclc_path
;
249 optimization_level
= c
.getCodeGenOpts().OptimizationLevel
;
252 bool ExecSuccess
= c
.ExecuteAction(act
);
258 // Get address spaces map to be able to find kernel argument address space
259 memcpy(address_spaces
, c
.getTarget().getAddressSpaceMap(),
260 sizeof(address_spaces
));
262 #if HAVE_LLVM >= 0x0306
263 return act
.takeModule().release();
265 return act
.takeModule();
270 find_kernels(llvm::Module
*mod
, std::vector
<llvm::Function
*> &kernels
) {
271 const llvm::NamedMDNode
*kernel_node
=
272 mod
->getNamedMetadata("opencl.kernels");
273 // This means there are no kernels in the program. The spec does not
274 // require that we return an error here, but there will be an error if
275 // the user tries to pass this program to a clCreateKernel() call.
280 for (unsigned i
= 0; i
< kernel_node
->getNumOperands(); ++i
) {
281 kernels
.push_back(llvm::dyn_cast
<llvm::Function
>(
282 kernel_node
->getOperand(i
)->getOperand(0)));
287 optimize(llvm::Module
*mod
, unsigned optimization_level
,
288 const std::vector
<llvm::Function
*> &kernels
) {
290 llvm::PassManager PM
;
291 // Add a function internalizer pass.
293 // By default, the function internalizer pass will look for a function
294 // called "main" and then mark all other functions as internal. Marking
295 // functions as internal enables the optimizer to perform optimizations
296 // like function inlining and global dead-code elimination.
298 // When there is no "main" function in a module, the internalize pass will
299 // treat the module like a library, and it won't internalize any functions.
300 // Since there is no "main" function in our kernels, we need to tell
301 // the internalizer pass that this module is not a library by passing a
302 // list of kernel functions to the internalizer. The internalizer will
303 // treat the functions in the list as "main" functions and internalize
304 // all of the other functions.
305 std::vector
<const char*> export_list
;
306 for (std::vector
<llvm::Function
*>::const_iterator I
= kernels
.begin(),
309 llvm::Function
*kernel
= *I
;
310 export_list
.push_back(kernel
->getName().data());
312 #if HAVE_LLVM < 0x0306
313 PM
.add(new llvm::DataLayoutPass(mod
));
315 PM
.add(new llvm::DataLayoutPass());
317 PM
.add(llvm::createInternalizePass(export_list
));
319 llvm::PassManagerBuilder PMB
;
320 PMB
.OptLevel
= optimization_level
;
321 PMB
.LibraryInfo
= new llvm::TargetLibraryInfo(
322 llvm::Triple(mod
->getTargetTriple()));
323 PMB
.populateModulePassManager(PM
);
327 compat::vector
<module::argument
>
328 get_kernel_args(const llvm::Module
*mod
, const std::string
&kernel_name
,
329 const clang::LangAS::Map
&address_spaces
) {
331 compat::vector
<module::argument
> args
;
332 llvm::Function
*kernel_func
= mod
->getFunction(kernel_name
);
334 #if HAVE_LLVM < 0x0302
335 llvm::TargetData
TD(kernel_func
->getParent());
336 #elif HAVE_LLVM < 0x0305
337 llvm::DataLayout
TD(kernel_func
->getParent()->getDataLayout());
339 llvm::DataLayout
TD(mod
);
342 for (llvm::Function::const_arg_iterator I
= kernel_func
->arg_begin(),
343 E
= kernel_func
->arg_end(); I
!= E
; ++I
) {
344 const llvm::Argument
&arg
= *I
;
346 llvm::Type
*arg_type
= arg
.getType();
347 const unsigned arg_store_size
= TD
.getTypeStoreSize(arg_type
);
349 // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
350 // type that is not a power of two bytes in size must be
351 // aligned to the next larger power of two". We need this
352 // alignment for three element vectors, which have
353 // non-power-of-2 store size.
354 const unsigned arg_api_size
= util_next_power_of_two(arg_store_size
);
356 llvm::Type
*target_type
= arg_type
->isIntegerTy() ?
357 TD
.getSmallestLegalIntType(mod
->getContext(), arg_store_size
* 8)
359 unsigned target_size
= TD
.getTypeStoreSize(target_type
);
360 unsigned target_align
= TD
.getABITypeAlignment(target_type
);
362 if (llvm::isa
<llvm::PointerType
>(arg_type
) && arg
.hasByValAttr()) {
364 llvm::dyn_cast
<llvm::PointerType
>(arg_type
)->getElementType();
367 if (arg_type
->isPointerTy()) {
368 unsigned address_space
= llvm::cast
<llvm::PointerType
>(arg_type
)->getAddressSpace();
369 if (address_space
== address_spaces
[clang::LangAS::opencl_local
370 - clang::LangAS::Offset
]) {
371 args
.push_back(module::argument(module::argument::local
,
372 arg_api_size
, target_size
,
374 module::argument::zero_ext
));
376 // XXX: Correctly handle constant address space. There is no
377 // way for r600g to pass a handle for constant buffers back
378 // to clover like it can for global buffers, so
379 // creating constant arguments will break r600g. For now,
380 // continue treating constant buffers as global buffers
381 // until we can come up with a way to create handles for
383 args
.push_back(module::argument(module::argument::global
,
384 arg_api_size
, target_size
,
386 module::argument::zero_ext
));
390 llvm::AttributeSet attrs
= kernel_func
->getAttributes();
391 enum module::argument::ext_type ext_type
=
392 (attrs
.hasAttribute(arg
.getArgNo() + 1,
393 llvm::Attribute::SExt
) ?
394 module::argument::sign_ext
:
395 module::argument::zero_ext
);
398 module::argument(module::argument::scalar
, arg_api_size
,
399 target_size
, target_align
, ext_type
));
403 // Append implicit arguments. XXX - The types, ordering and
404 // vector size of the implicit arguments should depend on the
405 // target according to the selected calling convention.
406 llvm::Type
*size_type
=
407 TD
.getSmallestLegalIntType(mod
->getContext(), sizeof(cl_uint
) * 8);
410 module::argument(module::argument::scalar
, sizeof(cl_uint
),
411 TD
.getTypeStoreSize(size_type
),
412 TD
.getABITypeAlignment(size_type
),
413 module::argument::zero_ext
,
414 module::argument::grid_dimension
));
417 module::argument(module::argument::scalar
, sizeof(cl_uint
),
418 TD
.getTypeStoreSize(size_type
),
419 TD
.getABITypeAlignment(size_type
),
420 module::argument::zero_ext
,
421 module::argument::grid_offset
));
427 build_module_llvm(llvm::Module
*mod
,
428 const std::vector
<llvm::Function
*> &kernels
,
429 clang::LangAS::Map
& address_spaces
) {
432 struct pipe_llvm_program_header header
;
434 llvm::SmallVector
<char, 1024> llvm_bitcode
;
435 llvm::raw_svector_ostream
bitcode_ostream(llvm_bitcode
);
436 llvm::BitstreamWriter
writer(llvm_bitcode
);
437 llvm::WriteBitcodeToFile(mod
, bitcode_ostream
);
438 bitcode_ostream
.flush();
440 for (unsigned i
= 0; i
< kernels
.size(); ++i
) {
441 std::string kernel_name
= kernels
[i
]->getName();
442 compat::vector
<module::argument
> args
=
443 get_kernel_args(mod
, kernel_name
, address_spaces
);
445 m
.syms
.push_back(module::symbol(kernel_name
, 0, i
, args
));
448 header
.num_bytes
= llvm_bitcode
.size();
450 data
.insert(0, (char*)(&header
), sizeof(header
));
451 data
.insert(data
.end(), llvm_bitcode
.begin(),
453 m
.secs
.push_back(module::section(0, module::section::text
,
454 header
.num_bytes
, data
));
460 compile_native(const llvm::Module
*mod
, const std::string
&triple
,
461 const std::string
&processor
, compat::string
&r_log
) {
464 LLVMTargetRef target
;
466 LLVMMemoryBufferRef out_buffer
;
467 unsigned buffer_size
;
468 const char *buffer_data
;
470 LLVMModuleRef mod_ref
= wrap(mod
);
472 if (LLVMGetTargetFromTriple(triple
.c_str(), &target
, &error_message
)) {
473 r_log
= std::string(error_message
);
474 LLVMDisposeMessage(error_message
);
478 LLVMTargetMachineRef tm
= LLVMCreateTargetMachine(
479 target
, triple
.c_str(), processor
.c_str(), "",
480 LLVMCodeGenLevelDefault
, LLVMRelocDefault
, LLVMCodeModelDefault
);
483 r_log
= "Could not create TargetMachine: " + triple
;
487 err
= LLVMTargetMachineEmitToMemoryBuffer(tm
, mod_ref
, LLVMObjectFile
,
488 &error_message
, &out_buffer
);
491 LLVMDisposeTargetMachine(tm
);
492 r_log
= std::string(error_message
);
493 LLVMDisposeMessage(error_message
);
497 buffer_size
= LLVMGetBufferSize(out_buffer
);
498 buffer_data
= LLVMGetBufferStart(out_buffer
);
500 std::vector
<char> code(buffer_data
, buffer_data
+ buffer_size
);
502 LLVMDisposeMemoryBuffer(out_buffer
);
503 LLVMDisposeTargetMachine(tm
);
508 std::map
<std::string
, unsigned>
509 get_kernel_offsets(std::vector
<char> &code
,
510 const std::vector
<llvm::Function
*> &kernels
,
511 compat::string
&r_log
) {
513 // One of the libelf implementations
514 // (http://www.mr511.de/software/english.htm) requires calling
515 // elf_version() before elf_memory().
517 elf_version(EV_CURRENT
);
519 Elf
*elf
= elf_memory(&code
[0], code
.size());
520 size_t section_str_index
;
521 elf_getshdrstrndx(elf
, §ion_str_index
);
522 Elf_Scn
*section
= NULL
;
523 Elf_Scn
*symtab
= NULL
;
524 GElf_Shdr symtab_header
;
526 // Find the symbol table
528 while ((section
= elf_nextscn(elf
, section
))) {
530 if (gelf_getshdr(section
, &symtab_header
) != &symtab_header
) {
531 r_log
= "Failed to read ELF section header.";
534 name
= elf_strptr(elf
, section_str_index
, symtab_header
.sh_name
);
535 if (!strcmp(name
, ".symtab")) {
541 r_log
= "Unable to find symbol table.";
544 } catch (build_error
&e
) {
550 // Extract symbol information from the table
551 Elf_Data
*symtab_data
= NULL
;
555 std::map
<std::string
, unsigned> kernel_offsets
;
556 symtab_data
= elf_getdata(symtab
, symtab_data
);
558 // Determine the offsets for each kernel
559 for (int i
= 0; (symbol
= gelf_getsym(symtab_data
, i
, &s
)); i
++) {
560 char *name
= elf_strptr(elf
, symtab_header
.sh_link
, symbol
->st_name
);
561 for (std::vector
<llvm::Function
*>::const_iterator it
= kernels
.begin(),
562 e
= kernels
.end(); it
!= e
; ++it
) {
563 llvm::Function
*f
= *it
;
564 if (f
->getName() == std::string(name
))
565 kernel_offsets
[f
->getName()] = symbol
->st_value
;
569 return kernel_offsets
;
573 build_module_native(std::vector
<char> &code
,
574 const llvm::Module
*mod
,
575 const std::vector
<llvm::Function
*> &kernels
,
576 const clang::LangAS::Map
&address_spaces
,
577 compat::string
&r_log
) {
579 std::map
<std::string
, unsigned> kernel_offsets
=
580 get_kernel_offsets(code
, kernels
, r_log
);
582 // Begin building the clover module
584 struct pipe_llvm_program_header header
;
586 // Store the generated ELF binary in the module's text section.
587 header
.num_bytes
= code
.size();
589 data
.append((char*)(&header
), sizeof(header
));
590 data
.append(code
.begin(), code
.end());
591 m
.secs
.push_back(module::section(0, module::section::text
,
592 header
.num_bytes
, data
));
594 for (std::map
<std::string
, unsigned>::iterator i
= kernel_offsets
.begin(),
595 e
= kernel_offsets
.end(); i
!= e
; ++i
) {
596 compat::vector
<module::argument
> args
=
597 get_kernel_args(mod
, i
->first
, address_spaces
);
598 m
.syms
.push_back(module::symbol(i
->first
, 0, i
->second
, args
));
604 #if HAVE_LLVM >= 0x0305
607 diagnostic_handler(const llvm::DiagnosticInfo
&di
, void *data
) {
608 if (di
.getSeverity() == llvm::DS_Error
) {
609 std::string message
= *(compat::string
*)data
;
610 llvm::raw_string_ostream
stream(message
);
611 llvm::DiagnosticPrinterRawOStream
dp(stream
);
614 *(compat::string
*)data
= message
;
624 static bool targets_initialized
= false;
625 if (!targets_initialized
) {
626 LLVMInitializeAllTargets();
627 LLVMInitializeAllTargetInfos();
628 LLVMInitializeAllTargetMCs();
629 LLVMInitializeAllAsmPrinters();
630 targets_initialized
= true;
633 } // End anonymous namespace
636 clover::compile_program_llvm(const compat::string
&source
,
637 enum pipe_shader_ir ir
,
638 const compat::string
&target
,
639 const compat::string
&opts
,
640 compat::string
&r_log
) {
644 std::vector
<llvm::Function
*> kernels
;
645 size_t processor_str_len
= std::string(target
.begin()).find_first_of("-");
646 std::string
processor(target
.begin(), 0, processor_str_len
);
647 std::string
triple(target
.begin(), processor_str_len
+ 1,
648 target
.size() - processor_str_len
- 1);
649 clang::LangAS::Map address_spaces
;
650 llvm::LLVMContext llvm_ctx
;
651 unsigned optimization_level
;
653 #if HAVE_LLVM >= 0x0305
654 llvm_ctx
.setDiagnosticHandler(diagnostic_handler
, &r_log
);
657 // The input file name must have the .cl extension in order for the
658 // CompilerInvocation class to recognize it as an OpenCL source file.
659 llvm::Module
*mod
= compile_llvm(llvm_ctx
, source
, "input.cl", triple
,
660 processor
, opts
, address_spaces
,
661 optimization_level
, r_log
);
663 find_kernels(mod
, kernels
);
665 optimize(mod
, optimization_level
, kernels
);
668 // Build the clover::module
670 case PIPE_SHADER_IR_TGSI
:
675 case PIPE_SHADER_IR_LLVM
:
676 m
= build_module_llvm(mod
, kernels
, address_spaces
);
678 case PIPE_SHADER_IR_NATIVE
: {
679 std::vector
<char> code
= compile_native(mod
, triple
, processor
, r_log
);
680 m
= build_module_native(code
, mod
, kernels
, address_spaces
, r_log
);
684 #if HAVE_LLVM >= 0x0306
685 // LLVM 3.6 and newer, the user takes ownership of the module.