2 // Copyright 2012-2016 Francisco Jerez
3 // Copyright 2012-2016 Advanced Micro Devices, Inc.
4 // Copyright 2014-2016 Jan Vesely
5 // Copyright 2014-2015 Serge Martin
6 // Copyright 2015 Zoltan Gilian
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 // OTHER DEALINGS IN THE SOFTWARE.
27 #include "core/compiler.hpp"
29 #include <clang/Frontend/CompilerInstance.h>
30 #include <clang/Frontend/TextDiagnosticBuffer.h>
31 #include <clang/Frontend/TextDiagnosticPrinter.h>
32 #include <clang/CodeGen/CodeGenAction.h>
33 #include <clang/Basic/TargetInfo.h>
34 #include <llvm/Bitcode/BitstreamWriter.h>
35 #include <llvm/Bitcode/ReaderWriter.h>
36 #include <llvm/Linker/Linker.h>
37 #include <llvm/IR/DiagnosticInfo.h>
38 #include <llvm/IR/DiagnosticPrinter.h>
39 #include <llvm/IR/DerivedTypes.h>
40 #include <llvm/IR/LLVMContext.h>
41 #include <llvm/IR/Module.h>
42 #include <llvm/Support/SourceMgr.h>
43 #include <llvm/IRReader/IRReader.h>
44 #if HAVE_LLVM >= 0x0307
45 #include <llvm/IR/LegacyPassManager.h>
47 #include <llvm/PassManager.h>
49 #include <llvm/Support/CodeGen.h>
50 #include <llvm/Support/TargetSelect.h>
51 #include <llvm/Support/MemoryBuffer.h>
52 #include <llvm/Support/FormattedStream.h>
53 #include <llvm/Support/TargetRegistry.h>
54 #include <llvm/Transforms/IPO.h>
55 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
56 #include <llvm/Transforms/Utils/Cloning.h>
59 #include <llvm/IR/DataLayout.h>
60 #if HAVE_LLVM >= 0x0307
61 #include <llvm/Analysis/TargetLibraryInfo.h>
63 #include <llvm/Target/TargetLibraryInfo.h>
65 #include <llvm/Target/TargetMachine.h>
66 #include <llvm/Target/TargetOptions.h>
68 #include <llvm-c/Target.h>
69 #include <llvm-c/TargetMachine.h>
70 #include <llvm-c/Core.h>
72 #include "pipe/p_state.h"
73 #include "util/u_memory.h"
74 #include "util/u_math.h"
84 using namespace clover
;
89 build_binary(const std::string
&source
, const std::string
&target
,
90 const std::string
&name
) {
91 clang::CompilerInstance c
;
92 clang::EmitObjAction
act(&llvm::getGlobalContext());
94 llvm::raw_string_ostream
s_log(log
);
96 LLVMInitializeTGSITarget();
97 LLVMInitializeTGSITargetInfo();
98 LLVMInitializeTGSITargetMC();
99 LLVMInitializeTGSIAsmPrinter();
101 c
.getFrontendOpts().Inputs
.push_back(
102 std::make_pair(clang::IK_OpenCL
, name
));
103 c
.getHeaderSearchOpts().UseBuiltinIncludes
= false;
104 c
.getHeaderSearchOpts().UseStandardIncludes
= false;
105 c
.getLangOpts().NoBuiltin
= true;
106 c
.getTargetOpts().Triple
= target
;
107 c
.getInvocation().setLangDefaults(clang::IK_OpenCL
);
108 c
.createDiagnostics(0, NULL
, new clang::TextDiagnosticPrinter(
109 s_log
, c
.getDiagnosticOpts()));
111 c
.getPreprocessorOpts().addRemappedFile(
112 name
, llvm::MemoryBuffer::getMemBuffer(source
));
114 if (!c
.ExecuteAction(act
))
115 throw compile_error(log
);
119 load_binary(const char *name
) {
120 std::ifstream
fs((name
));
121 std::vector
<unsigned char> str((std::istreambuf_iterator
<char>(fs
)),
122 (std::istreambuf_iterator
<char>()));
123 compat::istream
cs(str
);
124 return module::deserialize(cs
);
127 void debug_log(const std::string
&msg
, const std::string
&suffix
) {
128 const char *dbg_file
= debug_get_option("CLOVER_DEBUG_FILE", "stderr");
129 if (!strcmp("stderr", dbg_file
)) {
132 std::ofstream
file(dbg_file
+ suffix
, std::ios::app
);
138 compile_llvm(llvm::LLVMContext
&llvm_ctx
, const std::string
&source
,
139 const header_map
&headers
,
140 const std::string
&name
, const std::string
&triple
,
141 const std::string
&processor
, const std::string
&opts
,
142 clang::LangAS::Map
& address_spaces
, unsigned &optimization_level
,
143 std::string
&r_log
) {
145 clang::CompilerInstance c
;
146 clang::EmitLLVMOnlyAction
act(&llvm_ctx
);
148 llvm::raw_string_ostream
s_log(log
);
149 std::string libclc_path
= LIBCLC_LIBEXECDIR
+ processor
+ "-"
152 // Parse the compiler options:
153 std::vector
<std::string
> opts_array
;
154 std::istringstream
ss(opts
);
158 getline(ss
, opt
, ' ');
159 opts_array
.push_back(opt
);
162 opts_array
.push_back(name
);
164 std::vector
<const char *> opts_carray
;
165 for (unsigned i
= 0; i
< opts_array
.size(); i
++) {
166 opts_carray
.push_back(opts_array
.at(i
).c_str());
169 llvm::IntrusiveRefCntPtr
<clang::DiagnosticIDs
> DiagID
;
170 llvm::IntrusiveRefCntPtr
<clang::DiagnosticOptions
> DiagOpts
;
171 clang::TextDiagnosticBuffer
*DiagsBuffer
;
173 DiagID
= new clang::DiagnosticIDs();
174 DiagOpts
= new clang::DiagnosticOptions();
175 DiagsBuffer
= new clang::TextDiagnosticBuffer();
177 clang::DiagnosticsEngine
Diags(DiagID
, &*DiagOpts
, DiagsBuffer
);
180 Success
= clang::CompilerInvocation::CreateFromArgs(c
.getInvocation(),
182 opts_carray
.data() + opts_carray
.size(),
185 throw error(CL_INVALID_COMPILER_OPTIONS
);
187 c
.getFrontendOpts().ProgramAction
= clang::frontend::EmitLLVMOnly
;
188 c
.getHeaderSearchOpts().UseBuiltinIncludes
= true;
189 c
.getHeaderSearchOpts().UseStandardSystemIncludes
= true;
190 c
.getHeaderSearchOpts().ResourceDir
= CLANG_RESOURCE_DIR
;
192 // Add libclc generic search path
193 c
.getHeaderSearchOpts().AddPath(LIBCLC_INCLUDEDIR
,
194 clang::frontend::Angled
,
198 // Add libclc include
199 c
.getPreprocessorOpts().Includes
.push_back("clc/clc.h");
201 // clc.h requires that this macro be defined:
202 c
.getPreprocessorOpts().addMacroDef("cl_clang_storage_class_specifiers");
204 c
.getLangOpts().NoBuiltin
= true;
205 c
.getTargetOpts().Triple
= triple
;
206 c
.getTargetOpts().CPU
= processor
;
208 // This is a workaround for a Clang bug which causes the number
209 // of warnings and errors to be printed to stderr.
210 // http://www.llvm.org/bugs/show_bug.cgi?id=19735
211 c
.getDiagnosticOpts().ShowCarets
= false;
212 c
.getInvocation().setLangDefaults(c
.getLangOpts(), clang::IK_OpenCL
,
213 #if HAVE_LLVM >= 0x0309
214 llvm::Triple(triple
), c
.getPreprocessorOpts(),
216 clang::LangStandard::lang_opencl11
);
218 new clang::TextDiagnosticPrinter(
220 &c
.getDiagnosticOpts()));
222 #if HAVE_LLVM >= 0x0306
223 c
.getPreprocessorOpts().addRemappedFile(name
,
224 llvm::MemoryBuffer::getMemBuffer(source
).release());
226 c
.getPreprocessorOpts().addRemappedFile(name
,
227 llvm::MemoryBuffer::getMemBuffer(source
));
230 if (headers
.size()) {
231 const std::string tmp_header_path
= "/tmp/clover/";
233 c
.getHeaderSearchOpts().AddPath(tmp_header_path
,
234 clang::frontend::Angled
,
238 for (header_map::const_iterator it
= headers
.begin();
239 it
!= headers
.end(); ++it
) {
240 const std::string path
= tmp_header_path
+ std::string(it
->first
);
241 c
.getPreprocessorOpts().addRemappedFile(path
,
242 #if HAVE_LLVM >= 0x0306
243 llvm::MemoryBuffer::getMemBuffer(it
->second
.c_str()).release());
245 llvm::MemoryBuffer::getMemBuffer(it
->second
.c_str()));
250 // Setting this attribute tells clang to link this file before
251 // performing any optimizations. This is required so that
252 // we can replace calls to the OpenCL C barrier() builtin
253 // with calls to target intrinsics that have the noduplicate
254 // attribute. This attribute will prevent Clang from creating
255 // illegal uses of barrier() (e.g. Moving barrier() inside a conditional
256 // that is no executed by all threads) during its optimizaton passes.
257 #if HAVE_LLVM >= 0x0308
258 c
.getCodeGenOpts().LinkBitcodeFiles
.emplace_back(llvm::Linker::Flags::None
,
261 c
.getCodeGenOpts().LinkBitcodeFile
= libclc_path
;
263 optimization_level
= c
.getCodeGenOpts().OptimizationLevel
;
266 bool ExecSuccess
= c
.ExecuteAction(act
);
270 throw compile_error();
272 // Get address spaces map to be able to find kernel argument address space
273 memcpy(address_spaces
, c
.getTarget().getAddressSpaceMap(),
274 sizeof(address_spaces
));
276 #if HAVE_LLVM >= 0x0306
277 return act
.takeModule().release();
279 return act
.takeModule();
283 std::vector
<llvm::Function
*>
284 find_kernels(llvm::Module
*mod
) {
285 std::vector
<llvm::Function
*> kernels
;
286 #if HAVE_LLVM >= 0x0309
287 auto &list
= mod
->getFunctionList();
288 for_each(list
.begin(), list
.end(), [&](llvm::Function
&f
){
289 if (f
.getMetadata("kernel_arg_type"))
290 kernels
.push_back(&f
);
294 const llvm::NamedMDNode
*kernel_node
=
295 mod
->getNamedMetadata("opencl.kernels");
296 // This means there are no kernels in the program. The spec does not
297 // require that we return an error here, but there will be an error if
298 // the user tries to pass this program to a clCreateKernel() call.
300 return std::vector
<llvm::Function
*>();
303 kernels
.reserve(kernel_node
->getNumOperands());
304 for (unsigned i
= 0; i
< kernel_node
->getNumOperands(); ++i
) {
305 #if HAVE_LLVM >= 0x0306
306 kernels
.push_back(llvm::mdconst::dyn_extract
<llvm::Function
>(
308 kernels
.push_back(llvm::dyn_cast
<llvm::Function
>(
310 kernel_node
->getOperand(i
)->getOperand(0)));
316 optimize(llvm::Module
*mod
, unsigned optimization_level
) {
318 #if HAVE_LLVM >= 0x0307
319 llvm::legacy::PassManager PM
;
321 llvm::PassManager PM
;
324 const std::vector
<llvm::Function
*> kernels
= find_kernels(mod
);
326 // Add a function internalizer pass.
328 // By default, the function internalizer pass will look for a function
329 // called "main" and then mark all other functions as internal. Marking
330 // functions as internal enables the optimizer to perform optimizations
331 // like function inlining and global dead-code elimination.
333 // When there is no "main" function in a module, the internalize pass will
334 // treat the module like a library, and it won't internalize any functions.
335 // Since there is no "main" function in our kernels, we need to tell
336 // the internalizer pass that this module is not a library by passing a
337 // list of kernel functions to the internalizer. The internalizer will
338 // treat the functions in the list as "main" functions and internalize
339 // all of the other functions.
340 #if HAVE_LLVM >= 0x0309
341 auto preserve_kernels
= [=](const llvm::GlobalValue
&GV
) {
342 for (const auto &kernel
: kernels
) {
343 if (GV
.getName() == kernel
->getName())
349 std::vector
<const char*> export_list
;
350 for (std::vector
<llvm::Function
*>::const_iterator I
= kernels
.begin(),
353 llvm::Function
*kernel
= *I
;
354 export_list
.push_back(kernel
->getName().data());
357 #if HAVE_LLVM < 0x0306
358 PM
.add(new llvm::DataLayoutPass(mod
));
359 #elif HAVE_LLVM < 0x0307
360 PM
.add(new llvm::DataLayoutPass());
362 #if HAVE_LLVM >= 0x0309
363 PM
.add(llvm::createInternalizePass(preserve_kernels
));
365 PM
.add(llvm::createInternalizePass(export_list
));
368 llvm::PassManagerBuilder PMB
;
369 PMB
.OptLevel
= optimization_level
;
370 #if HAVE_LLVM < 0x0307
371 PMB
.LibraryInfo
= new llvm::TargetLibraryInfo(
373 PMB
.LibraryInfo
= new llvm::TargetLibraryInfoImpl(
375 llvm::Triple(mod
->getTargetTriple()));
376 PMB
.populateModulePassManager(PM
);
382 struct kernel_arg_md
{
383 llvm::StringRef type_name
;
384 llvm::StringRef access_qual
;
385 kernel_arg_md(llvm::StringRef type_name_
, llvm::StringRef access_qual_
):
386 type_name(type_name_
), access_qual(access_qual_
) {}
388 #if HAVE_LLVM >= 0x0309
389 std::vector
<kernel_arg_md
>
390 get_kernel_arg_md(const llvm::Function
*kernel_func
) {
392 size_t num_args
= kernel_func
->getArgumentList().size();
394 auto aq
= kernel_func
->getMetadata("kernel_arg_access_qual");
395 auto ty
= kernel_func
->getMetadata("kernel_arg_type");
397 std::vector
<kernel_arg_md
> res
;
398 res
.reserve(num_args
);
399 for (size_t i
= 0; i
< num_args
; ++i
) {
400 res
.push_back(kernel_arg_md(
401 llvm::cast
<llvm::MDString
>(ty
->getOperand(i
))->getString(),
402 llvm::cast
<llvm::MDString
>(aq
->getOperand(i
))->getString()));
408 #elif HAVE_LLVM >= 0x0306
411 get_kernel_metadata(const llvm::Function
*kernel_func
) {
412 auto mod
= kernel_func
->getParent();
413 auto kernels_node
= mod
->getNamedMetadata("opencl.kernels");
418 const llvm::MDNode
*kernel_node
= nullptr;
419 for (unsigned i
= 0; i
< kernels_node
->getNumOperands(); ++i
) {
420 auto func
= llvm::mdconst::dyn_extract
<llvm::Function
>(
421 kernels_node
->getOperand(i
)->getOperand(0));
422 if (func
== kernel_func
) {
423 kernel_node
= kernels_node
->getOperand(i
);
432 node_from_op_checked(const llvm::MDOperand
&md_operand
,
433 llvm::StringRef expect_name
,
434 unsigned expect_num_args
)
436 auto node
= llvm::cast
<llvm::MDNode
>(md_operand
);
437 assert(node
->getNumOperands() == expect_num_args
&&
438 "Wrong number of operands.");
440 auto str_node
= llvm::cast
<llvm::MDString
>(node
->getOperand(0));
441 assert(str_node
->getString() == expect_name
&&
442 "Wrong metadata node name.");
447 std::vector
<kernel_arg_md
>
448 get_kernel_arg_md(const llvm::Function
*kernel_func
) {
449 auto num_args
= kernel_func
->getArgumentList().size();
451 auto kernel_node
= get_kernel_metadata(kernel_func
);
452 auto aq
= node_from_op_checked(kernel_node
->getOperand(2),
453 "kernel_arg_access_qual", num_args
+ 1);
454 auto ty
= node_from_op_checked(kernel_node
->getOperand(3),
455 "kernel_arg_type", num_args
+ 1);
457 std::vector
<kernel_arg_md
> res
;
458 res
.reserve(num_args
);
459 for (unsigned i
= 0; i
< num_args
; ++i
) {
460 res
.push_back(kernel_arg_md(
461 llvm::cast
<llvm::MDString
>(ty
->getOperand(i
+1))->getString(),
462 llvm::cast
<llvm::MDString
>(aq
->getOperand(i
+1))->getString()));
470 std::vector
<kernel_arg_md
>
471 get_kernel_arg_md(const llvm::Function
*kernel_func
) {
472 return std::vector
<kernel_arg_md
>(
473 kernel_func
->getArgumentList().size(),
474 kernel_arg_md("", ""));
477 #endif // HAVE_LLVM >= 0x0306
479 std::vector
<module::argument
>
480 get_kernel_args(const llvm::Module
*mod
, const std::string
&kernel_name
,
481 const clang::LangAS::Map
&address_spaces
) {
483 std::vector
<module::argument
> args
;
484 llvm::Function
*kernel_func
= mod
->getFunction(kernel_name
);
485 assert(kernel_func
&& "Kernel name not found in module.");
486 auto arg_md
= get_kernel_arg_md(kernel_func
);
488 llvm::DataLayout
TD(mod
);
489 llvm::Type
*size_type
=
490 TD
.getSmallestLegalIntType(mod
->getContext(), sizeof(cl_uint
) * 8);
492 for (const auto &arg
: kernel_func
->args()) {
494 llvm::Type
*arg_type
= arg
.getType();
495 const unsigned arg_store_size
= TD
.getTypeStoreSize(arg_type
);
497 // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
498 // type that is not a power of two bytes in size must be
499 // aligned to the next larger power of two". We need this
500 // alignment for three element vectors, which have
501 // non-power-of-2 store size.
502 const unsigned arg_api_size
= util_next_power_of_two(arg_store_size
);
504 llvm::Type
*target_type
= arg_type
->isIntegerTy() ?
505 TD
.getSmallestLegalIntType(mod
->getContext(), arg_store_size
* 8)
507 unsigned target_size
= TD
.getTypeStoreSize(target_type
);
508 unsigned target_align
= TD
.getABITypeAlignment(target_type
);
510 llvm::StringRef type_name
= arg_md
[arg
.getArgNo()].type_name
;
511 llvm::StringRef access_qual
= arg_md
[arg
.getArgNo()].access_qual
;
514 const bool is_image2d
= type_name
== "image2d_t";
515 const bool is_image3d
= type_name
== "image3d_t";
516 if (is_image2d
|| is_image3d
) {
517 const bool is_write_only
= access_qual
== "write_only";
518 const bool is_read_only
= access_qual
== "read_only";
520 enum module::argument::type marg_type
;
521 if (is_image2d
&& is_read_only
) {
522 marg_type
= module::argument::image2d_rd
;
523 } else if (is_image2d
&& is_write_only
) {
524 marg_type
= module::argument::image2d_wr
;
525 } else if (is_image3d
&& is_read_only
) {
526 marg_type
= module::argument::image3d_rd
;
527 } else if (is_image3d
&& is_write_only
) {
528 marg_type
= module::argument::image3d_wr
;
530 assert(0 && "Wrong image access qualifier");
533 args
.push_back(module::argument(marg_type
,
534 arg_store_size
, target_size
,
536 module::argument::zero_ext
));
540 // Image size implicit argument
541 if (type_name
== "__llvm_image_size") {
542 args
.push_back(module::argument(module::argument::scalar
,
544 TD
.getTypeStoreSize(size_type
),
545 TD
.getABITypeAlignment(size_type
),
546 module::argument::zero_ext
,
547 module::argument::image_size
));
551 // Image format implicit argument
552 if (type_name
== "__llvm_image_format") {
553 args
.push_back(module::argument(module::argument::scalar
,
555 TD
.getTypeStoreSize(size_type
),
556 TD
.getABITypeAlignment(size_type
),
557 module::argument::zero_ext
,
558 module::argument::image_format
));
563 if (llvm::isa
<llvm::PointerType
>(arg_type
) && arg
.hasByValAttr()) {
565 llvm::dyn_cast
<llvm::PointerType
>(arg_type
)->getElementType();
568 if (arg_type
->isPointerTy()) {
569 unsigned address_space
= llvm::cast
<llvm::PointerType
>(arg_type
)->getAddressSpace();
570 if (address_space
== address_spaces
[clang::LangAS::opencl_local
571 - clang::LangAS::Offset
]) {
572 args
.push_back(module::argument(module::argument::local
,
573 arg_api_size
, target_size
,
575 module::argument::zero_ext
));
577 // XXX: Correctly handle constant address space. There is no
578 // way for r600g to pass a handle for constant buffers back
579 // to clover like it can for global buffers, so
580 // creating constant arguments will break r600g. For now,
581 // continue treating constant buffers as global buffers
582 // until we can come up with a way to create handles for
584 args
.push_back(module::argument(module::argument::global
,
585 arg_api_size
, target_size
,
587 module::argument::zero_ext
));
591 llvm::AttributeSet attrs
= kernel_func
->getAttributes();
592 enum module::argument::ext_type ext_type
=
593 (attrs
.hasAttribute(arg
.getArgNo() + 1,
594 llvm::Attribute::SExt
) ?
595 module::argument::sign_ext
:
596 module::argument::zero_ext
);
599 module::argument(module::argument::scalar
, arg_api_size
,
600 target_size
, target_align
, ext_type
));
604 // Append implicit arguments. XXX - The types, ordering and
605 // vector size of the implicit arguments should depend on the
606 // target according to the selected calling convention.
608 module::argument(module::argument::scalar
, sizeof(cl_uint
),
609 TD
.getTypeStoreSize(size_type
),
610 TD
.getABITypeAlignment(size_type
),
611 module::argument::zero_ext
,
612 module::argument::grid_dimension
));
615 module::argument(module::argument::scalar
, sizeof(cl_uint
),
616 TD
.getTypeStoreSize(size_type
),
617 TD
.getABITypeAlignment(size_type
),
618 module::argument::zero_ext
,
619 module::argument::grid_offset
));
625 build_module_llvm(llvm::Module
*mod
,
626 clang::LangAS::Map
& address_spaces
) {
629 struct pipe_llvm_program_header header
;
631 llvm::SmallVector
<char, 1024> llvm_bitcode
;
632 llvm::raw_svector_ostream
bitcode_ostream(llvm_bitcode
);
633 llvm::BitstreamWriter
writer(llvm_bitcode
);
634 llvm::WriteBitcodeToFile(mod
, bitcode_ostream
);
635 #if HAVE_LLVM < 0x0308
636 bitcode_ostream
.flush();
639 const std::vector
<llvm::Function
*> kernels
= find_kernels(mod
);
640 for (unsigned i
= 0; i
< kernels
.size(); ++i
) {
641 std::string kernel_name
= kernels
[i
]->getName();
642 std::vector
<module::argument
> args
=
643 get_kernel_args(mod
, kernel_name
, address_spaces
);
645 m
.syms
.push_back(module::symbol(kernel_name
, 0, i
, args
));
648 header
.num_bytes
= llvm_bitcode
.size();
649 std::vector
<char> data
;
650 data
.insert(data
.end(), (char*)(&header
),
651 (char*)(&header
) + sizeof(header
));
652 data
.insert(data
.end(), llvm_bitcode
.begin(),
654 m
.secs
.push_back(module::section(0, module::section::text
,
655 header
.num_bytes
, data
));
661 emit_code(LLVMTargetMachineRef tm
, LLVMModuleRef mod
,
662 LLVMCodeGenFileType file_type
,
663 LLVMMemoryBufferRef
*out_buffer
,
664 std::string
&r_log
) {
666 char *err_message
= NULL
;
668 err
= LLVMTargetMachineEmitToMemoryBuffer(tm
, mod
, file_type
,
669 &err_message
, out_buffer
);
672 r_log
= std::string(err_message
);
675 LLVMDisposeMessage(err_message
);
678 throw compile_error();
683 compile_native(const llvm::Module
*mod
, const std::string
&triple
,
684 const std::string
&processor
, unsigned dump_asm
,
685 std::string
&r_log
) {
688 LLVMTargetRef target
;
690 LLVMMemoryBufferRef out_buffer
;
691 unsigned buffer_size
;
692 const char *buffer_data
;
693 LLVMModuleRef mod_ref
= wrap(mod
);
695 if (LLVMGetTargetFromTriple(triple
.c_str(), &target
, &error_message
)) {
696 r_log
= std::string(error_message
);
697 LLVMDisposeMessage(error_message
);
698 throw compile_error();
701 LLVMTargetMachineRef tm
= LLVMCreateTargetMachine(
702 target
, triple
.c_str(), processor
.c_str(), "",
703 LLVMCodeGenLevelDefault
, LLVMRelocDefault
, LLVMCodeModelDefault
);
706 r_log
= "Could not create TargetMachine: " + triple
;
707 throw compile_error();
711 LLVMSetTargetMachineAsmVerbosity(tm
, true);
712 #if HAVE_LLVM >= 0x0308
713 LLVMModuleRef debug_mod
= wrap(llvm::CloneModule(mod
).release());
715 LLVMModuleRef debug_mod
= wrap(llvm::CloneModule(mod
));
717 emit_code(tm
, debug_mod
, LLVMAssemblyFile
, &out_buffer
, r_log
);
718 buffer_size
= LLVMGetBufferSize(out_buffer
);
719 buffer_data
= LLVMGetBufferStart(out_buffer
);
720 debug_log(std::string(buffer_data
, buffer_size
), ".asm");
722 LLVMSetTargetMachineAsmVerbosity(tm
, false);
723 LLVMDisposeMemoryBuffer(out_buffer
);
724 LLVMDisposeModule(debug_mod
);
727 emit_code(tm
, mod_ref
, LLVMObjectFile
, &out_buffer
, r_log
);
729 buffer_size
= LLVMGetBufferSize(out_buffer
);
730 buffer_data
= LLVMGetBufferStart(out_buffer
);
732 std::vector
<char> code(buffer_data
, buffer_data
+ buffer_size
);
734 LLVMDisposeMemoryBuffer(out_buffer
);
735 LLVMDisposeTargetMachine(tm
);
740 std::map
<std::string
, unsigned>
741 get_kernel_offsets(std::vector
<char> &code
,
742 const std::vector
<llvm::Function
*> &kernels
,
743 std::string
&r_log
) {
745 // One of the libelf implementations
746 // (http://www.mr511.de/software/english.htm) requires calling
747 // elf_version() before elf_memory().
749 elf_version(EV_CURRENT
);
751 Elf
*elf
= elf_memory(&code
[0], code
.size());
752 size_t section_str_index
;
753 elf_getshdrstrndx(elf
, §ion_str_index
);
754 Elf_Scn
*section
= NULL
;
755 Elf_Scn
*symtab
= NULL
;
756 GElf_Shdr symtab_header
;
758 // Find the symbol table
760 while ((section
= elf_nextscn(elf
, section
))) {
762 if (gelf_getshdr(section
, &symtab_header
) != &symtab_header
) {
763 r_log
= "Failed to read ELF section header.";
764 throw compile_error();
766 name
= elf_strptr(elf
, section_str_index
, symtab_header
.sh_name
);
767 if (!strcmp(name
, ".symtab")) {
773 r_log
= "Unable to find symbol table.";
774 throw compile_error();
776 } catch (compile_error
&e
) {
782 // Extract symbol information from the table
783 Elf_Data
*symtab_data
= NULL
;
787 std::map
<std::string
, unsigned> kernel_offsets
;
788 symtab_data
= elf_getdata(symtab
, symtab_data
);
790 // Determine the offsets for each kernel
791 for (int i
= 0; (symbol
= gelf_getsym(symtab_data
, i
, &s
)); i
++) {
792 char *name
= elf_strptr(elf
, symtab_header
.sh_link
, symbol
->st_name
);
793 for (std::vector
<llvm::Function
*>::const_iterator it
= kernels
.begin(),
794 e
= kernels
.end(); it
!= e
; ++it
) {
795 llvm::Function
*f
= *it
;
796 if (f
->getName() == std::string(name
))
797 kernel_offsets
[f
->getName()] = symbol
->st_value
;
801 return kernel_offsets
;
805 build_module_native(std::vector
<char> &code
,
807 const clang::LangAS::Map
&address_spaces
,
808 std::string
&r_log
) {
810 const std::vector
<llvm::Function
*> kernels
= find_kernels(mod
);
812 std::map
<std::string
, unsigned> kernel_offsets
=
813 get_kernel_offsets(code
, kernels
, r_log
);
815 // Begin building the clover module
817 struct pipe_llvm_program_header header
;
819 // Store the generated ELF binary in the module's text section.
820 header
.num_bytes
= code
.size();
821 std::vector
<char> data
;
822 data
.insert(data
.end(), (char*)(&header
),
823 (char*)(&header
) + sizeof(header
));
824 data
.insert(data
.end(), code
.begin(), code
.end());
825 m
.secs
.push_back(module::section(0, module::section::text
,
826 header
.num_bytes
, data
));
828 for (std::map
<std::string
, unsigned>::iterator i
= kernel_offsets
.begin(),
829 e
= kernel_offsets
.end(); i
!= e
; ++i
) {
830 std::vector
<module::argument
> args
=
831 get_kernel_args(mod
, i
->first
, address_spaces
);
832 m
.syms
.push_back(module::symbol(i
->first
, 0, i
->second
, args
));
839 diagnostic_handler(const llvm::DiagnosticInfo
&di
, void *data
) {
840 if (di
.getSeverity() == llvm::DS_Error
) {
841 std::string message
= *(std::string
*)data
;
842 llvm::raw_string_ostream
stream(message
);
843 llvm::DiagnosticPrinterRawOStream
dp(stream
);
846 *(std::string
*)data
= message
;
848 throw compile_error();
854 static bool targets_initialized
= false;
855 if (!targets_initialized
) {
856 LLVMInitializeAllTargets();
857 LLVMInitializeAllTargetInfos();
858 LLVMInitializeAllTargetMCs();
859 LLVMInitializeAllAsmPrinters();
860 targets_initialized
= true;
864 #define DBG_CLC (1 << 0)
865 #define DBG_LLVM (1 << 1)
866 #define DBG_ASM (1 << 2)
870 static const struct debug_named_value debug_options
[] = {
871 {"clc", DBG_CLC
, "Dump the OpenCL C code for all kernels."},
872 {"llvm", DBG_LLVM
, "Dump the generated LLVM IR for all kernels."},
873 {"asm", DBG_ASM
, "Dump kernel assembly code for targets specifying "
874 "PIPE_SHADER_IR_NATIVE"},
875 DEBUG_NAMED_VALUE_END
// must be last
877 static const unsigned debug_flags
=
878 debug_get_flags_option("CLOVER_DEBUG", debug_options
, 0);
883 } // End anonymous namespace
886 clover::compile_program_llvm(const std::string
&source
,
887 const header_map
&headers
,
888 enum pipe_shader_ir ir
,
889 const std::string
&target
,
890 const std::string
&opts
,
891 std::string
&r_log
) {
895 size_t processor_str_len
= std::string(target
).find_first_of("-");
896 std::string
processor(target
, 0, processor_str_len
);
897 std::string
triple(target
, processor_str_len
+ 1,
898 target
.size() - processor_str_len
- 1);
899 clang::LangAS::Map address_spaces
;
900 llvm::LLVMContext llvm_ctx
;
901 unsigned optimization_level
;
903 llvm_ctx
.setDiagnosticHandler(diagnostic_handler
, &r_log
);
905 if (get_debug_flags() & DBG_CLC
)
906 debug_log("// Build options: " + opts
+ '\n' + source
, ".cl");
908 // The input file name must have the .cl extension in order for the
909 // CompilerInvocation class to recognize it as an OpenCL source file.
910 llvm::Module
*mod
= compile_llvm(llvm_ctx
, source
, headers
, "input.cl",
911 triple
, processor
, opts
, address_spaces
,
912 optimization_level
, r_log
);
914 optimize(mod
, optimization_level
);
916 if (get_debug_flags() & DBG_LLVM
) {
918 llvm::raw_string_ostream
s_log(log
);
919 mod
->print(s_log
, NULL
);
921 debug_log(log
, ".ll");
925 // Build the clover::module
927 case PIPE_SHADER_IR_NIR
:
928 case PIPE_SHADER_IR_TGSI
:
929 //XXX: Handle TGSI, NIR
933 case PIPE_SHADER_IR_LLVM
:
934 m
= build_module_llvm(mod
, address_spaces
);
936 case PIPE_SHADER_IR_NATIVE
: {
937 std::vector
<char> code
= compile_native(mod
, triple
, processor
,
938 get_debug_flags() & DBG_ASM
,
940 m
= build_module_native(code
, mod
, address_spaces
, r_log
);
944 #if HAVE_LLVM >= 0x0306
945 // LLVM 3.6 and newer, the user takes ownership of the module.