clover: Fix kernel metadata retrieval after clang r273425
[mesa.git] / src / gallium / state_trackers / clover / llvm / invocation.cpp
1 //
2 // Copyright 2012-2016 Francisco Jerez
3 // Copyright 2012-2016 Advanced Micro Devices, Inc.
4 // Copyright 2014-2016 Jan Vesely
5 // Copyright 2014-2015 Serge Martin
6 // Copyright 2015 Zoltan Gilian
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 // OTHER DEALINGS IN THE SOFTWARE.
25 //
26
27 #include "core/compiler.hpp"
28
29 #include <clang/Frontend/CompilerInstance.h>
30 #include <clang/Frontend/TextDiagnosticBuffer.h>
31 #include <clang/Frontend/TextDiagnosticPrinter.h>
32 #include <clang/CodeGen/CodeGenAction.h>
33 #include <clang/Basic/TargetInfo.h>
34 #include <llvm/Bitcode/BitstreamWriter.h>
35 #include <llvm/Bitcode/ReaderWriter.h>
36 #include <llvm/Linker/Linker.h>
37 #include <llvm/IR/DiagnosticInfo.h>
38 #include <llvm/IR/DiagnosticPrinter.h>
39 #include <llvm/IR/DerivedTypes.h>
40 #include <llvm/IR/LLVMContext.h>
41 #include <llvm/IR/Module.h>
42 #include <llvm/Support/SourceMgr.h>
43 #include <llvm/IRReader/IRReader.h>
44 #if HAVE_LLVM >= 0x0307
45 #include <llvm/IR/LegacyPassManager.h>
46 #else
47 #include <llvm/PassManager.h>
48 #endif
49 #include <llvm/Support/CodeGen.h>
50 #include <llvm/Support/TargetSelect.h>
51 #include <llvm/Support/MemoryBuffer.h>
52 #include <llvm/Support/FormattedStream.h>
53 #include <llvm/Support/TargetRegistry.h>
54 #include <llvm/Transforms/IPO.h>
55 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
56 #include <llvm/Transforms/Utils/Cloning.h>
57
58
59 #include <llvm/IR/DataLayout.h>
60 #if HAVE_LLVM >= 0x0307
61 #include <llvm/Analysis/TargetLibraryInfo.h>
62 #else
63 #include <llvm/Target/TargetLibraryInfo.h>
64 #endif
65 #include <llvm/Target/TargetMachine.h>
66 #include <llvm/Target/TargetOptions.h>
67
68 #include <llvm-c/Target.h>
69 #include <llvm-c/TargetMachine.h>
70 #include <llvm-c/Core.h>
71
72 #include "pipe/p_state.h"
73 #include "util/u_memory.h"
74 #include "util/u_math.h"
75
76 #include <iostream>
77 #include <iomanip>
78 #include <fstream>
79 #include <cstdio>
80 #include <sstream>
81 #include <libelf.h>
82 #include <gelf.h>
83
84 using namespace clover;
85
86 namespace {
87 #if 0
88 void
89 build_binary(const std::string &source, const std::string &target,
90 const std::string &name) {
91 clang::CompilerInstance c;
92 clang::EmitObjAction act(&llvm::getGlobalContext());
93 std::string log;
94 llvm::raw_string_ostream s_log(log);
95
96 LLVMInitializeTGSITarget();
97 LLVMInitializeTGSITargetInfo();
98 LLVMInitializeTGSITargetMC();
99 LLVMInitializeTGSIAsmPrinter();
100
101 c.getFrontendOpts().Inputs.push_back(
102 std::make_pair(clang::IK_OpenCL, name));
103 c.getHeaderSearchOpts().UseBuiltinIncludes = false;
104 c.getHeaderSearchOpts().UseStandardIncludes = false;
105 c.getLangOpts().NoBuiltin = true;
106 c.getTargetOpts().Triple = target;
107 c.getInvocation().setLangDefaults(clang::IK_OpenCL);
108 c.createDiagnostics(0, NULL, new clang::TextDiagnosticPrinter(
109 s_log, c.getDiagnosticOpts()));
110
111 c.getPreprocessorOpts().addRemappedFile(
112 name, llvm::MemoryBuffer::getMemBuffer(source));
113
114 if (!c.ExecuteAction(act))
115 throw compile_error(log);
116 }
117
118 module
119 load_binary(const char *name) {
120 std::ifstream fs((name));
121 std::vector<unsigned char> str((std::istreambuf_iterator<char>(fs)),
122 (std::istreambuf_iterator<char>()));
123 compat::istream cs(str);
124 return module::deserialize(cs);
125 }
126 #endif
127 void debug_log(const std::string &msg, const std::string &suffix) {
128 const char *dbg_file = debug_get_option("CLOVER_DEBUG_FILE", "stderr");
129 if (!strcmp("stderr", dbg_file)) {
130 std::cerr << msg;
131 } else {
132 std::ofstream file(dbg_file + suffix, std::ios::app);
133 file << msg;
134 }
135 }
136
137 llvm::Module *
138 compile_llvm(llvm::LLVMContext &llvm_ctx, const std::string &source,
139 const header_map &headers,
140 const std::string &name, const std::string &triple,
141 const std::string &processor, const std::string &opts,
142 clang::LangAS::Map& address_spaces, unsigned &optimization_level,
143 std::string &r_log) {
144
145 clang::CompilerInstance c;
146 clang::EmitLLVMOnlyAction act(&llvm_ctx);
147 std::string log;
148 llvm::raw_string_ostream s_log(log);
149 std::string libclc_path = LIBCLC_LIBEXECDIR + processor + "-"
150 + triple + ".bc";
151
152 // Parse the compiler options:
153 std::vector<std::string> opts_array;
154 std::istringstream ss(opts);
155
156 while (!ss.eof()) {
157 std::string opt;
158 getline(ss, opt, ' ');
159 opts_array.push_back(opt);
160 }
161
162 opts_array.push_back(name);
163
164 std::vector<const char *> opts_carray;
165 for (unsigned i = 0; i < opts_array.size(); i++) {
166 opts_carray.push_back(opts_array.at(i).c_str());
167 }
168
169 llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> DiagID;
170 llvm::IntrusiveRefCntPtr<clang::DiagnosticOptions> DiagOpts;
171 clang::TextDiagnosticBuffer *DiagsBuffer;
172
173 DiagID = new clang::DiagnosticIDs();
174 DiagOpts = new clang::DiagnosticOptions();
175 DiagsBuffer = new clang::TextDiagnosticBuffer();
176
177 clang::DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer);
178 bool Success;
179
180 Success = clang::CompilerInvocation::CreateFromArgs(c.getInvocation(),
181 opts_carray.data(),
182 opts_carray.data() + opts_carray.size(),
183 Diags);
184 if (!Success) {
185 throw error(CL_INVALID_COMPILER_OPTIONS);
186 }
187 c.getFrontendOpts().ProgramAction = clang::frontend::EmitLLVMOnly;
188 c.getHeaderSearchOpts().UseBuiltinIncludes = true;
189 c.getHeaderSearchOpts().UseStandardSystemIncludes = true;
190 c.getHeaderSearchOpts().ResourceDir = CLANG_RESOURCE_DIR;
191
192 // Add libclc generic search path
193 c.getHeaderSearchOpts().AddPath(LIBCLC_INCLUDEDIR,
194 clang::frontend::Angled,
195 false, false
196 );
197
198 // Add libclc include
199 c.getPreprocessorOpts().Includes.push_back("clc/clc.h");
200
201 // clc.h requires that this macro be defined:
202 c.getPreprocessorOpts().addMacroDef("cl_clang_storage_class_specifiers");
203
204 c.getLangOpts().NoBuiltin = true;
205 c.getTargetOpts().Triple = triple;
206 c.getTargetOpts().CPU = processor;
207
208 // This is a workaround for a Clang bug which causes the number
209 // of warnings and errors to be printed to stderr.
210 // http://www.llvm.org/bugs/show_bug.cgi?id=19735
211 c.getDiagnosticOpts().ShowCarets = false;
212 c.getInvocation().setLangDefaults(c.getLangOpts(), clang::IK_OpenCL,
213 #if HAVE_LLVM >= 0x0309
214 llvm::Triple(triple), c.getPreprocessorOpts(),
215 #endif
216 clang::LangStandard::lang_opencl11);
217 c.createDiagnostics(
218 new clang::TextDiagnosticPrinter(
219 s_log,
220 &c.getDiagnosticOpts()));
221
222 #if HAVE_LLVM >= 0x0306
223 c.getPreprocessorOpts().addRemappedFile(name,
224 llvm::MemoryBuffer::getMemBuffer(source).release());
225 #else
226 c.getPreprocessorOpts().addRemappedFile(name,
227 llvm::MemoryBuffer::getMemBuffer(source));
228 #endif
229
230 if (headers.size()) {
231 const std::string tmp_header_path = "/tmp/clover/";
232
233 c.getHeaderSearchOpts().AddPath(tmp_header_path,
234 clang::frontend::Angled,
235 false, false
236 );
237
238 for (header_map::const_iterator it = headers.begin();
239 it != headers.end(); ++it) {
240 const std::string path = tmp_header_path + std::string(it->first);
241 c.getPreprocessorOpts().addRemappedFile(path,
242 #if HAVE_LLVM >= 0x0306
243 llvm::MemoryBuffer::getMemBuffer(it->second.c_str()).release());
244 #else
245 llvm::MemoryBuffer::getMemBuffer(it->second.c_str()));
246 #endif
247 }
248 }
249
250 // Setting this attribute tells clang to link this file before
251 // performing any optimizations. This is required so that
252 // we can replace calls to the OpenCL C barrier() builtin
253 // with calls to target intrinsics that have the noduplicate
254 // attribute. This attribute will prevent Clang from creating
255 // illegal uses of barrier() (e.g. Moving barrier() inside a conditional
256 // that is no executed by all threads) during its optimizaton passes.
257 #if HAVE_LLVM >= 0x0308
258 c.getCodeGenOpts().LinkBitcodeFiles.emplace_back(llvm::Linker::Flags::None,
259 libclc_path);
260 #else
261 c.getCodeGenOpts().LinkBitcodeFile = libclc_path;
262 #endif
263 optimization_level = c.getCodeGenOpts().OptimizationLevel;
264
265 // Compile the code
266 bool ExecSuccess = c.ExecuteAction(act);
267 r_log = log;
268
269 if (!ExecSuccess)
270 throw compile_error();
271
272 // Get address spaces map to be able to find kernel argument address space
273 memcpy(address_spaces, c.getTarget().getAddressSpaceMap(),
274 sizeof(address_spaces));
275
276 #if HAVE_LLVM >= 0x0306
277 return act.takeModule().release();
278 #else
279 return act.takeModule();
280 #endif
281 }
282
283 std::vector<llvm::Function *>
284 find_kernels(llvm::Module *mod) {
285 std::vector<llvm::Function *> kernels;
286 #if HAVE_LLVM >= 0x0309
287 auto &list = mod->getFunctionList();
288 for_each(list.begin(), list.end(), [&](llvm::Function &f){
289 if (f.getMetadata("kernel_arg_type"))
290 kernels.push_back(&f);
291 });
292 return kernels;
293 #endif
294 const llvm::NamedMDNode *kernel_node =
295 mod->getNamedMetadata("opencl.kernels");
296 // This means there are no kernels in the program. The spec does not
297 // require that we return an error here, but there will be an error if
298 // the user tries to pass this program to a clCreateKernel() call.
299 if (!kernel_node) {
300 return std::vector<llvm::Function *>();
301 }
302
303 kernels.reserve(kernel_node->getNumOperands());
304 for (unsigned i = 0; i < kernel_node->getNumOperands(); ++i) {
305 #if HAVE_LLVM >= 0x0306
306 kernels.push_back(llvm::mdconst::dyn_extract<llvm::Function>(
307 #else
308 kernels.push_back(llvm::dyn_cast<llvm::Function>(
309 #endif
310 kernel_node->getOperand(i)->getOperand(0)));
311 }
312 return kernels;
313 }
314
315 void
316 optimize(llvm::Module *mod, unsigned optimization_level) {
317
318 #if HAVE_LLVM >= 0x0307
319 llvm::legacy::PassManager PM;
320 #else
321 llvm::PassManager PM;
322 #endif
323
324 const std::vector<llvm::Function *> kernels = find_kernels(mod);
325
326 // Add a function internalizer pass.
327 //
328 // By default, the function internalizer pass will look for a function
329 // called "main" and then mark all other functions as internal. Marking
330 // functions as internal enables the optimizer to perform optimizations
331 // like function inlining and global dead-code elimination.
332 //
333 // When there is no "main" function in a module, the internalize pass will
334 // treat the module like a library, and it won't internalize any functions.
335 // Since there is no "main" function in our kernels, we need to tell
336 // the internalizer pass that this module is not a library by passing a
337 // list of kernel functions to the internalizer. The internalizer will
338 // treat the functions in the list as "main" functions and internalize
339 // all of the other functions.
340 #if HAVE_LLVM >= 0x0309
341 auto preserve_kernels = [=](const llvm::GlobalValue &GV) {
342 for (const auto &kernel : kernels) {
343 if (GV.getName() == kernel->getName())
344 return true;
345 }
346 return false;
347 };
348 #else
349 std::vector<const char*> export_list;
350 for (std::vector<llvm::Function *>::const_iterator I = kernels.begin(),
351 E = kernels.end();
352 I != E; ++I) {
353 llvm::Function *kernel = *I;
354 export_list.push_back(kernel->getName().data());
355 }
356 #endif
357 #if HAVE_LLVM < 0x0306
358 PM.add(new llvm::DataLayoutPass(mod));
359 #elif HAVE_LLVM < 0x0307
360 PM.add(new llvm::DataLayoutPass());
361 #endif
362 #if HAVE_LLVM >= 0x0309
363 PM.add(llvm::createInternalizePass(preserve_kernels));
364 #else
365 PM.add(llvm::createInternalizePass(export_list));
366 #endif
367
368 llvm::PassManagerBuilder PMB;
369 PMB.OptLevel = optimization_level;
370 #if HAVE_LLVM < 0x0307
371 PMB.LibraryInfo = new llvm::TargetLibraryInfo(
372 #else
373 PMB.LibraryInfo = new llvm::TargetLibraryInfoImpl(
374 #endif
375 llvm::Triple(mod->getTargetTriple()));
376 PMB.populateModulePassManager(PM);
377 PM.run(*mod);
378 }
379
380 // Kernel metadata
381
382 struct kernel_arg_md {
383 llvm::StringRef type_name;
384 llvm::StringRef access_qual;
385 kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_):
386 type_name(type_name_), access_qual(access_qual_) {}
387 };
388 #if HAVE_LLVM >= 0x0309
389 std::vector<kernel_arg_md>
390 get_kernel_arg_md(const llvm::Function *kernel_func) {
391
392 size_t num_args = kernel_func->getArgumentList().size();
393
394 auto aq = kernel_func->getMetadata("kernel_arg_access_qual");
395 auto ty = kernel_func->getMetadata("kernel_arg_type");
396
397 std::vector<kernel_arg_md> res;
398 res.reserve(num_args);
399 for (size_t i = 0; i < num_args; ++i) {
400 res.push_back(kernel_arg_md(
401 llvm::cast<llvm::MDString>(ty->getOperand(i))->getString(),
402 llvm::cast<llvm::MDString>(aq->getOperand(i))->getString()));
403 }
404
405 return res;
406 }
407
408 #elif HAVE_LLVM >= 0x0306
409
410 const llvm::MDNode *
411 get_kernel_metadata(const llvm::Function *kernel_func) {
412 auto mod = kernel_func->getParent();
413 auto kernels_node = mod->getNamedMetadata("opencl.kernels");
414 if (!kernels_node) {
415 return nullptr;
416 }
417
418 const llvm::MDNode *kernel_node = nullptr;
419 for (unsigned i = 0; i < kernels_node->getNumOperands(); ++i) {
420 auto func = llvm::mdconst::dyn_extract<llvm::Function>(
421 kernels_node->getOperand(i)->getOperand(0));
422 if (func == kernel_func) {
423 kernel_node = kernels_node->getOperand(i);
424 break;
425 }
426 }
427
428 return kernel_node;
429 }
430
431 llvm::MDNode*
432 node_from_op_checked(const llvm::MDOperand &md_operand,
433 llvm::StringRef expect_name,
434 unsigned expect_num_args)
435 {
436 auto node = llvm::cast<llvm::MDNode>(md_operand);
437 assert(node->getNumOperands() == expect_num_args &&
438 "Wrong number of operands.");
439
440 auto str_node = llvm::cast<llvm::MDString>(node->getOperand(0));
441 assert(str_node->getString() == expect_name &&
442 "Wrong metadata node name.");
443
444 return node;
445 }
446
447 std::vector<kernel_arg_md>
448 get_kernel_arg_md(const llvm::Function *kernel_func) {
449 auto num_args = kernel_func->getArgumentList().size();
450
451 auto kernel_node = get_kernel_metadata(kernel_func);
452 auto aq = node_from_op_checked(kernel_node->getOperand(2),
453 "kernel_arg_access_qual", num_args + 1);
454 auto ty = node_from_op_checked(kernel_node->getOperand(3),
455 "kernel_arg_type", num_args + 1);
456
457 std::vector<kernel_arg_md> res;
458 res.reserve(num_args);
459 for (unsigned i = 0; i < num_args; ++i) {
460 res.push_back(kernel_arg_md(
461 llvm::cast<llvm::MDString>(ty->getOperand(i+1))->getString(),
462 llvm::cast<llvm::MDString>(aq->getOperand(i+1))->getString()));
463 }
464
465 return res;
466 }
467
468 #else
469
470 std::vector<kernel_arg_md>
471 get_kernel_arg_md(const llvm::Function *kernel_func) {
472 return std::vector<kernel_arg_md>(
473 kernel_func->getArgumentList().size(),
474 kernel_arg_md("", ""));
475 }
476
477 #endif // HAVE_LLVM >= 0x0306
478
479 std::vector<module::argument>
480 get_kernel_args(const llvm::Module *mod, const std::string &kernel_name,
481 const clang::LangAS::Map &address_spaces) {
482
483 std::vector<module::argument> args;
484 llvm::Function *kernel_func = mod->getFunction(kernel_name);
485 assert(kernel_func && "Kernel name not found in module.");
486 auto arg_md = get_kernel_arg_md(kernel_func);
487
488 llvm::DataLayout TD(mod);
489 llvm::Type *size_type =
490 TD.getSmallestLegalIntType(mod->getContext(), sizeof(cl_uint) * 8);
491
492 for (const auto &arg: kernel_func->args()) {
493
494 llvm::Type *arg_type = arg.getType();
495 const unsigned arg_store_size = TD.getTypeStoreSize(arg_type);
496
497 // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
498 // type that is not a power of two bytes in size must be
499 // aligned to the next larger power of two". We need this
500 // alignment for three element vectors, which have
501 // non-power-of-2 store size.
502 const unsigned arg_api_size = util_next_power_of_two(arg_store_size);
503
504 llvm::Type *target_type = arg_type->isIntegerTy() ?
505 TD.getSmallestLegalIntType(mod->getContext(), arg_store_size * 8)
506 : arg_type;
507 unsigned target_size = TD.getTypeStoreSize(target_type);
508 unsigned target_align = TD.getABITypeAlignment(target_type);
509
510 llvm::StringRef type_name = arg_md[arg.getArgNo()].type_name;
511 llvm::StringRef access_qual = arg_md[arg.getArgNo()].access_qual;
512
513 // Image
514 const bool is_image2d = type_name == "image2d_t";
515 const bool is_image3d = type_name == "image3d_t";
516 if (is_image2d || is_image3d) {
517 const bool is_write_only = access_qual == "write_only";
518 const bool is_read_only = access_qual == "read_only";
519
520 enum module::argument::type marg_type;
521 if (is_image2d && is_read_only) {
522 marg_type = module::argument::image2d_rd;
523 } else if (is_image2d && is_write_only) {
524 marg_type = module::argument::image2d_wr;
525 } else if (is_image3d && is_read_only) {
526 marg_type = module::argument::image3d_rd;
527 } else if (is_image3d && is_write_only) {
528 marg_type = module::argument::image3d_wr;
529 } else {
530 assert(0 && "Wrong image access qualifier");
531 }
532
533 args.push_back(module::argument(marg_type,
534 arg_store_size, target_size,
535 target_align,
536 module::argument::zero_ext));
537 continue;
538 }
539
540 // Image size implicit argument
541 if (type_name == "__llvm_image_size") {
542 args.push_back(module::argument(module::argument::scalar,
543 sizeof(cl_uint),
544 TD.getTypeStoreSize(size_type),
545 TD.getABITypeAlignment(size_type),
546 module::argument::zero_ext,
547 module::argument::image_size));
548 continue;
549 }
550
551 // Image format implicit argument
552 if (type_name == "__llvm_image_format") {
553 args.push_back(module::argument(module::argument::scalar,
554 sizeof(cl_uint),
555 TD.getTypeStoreSize(size_type),
556 TD.getABITypeAlignment(size_type),
557 module::argument::zero_ext,
558 module::argument::image_format));
559 continue;
560 }
561
562 // Other types
563 if (llvm::isa<llvm::PointerType>(arg_type) && arg.hasByValAttr()) {
564 arg_type =
565 llvm::dyn_cast<llvm::PointerType>(arg_type)->getElementType();
566 }
567
568 if (arg_type->isPointerTy()) {
569 unsigned address_space = llvm::cast<llvm::PointerType>(arg_type)->getAddressSpace();
570 if (address_space == address_spaces[clang::LangAS::opencl_local
571 - clang::LangAS::Offset]) {
572 args.push_back(module::argument(module::argument::local,
573 arg_api_size, target_size,
574 target_align,
575 module::argument::zero_ext));
576 } else {
577 // XXX: Correctly handle constant address space. There is no
578 // way for r600g to pass a handle for constant buffers back
579 // to clover like it can for global buffers, so
580 // creating constant arguments will break r600g. For now,
581 // continue treating constant buffers as global buffers
582 // until we can come up with a way to create handles for
583 // constant buffers.
584 args.push_back(module::argument(module::argument::global,
585 arg_api_size, target_size,
586 target_align,
587 module::argument::zero_ext));
588 }
589
590 } else {
591 llvm::AttributeSet attrs = kernel_func->getAttributes();
592 enum module::argument::ext_type ext_type =
593 (attrs.hasAttribute(arg.getArgNo() + 1,
594 llvm::Attribute::SExt) ?
595 module::argument::sign_ext :
596 module::argument::zero_ext);
597
598 args.push_back(
599 module::argument(module::argument::scalar, arg_api_size,
600 target_size, target_align, ext_type));
601 }
602 }
603
604 // Append implicit arguments. XXX - The types, ordering and
605 // vector size of the implicit arguments should depend on the
606 // target according to the selected calling convention.
607 args.push_back(
608 module::argument(module::argument::scalar, sizeof(cl_uint),
609 TD.getTypeStoreSize(size_type),
610 TD.getABITypeAlignment(size_type),
611 module::argument::zero_ext,
612 module::argument::grid_dimension));
613
614 args.push_back(
615 module::argument(module::argument::scalar, sizeof(cl_uint),
616 TD.getTypeStoreSize(size_type),
617 TD.getABITypeAlignment(size_type),
618 module::argument::zero_ext,
619 module::argument::grid_offset));
620
621 return args;
622 }
623
624 module
625 build_module_llvm(llvm::Module *mod,
626 clang::LangAS::Map& address_spaces) {
627
628 module m;
629 struct pipe_llvm_program_header header;
630
631 llvm::SmallVector<char, 1024> llvm_bitcode;
632 llvm::raw_svector_ostream bitcode_ostream(llvm_bitcode);
633 llvm::BitstreamWriter writer(llvm_bitcode);
634 llvm::WriteBitcodeToFile(mod, bitcode_ostream);
635 #if HAVE_LLVM < 0x0308
636 bitcode_ostream.flush();
637 #endif
638
639 const std::vector<llvm::Function *> kernels = find_kernels(mod);
640 for (unsigned i = 0; i < kernels.size(); ++i) {
641 std::string kernel_name = kernels[i]->getName();
642 std::vector<module::argument> args =
643 get_kernel_args(mod, kernel_name, address_spaces);
644
645 m.syms.push_back(module::symbol(kernel_name, 0, i, args ));
646 }
647
648 header.num_bytes = llvm_bitcode.size();
649 std::vector<char> data;
650 data.insert(data.end(), (char*)(&header),
651 (char*)(&header) + sizeof(header));
652 data.insert(data.end(), llvm_bitcode.begin(),
653 llvm_bitcode.end());
654 m.secs.push_back(module::section(0, module::section::text,
655 header.num_bytes, data));
656
657 return m;
658 }
659
660 void
661 emit_code(LLVMTargetMachineRef tm, LLVMModuleRef mod,
662 LLVMCodeGenFileType file_type,
663 LLVMMemoryBufferRef *out_buffer,
664 std::string &r_log) {
665 LLVMBool err;
666 char *err_message = NULL;
667
668 err = LLVMTargetMachineEmitToMemoryBuffer(tm, mod, file_type,
669 &err_message, out_buffer);
670
671 if (err) {
672 r_log = std::string(err_message);
673 }
674
675 LLVMDisposeMessage(err_message);
676
677 if (err) {
678 throw compile_error();
679 }
680 }
681
682 std::vector<char>
683 compile_native(const llvm::Module *mod, const std::string &triple,
684 const std::string &processor, unsigned dump_asm,
685 std::string &r_log) {
686
687 std::string log;
688 LLVMTargetRef target;
689 char *error_message;
690 LLVMMemoryBufferRef out_buffer;
691 unsigned buffer_size;
692 const char *buffer_data;
693 LLVMModuleRef mod_ref = wrap(mod);
694
695 if (LLVMGetTargetFromTriple(triple.c_str(), &target, &error_message)) {
696 r_log = std::string(error_message);
697 LLVMDisposeMessage(error_message);
698 throw compile_error();
699 }
700
701 LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
702 target, triple.c_str(), processor.c_str(), "",
703 LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault);
704
705 if (!tm) {
706 r_log = "Could not create TargetMachine: " + triple;
707 throw compile_error();
708 }
709
710 if (dump_asm) {
711 LLVMSetTargetMachineAsmVerbosity(tm, true);
712 #if HAVE_LLVM >= 0x0308
713 LLVMModuleRef debug_mod = wrap(llvm::CloneModule(mod).release());
714 #else
715 LLVMModuleRef debug_mod = wrap(llvm::CloneModule(mod));
716 #endif
717 emit_code(tm, debug_mod, LLVMAssemblyFile, &out_buffer, r_log);
718 buffer_size = LLVMGetBufferSize(out_buffer);
719 buffer_data = LLVMGetBufferStart(out_buffer);
720 debug_log(std::string(buffer_data, buffer_size), ".asm");
721
722 LLVMSetTargetMachineAsmVerbosity(tm, false);
723 LLVMDisposeMemoryBuffer(out_buffer);
724 LLVMDisposeModule(debug_mod);
725 }
726
727 emit_code(tm, mod_ref, LLVMObjectFile, &out_buffer, r_log);
728
729 buffer_size = LLVMGetBufferSize(out_buffer);
730 buffer_data = LLVMGetBufferStart(out_buffer);
731
732 std::vector<char> code(buffer_data, buffer_data + buffer_size);
733
734 LLVMDisposeMemoryBuffer(out_buffer);
735 LLVMDisposeTargetMachine(tm);
736
737 return code;
738 }
739
740 std::map<std::string, unsigned>
741 get_kernel_offsets(std::vector<char> &code,
742 const std::vector<llvm::Function *> &kernels,
743 std::string &r_log) {
744
745 // One of the libelf implementations
746 // (http://www.mr511.de/software/english.htm) requires calling
747 // elf_version() before elf_memory().
748 //
749 elf_version(EV_CURRENT);
750
751 Elf *elf = elf_memory(&code[0], code.size());
752 size_t section_str_index;
753 elf_getshdrstrndx(elf, &section_str_index);
754 Elf_Scn *section = NULL;
755 Elf_Scn *symtab = NULL;
756 GElf_Shdr symtab_header;
757
758 // Find the symbol table
759 try {
760 while ((section = elf_nextscn(elf, section))) {
761 const char *name;
762 if (gelf_getshdr(section, &symtab_header) != &symtab_header) {
763 r_log = "Failed to read ELF section header.";
764 throw compile_error();
765 }
766 name = elf_strptr(elf, section_str_index, symtab_header.sh_name);
767 if (!strcmp(name, ".symtab")) {
768 symtab = section;
769 break;
770 }
771 }
772 if (!symtab) {
773 r_log = "Unable to find symbol table.";
774 throw compile_error();
775 }
776 } catch (compile_error &e) {
777 elf_end(elf);
778 throw e;
779 }
780
781
782 // Extract symbol information from the table
783 Elf_Data *symtab_data = NULL;
784 GElf_Sym *symbol;
785 GElf_Sym s;
786
787 std::map<std::string, unsigned> kernel_offsets;
788 symtab_data = elf_getdata(symtab, symtab_data);
789
790 // Determine the offsets for each kernel
791 for (int i = 0; (symbol = gelf_getsym(symtab_data, i, &s)); i++) {
792 char *name = elf_strptr(elf, symtab_header.sh_link, symbol->st_name);
793 for (std::vector<llvm::Function*>::const_iterator it = kernels.begin(),
794 e = kernels.end(); it != e; ++it) {
795 llvm::Function *f = *it;
796 if (f->getName() == std::string(name))
797 kernel_offsets[f->getName()] = symbol->st_value;
798 }
799 }
800 elf_end(elf);
801 return kernel_offsets;
802 }
803
804 module
805 build_module_native(std::vector<char> &code,
806 llvm::Module *mod,
807 const clang::LangAS::Map &address_spaces,
808 std::string &r_log) {
809
810 const std::vector<llvm::Function *> kernels = find_kernels(mod);
811
812 std::map<std::string, unsigned> kernel_offsets =
813 get_kernel_offsets(code, kernels, r_log);
814
815 // Begin building the clover module
816 module m;
817 struct pipe_llvm_program_header header;
818
819 // Store the generated ELF binary in the module's text section.
820 header.num_bytes = code.size();
821 std::vector<char> data;
822 data.insert(data.end(), (char*)(&header),
823 (char*)(&header) + sizeof(header));
824 data.insert(data.end(), code.begin(), code.end());
825 m.secs.push_back(module::section(0, module::section::text,
826 header.num_bytes, data));
827
828 for (std::map<std::string, unsigned>::iterator i = kernel_offsets.begin(),
829 e = kernel_offsets.end(); i != e; ++i) {
830 std::vector<module::argument> args =
831 get_kernel_args(mod, i->first, address_spaces);
832 m.syms.push_back(module::symbol(i->first, 0, i->second, args ));
833 }
834
835 return m;
836 }
837
838 void
839 diagnostic_handler(const llvm::DiagnosticInfo &di, void *data) {
840 if (di.getSeverity() == llvm::DS_Error) {
841 std::string message = *(std::string*)data;
842 llvm::raw_string_ostream stream(message);
843 llvm::DiagnosticPrinterRawOStream dp(stream);
844 di.print(dp);
845 stream.flush();
846 *(std::string*)data = message;
847
848 throw compile_error();
849 }
850 }
851
852 void
853 init_targets() {
854 static bool targets_initialized = false;
855 if (!targets_initialized) {
856 LLVMInitializeAllTargets();
857 LLVMInitializeAllTargetInfos();
858 LLVMInitializeAllTargetMCs();
859 LLVMInitializeAllAsmPrinters();
860 targets_initialized = true;
861 }
862 }
863
864 #define DBG_CLC (1 << 0)
865 #define DBG_LLVM (1 << 1)
866 #define DBG_ASM (1 << 2)
867
868 unsigned
869 get_debug_flags() {
870 static const struct debug_named_value debug_options[] = {
871 {"clc", DBG_CLC, "Dump the OpenCL C code for all kernels."},
872 {"llvm", DBG_LLVM, "Dump the generated LLVM IR for all kernels."},
873 {"asm", DBG_ASM, "Dump kernel assembly code for targets specifying "
874 "PIPE_SHADER_IR_NATIVE"},
875 DEBUG_NAMED_VALUE_END // must be last
876 };
877 static const unsigned debug_flags =
878 debug_get_flags_option("CLOVER_DEBUG", debug_options, 0);
879
880 return debug_flags;
881 }
882
883 } // End anonymous namespace
884
885 module
886 clover::compile_program_llvm(const std::string &source,
887 const header_map &headers,
888 enum pipe_shader_ir ir,
889 const std::string &target,
890 const std::string &opts,
891 std::string &r_log) {
892
893 init_targets();
894
895 size_t processor_str_len = std::string(target).find_first_of("-");
896 std::string processor(target, 0, processor_str_len);
897 std::string triple(target, processor_str_len + 1,
898 target.size() - processor_str_len - 1);
899 clang::LangAS::Map address_spaces;
900 llvm::LLVMContext llvm_ctx;
901 unsigned optimization_level;
902
903 llvm_ctx.setDiagnosticHandler(diagnostic_handler, &r_log);
904
905 if (get_debug_flags() & DBG_CLC)
906 debug_log("// Build options: " + opts + '\n' + source, ".cl");
907
908 // The input file name must have the .cl extension in order for the
909 // CompilerInvocation class to recognize it as an OpenCL source file.
910 llvm::Module *mod = compile_llvm(llvm_ctx, source, headers, "input.cl",
911 triple, processor, opts, address_spaces,
912 optimization_level, r_log);
913
914 optimize(mod, optimization_level);
915
916 if (get_debug_flags() & DBG_LLVM) {
917 std::string log;
918 llvm::raw_string_ostream s_log(log);
919 mod->print(s_log, NULL);
920 s_log.flush();
921 debug_log(log, ".ll");
922 }
923
924 module m;
925 // Build the clover::module
926 switch (ir) {
927 case PIPE_SHADER_IR_NIR:
928 case PIPE_SHADER_IR_TGSI:
929 //XXX: Handle TGSI, NIR
930 assert(0);
931 m = module();
932 break;
933 case PIPE_SHADER_IR_LLVM:
934 m = build_module_llvm(mod, address_spaces);
935 break;
936 case PIPE_SHADER_IR_NATIVE: {
937 std::vector<char> code = compile_native(mod, triple, processor,
938 get_debug_flags() & DBG_ASM,
939 r_log);
940 m = build_module_native(code, mod, address_spaces, r_log);
941 break;
942 }
943 }
944 #if HAVE_LLVM >= 0x0306
945 // LLVM 3.6 and newer, the user takes ownership of the module.
946 delete mod;
947 #endif
948
949 return m;
950 }