a75be648898669a8f81c87c7c6a388642e399521
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_misc.cpp
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 /**
30 * The purpose of this module is to expose LLVM functionality not available
31 * through the C++ bindings.
32 */
33
34
35 // Undef these vars just to silence warnings
36 #undef PACKAGE_BUGREPORT
37 #undef PACKAGE_NAME
38 #undef PACKAGE_STRING
39 #undef PACKAGE_TARNAME
40 #undef PACKAGE_VERSION
41
42
43 #include <stddef.h>
44
45 #include <llvm/Config/llvm-config.h>
46
47 // Workaround http://llvm.org/PR23628
48 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
49 # pragma push_macro("DEBUG")
50 # undef DEBUG
51 #endif
52
53 #include <llvm/Config/llvm-config.h>
54 #include <llvm-c/Core.h>
55 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6)
56 #include <llvm-c/Support.h>
57 #endif
58 #include <llvm-c/ExecutionEngine.h>
59 #include <llvm/Target/TargetOptions.h>
60 #include <llvm/ExecutionEngine/ExecutionEngine.h>
61 #include <llvm/ADT/Triple.h>
62 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
63 #include <llvm/Analysis/TargetLibraryInfo.h>
64 #else
65 #include <llvm/Target/TargetLibraryInfo.h>
66 #endif
67 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
68 #include <llvm/ExecutionEngine/JITMemoryManager.h>
69 #else
70 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
71 #endif
72 #include <llvm/Support/CommandLine.h>
73 #include <llvm/Support/Host.h>
74 #include <llvm/Support/PrettyStackTrace.h>
75
76 #include <llvm/Support/TargetSelect.h>
77
78 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
79 #include <llvm/IR/CallSite.h>
80 #endif
81 #include <llvm/IR/IRBuilder.h>
82 #include <llvm/IR/Module.h>
83 #include <llvm/Support/CBindingWrapping.h>
84
85 #include <llvm/Config/llvm-config.h>
86 #if LLVM_USE_INTEL_JITEVENTS
87 #include <llvm/ExecutionEngine/JITEventListener.h>
88 #endif
89
90 // Workaround http://llvm.org/PR23628
91 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
92 # pragma pop_macro("DEBUG")
93 #endif
94
95 #include "c11/threads.h"
96 #include "os/os_thread.h"
97 #include "pipe/p_config.h"
98 #include "util/u_debug.h"
99 #include "util/u_cpu_detect.h"
100
101 #include "lp_bld_misc.h"
102 #include "lp_bld_debug.h"
103
104 namespace {
105
106 class LLVMEnsureMultithreaded {
107 public:
108 LLVMEnsureMultithreaded()
109 {
110 if (!LLVMIsMultithreaded()) {
111 LLVMStartMultithreaded();
112 }
113 }
114 };
115
116 static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;
117
118 }
119
120 static once_flag init_native_targets_once_flag = ONCE_FLAG_INIT;
121
122 static void init_native_targets()
123 {
124 // If we have a native target, initialize it to ensure it is linked in and
125 // usable by the JIT.
126 llvm::InitializeNativeTarget();
127
128 llvm::InitializeNativeTargetAsmPrinter();
129
130 llvm::InitializeNativeTargetDisassembler();
131 #if DEBUG && (LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6))
132 {
133 char *env_llc_options = getenv("GALLIVM_LLC_OPTIONS");
134 if (env_llc_options) {
135 char *option;
136 char *options[64] = {(char *) "llc"}; // Warning without cast
137 int n;
138 for (n = 0, option = strtok(env_llc_options, " "); option; n++, option = strtok(NULL, " ")) {
139 options[n + 1] = option;
140 }
141 if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
142 debug_printf("llc additional options (%d):\n", n);
143 for (int i = 1; i <= n; i++)
144 debug_printf("\t%s\n", options[i]);
145 debug_printf("\n");
146 }
147 LLVMParseCommandLineOptions(n + 1, options, NULL);
148 }
149 }
150 #endif
151 }
152
153 extern "C" void
154 lp_set_target_options(void)
155 {
156 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
157 /*
158 * By default LLVM adds a signal handler to output a pretty stack trace.
159 * This signal handler is never removed, causing problems when unloading the
160 * shared object where the gallium driver resides.
161 */
162 llvm::DisablePrettyStackTrace = true;
163 #endif
164
165 /* The llvm target registry is not thread-safe, so drivers and state-trackers
166 * that want to initialize targets should use the lp_set_target_options()
167 * function to safely initialize targets.
168 *
169 * LLVM targets should be initialized before the driver or state-tracker tries
170 * to access the registry.
171 */
172 call_once(&init_native_targets_once_flag, init_native_targets);
173 }
174
175 extern "C"
176 LLVMTargetLibraryInfoRef
177 gallivm_create_target_library_info(const char *triple)
178 {
179 return reinterpret_cast<LLVMTargetLibraryInfoRef>(
180 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7)
181 new llvm::TargetLibraryInfo(
182 #else
183 new llvm::TargetLibraryInfoImpl(
184 #endif
185 llvm::Triple(triple)));
186 }
187
188 extern "C"
189 void
190 gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
191 {
192 delete reinterpret_cast<
193 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7)
194 llvm::TargetLibraryInfo
195 #else
196 llvm::TargetLibraryInfoImpl
197 #endif
198 *>(library_info);
199 }
200
201
202 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
203
204 extern "C"
205 void
206 LLVMSetAlignmentBackport(LLVMValueRef V,
207 unsigned Bytes)
208 {
209 switch (LLVMGetInstructionOpcode(V)) {
210 case LLVMLoad:
211 llvm::unwrap<llvm::LoadInst>(V)->setAlignment(Bytes);
212 break;
213 case LLVMStore:
214 llvm::unwrap<llvm::StoreInst>(V)->setAlignment(Bytes);
215 break;
216 default:
217 assert(0);
218 break;
219 }
220 }
221
222 #endif
223
224
225 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
226 typedef llvm::JITMemoryManager BaseMemoryManager;
227 #else
228 typedef llvm::RTDyldMemoryManager BaseMemoryManager;
229 #endif
230
231
232 /*
233 * Delegating is tedious but the default manager class is hidden in an
234 * anonymous namespace in LLVM, so we cannot just derive from it to change
235 * its behavior.
236 */
237 class DelegatingJITMemoryManager : public BaseMemoryManager {
238
239 protected:
240 virtual BaseMemoryManager *mgr() const = 0;
241
242 public:
243 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
244 /*
245 * From JITMemoryManager
246 */
247 virtual void setMemoryWritable() {
248 mgr()->setMemoryWritable();
249 }
250 virtual void setMemoryExecutable() {
251 mgr()->setMemoryExecutable();
252 }
253 virtual void setPoisonMemory(bool poison) {
254 mgr()->setPoisonMemory(poison);
255 }
256 virtual void AllocateGOT() {
257 mgr()->AllocateGOT();
258 /*
259 * isManagingGOT() is not virtual in base class so we can't delegate.
260 * Instead we mirror the value of HasGOT in our instance.
261 */
262 HasGOT = mgr()->isManagingGOT();
263 }
264 virtual uint8_t *getGOTBase() const {
265 return mgr()->getGOTBase();
266 }
267 virtual uint8_t *startFunctionBody(const llvm::Function *F,
268 uintptr_t &ActualSize) {
269 return mgr()->startFunctionBody(F, ActualSize);
270 }
271 virtual uint8_t *allocateStub(const llvm::GlobalValue *F,
272 unsigned StubSize,
273 unsigned Alignment) {
274 return mgr()->allocateStub(F, StubSize, Alignment);
275 }
276 virtual void endFunctionBody(const llvm::Function *F,
277 uint8_t *FunctionStart,
278 uint8_t *FunctionEnd) {
279 mgr()->endFunctionBody(F, FunctionStart, FunctionEnd);
280 }
281 virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
282 return mgr()->allocateSpace(Size, Alignment);
283 }
284 virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
285 return mgr()->allocateGlobal(Size, Alignment);
286 }
287 virtual void deallocateFunctionBody(void *Body) {
288 mgr()->deallocateFunctionBody(Body);
289 }
290 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
291 virtual uint8_t *startExceptionTable(const llvm::Function *F,
292 uintptr_t &ActualSize) {
293 return mgr()->startExceptionTable(F, ActualSize);
294 }
295 virtual void endExceptionTable(const llvm::Function *F,
296 uint8_t *TableStart,
297 uint8_t *TableEnd,
298 uint8_t *FrameRegister) {
299 mgr()->endExceptionTable(F, TableStart, TableEnd,
300 FrameRegister);
301 }
302 virtual void deallocateExceptionTable(void *ET) {
303 mgr()->deallocateExceptionTable(ET);
304 }
305 #endif
306 virtual bool CheckInvariants(std::string &s) {
307 return mgr()->CheckInvariants(s);
308 }
309 virtual size_t GetDefaultCodeSlabSize() {
310 return mgr()->GetDefaultCodeSlabSize();
311 }
312 virtual size_t GetDefaultDataSlabSize() {
313 return mgr()->GetDefaultDataSlabSize();
314 }
315 virtual size_t GetDefaultStubSlabSize() {
316 return mgr()->GetDefaultStubSlabSize();
317 }
318 virtual unsigned GetNumCodeSlabs() {
319 return mgr()->GetNumCodeSlabs();
320 }
321 virtual unsigned GetNumDataSlabs() {
322 return mgr()->GetNumDataSlabs();
323 }
324 virtual unsigned GetNumStubSlabs() {
325 return mgr()->GetNumStubSlabs();
326 }
327 #endif
328
329 /*
330 * From RTDyldMemoryManager
331 */
332 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
333 virtual uint8_t *allocateCodeSection(uintptr_t Size,
334 unsigned Alignment,
335 unsigned SectionID,
336 llvm::StringRef SectionName) {
337 return mgr()->allocateCodeSection(Size, Alignment, SectionID,
338 SectionName);
339 }
340 #else
341 virtual uint8_t *allocateCodeSection(uintptr_t Size,
342 unsigned Alignment,
343 unsigned SectionID) {
344 return mgr()->allocateCodeSection(Size, Alignment, SectionID);
345 }
346 #endif
347 virtual uint8_t *allocateDataSection(uintptr_t Size,
348 unsigned Alignment,
349 unsigned SectionID,
350 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
351 llvm::StringRef SectionName,
352 #endif
353 bool IsReadOnly) {
354 return mgr()->allocateDataSection(Size, Alignment, SectionID,
355 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
356 SectionName,
357 #endif
358 IsReadOnly);
359 }
360 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
361 virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
362 mgr()->registerEHFrames(Addr, LoadAddr, Size);
363 }
364 #else
365 virtual void registerEHFrames(llvm::StringRef SectionData) {
366 mgr()->registerEHFrames(SectionData);
367 }
368 #endif
369 #if LLVM_VERSION_MAJOR >= 5
370 virtual void deregisterEHFrames() {
371 mgr()->deregisterEHFrames();
372 }
373 #elif LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
374 virtual void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
375 mgr()->deregisterEHFrames(Addr, LoadAddr, Size);
376 }
377 #endif
378 virtual void *getPointerToNamedFunction(const std::string &Name,
379 bool AbortOnFailure=true) {
380 return mgr()->getPointerToNamedFunction(Name, AbortOnFailure);
381 }
382 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 3)
383 virtual bool applyPermissions(std::string *ErrMsg = 0) {
384 return mgr()->applyPermissions(ErrMsg);
385 }
386 #else
387 virtual bool finalizeMemory(std::string *ErrMsg = 0) {
388 return mgr()->finalizeMemory(ErrMsg);
389 }
390 #endif
391 };
392
393
394 /*
395 * Delegate memory management to one shared manager for more efficient use
396 * of memory than creating a separate pool for each LLVM engine.
397 * Keep generated code until freeGeneratedCode() is called, instead of when
398 * memory manager is destroyed, which happens during engine destruction.
399 * This allows additional memory savings as we don't have to keep the engine
400 * around in order to use the code.
401 * All methods are delegated to the shared manager except destruction and
402 * deallocating code. For the latter we just remember what needs to be
403 * deallocated later. The shared manager is deleted once it is empty.
404 */
405 class ShaderMemoryManager : public DelegatingJITMemoryManager {
406
407 BaseMemoryManager *TheMM;
408
409 struct GeneratedCode {
410 typedef std::vector<void *> Vec;
411 Vec FunctionBody, ExceptionTable;
412 BaseMemoryManager *TheMM;
413
414 GeneratedCode(BaseMemoryManager *MM) {
415 TheMM = MM;
416 }
417
418 ~GeneratedCode() {
419 /*
420 * Deallocate things as previously requested and
421 * free shared manager when no longer used.
422 */
423 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
424 Vec::iterator i;
425
426 assert(TheMM);
427 for ( i = FunctionBody.begin(); i != FunctionBody.end(); ++i )
428 TheMM->deallocateFunctionBody(*i);
429 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
430 for ( i = ExceptionTable.begin(); i != ExceptionTable.end(); ++i )
431 TheMM->deallocateExceptionTable(*i);
432 #endif /* LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4) */
433 #endif /* LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6) */
434 }
435 };
436
437 GeneratedCode *code;
438
439 BaseMemoryManager *mgr() const {
440 return TheMM;
441 }
442
443 public:
444
445 ShaderMemoryManager(BaseMemoryManager* MM) {
446 TheMM = MM;
447 code = new GeneratedCode(MM);
448 }
449
450 virtual ~ShaderMemoryManager() {
451 /*
452 * 'code' is purposely not deleted. It is the user's responsibility
453 * to call getGeneratedCode() and freeGeneratedCode().
454 */
455 }
456
457 struct lp_generated_code *getGeneratedCode() {
458 return (struct lp_generated_code *) code;
459 }
460
461 static void freeGeneratedCode(struct lp_generated_code *code) {
462 delete (GeneratedCode *) code;
463 }
464
465 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
466 virtual void deallocateExceptionTable(void *ET) {
467 // remember for later deallocation
468 code->ExceptionTable.push_back(ET);
469 }
470 #endif
471
472 virtual void deallocateFunctionBody(void *Body) {
473 // remember for later deallocation
474 code->FunctionBody.push_back(Body);
475 }
476 };
477
478
479 /**
480 * Same as LLVMCreateJITCompilerForModule, but:
481 * - allows using MCJIT and enabling AVX feature where available.
482 * - set target options
483 *
484 * See also:
485 * - llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
486 * - llvm/tools/lli/lli.cpp
487 * - http://markmail.org/message/ttkuhvgj4cxxy2on#query:+page:1+mid:aju2dggerju3ivd3+state:results
488 */
489 extern "C"
490 LLVMBool
491 lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
492 lp_generated_code **OutCode,
493 LLVMModuleRef M,
494 LLVMMCJITMemoryManagerRef CMM,
495 unsigned OptLevel,
496 int useMCJIT,
497 char **OutError)
498 {
499 using namespace llvm;
500
501 std::string Error;
502 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6)
503 EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
504 #else
505 EngineBuilder builder(unwrap(M));
506 #endif
507
508 /**
509 * LLVM 3.1+ haven't more "extern unsigned llvm::StackAlignmentOverride" and
510 * friends for configuring code generation options, like stack alignment.
511 */
512 TargetOptions options;
513 #if defined(PIPE_ARCH_X86)
514 options.StackAlignmentOverride = 4;
515 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
516 options.RealignStack = true;
517 #endif
518 #endif
519
520 #if defined(DEBUG) && (LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7))
521 options.JITEmitDebugInfo = true;
522 #endif
523
524 /* XXX: Workaround http://llvm.org/PR21435 */
525 #if defined(DEBUG) || defined(PROFILE) || \
526 ((LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 3)) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)))
527 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
528 options.NoFramePointerElimNonLeaf = true;
529 #endif
530 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7)
531 options.NoFramePointerElim = true;
532 #endif
533 #endif
534
535 builder.setEngineKind(EngineKind::JIT)
536 .setErrorStr(&Error)
537 .setTargetOptions(options)
538 .setOptLevel((CodeGenOpt::Level)OptLevel);
539
540 if (useMCJIT) {
541 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
542 builder.setUseMCJIT(true);
543 #endif
544 #ifdef _WIN32
545 /*
546 * MCJIT works on Windows, but currently only through ELF object format.
547 *
548 * XXX: We could use `LLVM_HOST_TRIPLE "-elf"` but LLVM_HOST_TRIPLE has
549 * different strings for MinGW/MSVC, so better play it safe and be
550 * explicit.
551 */
552 # ifdef _WIN64
553 LLVMSetTarget(M, "x86_64-pc-win32-elf");
554 # else
555 LLVMSetTarget(M, "i686-pc-win32-elf");
556 # endif
557 #endif
558 }
559
560 llvm::SmallVector<std::string, 16> MAttrs;
561
562 #if LLVM_VERSION_MAJOR >= 4 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_ARM))
563 /* llvm-3.3+ implements sys::getHostCPUFeatures for Arm
564 * and llvm-3.7+ for x86, which allows us to enable/disable
565 * code generation based on the results of cpuid on these
566 * architectures.
567 */
568 llvm::StringMap<bool> features;
569 llvm::sys::getHostCPUFeatures(features);
570
571 for (StringMapIterator<bool> f = features.begin();
572 f != features.end();
573 ++f) {
574 MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
575 }
576 #elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
577 /*
578 * We need to unset attributes because sometimes LLVM mistakenly assumes
579 * certain features are present given the processor name.
580 *
581 * https://bugs.freedesktop.org/show_bug.cgi?id=92214
582 * http://llvm.org/PR25021
583 * http://llvm.org/PR19429
584 * http://llvm.org/PR16721
585 */
586 MAttrs.push_back(util_cpu_caps.has_sse ? "+sse" : "-sse" );
587 MAttrs.push_back(util_cpu_caps.has_sse2 ? "+sse2" : "-sse2" );
588 MAttrs.push_back(util_cpu_caps.has_sse3 ? "+sse3" : "-sse3" );
589 MAttrs.push_back(util_cpu_caps.has_ssse3 ? "+ssse3" : "-ssse3" );
590 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
591 MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
592 #else
593 MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41" : "-sse41" );
594 #endif
595 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
596 MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
597 #else
598 MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42" : "-sse42" );
599 #endif
600 /*
601 * AVX feature is not automatically detected from CPUID by the X86 target
602 * yet, because the old (yet default) JIT engine is not capable of
603 * emitting the opcodes. On newer llvm versions it is and at least some
604 * versions (tested with 3.3) will emit avx opcodes without this anyway.
605 */
606 MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx");
607 MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
608 if (LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)) {
609 MAttrs.push_back(util_cpu_caps.has_fma ? "+fma" : "-fma");
610 } else {
611 /*
612 * The old JIT in LLVM 3.3 has a bug encoding llvm.fmuladd.f32 and
613 * llvm.fmuladd.v2f32 intrinsics when FMA is available.
614 */
615 MAttrs.push_back("-fma");
616 }
617 MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
618 /* disable avx512 and all subvariants */
619 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
620 MAttrs.push_back("-avx512cd");
621 MAttrs.push_back("-avx512er");
622 MAttrs.push_back("-avx512f");
623 MAttrs.push_back("-avx512pf");
624 #endif
625 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
626 MAttrs.push_back("-avx512bw");
627 MAttrs.push_back("-avx512dq");
628 MAttrs.push_back("-avx512vl");
629 #endif
630 #endif
631 #if defined(PIPE_ARCH_ARM)
632 if (!util_cpu_caps.has_neon) {
633 MAttrs.push_back("-neon");
634 MAttrs.push_back("-crypto");
635 MAttrs.push_back("-vfp2");
636 }
637 #endif
638
639 #if defined(PIPE_ARCH_PPC)
640 MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
641 #if (LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4))
642 #if (LLVM_VERSION_MAJOR < 4)
643 /*
644 * Make sure VSX instructions are disabled
645 * See LLVM bugs:
646 * https://llvm.org/bugs/show_bug.cgi?id=25503#c7 (fixed in 3.8.1)
647 * https://llvm.org/bugs/show_bug.cgi?id=26775 (fixed in 3.8.1)
648 * https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0)
649 * https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0)
650 */
651 if (util_cpu_caps.has_altivec) {
652 MAttrs.push_back("-vsx");
653 }
654 #else
655 /*
656 * Bug 25503 is fixed, by the same fix that fixed
657 * bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1).
658 * BZ 33531 actually comprises more than one bug, all of
659 * which are fixed in LLVM 4.0.
660 *
661 * With LLVM 4.0 or higher:
662 * Make sure VSX instructions are ENABLED (if supported), unless
663 * VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0.
664 */
665 if (util_cpu_caps.has_altivec) {
666 MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx");
667 }
668 #endif
669 #endif
670 #endif
671
672 builder.setMAttrs(MAttrs);
673
674 if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
675 int n = MAttrs.size();
676 if (n > 0) {
677 debug_printf("llc -mattr option(s): ");
678 for (int i = 0; i < n; i++)
679 debug_printf("%s%s", MAttrs[i].c_str(), (i < n - 1) ? "," : "");
680 debug_printf("\n");
681 }
682 }
683
684 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
685 StringRef MCPU = llvm::sys::getHostCPUName();
686 /*
687 * The cpu bits are no longer set automatically, so need to set mcpu manually.
688 * Note that the MAttrs set above will be sort of ignored (since we should
689 * not set any which would not be set by specifying the cpu anyway).
690 * It ought to be safe though since getHostCPUName() should include bits
691 * not only from the cpu but environment as well (for instance if it's safe
692 * to use avx instructions which need OS support). According to
693 * http://llvm.org/bugs/show_bug.cgi?id=19429 however if I understand this
694 * right it may be necessary to specify older cpu (or disable mattrs) though
695 * when not using MCJIT so no instructions are generated which the old JIT
696 * can't handle. Not entirely sure if we really need to do anything yet.
697 */
698 #if defined(PIPE_ARCH_LITTLE_ENDIAN) && defined(PIPE_ARCH_PPC_64)
699 /*
700 * Versions of LLVM prior to 4.0 lacked a table entry for "POWER8NVL",
701 * resulting in (big-endian) "generic" being returned on
702 * little-endian Power8NVL systems. The result was that code that
703 * attempted to load the least significant 32 bits of a 64-bit quantity
704 * from memory loaded the wrong half. This resulted in failures in some
705 * Piglit tests, e.g.
706 * .../arb_gpu_shader_fp64/execution/conversion/frag-conversion-explicit-double-uint
707 */
708 if (MCPU == "generic")
709 MCPU = "pwr8";
710 #endif
711 builder.setMCPU(MCPU);
712 if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
713 debug_printf("llc -mcpu option: %s\n", MCPU.str().c_str());
714 }
715 #endif
716
717 ShaderMemoryManager *MM = NULL;
718 if (useMCJIT) {
719 BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
720 MM = new ShaderMemoryManager(JMM);
721 *OutCode = MM->getGeneratedCode();
722
723 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6)
724 builder.setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager>(MM));
725 MM = NULL; // ownership taken by std::unique_ptr
726 #elif LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 3)
727 builder.setMCJITMemoryManager(MM);
728 #else
729 builder.setJITMemoryManager(MM);
730 #endif
731 } else {
732 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
733 BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
734 MM = new ShaderMemoryManager(JMM);
735 *OutCode = MM->getGeneratedCode();
736
737 builder.setJITMemoryManager(MM);
738 #else
739 assert(0);
740 #endif
741 }
742
743 ExecutionEngine *JIT;
744
745 JIT = builder.create();
746 #if LLVM_USE_INTEL_JITEVENTS
747 JITEventListener *JEL = JITEventListener::createIntelJITEventListener();
748 JIT->RegisterJITEventListener(JEL);
749 #endif
750 if (JIT) {
751 *OutJIT = wrap(JIT);
752 return 0;
753 }
754 lp_free_generated_code(*OutCode);
755 *OutCode = 0;
756 delete MM;
757 *OutError = strdup(Error.c_str());
758 return 1;
759 }
760
761
762 extern "C"
763 void
764 lp_free_generated_code(struct lp_generated_code *code)
765 {
766 ShaderMemoryManager::freeGeneratedCode(code);
767 }
768
769 extern "C"
770 LLVMMCJITMemoryManagerRef
771 lp_get_default_memory_manager()
772 {
773 BaseMemoryManager *mm;
774 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
775 mm = llvm::JITMemoryManager::CreateDefaultMemManager();
776 #else
777 mm = new llvm::SectionMemoryManager();
778 #endif
779 return reinterpret_cast<LLVMMCJITMemoryManagerRef>(mm);
780 }
781
782 extern "C"
783 void
784 lp_free_memory_manager(LLVMMCJITMemoryManagerRef memorymgr)
785 {
786 delete reinterpret_cast<BaseMemoryManager*>(memorymgr);
787 }
788
789 extern "C" LLVMValueRef
790 lp_get_called_value(LLVMValueRef call)
791 {
792 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
793 return LLVMGetCalledValue(call);
794 #elif LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
795 return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
796 #else
797 return NULL; /* radeonsi doesn't support so old LLVM. */
798 #endif
799 }
800
801 extern "C" bool
802 lp_is_function(LLVMValueRef v)
803 {
804 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
805 return LLVMGetValueKind(v) == LLVMFunctionValueKind;
806 #else
807 return llvm::isa<llvm::Function>(llvm::unwrap(v));
808 #endif
809 }
810
811 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
812 static llvm::AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering) {
813 switch (Ordering) {
814 case LLVMAtomicOrderingNotAtomic: return llvm::AtomicOrdering::NotAtomic;
815 case LLVMAtomicOrderingUnordered: return llvm::AtomicOrdering::Unordered;
816 case LLVMAtomicOrderingMonotonic: return llvm::AtomicOrdering::Monotonic;
817 case LLVMAtomicOrderingAcquire: return llvm::AtomicOrdering::Acquire;
818 case LLVMAtomicOrderingRelease: return llvm::AtomicOrdering::Release;
819 case LLVMAtomicOrderingAcquireRelease:
820 return llvm::AtomicOrdering::AcquireRelease;
821 case LLVMAtomicOrderingSequentiallyConsistent:
822 return llvm::AtomicOrdering::SequentiallyConsistent;
823 }
824
825 llvm_unreachable("Invalid LLVMAtomicOrdering value!");
826 }
827
828 LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr,
829 LLVMValueRef Cmp, LLVMValueRef New,
830 LLVMAtomicOrdering SuccessOrdering,
831 LLVMAtomicOrdering FailureOrdering,
832 LLVMBool SingleThread)
833 {
834 return llvm::wrap(llvm::unwrap(B)->CreateAtomicCmpXchg(llvm::unwrap(Ptr), llvm::unwrap(Cmp),
835 llvm::unwrap(New), mapFromLLVMOrdering(SuccessOrdering),
836 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
837 mapFromLLVMOrdering(FailureOrdering),
838 #endif
839 SingleThread ? llvm::SynchronizationScope::SingleThread : llvm::SynchronizationScope::CrossThread));
840 }
841 #endif
842
843 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5)
844 LLVMValueRef LLVMBuildFence(LLVMBuilderRef B,
845 LLVMAtomicOrdering ordering,
846 LLVMBool singleThread,
847 const char *Name)
848 {
849 return llvm::wrap(llvm::unwrap(B)->CreateFence(mapFromLLVMOrdering(ordering),
850 singleThread ? llvm::SynchronizationScope::SingleThread : llvm::SynchronizationScope::CrossThread));
851 }
852 #endif