1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
26 **************************************************************************/
30 * The purpose of this module is to expose LLVM functionality not available
31 * through the C++ bindings.
35 // Undef these vars just to silence warnings
36 #undef PACKAGE_BUGREPORT
39 #undef PACKAGE_TARNAME
40 #undef PACKAGE_VERSION
45 #include <llvm/Config/llvm-config.h>
47 // Workaround http://llvm.org/PR23628
48 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
49 # pragma push_macro("DEBUG")
53 #include <llvm/Config/llvm-config.h>
54 #include <llvm-c/Core.h>
55 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6)
56 #include <llvm-c/Support.h>
58 #include <llvm-c/ExecutionEngine.h>
59 #include <llvm/Target/TargetOptions.h>
60 #include <llvm/ExecutionEngine/ExecutionEngine.h>
61 #include <llvm/ADT/Triple.h>
62 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
63 #include <llvm/Analysis/TargetLibraryInfo.h>
65 #include <llvm/Target/TargetLibraryInfo.h>
67 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
68 #include <llvm/ExecutionEngine/JITMemoryManager.h>
70 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
72 #include <llvm/Support/CommandLine.h>
73 #include <llvm/Support/Host.h>
74 #include <llvm/Support/PrettyStackTrace.h>
76 #include <llvm/Support/TargetSelect.h>
78 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
79 #include <llvm/IR/CallSite.h>
81 #include <llvm/IR/IRBuilder.h>
82 #include <llvm/IR/Module.h>
83 #include <llvm/Support/CBindingWrapping.h>
85 #include <llvm/Config/llvm-config.h>
86 #if LLVM_USE_INTEL_JITEVENTS
87 #include <llvm/ExecutionEngine/JITEventListener.h>
90 // Workaround http://llvm.org/PR23628
91 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
92 # pragma pop_macro("DEBUG")
95 #include "c11/threads.h"
96 #include "os/os_thread.h"
97 #include "pipe/p_config.h"
98 #include "util/u_debug.h"
99 #include "util/u_cpu_detect.h"
101 #include "lp_bld_misc.h"
102 #include "lp_bld_debug.h"
106 class LLVMEnsureMultithreaded
{
108 LLVMEnsureMultithreaded()
110 if (!LLVMIsMultithreaded()) {
111 LLVMStartMultithreaded();
116 static LLVMEnsureMultithreaded lLVMEnsureMultithreaded
;
120 static once_flag init_native_targets_once_flag
= ONCE_FLAG_INIT
;
122 static void init_native_targets()
124 // If we have a native target, initialize it to ensure it is linked in and
125 // usable by the JIT.
126 llvm::InitializeNativeTarget();
128 llvm::InitializeNativeTargetAsmPrinter();
130 llvm::InitializeNativeTargetDisassembler();
131 #if DEBUG && (LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6))
133 char *env_llc_options
= getenv("GALLIVM_LLC_OPTIONS");
134 if (env_llc_options
) {
136 char *options
[64] = {(char *) "llc"}; // Warning without cast
138 for (n
= 0, option
= strtok(env_llc_options
, " "); option
; n
++, option
= strtok(NULL
, " ")) {
139 options
[n
+ 1] = option
;
141 if (gallivm_debug
& (GALLIVM_DEBUG_IR
| GALLIVM_DEBUG_ASM
| GALLIVM_DEBUG_DUMP_BC
)) {
142 debug_printf("llc additional options (%d):\n", n
);
143 for (int i
= 1; i
<= n
; i
++)
144 debug_printf("\t%s\n", options
[i
]);
147 LLVMParseCommandLineOptions(n
+ 1, options
, NULL
);
154 lp_set_target_options(void)
156 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
158 * By default LLVM adds a signal handler to output a pretty stack trace.
159 * This signal handler is never removed, causing problems when unloading the
160 * shared object where the gallium driver resides.
162 llvm::DisablePrettyStackTrace
= true;
165 /* The llvm target registry is not thread-safe, so drivers and state-trackers
166 * that want to initialize targets should use the lp_set_target_options()
167 * function to safely initialize targets.
169 * LLVM targets should be initialized before the driver or state-tracker tries
170 * to access the registry.
172 call_once(&init_native_targets_once_flag
, init_native_targets
);
176 LLVMTargetLibraryInfoRef
177 gallivm_create_target_library_info(const char *triple
)
179 return reinterpret_cast<LLVMTargetLibraryInfoRef
>(
180 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7)
181 new llvm::TargetLibraryInfo(
183 new llvm::TargetLibraryInfoImpl(
185 llvm::Triple(triple
)));
190 gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info
)
192 delete reinterpret_cast<
193 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7)
194 llvm::TargetLibraryInfo
196 llvm::TargetLibraryInfoImpl
202 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
206 LLVMSetAlignmentBackport(LLVMValueRef V
,
209 switch (LLVMGetInstructionOpcode(V
)) {
211 llvm::unwrap
<llvm::LoadInst
>(V
)->setAlignment(Bytes
);
214 llvm::unwrap
<llvm::StoreInst
>(V
)->setAlignment(Bytes
);
225 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
226 typedef llvm::JITMemoryManager BaseMemoryManager
;
228 typedef llvm::RTDyldMemoryManager BaseMemoryManager
;
233 * Delegating is tedious but the default manager class is hidden in an
234 * anonymous namespace in LLVM, so we cannot just derive from it to change
237 class DelegatingJITMemoryManager
: public BaseMemoryManager
{
240 virtual BaseMemoryManager
*mgr() const = 0;
243 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
245 * From JITMemoryManager
247 virtual void setMemoryWritable() {
248 mgr()->setMemoryWritable();
250 virtual void setMemoryExecutable() {
251 mgr()->setMemoryExecutable();
253 virtual void setPoisonMemory(bool poison
) {
254 mgr()->setPoisonMemory(poison
);
256 virtual void AllocateGOT() {
257 mgr()->AllocateGOT();
259 * isManagingGOT() is not virtual in base class so we can't delegate.
260 * Instead we mirror the value of HasGOT in our instance.
262 HasGOT
= mgr()->isManagingGOT();
264 virtual uint8_t *getGOTBase() const {
265 return mgr()->getGOTBase();
267 virtual uint8_t *startFunctionBody(const llvm::Function
*F
,
268 uintptr_t &ActualSize
) {
269 return mgr()->startFunctionBody(F
, ActualSize
);
271 virtual uint8_t *allocateStub(const llvm::GlobalValue
*F
,
273 unsigned Alignment
) {
274 return mgr()->allocateStub(F
, StubSize
, Alignment
);
276 virtual void endFunctionBody(const llvm::Function
*F
,
277 uint8_t *FunctionStart
,
278 uint8_t *FunctionEnd
) {
279 mgr()->endFunctionBody(F
, FunctionStart
, FunctionEnd
);
281 virtual uint8_t *allocateSpace(intptr_t Size
, unsigned Alignment
) {
282 return mgr()->allocateSpace(Size
, Alignment
);
284 virtual uint8_t *allocateGlobal(uintptr_t Size
, unsigned Alignment
) {
285 return mgr()->allocateGlobal(Size
, Alignment
);
287 virtual void deallocateFunctionBody(void *Body
) {
288 mgr()->deallocateFunctionBody(Body
);
290 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
291 virtual uint8_t *startExceptionTable(const llvm::Function
*F
,
292 uintptr_t &ActualSize
) {
293 return mgr()->startExceptionTable(F
, ActualSize
);
295 virtual void endExceptionTable(const llvm::Function
*F
,
298 uint8_t *FrameRegister
) {
299 mgr()->endExceptionTable(F
, TableStart
, TableEnd
,
302 virtual void deallocateExceptionTable(void *ET
) {
303 mgr()->deallocateExceptionTable(ET
);
306 virtual bool CheckInvariants(std::string
&s
) {
307 return mgr()->CheckInvariants(s
);
309 virtual size_t GetDefaultCodeSlabSize() {
310 return mgr()->GetDefaultCodeSlabSize();
312 virtual size_t GetDefaultDataSlabSize() {
313 return mgr()->GetDefaultDataSlabSize();
315 virtual size_t GetDefaultStubSlabSize() {
316 return mgr()->GetDefaultStubSlabSize();
318 virtual unsigned GetNumCodeSlabs() {
319 return mgr()->GetNumCodeSlabs();
321 virtual unsigned GetNumDataSlabs() {
322 return mgr()->GetNumDataSlabs();
324 virtual unsigned GetNumStubSlabs() {
325 return mgr()->GetNumStubSlabs();
330 * From RTDyldMemoryManager
332 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
333 virtual uint8_t *allocateCodeSection(uintptr_t Size
,
336 llvm::StringRef SectionName
) {
337 return mgr()->allocateCodeSection(Size
, Alignment
, SectionID
,
341 virtual uint8_t *allocateCodeSection(uintptr_t Size
,
343 unsigned SectionID
) {
344 return mgr()->allocateCodeSection(Size
, Alignment
, SectionID
);
347 virtual uint8_t *allocateDataSection(uintptr_t Size
,
350 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
351 llvm::StringRef SectionName
,
354 return mgr()->allocateDataSection(Size
, Alignment
, SectionID
,
355 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
360 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
361 virtual void registerEHFrames(uint8_t *Addr
, uint64_t LoadAddr
, size_t Size
) {
362 mgr()->registerEHFrames(Addr
, LoadAddr
, Size
);
365 virtual void registerEHFrames(llvm::StringRef SectionData
) {
366 mgr()->registerEHFrames(SectionData
);
369 #if LLVM_VERSION_MAJOR >= 5
370 virtual void deregisterEHFrames() {
371 mgr()->deregisterEHFrames();
373 #elif LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
374 virtual void deregisterEHFrames(uint8_t *Addr
, uint64_t LoadAddr
, size_t Size
) {
375 mgr()->deregisterEHFrames(Addr
, LoadAddr
, Size
);
378 virtual void *getPointerToNamedFunction(const std::string
&Name
,
379 bool AbortOnFailure
=true) {
380 return mgr()->getPointerToNamedFunction(Name
, AbortOnFailure
);
382 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 3)
383 virtual bool applyPermissions(std::string
*ErrMsg
= 0) {
384 return mgr()->applyPermissions(ErrMsg
);
387 virtual bool finalizeMemory(std::string
*ErrMsg
= 0) {
388 return mgr()->finalizeMemory(ErrMsg
);
395 * Delegate memory management to one shared manager for more efficient use
396 * of memory than creating a separate pool for each LLVM engine.
397 * Keep generated code until freeGeneratedCode() is called, instead of when
398 * memory manager is destroyed, which happens during engine destruction.
399 * This allows additional memory savings as we don't have to keep the engine
400 * around in order to use the code.
401 * All methods are delegated to the shared manager except destruction and
402 * deallocating code. For the latter we just remember what needs to be
403 * deallocated later. The shared manager is deleted once it is empty.
405 class ShaderMemoryManager
: public DelegatingJITMemoryManager
{
407 BaseMemoryManager
*TheMM
;
409 struct GeneratedCode
{
410 typedef std::vector
<void *> Vec
;
411 Vec FunctionBody
, ExceptionTable
;
412 BaseMemoryManager
*TheMM
;
414 GeneratedCode(BaseMemoryManager
*MM
) {
420 * Deallocate things as previously requested and
421 * free shared manager when no longer used.
423 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
427 for ( i
= FunctionBody
.begin(); i
!= FunctionBody
.end(); ++i
)
428 TheMM
->deallocateFunctionBody(*i
);
429 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
430 for ( i
= ExceptionTable
.begin(); i
!= ExceptionTable
.end(); ++i
)
431 TheMM
->deallocateExceptionTable(*i
);
432 #endif /* LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4) */
433 #endif /* LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6) */
439 BaseMemoryManager
*mgr() const {
445 ShaderMemoryManager(BaseMemoryManager
* MM
) {
447 code
= new GeneratedCode(MM
);
450 virtual ~ShaderMemoryManager() {
452 * 'code' is purposely not deleted. It is the user's responsibility
453 * to call getGeneratedCode() and freeGeneratedCode().
457 struct lp_generated_code
*getGeneratedCode() {
458 return (struct lp_generated_code
*) code
;
461 static void freeGeneratedCode(struct lp_generated_code
*code
) {
462 delete (GeneratedCode
*) code
;
465 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
466 virtual void deallocateExceptionTable(void *ET
) {
467 // remember for later deallocation
468 code
->ExceptionTable
.push_back(ET
);
472 virtual void deallocateFunctionBody(void *Body
) {
473 // remember for later deallocation
474 code
->FunctionBody
.push_back(Body
);
480 * Same as LLVMCreateJITCompilerForModule, but:
481 * - allows using MCJIT and enabling AVX feature where available.
482 * - set target options
485 * - llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
486 * - llvm/tools/lli/lli.cpp
487 * - http://markmail.org/message/ttkuhvgj4cxxy2on#query:+page:1+mid:aju2dggerju3ivd3+state:results
491 lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef
*OutJIT
,
492 lp_generated_code
**OutCode
,
494 LLVMMCJITMemoryManagerRef CMM
,
499 using namespace llvm
;
502 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6)
503 EngineBuilder
builder(std::unique_ptr
<Module
>(unwrap(M
)));
505 EngineBuilder
builder(unwrap(M
));
509 * LLVM 3.1+ haven't more "extern unsigned llvm::StackAlignmentOverride" and
510 * friends for configuring code generation options, like stack alignment.
512 TargetOptions options
;
513 #if defined(PIPE_ARCH_X86)
514 options
.StackAlignmentOverride
= 4;
515 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
516 options
.RealignStack
= true;
520 #if defined(DEBUG) && (LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7))
521 options
.JITEmitDebugInfo
= true;
524 /* XXX: Workaround http://llvm.org/PR21435 */
525 #if defined(DEBUG) || defined(PROFILE) || \
526 ((LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 3)) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)))
527 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 4)
528 options
.NoFramePointerElimNonLeaf
= true;
530 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7)
531 options
.NoFramePointerElim
= true;
535 builder
.setEngineKind(EngineKind::JIT
)
537 .setTargetOptions(options
)
538 .setOptLevel((CodeGenOpt::Level
)OptLevel
);
541 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
542 builder
.setUseMCJIT(true);
546 * MCJIT works on Windows, but currently only through ELF object format.
548 * XXX: We could use `LLVM_HOST_TRIPLE "-elf"` but LLVM_HOST_TRIPLE has
549 * different strings for MinGW/MSVC, so better play it safe and be
553 LLVMSetTarget(M
, "x86_64-pc-win32-elf");
555 LLVMSetTarget(M
, "i686-pc-win32-elf");
560 llvm::SmallVector
<std::string
, 16> MAttrs
;
562 #if LLVM_VERSION_MAJOR >= 4 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_ARM))
563 /* llvm-3.3+ implements sys::getHostCPUFeatures for Arm
564 * and llvm-3.7+ for x86, which allows us to enable/disable
565 * code generation based on the results of cpuid on these
568 llvm::StringMap
<bool> features
;
569 llvm::sys::getHostCPUFeatures(features
);
571 for (StringMapIterator
<bool> f
= features
.begin();
574 MAttrs
.push_back(((*f
).second
? "+" : "-") + (*f
).first().str());
576 #elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
578 * We need to unset attributes because sometimes LLVM mistakenly assumes
579 * certain features are present given the processor name.
581 * https://bugs.freedesktop.org/show_bug.cgi?id=92214
582 * http://llvm.org/PR25021
583 * http://llvm.org/PR19429
584 * http://llvm.org/PR16721
586 MAttrs
.push_back(util_cpu_caps
.has_sse
? "+sse" : "-sse" );
587 MAttrs
.push_back(util_cpu_caps
.has_sse2
? "+sse2" : "-sse2" );
588 MAttrs
.push_back(util_cpu_caps
.has_sse3
? "+sse3" : "-sse3" );
589 MAttrs
.push_back(util_cpu_caps
.has_ssse3
? "+ssse3" : "-ssse3" );
590 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
591 MAttrs
.push_back(util_cpu_caps
.has_sse4_1
? "+sse4.1" : "-sse4.1");
593 MAttrs
.push_back(util_cpu_caps
.has_sse4_1
? "+sse41" : "-sse41" );
595 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
596 MAttrs
.push_back(util_cpu_caps
.has_sse4_2
? "+sse4.2" : "-sse4.2");
598 MAttrs
.push_back(util_cpu_caps
.has_sse4_2
? "+sse42" : "-sse42" );
601 * AVX feature is not automatically detected from CPUID by the X86 target
602 * yet, because the old (yet default) JIT engine is not capable of
603 * emitting the opcodes. On newer llvm versions it is and at least some
604 * versions (tested with 3.3) will emit avx opcodes without this anyway.
606 MAttrs
.push_back(util_cpu_caps
.has_avx
? "+avx" : "-avx");
607 MAttrs
.push_back(util_cpu_caps
.has_f16c
? "+f16c" : "-f16c");
608 if (LLVM_VERSION_MAJOR
> 3 || (LLVM_VERSION_MAJOR
== 3 && LLVM_VERSION_MINOR
>= 4)) {
609 MAttrs
.push_back(util_cpu_caps
.has_fma
? "+fma" : "-fma");
612 * The old JIT in LLVM 3.3 has a bug encoding llvm.fmuladd.f32 and
613 * llvm.fmuladd.v2f32 intrinsics when FMA is available.
615 MAttrs
.push_back("-fma");
617 MAttrs
.push_back(util_cpu_caps
.has_avx2
? "+avx2" : "-avx2");
618 /* disable avx512 and all subvariants */
619 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4)
620 MAttrs
.push_back("-avx512cd");
621 MAttrs
.push_back("-avx512er");
622 MAttrs
.push_back("-avx512f");
623 MAttrs
.push_back("-avx512pf");
625 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
626 MAttrs
.push_back("-avx512bw");
627 MAttrs
.push_back("-avx512dq");
628 MAttrs
.push_back("-avx512vl");
631 #if defined(PIPE_ARCH_ARM)
632 if (!util_cpu_caps
.has_neon
) {
633 MAttrs
.push_back("-neon");
634 MAttrs
.push_back("-crypto");
635 MAttrs
.push_back("-vfp2");
639 #if defined(PIPE_ARCH_PPC)
640 MAttrs
.push_back(util_cpu_caps
.has_altivec
? "+altivec" : "-altivec");
641 #if (LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 4))
642 #if (LLVM_VERSION_MAJOR < 4)
644 * Make sure VSX instructions are disabled
646 * https://llvm.org/bugs/show_bug.cgi?id=25503#c7 (fixed in 3.8.1)
647 * https://llvm.org/bugs/show_bug.cgi?id=26775 (fixed in 3.8.1)
648 * https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0)
649 * https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0)
651 if (util_cpu_caps
.has_altivec
) {
652 MAttrs
.push_back("-vsx");
656 * Bug 25503 is fixed, by the same fix that fixed
657 * bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1).
658 * BZ 33531 actually comprises more than one bug, all of
659 * which are fixed in LLVM 4.0.
661 * With LLVM 4.0 or higher:
662 * Make sure VSX instructions are ENABLED (if supported), unless
663 * VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0.
665 if (util_cpu_caps
.has_altivec
) {
666 MAttrs
.push_back(util_cpu_caps
.has_vsx
? "+vsx" : "-vsx");
672 builder
.setMAttrs(MAttrs
);
674 if (gallivm_debug
& (GALLIVM_DEBUG_IR
| GALLIVM_DEBUG_ASM
| GALLIVM_DEBUG_DUMP_BC
)) {
675 int n
= MAttrs
.size();
677 debug_printf("llc -mattr option(s): ");
678 for (int i
= 0; i
< n
; i
++)
679 debug_printf("%s%s", MAttrs
[i
].c_str(), (i
< n
- 1) ? "," : "");
684 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
685 StringRef MCPU
= llvm::sys::getHostCPUName();
687 * The cpu bits are no longer set automatically, so need to set mcpu manually.
688 * Note that the MAttrs set above will be sort of ignored (since we should
689 * not set any which would not be set by specifying the cpu anyway).
690 * It ought to be safe though since getHostCPUName() should include bits
691 * not only from the cpu but environment as well (for instance if it's safe
692 * to use avx instructions which need OS support). According to
693 * http://llvm.org/bugs/show_bug.cgi?id=19429 however if I understand this
694 * right it may be necessary to specify older cpu (or disable mattrs) though
695 * when not using MCJIT so no instructions are generated which the old JIT
696 * can't handle. Not entirely sure if we really need to do anything yet.
698 #if defined(PIPE_ARCH_LITTLE_ENDIAN) && defined(PIPE_ARCH_PPC_64)
700 * Versions of LLVM prior to 4.0 lacked a table entry for "POWER8NVL",
701 * resulting in (big-endian) "generic" being returned on
702 * little-endian Power8NVL systems. The result was that code that
703 * attempted to load the least significant 32 bits of a 64-bit quantity
704 * from memory loaded the wrong half. This resulted in failures in some
706 * .../arb_gpu_shader_fp64/execution/conversion/frag-conversion-explicit-double-uint
708 if (MCPU
== "generic")
711 builder
.setMCPU(MCPU
);
712 if (gallivm_debug
& (GALLIVM_DEBUG_IR
| GALLIVM_DEBUG_ASM
| GALLIVM_DEBUG_DUMP_BC
)) {
713 debug_printf("llc -mcpu option: %s\n", MCPU
.str().c_str());
717 ShaderMemoryManager
*MM
= NULL
;
719 BaseMemoryManager
* JMM
= reinterpret_cast<BaseMemoryManager
*>(CMM
);
720 MM
= new ShaderMemoryManager(JMM
);
721 *OutCode
= MM
->getGeneratedCode();
723 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6)
724 builder
.setMCJITMemoryManager(std::unique_ptr
<RTDyldMemoryManager
>(MM
));
725 MM
= NULL
; // ownership taken by std::unique_ptr
726 #elif LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 3)
727 builder
.setMCJITMemoryManager(MM
);
729 builder
.setJITMemoryManager(MM
);
732 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
733 BaseMemoryManager
* JMM
= reinterpret_cast<BaseMemoryManager
*>(CMM
);
734 MM
= new ShaderMemoryManager(JMM
);
735 *OutCode
= MM
->getGeneratedCode();
737 builder
.setJITMemoryManager(MM
);
743 ExecutionEngine
*JIT
;
745 JIT
= builder
.create();
746 #if LLVM_USE_INTEL_JITEVENTS
747 JITEventListener
*JEL
= JITEventListener::createIntelJITEventListener();
748 JIT
->RegisterJITEventListener(JEL
);
754 lp_free_generated_code(*OutCode
);
757 *OutError
= strdup(Error
.c_str());
764 lp_free_generated_code(struct lp_generated_code
*code
)
766 ShaderMemoryManager::freeGeneratedCode(code
);
770 LLVMMCJITMemoryManagerRef
771 lp_get_default_memory_manager()
773 BaseMemoryManager
*mm
;
774 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 6)
775 mm
= llvm::JITMemoryManager::CreateDefaultMemManager();
777 mm
= new llvm::SectionMemoryManager();
779 return reinterpret_cast<LLVMMCJITMemoryManagerRef
>(mm
);
784 lp_free_memory_manager(LLVMMCJITMemoryManagerRef memorymgr
)
786 delete reinterpret_cast<BaseMemoryManager
*>(memorymgr
);
789 extern "C" LLVMValueRef
790 lp_get_called_value(LLVMValueRef call
)
792 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
793 return LLVMGetCalledValue(call
);
794 #elif LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
795 return llvm::wrap(llvm::CallSite(llvm::unwrap
<llvm::Instruction
>(call
)).getCalledValue());
797 return NULL
; /* radeonsi doesn't support so old LLVM. */
802 lp_is_function(LLVMValueRef v
)
804 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
805 return LLVMGetValueKind(v
) == LLVMFunctionValueKind
;
807 return llvm::isa
<llvm::Function
>(llvm::unwrap(v
));
811 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
812 static llvm::AtomicOrdering
mapFromLLVMOrdering(LLVMAtomicOrdering Ordering
) {
814 case LLVMAtomicOrderingNotAtomic
: return llvm::AtomicOrdering::NotAtomic
;
815 case LLVMAtomicOrderingUnordered
: return llvm::AtomicOrdering::Unordered
;
816 case LLVMAtomicOrderingMonotonic
: return llvm::AtomicOrdering::Monotonic
;
817 case LLVMAtomicOrderingAcquire
: return llvm::AtomicOrdering::Acquire
;
818 case LLVMAtomicOrderingRelease
: return llvm::AtomicOrdering::Release
;
819 case LLVMAtomicOrderingAcquireRelease
:
820 return llvm::AtomicOrdering::AcquireRelease
;
821 case LLVMAtomicOrderingSequentiallyConsistent
:
822 return llvm::AtomicOrdering::SequentiallyConsistent
;
825 llvm_unreachable("Invalid LLVMAtomicOrdering value!");
828 LLVMValueRef
LLVMBuildAtomicCmpXchg(LLVMBuilderRef B
, LLVMValueRef Ptr
,
829 LLVMValueRef Cmp
, LLVMValueRef New
,
830 LLVMAtomicOrdering SuccessOrdering
,
831 LLVMAtomicOrdering FailureOrdering
,
832 LLVMBool SingleThread
)
834 return llvm::wrap(llvm::unwrap(B
)->CreateAtomicCmpXchg(llvm::unwrap(Ptr
), llvm::unwrap(Cmp
),
835 llvm::unwrap(New
), mapFromLLVMOrdering(SuccessOrdering
),
836 #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5)
837 mapFromLLVMOrdering(FailureOrdering
),
839 SingleThread
? llvm::SynchronizationScope::SingleThread
: llvm::SynchronizationScope::CrossThread
));
843 #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5)
844 LLVMValueRef
LLVMBuildFence(LLVMBuilderRef B
,
845 LLVMAtomicOrdering ordering
,
846 LLVMBool singleThread
,
849 return llvm::wrap(llvm::unwrap(B
)->CreateFence(mapFromLLVMOrdering(ordering
),
850 singleThread
? llvm::SynchronizationScope::SingleThread
: llvm::SynchronizationScope::CrossThread
));