gallivm: Translate all util_cpu_caps bits to LLVM attributes.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_misc.cpp
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 /**
30 * The purpose of this module is to expose LLVM functionality not available
31 * through the C++ bindings.
32 */
33
34
35 #ifndef __STDC_LIMIT_MACROS
36 #define __STDC_LIMIT_MACROS
37 #endif
38
39 #ifndef __STDC_CONSTANT_MACROS
40 #define __STDC_CONSTANT_MACROS
41 #endif
42
43 // Undef these vars just to silence warnings
44 #undef PACKAGE_BUGREPORT
45 #undef PACKAGE_NAME
46 #undef PACKAGE_STRING
47 #undef PACKAGE_TARNAME
48 #undef PACKAGE_VERSION
49
50
51 #include <stddef.h>
52
53 // Workaround http://llvm.org/PR23628
54 #if HAVE_LLVM >= 0x0307
55 # pragma push_macro("DEBUG")
56 # undef DEBUG
57 #endif
58
59 #include <llvm-c/Core.h>
60 #include <llvm-c/ExecutionEngine.h>
61 #include <llvm/Target/TargetOptions.h>
62 #include <llvm/ExecutionEngine/ExecutionEngine.h>
63 #include <llvm/ADT/Triple.h>
64 #if HAVE_LLVM < 0x0306
65 #include <llvm/ExecutionEngine/JITMemoryManager.h>
66 #else
67 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
68 #endif
69 #include <llvm/Support/CommandLine.h>
70 #include <llvm/Support/Host.h>
71 #include <llvm/Support/PrettyStackTrace.h>
72
73 #include <llvm/Support/TargetSelect.h>
74
75 #include <llvm/IR/IRBuilder.h>
76 #include <llvm/IR/Module.h>
77 #include <llvm/Support/CBindingWrapping.h>
78
79 // Workaround http://llvm.org/PR23628
80 #if HAVE_LLVM >= 0x0307
81 # pragma pop_macro("DEBUG")
82 #endif
83
84 #include "c11/threads.h"
85 #include "os/os_thread.h"
86 #include "pipe/p_config.h"
87 #include "util/u_debug.h"
88 #include "util/u_cpu_detect.h"
89
90 #include "lp_bld_misc.h"
91
92 namespace {
93
94 class LLVMEnsureMultithreaded {
95 public:
96 LLVMEnsureMultithreaded()
97 {
98 if (!LLVMIsMultithreaded()) {
99 LLVMStartMultithreaded();
100 }
101 }
102 };
103
104 static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;
105
106 }
107
108 static once_flag init_native_targets_once_flag;
109
110 static void init_native_targets()
111 {
112 // If we have a native target, initialize it to ensure it is linked in and
113 // usable by the JIT.
114 llvm::InitializeNativeTarget();
115
116 llvm::InitializeNativeTargetAsmPrinter();
117
118 llvm::InitializeNativeTargetDisassembler();
119 }
120
121 /**
122 * The llvm target registry is not thread-safe, so drivers and state-trackers
123 * that want to initialize targets should use the gallivm_init_llvm_targets()
124 * function to safely initialize targets.
125 *
126 * LLVM targets should be initialized before the driver or state-tracker tries
127 * to access the registry.
128 */
129 extern "C" void
130 gallivm_init_llvm_targets(void)
131 {
132 call_once(&init_native_targets_once_flag, init_native_targets);
133 }
134
135 extern "C" void
136 lp_set_target_options(void)
137 {
138 #if HAVE_LLVM < 0x0304
139 /*
140 * By default LLVM adds a signal handler to output a pretty stack trace.
141 * This signal handler is never removed, causing problems when unloading the
142 * shared object where the gallium driver resides.
143 */
144 llvm::DisablePrettyStackTrace = true;
145 #endif
146
147 gallivm_init_llvm_targets();
148 }
149
150
151 extern "C"
152 LLVMValueRef
153 lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
154 const char *Name)
155 {
156 return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name));
157 }
158
159
160 extern "C"
161 void
162 lp_set_load_alignment(LLVMValueRef Inst,
163 unsigned Align)
164 {
165 llvm::unwrap<llvm::LoadInst>(Inst)->setAlignment(Align);
166 }
167
168 extern "C"
169 void
170 lp_set_store_alignment(LLVMValueRef Inst,
171 unsigned Align)
172 {
173 llvm::unwrap<llvm::StoreInst>(Inst)->setAlignment(Align);
174 }
175
176
177 #if HAVE_LLVM < 0x0306
178 typedef llvm::JITMemoryManager BaseMemoryManager;
179 #else
180 typedef llvm::RTDyldMemoryManager BaseMemoryManager;
181 #endif
182
183
184 /*
185 * Delegating is tedious but the default manager class is hidden in an
186 * anonymous namespace in LLVM, so we cannot just derive from it to change
187 * its behavior.
188 */
189 class DelegatingJITMemoryManager : public BaseMemoryManager {
190
191 protected:
192 virtual BaseMemoryManager *mgr() const = 0;
193
194 public:
195 #if HAVE_LLVM < 0x0306
196 /*
197 * From JITMemoryManager
198 */
199 virtual void setMemoryWritable() {
200 mgr()->setMemoryWritable();
201 }
202 virtual void setMemoryExecutable() {
203 mgr()->setMemoryExecutable();
204 }
205 virtual void setPoisonMemory(bool poison) {
206 mgr()->setPoisonMemory(poison);
207 }
208 virtual void AllocateGOT() {
209 mgr()->AllocateGOT();
210 /*
211 * isManagingGOT() is not virtual in base class so we can't delegate.
212 * Instead we mirror the value of HasGOT in our instance.
213 */
214 HasGOT = mgr()->isManagingGOT();
215 }
216 virtual uint8_t *getGOTBase() const {
217 return mgr()->getGOTBase();
218 }
219 virtual uint8_t *startFunctionBody(const llvm::Function *F,
220 uintptr_t &ActualSize) {
221 return mgr()->startFunctionBody(F, ActualSize);
222 }
223 virtual uint8_t *allocateStub(const llvm::GlobalValue *F,
224 unsigned StubSize,
225 unsigned Alignment) {
226 return mgr()->allocateStub(F, StubSize, Alignment);
227 }
228 virtual void endFunctionBody(const llvm::Function *F,
229 uint8_t *FunctionStart,
230 uint8_t *FunctionEnd) {
231 mgr()->endFunctionBody(F, FunctionStart, FunctionEnd);
232 }
233 virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
234 return mgr()->allocateSpace(Size, Alignment);
235 }
236 virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
237 return mgr()->allocateGlobal(Size, Alignment);
238 }
239 virtual void deallocateFunctionBody(void *Body) {
240 mgr()->deallocateFunctionBody(Body);
241 }
242 #if HAVE_LLVM < 0x0304
243 virtual uint8_t *startExceptionTable(const llvm::Function *F,
244 uintptr_t &ActualSize) {
245 return mgr()->startExceptionTable(F, ActualSize);
246 }
247 virtual void endExceptionTable(const llvm::Function *F,
248 uint8_t *TableStart,
249 uint8_t *TableEnd,
250 uint8_t *FrameRegister) {
251 mgr()->endExceptionTable(F, TableStart, TableEnd,
252 FrameRegister);
253 }
254 virtual void deallocateExceptionTable(void *ET) {
255 mgr()->deallocateExceptionTable(ET);
256 }
257 #endif
258 virtual bool CheckInvariants(std::string &s) {
259 return mgr()->CheckInvariants(s);
260 }
261 virtual size_t GetDefaultCodeSlabSize() {
262 return mgr()->GetDefaultCodeSlabSize();
263 }
264 virtual size_t GetDefaultDataSlabSize() {
265 return mgr()->GetDefaultDataSlabSize();
266 }
267 virtual size_t GetDefaultStubSlabSize() {
268 return mgr()->GetDefaultStubSlabSize();
269 }
270 virtual unsigned GetNumCodeSlabs() {
271 return mgr()->GetNumCodeSlabs();
272 }
273 virtual unsigned GetNumDataSlabs() {
274 return mgr()->GetNumDataSlabs();
275 }
276 virtual unsigned GetNumStubSlabs() {
277 return mgr()->GetNumStubSlabs();
278 }
279 #endif
280
281 /*
282 * From RTDyldMemoryManager
283 */
284 #if HAVE_LLVM >= 0x0304
285 virtual uint8_t *allocateCodeSection(uintptr_t Size,
286 unsigned Alignment,
287 unsigned SectionID,
288 llvm::StringRef SectionName) {
289 return mgr()->allocateCodeSection(Size, Alignment, SectionID,
290 SectionName);
291 }
292 #else
293 virtual uint8_t *allocateCodeSection(uintptr_t Size,
294 unsigned Alignment,
295 unsigned SectionID) {
296 return mgr()->allocateCodeSection(Size, Alignment, SectionID);
297 }
298 #endif
299 virtual uint8_t *allocateDataSection(uintptr_t Size,
300 unsigned Alignment,
301 unsigned SectionID,
302 #if HAVE_LLVM >= 0x0304
303 llvm::StringRef SectionName,
304 #endif
305 bool IsReadOnly) {
306 return mgr()->allocateDataSection(Size, Alignment, SectionID,
307 #if HAVE_LLVM >= 0x0304
308 SectionName,
309 #endif
310 IsReadOnly);
311 }
312 #if HAVE_LLVM >= 0x0304
313 virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
314 mgr()->registerEHFrames(Addr, LoadAddr, Size);
315 }
316 virtual void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
317 mgr()->deregisterEHFrames(Addr, LoadAddr, Size);
318 }
319 #else
320 virtual void registerEHFrames(llvm::StringRef SectionData) {
321 mgr()->registerEHFrames(SectionData);
322 }
323 #endif
324 virtual void *getPointerToNamedFunction(const std::string &Name,
325 bool AbortOnFailure=true) {
326 return mgr()->getPointerToNamedFunction(Name, AbortOnFailure);
327 }
328 #if HAVE_LLVM <= 0x0303
329 virtual bool applyPermissions(std::string *ErrMsg = 0) {
330 return mgr()->applyPermissions(ErrMsg);
331 }
332 #else
333 virtual bool finalizeMemory(std::string *ErrMsg = 0) {
334 return mgr()->finalizeMemory(ErrMsg);
335 }
336 #endif
337 };
338
339
340 /*
341 * Delegate memory management to one shared manager for more efficient use
342 * of memory than creating a separate pool for each LLVM engine.
343 * Keep generated code until freeGeneratedCode() is called, instead of when
344 * memory manager is destroyed, which happens during engine destruction.
345 * This allows additional memory savings as we don't have to keep the engine
346 * around in order to use the code.
347 * All methods are delegated to the shared manager except destruction and
348 * deallocating code. For the latter we just remember what needs to be
349 * deallocated later. The shared manager is deleted once it is empty.
350 */
351 class ShaderMemoryManager : public DelegatingJITMemoryManager {
352
353 BaseMemoryManager *TheMM;
354
355 struct GeneratedCode {
356 typedef std::vector<void *> Vec;
357 Vec FunctionBody, ExceptionTable;
358 BaseMemoryManager *TheMM;
359
360 GeneratedCode(BaseMemoryManager *MM) {
361 TheMM = MM;
362 }
363
364 ~GeneratedCode() {
365 /*
366 * Deallocate things as previously requested and
367 * free shared manager when no longer used.
368 */
369 #if HAVE_LLVM < 0x0306
370 Vec::iterator i;
371
372 assert(TheMM);
373 for ( i = FunctionBody.begin(); i != FunctionBody.end(); ++i )
374 TheMM->deallocateFunctionBody(*i);
375 #if HAVE_LLVM < 0x0304
376 for ( i = ExceptionTable.begin(); i != ExceptionTable.end(); ++i )
377 TheMM->deallocateExceptionTable(*i);
378 #endif /* HAVE_LLVM < 0x0304 */
379 #endif /* HAVE_LLVM < 0x0306 */
380 }
381 };
382
383 GeneratedCode *code;
384
385 BaseMemoryManager *mgr() const {
386 return TheMM;
387 }
388
389 public:
390
391 ShaderMemoryManager(BaseMemoryManager* MM) {
392 TheMM = MM;
393 code = new GeneratedCode(MM);
394 }
395
396 virtual ~ShaderMemoryManager() {
397 /*
398 * 'code' is purposely not deleted. It is the user's responsibility
399 * to call getGeneratedCode() and freeGeneratedCode().
400 */
401 }
402
403 struct lp_generated_code *getGeneratedCode() {
404 return (struct lp_generated_code *) code;
405 }
406
407 static void freeGeneratedCode(struct lp_generated_code *code) {
408 delete (GeneratedCode *) code;
409 }
410
411 #if HAVE_LLVM < 0x0304
412 virtual void deallocateExceptionTable(void *ET) {
413 // remember for later deallocation
414 code->ExceptionTable.push_back(ET);
415 }
416 #endif
417
418 virtual void deallocateFunctionBody(void *Body) {
419 // remember for later deallocation
420 code->FunctionBody.push_back(Body);
421 }
422 };
423
424
425 /**
426 * Same as LLVMCreateJITCompilerForModule, but:
427 * - allows using MCJIT and enabling AVX feature where available.
428 * - set target options
429 *
430 * See also:
431 * - llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
432 * - llvm/tools/lli/lli.cpp
433 * - http://markmail.org/message/ttkuhvgj4cxxy2on#query:+page:1+mid:aju2dggerju3ivd3+state:results
434 */
435 extern "C"
436 LLVMBool
437 lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
438 lp_generated_code **OutCode,
439 LLVMModuleRef M,
440 LLVMMCJITMemoryManagerRef CMM,
441 unsigned OptLevel,
442 int useMCJIT,
443 char **OutError)
444 {
445 using namespace llvm;
446
447 std::string Error;
448 #if HAVE_LLVM >= 0x0306
449 EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
450 #else
451 EngineBuilder builder(unwrap(M));
452 #endif
453
454 /**
455 * LLVM 3.1+ haven't more "extern unsigned llvm::StackAlignmentOverride" and
456 * friends for configuring code generation options, like stack alignment.
457 */
458 TargetOptions options;
459 #if defined(PIPE_ARCH_X86)
460 options.StackAlignmentOverride = 4;
461 #if HAVE_LLVM < 0x0304
462 options.RealignStack = true;
463 #endif
464 #endif
465
466 #if defined(DEBUG) && HAVE_LLVM < 0x0307
467 options.JITEmitDebugInfo = true;
468 #endif
469
470 /* XXX: Workaround http://llvm.org/PR21435 */
471 #if defined(DEBUG) || defined(PROFILE) || \
472 (HAVE_LLVM >= 0x0303 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)))
473 #if HAVE_LLVM < 0x0304
474 options.NoFramePointerElimNonLeaf = true;
475 #endif
476 #if HAVE_LLVM < 0x0307
477 options.NoFramePointerElim = true;
478 #endif
479 #endif
480
481 builder.setEngineKind(EngineKind::JIT)
482 .setErrorStr(&Error)
483 .setTargetOptions(options)
484 .setOptLevel((CodeGenOpt::Level)OptLevel);
485
486 if (useMCJIT) {
487 #if HAVE_LLVM < 0x0306
488 builder.setUseMCJIT(true);
489 #endif
490 #ifdef _WIN32
491 /*
492 * MCJIT works on Windows, but currently only through ELF object format.
493 */
494 std::string targetTriple = llvm::sys::getProcessTriple();
495 targetTriple.append("-elf");
496 unwrap(M)->setTargetTriple(targetTriple);
497 #endif
498 }
499
500 llvm::SmallVector<std::string, 16> MAttrs;
501 if (util_cpu_caps.has_sse) {
502 MAttrs.push_back("+sse");
503 }
504 if (util_cpu_caps.has_sse2) {
505 MAttrs.push_back("+sse2");
506 }
507 if (util_cpu_caps.has_sse3) {
508 MAttrs.push_back("+sse3");
509 }
510 if (util_cpu_caps.has_ssse3) {
511 MAttrs.push_back("+ssse3");
512 }
513 if (util_cpu_caps.has_sse4_1) {
514 #if HAVE_LLVM >= 0x0304
515 MAttrs.push_back("+sse4.1");
516 #else
517 MAttrs.push_back("+sse41");
518 #endif
519 }
520 if (util_cpu_caps.has_sse4_2) {
521 #if HAVE_LLVM >= 0x0304
522 MAttrs.push_back("+sse4.2");
523 #else
524 MAttrs.push_back("+sse42");
525 #endif
526 }
527 if (util_cpu_caps.has_avx) {
528 /*
529 * AVX feature is not automatically detected from CPUID by the X86 target
530 * yet, because the old (yet default) JIT engine is not capable of
531 * emitting the opcodes. On newer llvm versions it is and at least some
532 * versions (tested with 3.3) will emit avx opcodes without this anyway.
533 */
534 MAttrs.push_back("+avx");
535 if (util_cpu_caps.has_f16c) {
536 MAttrs.push_back("+f16c");
537 }
538 if (util_cpu_caps.has_avx2) {
539 MAttrs.push_back("+avx2");
540 }
541 }
542 if (util_cpu_caps.has_altivec) {
543 MAttrs.push_back("+altivec");
544 }
545 builder.setMAttrs(MAttrs);
546
547 #if HAVE_LLVM >= 0x0305
548 StringRef MCPU = llvm::sys::getHostCPUName();
549 /*
550 * The cpu bits are no longer set automatically, so need to set mcpu manually.
551 * Note that the MAttrs set above will be sort of ignored (since we should
552 * not set any which would not be set by specifying the cpu anyway).
553 * It ought to be safe though since getHostCPUName() should include bits
554 * not only from the cpu but environment as well (for instance if it's safe
555 * to use avx instructions which need OS support). According to
556 * http://llvm.org/bugs/show_bug.cgi?id=19429 however if I understand this
557 * right it may be necessary to specify older cpu (or disable mattrs) though
558 * when not using MCJIT so no instructions are generated which the old JIT
559 * can't handle. Not entirely sure if we really need to do anything yet.
560 */
561 builder.setMCPU(MCPU);
562 #endif
563
564 ShaderMemoryManager *MM = NULL;
565 if (useMCJIT) {
566 #if HAVE_LLVM > 0x0303
567 BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
568 MM = new ShaderMemoryManager(JMM);
569 *OutCode = MM->getGeneratedCode();
570
571 #if HAVE_LLVM >= 0x0306
572 builder.setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager>(MM));
573 MM = NULL; // ownership taken by std::unique_ptr
574 #else
575 builder.setMCJITMemoryManager(MM);
576 #endif
577 #endif
578 } else {
579 #if HAVE_LLVM < 0x0306
580 BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
581 MM = new ShaderMemoryManager(JMM);
582 *OutCode = MM->getGeneratedCode();
583
584 builder.setJITMemoryManager(MM);
585 #else
586 assert(0);
587 #endif
588 }
589
590 ExecutionEngine *JIT;
591
592 JIT = builder.create();
593 if (JIT) {
594 *OutJIT = wrap(JIT);
595 return 0;
596 }
597 lp_free_generated_code(*OutCode);
598 *OutCode = 0;
599 delete MM;
600 *OutError = strdup(Error.c_str());
601 return 1;
602 }
603
604
605 extern "C"
606 void
607 lp_free_generated_code(struct lp_generated_code *code)
608 {
609 ShaderMemoryManager::freeGeneratedCode(code);
610 }
611
612 extern "C"
613 LLVMMCJITMemoryManagerRef
614 lp_get_default_memory_manager()
615 {
616 BaseMemoryManager *mm;
617 #if HAVE_LLVM < 0x0306
618 mm = llvm::JITMemoryManager::CreateDefaultMemManager();
619 #else
620 mm = new llvm::SectionMemoryManager();
621 #endif
622 return reinterpret_cast<LLVMMCJITMemoryManagerRef>(mm);
623 }
624
625 extern "C"
626 void
627 lp_free_memory_manager(LLVMMCJITMemoryManagerRef memorymgr)
628 {
629 delete reinterpret_cast<BaseMemoryManager*>(memorymgr);
630 }