2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
28 #include "ac_binary.h"
29 #include "ac_llvm_util.h"
30 #include "ac_llvm_build.h"
32 #include "util/macros.h"
34 #include <llvm-c/Core.h>
35 #include <llvm/Target/TargetMachine.h>
36 #include <llvm/IR/IRBuilder.h>
37 #include <llvm/Analysis/TargetLibraryInfo.h>
38 #include <llvm/Transforms/IPO.h>
40 #include <llvm/IR/LegacyPassManager.h>
42 void ac_add_attr_dereferenceable(LLVMValueRef val
, uint64_t bytes
)
44 llvm::Argument
*A
= llvm::unwrap
<llvm::Argument
>(val
);
45 A
->addAttr(llvm::Attribute::getWithDereferenceableBytes(A
->getContext(), bytes
));
48 bool ac_is_sgpr_param(LLVMValueRef arg
)
50 llvm::Argument
*A
= llvm::unwrap
<llvm::Argument
>(arg
);
51 llvm::AttributeList AS
= A
->getParent()->getAttributes();
52 unsigned ArgNo
= A
->getArgNo();
53 return AS
.hasAttribute(ArgNo
+ 1, llvm::Attribute::InReg
);
56 LLVMValueRef
ac_llvm_get_called_value(LLVMValueRef call
)
58 return LLVMGetCalledValue(call
);
61 bool ac_llvm_is_function(LLVMValueRef v
)
63 return LLVMGetValueKind(v
) == LLVMFunctionValueKind
;
66 LLVMModuleRef
ac_create_module(LLVMTargetMachineRef tm
, LLVMContextRef ctx
)
68 llvm::TargetMachine
*TM
= reinterpret_cast<llvm::TargetMachine
*>(tm
);
69 LLVMModuleRef module
= LLVMModuleCreateWithNameInContext("mesa-shader", ctx
);
71 llvm::unwrap(module
)->setTargetTriple(TM
->getTargetTriple().getTriple());
72 llvm::unwrap(module
)->setDataLayout(TM
->createDataLayout());
76 LLVMBuilderRef
ac_create_builder(LLVMContextRef ctx
,
77 enum ac_float_mode float_mode
)
79 LLVMBuilderRef builder
= LLVMCreateBuilderInContext(ctx
);
81 llvm::FastMathFlags flags
;
84 case AC_FLOAT_MODE_DEFAULT
:
85 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO
:
88 case AC_FLOAT_MODE_DEFAULT_OPENGL
:
89 /* Allow optimizations to treat the sign of a zero argument or
90 * result as insignificant.
92 flags
.setNoSignedZeros(); /* nsz */
94 /* Allow optimizations to use the reciprocal of an argument
95 * rather than perform division.
97 flags
.setAllowReciprocal(); /* arcp */
99 /* Allow floating-point contraction (e.g. fusing a multiply
100 * followed by an addition into a fused multiply-and-add).
102 flags
.setAllowContract(); /* contract */
104 llvm::unwrap(builder
)->setFastMathFlags(flags
);
111 /* Return the original state of inexact math. */
112 bool ac_disable_inexact_math(LLVMBuilderRef builder
)
114 auto *b
= llvm::unwrap(builder
);
115 llvm::FastMathFlags flags
= b
->getFastMathFlags();
117 if (!flags
.allowContract())
120 flags
.setAllowContract(false);
121 b
->setFastMathFlags(flags
);
125 void ac_restore_inexact_math(LLVMBuilderRef builder
, bool value
)
127 auto *b
= llvm::unwrap(builder
);
128 llvm::FastMathFlags flags
= b
->getFastMathFlags();
130 if (flags
.allowContract() == value
)
133 flags
.setAllowContract(value
);
134 b
->setFastMathFlags(flags
);
137 LLVMTargetLibraryInfoRef
138 ac_create_target_library_info(const char *triple
)
140 return reinterpret_cast<LLVMTargetLibraryInfoRef
>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple
)));
144 ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info
)
146 delete reinterpret_cast<llvm::TargetLibraryInfoImpl
*>(library_info
);
149 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
150 * better compatibility with C code. */
151 struct raw_memory_ostream
: public llvm::raw_pwrite_stream
{
164 ~raw_memory_ostream()
174 void take(char *&out_buffer
, size_t &out_size
)
183 void flush() = delete;
185 void write_impl(const char *ptr
, size_t size
) override
187 if (unlikely(written
+ size
< written
))
189 if (written
+ size
> bufsize
) {
190 bufsize
= MAX3(1024, written
+ size
, bufsize
/ 3 * 4);
191 buffer
= (char *)realloc(buffer
, bufsize
);
193 fprintf(stderr
, "amd: out of memory allocating ELF buffer\n");
197 memcpy(buffer
+ written
, ptr
, size
);
201 void pwrite_impl(const char *ptr
, size_t size
, uint64_t offset
) override
203 assert(offset
== (size_t)offset
&&
204 offset
+ size
>= offset
&& offset
+ size
<= written
);
205 memcpy(buffer
+ offset
, ptr
, size
);
208 uint64_t current_pos() const override
214 /* The LLVM compiler is represented as a pass manager containing passes for
215 * optimizations, instruction selection, and code generation.
217 struct ac_compiler_passes
{
218 raw_memory_ostream ostream
; /* ELF shader binary stream */
219 llvm::legacy::PassManager passmgr
; /* list of passes */
222 struct ac_compiler_passes
*ac_create_llvm_passes(LLVMTargetMachineRef tm
)
224 struct ac_compiler_passes
*p
= new ac_compiler_passes();
228 llvm::TargetMachine
*TM
= reinterpret_cast<llvm::TargetMachine
*>(tm
);
230 if (TM
->addPassesToEmitFile(p
->passmgr
, p
->ostream
,
232 #if LLVM_VERSION_MAJOR >= 10
233 llvm::CGFT_ObjectFile
)) {
235 llvm::TargetMachine::CGFT_ObjectFile
)) {
237 fprintf(stderr
, "amd: TargetMachine can't emit a file of this type!\n");
244 void ac_destroy_llvm_passes(struct ac_compiler_passes
*p
)
249 /* This returns false on failure. */
250 bool ac_compile_module_to_elf(struct ac_compiler_passes
*p
, LLVMModuleRef module
,
251 char **pelf_buffer
, size_t *pelf_size
)
253 p
->passmgr
.run(*llvm::unwrap(module
));
254 p
->ostream
.take(*pelf_buffer
, *pelf_size
);
258 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr
)
260 llvm::unwrap(passmgr
)->add(llvm::createBarrierNoopPass());
263 void ac_enable_global_isel(LLVMTargetMachineRef tm
)
265 reinterpret_cast<llvm::TargetMachine
*>(tm
)->setGlobalISel(true);
268 LLVMValueRef
ac_build_atomic_rmw(struct ac_llvm_context
*ctx
, LLVMAtomicRMWBinOp op
,
269 LLVMValueRef ptr
, LLVMValueRef val
,
270 const char *sync_scope
) {
271 llvm::AtomicRMWInst::BinOp binop
;
273 case LLVMAtomicRMWBinOpXchg
:
274 binop
= llvm::AtomicRMWInst::Xchg
;
276 case LLVMAtomicRMWBinOpAdd
:
277 binop
= llvm::AtomicRMWInst::Add
;
279 case LLVMAtomicRMWBinOpSub
:
280 binop
= llvm::AtomicRMWInst::Sub
;
282 case LLVMAtomicRMWBinOpAnd
:
283 binop
= llvm::AtomicRMWInst::And
;
285 case LLVMAtomicRMWBinOpNand
:
286 binop
= llvm::AtomicRMWInst::Nand
;
288 case LLVMAtomicRMWBinOpOr
:
289 binop
= llvm::AtomicRMWInst::Or
;
291 case LLVMAtomicRMWBinOpXor
:
292 binop
= llvm::AtomicRMWInst::Xor
;
294 case LLVMAtomicRMWBinOpMax
:
295 binop
= llvm::AtomicRMWInst::Max
;
297 case LLVMAtomicRMWBinOpMin
:
298 binop
= llvm::AtomicRMWInst::Min
;
300 case LLVMAtomicRMWBinOpUMax
:
301 binop
= llvm::AtomicRMWInst::UMax
;
303 case LLVMAtomicRMWBinOpUMin
:
304 binop
= llvm::AtomicRMWInst::UMin
;
307 unreachable(!"invalid LLVMAtomicRMWBinOp");
310 unsigned SSID
= llvm::unwrap(ctx
->context
)->getOrInsertSyncScopeID(sync_scope
);
311 return llvm::wrap(llvm::unwrap(ctx
->builder
)->CreateAtomicRMW(
312 binop
, llvm::unwrap(ptr
), llvm::unwrap(val
),
313 llvm::AtomicOrdering::SequentiallyConsistent
, SSID
));
316 LLVMValueRef
ac_build_atomic_cmp_xchg(struct ac_llvm_context
*ctx
, LLVMValueRef ptr
,
317 LLVMValueRef cmp
, LLVMValueRef val
,
318 const char *sync_scope
) {
319 unsigned SSID
= llvm::unwrap(ctx
->context
)->getOrInsertSyncScopeID(sync_scope
);
320 return llvm::wrap(llvm::unwrap(ctx
->builder
)->CreateAtomicCmpXchg(
321 llvm::unwrap(ptr
), llvm::unwrap(cmp
), llvm::unwrap(val
),
322 llvm::AtomicOrdering::SequentiallyConsistent
,
323 llvm::AtomicOrdering::SequentiallyConsistent
, SSID
));