aco: Fixup markdown formatting of the README-ISA.
[mesa.git] / src / amd / llvm / ac_llvm_helper.cpp
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 #include <cstring>
27
28 #include <llvm-c/Core.h>
29 #include <llvm/Target/TargetMachine.h>
30 #include <llvm/IR/IRBuilder.h>
31 #include <llvm/Analysis/TargetLibraryInfo.h>
32 #include <llvm/Transforms/IPO.h>
33
34 #include <llvm/IR/LegacyPassManager.h>
35
36 /* DO NOT REORDER THE HEADERS
37 * The LLVM headers need to all be included before any Mesa header,
38 * as they use the `restrict` keyword in ways that are incompatible
39 * with our #define in include/c99_compat.h
40 */
41
42 #include "ac_binary.h"
43 #include "ac_llvm_util.h"
44 #include "ac_llvm_build.h"
45
46 #include "util/macros.h"
47
48 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
49 {
50 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
51 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
52 }
53
54 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
55 {
56 #if LLVM_VERSION_MAJOR >= 10
57 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
58 A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
59 #else
60 /* Avoid unused parameter warnings. */
61 (void)val;
62 (void)bytes;
63 #endif
64 }
65
66 bool ac_is_sgpr_param(LLVMValueRef arg)
67 {
68 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
69 llvm::AttributeList AS = A->getParent()->getAttributes();
70 unsigned ArgNo = A->getArgNo();
71 return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
72 }
73
74 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
75 {
76 return LLVMGetCalledValue(call);
77 }
78
79 bool ac_llvm_is_function(LLVMValueRef v)
80 {
81 return LLVMGetValueKind(v) == LLVMFunctionValueKind;
82 }
83
84 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
85 {
86 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
87 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
88
89 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
90 llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
91 return module;
92 }
93
94 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
95 enum ac_float_mode float_mode)
96 {
97 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
98
99 llvm::FastMathFlags flags;
100
101 switch (float_mode) {
102 case AC_FLOAT_MODE_DEFAULT:
103 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
104 break;
105
106 case AC_FLOAT_MODE_DEFAULT_OPENGL:
107 /* Allow optimizations to treat the sign of a zero argument or
108 * result as insignificant.
109 */
110 flags.setNoSignedZeros(); /* nsz */
111
112 /* Allow optimizations to use the reciprocal of an argument
113 * rather than perform division.
114 */
115 flags.setAllowReciprocal(); /* arcp */
116
117 /* Allow floating-point contraction (e.g. fusing a multiply
118 * followed by an addition into a fused multiply-and-add).
119 */
120 flags.setAllowContract(); /* contract */
121
122 llvm::unwrap(builder)->setFastMathFlags(flags);
123 break;
124 }
125
126 return builder;
127 }
128
129 /* Return the original state of inexact math. */
130 bool ac_disable_inexact_math(LLVMBuilderRef builder)
131 {
132 auto *b = llvm::unwrap(builder);
133 llvm::FastMathFlags flags = b->getFastMathFlags();
134
135 if (!flags.allowContract())
136 return false;
137
138 flags.setAllowContract(false);
139 b->setFastMathFlags(flags);
140 return true;
141 }
142
143 void ac_restore_inexact_math(LLVMBuilderRef builder, bool value)
144 {
145 auto *b = llvm::unwrap(builder);
146 llvm::FastMathFlags flags = b->getFastMathFlags();
147
148 if (flags.allowContract() == value)
149 return;
150
151 flags.setAllowContract(value);
152 b->setFastMathFlags(flags);
153 }
154
155 LLVMTargetLibraryInfoRef
156 ac_create_target_library_info(const char *triple)
157 {
158 return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
159 }
160
161 void
162 ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
163 {
164 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
165 }
166
167 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
168 * better compatibility with C code. */
169 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
170 char *buffer;
171 size_t written;
172 size_t bufsize;
173
174 raw_memory_ostream()
175 {
176 buffer = NULL;
177 written = 0;
178 bufsize = 0;
179 SetUnbuffered();
180 }
181
182 ~raw_memory_ostream()
183 {
184 free(buffer);
185 }
186
187 void clear()
188 {
189 written = 0;
190 }
191
192 void take(char *&out_buffer, size_t &out_size)
193 {
194 out_buffer = buffer;
195 out_size = written;
196 buffer = NULL;
197 written = 0;
198 bufsize = 0;
199 }
200
201 void flush() = delete;
202
203 void write_impl(const char *ptr, size_t size) override
204 {
205 if (unlikely(written + size < written))
206 abort();
207 if (written + size > bufsize) {
208 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
209 buffer = (char *)realloc(buffer, bufsize);
210 if (!buffer) {
211 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
212 abort();
213 }
214 }
215 memcpy(buffer + written, ptr, size);
216 written += size;
217 }
218
219 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
220 {
221 assert(offset == (size_t)offset &&
222 offset + size >= offset && offset + size <= written);
223 memcpy(buffer + offset, ptr, size);
224 }
225
226 uint64_t current_pos() const override
227 {
228 return written;
229 }
230 };
231
232 /* The LLVM compiler is represented as a pass manager containing passes for
233 * optimizations, instruction selection, and code generation.
234 */
235 struct ac_compiler_passes {
236 raw_memory_ostream ostream; /* ELF shader binary stream */
237 llvm::legacy::PassManager passmgr; /* list of passes */
238 };
239
240 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
241 {
242 struct ac_compiler_passes *p = new ac_compiler_passes();
243 if (!p)
244 return NULL;
245
246 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
247
248 if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
249 nullptr,
250 #if LLVM_VERSION_MAJOR >= 10
251 llvm::CGFT_ObjectFile)) {
252 #else
253 llvm::TargetMachine::CGFT_ObjectFile)) {
254 #endif
255 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
256 delete p;
257 return NULL;
258 }
259 return p;
260 }
261
262 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
263 {
264 delete p;
265 }
266
267 /* This returns false on failure. */
268 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
269 char **pelf_buffer, size_t *pelf_size)
270 {
271 p->passmgr.run(*llvm::unwrap(module));
272 p->ostream.take(*pelf_buffer, *pelf_size);
273 return true;
274 }
275
276 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
277 {
278 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
279 }
280
281 void ac_enable_global_isel(LLVMTargetMachineRef tm)
282 {
283 reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
284 }
285
286 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
287 LLVMValueRef ptr, LLVMValueRef val,
288 const char *sync_scope) {
289 llvm::AtomicRMWInst::BinOp binop;
290 switch (op) {
291 case LLVMAtomicRMWBinOpXchg:
292 binop = llvm::AtomicRMWInst::Xchg;
293 break;
294 case LLVMAtomicRMWBinOpAdd:
295 binop = llvm::AtomicRMWInst::Add;
296 break;
297 case LLVMAtomicRMWBinOpSub:
298 binop = llvm::AtomicRMWInst::Sub;
299 break;
300 case LLVMAtomicRMWBinOpAnd:
301 binop = llvm::AtomicRMWInst::And;
302 break;
303 case LLVMAtomicRMWBinOpNand:
304 binop = llvm::AtomicRMWInst::Nand;
305 break;
306 case LLVMAtomicRMWBinOpOr:
307 binop = llvm::AtomicRMWInst::Or;
308 break;
309 case LLVMAtomicRMWBinOpXor:
310 binop = llvm::AtomicRMWInst::Xor;
311 break;
312 case LLVMAtomicRMWBinOpMax:
313 binop = llvm::AtomicRMWInst::Max;
314 break;
315 case LLVMAtomicRMWBinOpMin:
316 binop = llvm::AtomicRMWInst::Min;
317 break;
318 case LLVMAtomicRMWBinOpUMax:
319 binop = llvm::AtomicRMWInst::UMax;
320 break;
321 case LLVMAtomicRMWBinOpUMin:
322 binop = llvm::AtomicRMWInst::UMin;
323 break;
324 default:
325 unreachable(!"invalid LLVMAtomicRMWBinOp");
326 break;
327 }
328 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
329 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
330 binop, llvm::unwrap(ptr), llvm::unwrap(val),
331 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
332 }
333
334 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
335 LLVMValueRef cmp, LLVMValueRef val,
336 const char *sync_scope) {
337 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
338 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
339 llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
340 llvm::AtomicOrdering::SequentiallyConsistent,
341 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
342 }