Revert "ac: reassociate FP expressions for inexact instructions for radeonsi"
[mesa.git] / src / amd / llvm / ac_llvm_helper.cpp
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 #include <cstring>
27
28 #include "ac_binary.h"
29 #include "ac_llvm_util.h"
30 #include "ac_llvm_build.h"
31
32 #include "util/macros.h"
33
34 #include <llvm-c/Core.h>
35 #include <llvm/Target/TargetMachine.h>
36 #include <llvm/IR/IRBuilder.h>
37 #include <llvm/Analysis/TargetLibraryInfo.h>
38 #include <llvm/Transforms/IPO.h>
39
40 #include <llvm/IR/LegacyPassManager.h>
41
42 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
43 {
44 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
45 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
46 }
47
48 bool ac_is_sgpr_param(LLVMValueRef arg)
49 {
50 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
51 llvm::AttributeList AS = A->getParent()->getAttributes();
52 unsigned ArgNo = A->getArgNo();
53 return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
54 }
55
56 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
57 {
58 return LLVMGetCalledValue(call);
59 }
60
61 bool ac_llvm_is_function(LLVMValueRef v)
62 {
63 return LLVMGetValueKind(v) == LLVMFunctionValueKind;
64 }
65
66 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
67 {
68 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
69 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
70
71 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
72 llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
73 return module;
74 }
75
76 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
77 enum ac_float_mode float_mode)
78 {
79 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
80
81 llvm::FastMathFlags flags;
82
83 switch (float_mode) {
84 case AC_FLOAT_MODE_DEFAULT:
85 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
86 break;
87
88 case AC_FLOAT_MODE_DEFAULT_OPENGL:
89 /* Allow optimizations to treat the sign of a zero argument or
90 * result as insignificant.
91 */
92 flags.setNoSignedZeros(); /* nsz */
93
94 /* Allow optimizations to use the reciprocal of an argument
95 * rather than perform division.
96 */
97 flags.setAllowReciprocal(); /* arcp */
98
99 /* Allow floating-point contraction (e.g. fusing a multiply
100 * followed by an addition into a fused multiply-and-add).
101 */
102 flags.setAllowContract(); /* contract */
103
104 llvm::unwrap(builder)->setFastMathFlags(flags);
105 break;
106 }
107
108 return builder;
109 }
110
111 /* Return the original state of inexact math. */
112 bool ac_disable_inexact_math(LLVMBuilderRef builder)
113 {
114 auto *b = llvm::unwrap(builder);
115 llvm::FastMathFlags flags = b->getFastMathFlags();
116
117 if (!flags.allowContract())
118 return false;
119
120 flags.setAllowContract(false);
121 b->setFastMathFlags(flags);
122 return true;
123 }
124
125 void ac_restore_inexact_math(LLVMBuilderRef builder, bool value)
126 {
127 auto *b = llvm::unwrap(builder);
128 llvm::FastMathFlags flags = b->getFastMathFlags();
129
130 if (flags.allowContract() == value)
131 return;
132
133 flags.setAllowContract(value);
134 b->setFastMathFlags(flags);
135 }
136
137 LLVMTargetLibraryInfoRef
138 ac_create_target_library_info(const char *triple)
139 {
140 return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
141 }
142
143 void
144 ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
145 {
146 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
147 }
148
149 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
150 * better compatibility with C code. */
151 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
152 char *buffer;
153 size_t written;
154 size_t bufsize;
155
156 raw_memory_ostream()
157 {
158 buffer = NULL;
159 written = 0;
160 bufsize = 0;
161 SetUnbuffered();
162 }
163
164 ~raw_memory_ostream()
165 {
166 free(buffer);
167 }
168
169 void clear()
170 {
171 written = 0;
172 }
173
174 void take(char *&out_buffer, size_t &out_size)
175 {
176 out_buffer = buffer;
177 out_size = written;
178 buffer = NULL;
179 written = 0;
180 bufsize = 0;
181 }
182
183 void flush() = delete;
184
185 void write_impl(const char *ptr, size_t size) override
186 {
187 if (unlikely(written + size < written))
188 abort();
189 if (written + size > bufsize) {
190 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
191 buffer = (char *)realloc(buffer, bufsize);
192 if (!buffer) {
193 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
194 abort();
195 }
196 }
197 memcpy(buffer + written, ptr, size);
198 written += size;
199 }
200
201 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
202 {
203 assert(offset == (size_t)offset &&
204 offset + size >= offset && offset + size <= written);
205 memcpy(buffer + offset, ptr, size);
206 }
207
208 uint64_t current_pos() const override
209 {
210 return written;
211 }
212 };
213
214 /* The LLVM compiler is represented as a pass manager containing passes for
215 * optimizations, instruction selection, and code generation.
216 */
217 struct ac_compiler_passes {
218 raw_memory_ostream ostream; /* ELF shader binary stream */
219 llvm::legacy::PassManager passmgr; /* list of passes */
220 };
221
222 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
223 {
224 struct ac_compiler_passes *p = new ac_compiler_passes();
225 if (!p)
226 return NULL;
227
228 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
229
230 if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
231 nullptr,
232 #if LLVM_VERSION_MAJOR >= 10
233 llvm::CGFT_ObjectFile)) {
234 #else
235 llvm::TargetMachine::CGFT_ObjectFile)) {
236 #endif
237 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
238 delete p;
239 return NULL;
240 }
241 return p;
242 }
243
244 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
245 {
246 delete p;
247 }
248
249 /* This returns false on failure. */
250 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
251 char **pelf_buffer, size_t *pelf_size)
252 {
253 p->passmgr.run(*llvm::unwrap(module));
254 p->ostream.take(*pelf_buffer, *pelf_size);
255 return true;
256 }
257
258 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
259 {
260 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
261 }
262
263 void ac_enable_global_isel(LLVMTargetMachineRef tm)
264 {
265 reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
266 }
267
268 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
269 LLVMValueRef ptr, LLVMValueRef val,
270 const char *sync_scope) {
271 llvm::AtomicRMWInst::BinOp binop;
272 switch (op) {
273 case LLVMAtomicRMWBinOpXchg:
274 binop = llvm::AtomicRMWInst::Xchg;
275 break;
276 case LLVMAtomicRMWBinOpAdd:
277 binop = llvm::AtomicRMWInst::Add;
278 break;
279 case LLVMAtomicRMWBinOpSub:
280 binop = llvm::AtomicRMWInst::Sub;
281 break;
282 case LLVMAtomicRMWBinOpAnd:
283 binop = llvm::AtomicRMWInst::And;
284 break;
285 case LLVMAtomicRMWBinOpNand:
286 binop = llvm::AtomicRMWInst::Nand;
287 break;
288 case LLVMAtomicRMWBinOpOr:
289 binop = llvm::AtomicRMWInst::Or;
290 break;
291 case LLVMAtomicRMWBinOpXor:
292 binop = llvm::AtomicRMWInst::Xor;
293 break;
294 case LLVMAtomicRMWBinOpMax:
295 binop = llvm::AtomicRMWInst::Max;
296 break;
297 case LLVMAtomicRMWBinOpMin:
298 binop = llvm::AtomicRMWInst::Min;
299 break;
300 case LLVMAtomicRMWBinOpUMax:
301 binop = llvm::AtomicRMWInst::UMax;
302 break;
303 case LLVMAtomicRMWBinOpUMin:
304 binop = llvm::AtomicRMWInst::UMin;
305 break;
306 default:
307 unreachable(!"invalid LLVMAtomicRMWBinOp");
308 break;
309 }
310 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
311 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
312 binop, llvm::unwrap(ptr), llvm::unwrap(val),
313 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
314 }
315
316 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
317 LLVMValueRef cmp, LLVMValueRef val,
318 const char *sync_scope) {
319 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
320 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
321 llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
322 llvm::AtomicOrdering::SequentiallyConsistent,
323 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
324 }