Revert "ac: generate FMA for inexact instructions for radeonsi"
[mesa.git] / src / amd / llvm / ac_llvm_helper.cpp
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 #include <cstring>
27
28 #include <llvm-c/Core.h>
29 #include <llvm/Target/TargetMachine.h>
30 #include <llvm/IR/IRBuilder.h>
31 #include <llvm/Analysis/TargetLibraryInfo.h>
32 #include <llvm/Transforms/IPO.h>
33
34 #include <llvm/IR/LegacyPassManager.h>
35
36 /* DO NOT REORDER THE HEADERS
37 * The LLVM headers need to all be included before any Mesa header,
38 * as they use the `restrict` keyword in ways that are incompatible
39 * with our #define in include/c99_compat.h
40 */
41
42 #include "ac_binary.h"
43 #include "ac_llvm_util.h"
44 #include "ac_llvm_build.h"
45
46 #include "util/macros.h"
47
48 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
49 {
50 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
51 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
52 }
53
54 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
55 {
56 #if LLVM_VERSION_MAJOR >= 10
57 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
58 A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
59 #else
60 /* Avoid unused parameter warnings. */
61 (void)val;
62 (void)bytes;
63 #endif
64 }
65
66 bool ac_is_sgpr_param(LLVMValueRef arg)
67 {
68 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
69 llvm::AttributeList AS = A->getParent()->getAttributes();
70 unsigned ArgNo = A->getArgNo();
71 return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
72 }
73
74 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
75 {
76 return LLVMGetCalledValue(call);
77 }
78
79 bool ac_llvm_is_function(LLVMValueRef v)
80 {
81 return LLVMGetValueKind(v) == LLVMFunctionValueKind;
82 }
83
84 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
85 {
86 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
87 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
88
89 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
90 llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
91 return module;
92 }
93
94 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
95 enum ac_float_mode float_mode)
96 {
97 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
98
99 llvm::FastMathFlags flags;
100
101 switch (float_mode) {
102 case AC_FLOAT_MODE_DEFAULT:
103 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
104 break;
105
106 case AC_FLOAT_MODE_DEFAULT_OPENGL:
107 /* Allow optimizations to treat the sign of a zero argument or
108 * result as insignificant.
109 */
110 flags.setNoSignedZeros(); /* nsz */
111
112 /* Allow optimizations to use the reciprocal of an argument
113 * rather than perform division.
114 */
115 flags.setAllowReciprocal(); /* arcp */
116
117 llvm::unwrap(builder)->setFastMathFlags(flags);
118 break;
119 }
120
121 return builder;
122 }
123
124 LLVMTargetLibraryInfoRef
125 ac_create_target_library_info(const char *triple)
126 {
127 return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
128 }
129
130 void
131 ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
132 {
133 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
134 }
135
136 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
137 * better compatibility with C code. */
138 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
139 char *buffer;
140 size_t written;
141 size_t bufsize;
142
143 raw_memory_ostream()
144 {
145 buffer = NULL;
146 written = 0;
147 bufsize = 0;
148 SetUnbuffered();
149 }
150
151 ~raw_memory_ostream()
152 {
153 free(buffer);
154 }
155
156 void clear()
157 {
158 written = 0;
159 }
160
161 void take(char *&out_buffer, size_t &out_size)
162 {
163 out_buffer = buffer;
164 out_size = written;
165 buffer = NULL;
166 written = 0;
167 bufsize = 0;
168 }
169
170 void flush() = delete;
171
172 void write_impl(const char *ptr, size_t size) override
173 {
174 if (unlikely(written + size < written))
175 abort();
176 if (written + size > bufsize) {
177 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
178 buffer = (char *)realloc(buffer, bufsize);
179 if (!buffer) {
180 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
181 abort();
182 }
183 }
184 memcpy(buffer + written, ptr, size);
185 written += size;
186 }
187
188 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
189 {
190 assert(offset == (size_t)offset &&
191 offset + size >= offset && offset + size <= written);
192 memcpy(buffer + offset, ptr, size);
193 }
194
195 uint64_t current_pos() const override
196 {
197 return written;
198 }
199 };
200
201 /* The LLVM compiler is represented as a pass manager containing passes for
202 * optimizations, instruction selection, and code generation.
203 */
204 struct ac_compiler_passes {
205 raw_memory_ostream ostream; /* ELF shader binary stream */
206 llvm::legacy::PassManager passmgr; /* list of passes */
207 };
208
209 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
210 {
211 struct ac_compiler_passes *p = new ac_compiler_passes();
212 if (!p)
213 return NULL;
214
215 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
216
217 if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
218 nullptr,
219 #if LLVM_VERSION_MAJOR >= 10
220 llvm::CGFT_ObjectFile)) {
221 #else
222 llvm::TargetMachine::CGFT_ObjectFile)) {
223 #endif
224 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
225 delete p;
226 return NULL;
227 }
228 return p;
229 }
230
231 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
232 {
233 delete p;
234 }
235
236 /* This returns false on failure. */
237 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
238 char **pelf_buffer, size_t *pelf_size)
239 {
240 p->passmgr.run(*llvm::unwrap(module));
241 p->ostream.take(*pelf_buffer, *pelf_size);
242 return true;
243 }
244
245 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
246 {
247 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
248 }
249
250 void ac_enable_global_isel(LLVMTargetMachineRef tm)
251 {
252 reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
253 }
254
255 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
256 LLVMValueRef ptr, LLVMValueRef val,
257 const char *sync_scope) {
258 llvm::AtomicRMWInst::BinOp binop;
259 switch (op) {
260 case LLVMAtomicRMWBinOpXchg:
261 binop = llvm::AtomicRMWInst::Xchg;
262 break;
263 case LLVMAtomicRMWBinOpAdd:
264 binop = llvm::AtomicRMWInst::Add;
265 break;
266 case LLVMAtomicRMWBinOpSub:
267 binop = llvm::AtomicRMWInst::Sub;
268 break;
269 case LLVMAtomicRMWBinOpAnd:
270 binop = llvm::AtomicRMWInst::And;
271 break;
272 case LLVMAtomicRMWBinOpNand:
273 binop = llvm::AtomicRMWInst::Nand;
274 break;
275 case LLVMAtomicRMWBinOpOr:
276 binop = llvm::AtomicRMWInst::Or;
277 break;
278 case LLVMAtomicRMWBinOpXor:
279 binop = llvm::AtomicRMWInst::Xor;
280 break;
281 case LLVMAtomicRMWBinOpMax:
282 binop = llvm::AtomicRMWInst::Max;
283 break;
284 case LLVMAtomicRMWBinOpMin:
285 binop = llvm::AtomicRMWInst::Min;
286 break;
287 case LLVMAtomicRMWBinOpUMax:
288 binop = llvm::AtomicRMWInst::UMax;
289 break;
290 case LLVMAtomicRMWBinOpUMin:
291 binop = llvm::AtomicRMWInst::UMin;
292 break;
293 default:
294 unreachable(!"invalid LLVMAtomicRMWBinOp");
295 break;
296 }
297 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
298 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
299 binop, llvm::unwrap(ptr), llvm::unwrap(val),
300 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
301 }
302
303 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
304 LLVMValueRef cmp, LLVMValueRef val,
305 const char *sync_scope) {
306 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
307 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
308 llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
309 llvm::AtomicOrdering::SequentiallyConsistent,
310 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
311 }