radeonsi: move HTILE allocation outside of radeonsi
[mesa.git] / src / amd / common / ac_llvm_helper.cpp
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 /* based on Marek's patch to lp_bld_misc.cpp */
27
28 // Workaround http://llvm.org/PR23628
29 #pragma push_macro("DEBUG")
30 #undef DEBUG
31
32 #include <cstring>
33
34 #include "ac_binary.h"
35 #include "ac_llvm_util.h"
36 #include "ac_llvm_build.h"
37
38 #include "util/macros.h"
39
40 #include <llvm-c/Core.h>
41 #include <llvm/Target/TargetMachine.h>
42 #include <llvm/IR/IRBuilder.h>
43 #include <llvm/Analysis/TargetLibraryInfo.h>
44 #include <llvm/Transforms/IPO.h>
45
46 #include <llvm/IR/LegacyPassManager.h>
47
48 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
49 {
50 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
51 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
52 }
53
54 bool ac_is_sgpr_param(LLVMValueRef arg)
55 {
56 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
57 llvm::AttributeList AS = A->getParent()->getAttributes();
58 unsigned ArgNo = A->getArgNo();
59 return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
60 }
61
62 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
63 {
64 return LLVMGetCalledValue(call);
65 }
66
67 bool ac_llvm_is_function(LLVMValueRef v)
68 {
69 return LLVMGetValueKind(v) == LLVMFunctionValueKind;
70 }
71
72 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
73 {
74 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
75 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
76
77 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
78 llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
79 return module;
80 }
81
82 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
83 enum ac_float_mode float_mode)
84 {
85 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
86
87 llvm::FastMathFlags flags;
88
89 switch (float_mode) {
90 case AC_FLOAT_MODE_DEFAULT:
91 break;
92 case AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH:
93 flags.setNoSignedZeros();
94 llvm::unwrap(builder)->setFastMathFlags(flags);
95 break;
96 case AC_FLOAT_MODE_UNSAFE_FP_MATH:
97 flags.setFast();
98 llvm::unwrap(builder)->setFastMathFlags(flags);
99 break;
100 }
101
102 return builder;
103 }
104
105 LLVMTargetLibraryInfoRef
106 ac_create_target_library_info(const char *triple)
107 {
108 return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
109 }
110
111 void
112 ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
113 {
114 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
115 }
116
117 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
118 * better compatibility with C code. */
119 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
120 char *buffer;
121 size_t written;
122 size_t bufsize;
123
124 raw_memory_ostream()
125 {
126 buffer = NULL;
127 written = 0;
128 bufsize = 0;
129 SetUnbuffered();
130 }
131
132 ~raw_memory_ostream()
133 {
134 free(buffer);
135 }
136
137 void clear()
138 {
139 written = 0;
140 }
141
142 void take(char *&out_buffer, size_t &out_size)
143 {
144 out_buffer = buffer;
145 out_size = written;
146 buffer = NULL;
147 written = 0;
148 bufsize = 0;
149 }
150
151 void flush() = delete;
152
153 void write_impl(const char *ptr, size_t size) override
154 {
155 if (unlikely(written + size < written))
156 abort();
157 if (written + size > bufsize) {
158 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
159 buffer = (char *)realloc(buffer, bufsize);
160 if (!buffer) {
161 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
162 abort();
163 }
164 }
165 memcpy(buffer + written, ptr, size);
166 written += size;
167 }
168
169 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
170 {
171 assert(offset == (size_t)offset &&
172 offset + size >= offset && offset + size <= written);
173 memcpy(buffer + offset, ptr, size);
174 }
175
176 uint64_t current_pos() const override
177 {
178 return written;
179 }
180 };
181
182 /* The LLVM compiler is represented as a pass manager containing passes for
183 * optimizations, instruction selection, and code generation.
184 */
185 struct ac_compiler_passes {
186 raw_memory_ostream ostream; /* ELF shader binary stream */
187 llvm::legacy::PassManager passmgr; /* list of passes */
188 };
189
190 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
191 {
192 struct ac_compiler_passes *p = new ac_compiler_passes();
193 if (!p)
194 return NULL;
195
196 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
197
198 if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
199 nullptr,
200 llvm::TargetMachine::CGFT_ObjectFile)) {
201 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
202 delete p;
203 return NULL;
204 }
205 return p;
206 }
207
208 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
209 {
210 delete p;
211 }
212
213 /* This returns false on failure. */
214 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
215 char **pelf_buffer, size_t *pelf_size)
216 {
217 p->passmgr.run(*llvm::unwrap(module));
218 p->ostream.take(*pelf_buffer, *pelf_size);
219 return true;
220 }
221
222 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
223 {
224 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
225 }
226
227 void ac_enable_global_isel(LLVMTargetMachineRef tm)
228 {
229 reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
230 }
231
232 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
233 LLVMValueRef ptr, LLVMValueRef val,
234 const char *sync_scope) {
235 llvm::AtomicRMWInst::BinOp binop;
236 switch (op) {
237 case LLVMAtomicRMWBinOpXchg:
238 binop = llvm::AtomicRMWInst::Xchg;
239 break;
240 case LLVMAtomicRMWBinOpAdd:
241 binop = llvm::AtomicRMWInst::Add;
242 break;
243 case LLVMAtomicRMWBinOpSub:
244 binop = llvm::AtomicRMWInst::Sub;
245 break;
246 case LLVMAtomicRMWBinOpAnd:
247 binop = llvm::AtomicRMWInst::And;
248 break;
249 case LLVMAtomicRMWBinOpNand:
250 binop = llvm::AtomicRMWInst::Nand;
251 break;
252 case LLVMAtomicRMWBinOpOr:
253 binop = llvm::AtomicRMWInst::Or;
254 break;
255 case LLVMAtomicRMWBinOpXor:
256 binop = llvm::AtomicRMWInst::Xor;
257 break;
258 case LLVMAtomicRMWBinOpMax:
259 binop = llvm::AtomicRMWInst::Max;
260 break;
261 case LLVMAtomicRMWBinOpMin:
262 binop = llvm::AtomicRMWInst::Min;
263 break;
264 case LLVMAtomicRMWBinOpUMax:
265 binop = llvm::AtomicRMWInst::UMax;
266 break;
267 case LLVMAtomicRMWBinOpUMin:
268 binop = llvm::AtomicRMWInst::UMin;
269 break;
270 default:
271 unreachable(!"invalid LLVMAtomicRMWBinOp");
272 break;
273 }
274 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
275 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
276 binop, llvm::unwrap(ptr), llvm::unwrap(val),
277 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
278 }
279
280 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
281 LLVMValueRef cmp, LLVMValueRef val,
282 const char *sync_scope) {
283 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
284 return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
285 llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
286 llvm::AtomicOrdering::SequentiallyConsistent,
287 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
288 }