radeonsi: move si_shader_llvm_build.c content into si_shader_llvm.c
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_llvm.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27 #include "ac_rtld.h"
28 #include "sid.h"
29
30 #include "tgsi/tgsi_from_mesa.h"
31 #include "util/u_memory.h"
32
33 struct si_llvm_diagnostics {
34 struct pipe_debug_callback *debug;
35 unsigned retval;
36 };
37
38 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
39 {
40 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
41 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
42 const char *severity_str = NULL;
43
44 switch (severity) {
45 case LLVMDSError:
46 severity_str = "error";
47 break;
48 case LLVMDSWarning:
49 severity_str = "warning";
50 break;
51 case LLVMDSRemark:
52 case LLVMDSNote:
53 default:
54 return;
55 }
56
57 char *description = LLVMGetDiagInfoDescription(di);
58
59 pipe_debug_message(diag->debug, SHADER_INFO,
60 "LLVM diagnostic (%s): %s", severity_str, description);
61
62 if (severity == LLVMDSError) {
63 diag->retval = 1;
64 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
65 }
66
67 LLVMDisposeMessage(description);
68 }
69
70 int si_compile_llvm(struct si_screen *sscreen,
71 struct si_shader_binary *binary,
72 struct ac_shader_config *conf,
73 struct ac_llvm_compiler *compiler,
74 struct ac_llvm_context *ac,
75 struct pipe_debug_callback *debug,
76 enum pipe_shader_type shader_type,
77 const char *name,
78 bool less_optimized)
79 {
80 unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
81
82 if (si_can_dump_shader(sscreen, shader_type)) {
83 fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
84
85 if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
86 fprintf(stderr, "%s LLVM IR:\n\n", name);
87 ac_dump_module(ac->module);
88 fprintf(stderr, "\n");
89 }
90 }
91
92 if (sscreen->record_llvm_ir) {
93 char *ir = LLVMPrintModuleToString(ac->module);
94 binary->llvm_ir_string = strdup(ir);
95 LLVMDisposeMessage(ir);
96 }
97
98 if (!si_replace_shader(count, binary)) {
99 struct ac_compiler_passes *passes = compiler->passes;
100
101 if (ac->wave_size == 32)
102 passes = compiler->passes_wave32;
103 else if (less_optimized && compiler->low_opt_passes)
104 passes = compiler->low_opt_passes;
105
106 struct si_llvm_diagnostics diag = {debug};
107 LLVMContextSetDiagnosticHandler(ac->context, si_diagnostic_handler, &diag);
108
109 if (!ac_compile_module_to_elf(passes, ac->module,
110 (char **)&binary->elf_buffer,
111 &binary->elf_size))
112 diag.retval = 1;
113
114 if (diag.retval != 0) {
115 pipe_debug_message(debug, SHADER_INFO, "LLVM compilation failed");
116 return diag.retval;
117 }
118 }
119
120 struct ac_rtld_binary rtld;
121 if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
122 .info = &sscreen->info,
123 .shader_type = tgsi_processor_to_shader_stage(shader_type),
124 .wave_size = ac->wave_size,
125 .num_parts = 1,
126 .elf_ptrs = &binary->elf_buffer,
127 .elf_sizes = &binary->elf_size }))
128 return -1;
129
130 bool ok = ac_rtld_read_config(&rtld, conf);
131 ac_rtld_close(&rtld);
132 if (!ok)
133 return -1;
134
135 /* Enable 64-bit and 16-bit denormals, because there is no performance
136 * cost.
137 *
138 * If denormals are enabled, all floating-point output modifiers are
139 * ignored.
140 *
141 * Don't enable denormals for 32-bit floats, because:
142 * - Floating-point output modifiers would be ignored by the hw.
143 * - Some opcodes don't support denormals, such as v_mad_f32. We would
144 * have to stop using those.
145 * - GFX6 & GFX7 would be very slow.
146 */
147 conf->float_mode |= V_00B028_FP_64_DENORMS;
148
149 return 0;
150 }
151
152 void si_llvm_context_init(struct si_shader_context *ctx,
153 struct si_screen *sscreen,
154 struct ac_llvm_compiler *compiler,
155 unsigned wave_size)
156 {
157 memset(ctx, 0, sizeof(*ctx));
158 ctx->screen = sscreen;
159 ctx->compiler = compiler;
160
161 ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
162 sscreen->info.family,
163 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
164 wave_size, 64);
165 }
166
167 void si_llvm_create_func(struct si_shader_context *ctx, const char *name,
168 LLVMTypeRef *return_types, unsigned num_return_elems,
169 unsigned max_workgroup_size)
170 {
171 LLVMTypeRef ret_type;
172 enum ac_llvm_calling_convention call_conv;
173 enum pipe_shader_type real_shader_type;
174
175 if (num_return_elems)
176 ret_type = LLVMStructTypeInContext(ctx->ac.context,
177 return_types,
178 num_return_elems, true);
179 else
180 ret_type = ctx->ac.voidt;
181
182 real_shader_type = ctx->type;
183
184 /* LS is merged into HS (TCS), and ES is merged into GS. */
185 if (ctx->screen->info.chip_class >= GFX9) {
186 if (ctx->shader->key.as_ls)
187 real_shader_type = PIPE_SHADER_TESS_CTRL;
188 else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
189 real_shader_type = PIPE_SHADER_GEOMETRY;
190 }
191
192 switch (real_shader_type) {
193 case PIPE_SHADER_VERTEX:
194 case PIPE_SHADER_TESS_EVAL:
195 call_conv = AC_LLVM_AMDGPU_VS;
196 break;
197 case PIPE_SHADER_TESS_CTRL:
198 call_conv = AC_LLVM_AMDGPU_HS;
199 break;
200 case PIPE_SHADER_GEOMETRY:
201 call_conv = AC_LLVM_AMDGPU_GS;
202 break;
203 case PIPE_SHADER_FRAGMENT:
204 call_conv = AC_LLVM_AMDGPU_PS;
205 break;
206 case PIPE_SHADER_COMPUTE:
207 call_conv = AC_LLVM_AMDGPU_CS;
208 break;
209 default:
210 unreachable("Unhandle shader type");
211 }
212
213 /* Setup the function */
214 ctx->return_type = ret_type;
215 ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
216 ret_type, ctx->ac.module);
217 ctx->return_value = LLVMGetUndef(ctx->return_type);
218
219 if (ctx->screen->info.address32_hi) {
220 ac_llvm_add_target_dep_function_attr(ctx->main_fn,
221 "amdgpu-32bit-address-high-bits",
222 ctx->screen->info.address32_hi);
223 }
224
225 LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
226 "no-signed-zeros-fp-math",
227 "true");
228
229 ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
230 }
231
232 void si_llvm_optimize_module(struct si_shader_context *ctx)
233 {
234 /* Dump LLVM IR before any optimization passes */
235 if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
236 si_can_dump_shader(ctx->screen, ctx->type))
237 LLVMDumpModule(ctx->ac.module);
238
239 /* Run the pass */
240 LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
241 LLVMDisposeBuilder(ctx->ac.builder);
242 }
243
244 void si_llvm_dispose(struct si_shader_context *ctx)
245 {
246 LLVMDisposeModule(ctx->ac.module);
247 LLVMContextDispose(ctx->ac.context);
248 ac_llvm_context_dispose(&ctx->ac);
249 }
250
251 /**
252 * Load a dword from a constant buffer.
253 */
254 LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx,
255 LLVMValueRef resource, LLVMValueRef offset)
256 {
257 return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
258 0, 0, true, true);
259 }
260
261 void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
262 {
263 if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
264 LLVMBuildRetVoid(ctx->ac.builder);
265 else
266 LLVMBuildRet(ctx->ac.builder, ret);
267 }
268
269 LLVMValueRef si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
270 struct ac_arg param, unsigned return_index)
271 {
272 return LLVMBuildInsertValue(ctx->ac.builder, ret,
273 ac_get_arg(&ctx->ac, param),
274 return_index, "");
275 }
276
277 LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
278 struct ac_arg param, unsigned return_index)
279 {
280 LLVMBuilderRef builder = ctx->ac.builder;
281 LLVMValueRef p = ac_get_arg(&ctx->ac, param);
282
283 return LLVMBuildInsertValue(builder, ret,
284 ac_to_float(&ctx->ac, p),
285 return_index, "");
286 }
287
288 LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
289 struct ac_arg param, unsigned return_index)
290 {
291 LLVMBuilderRef builder = ctx->ac.builder;
292 LLVMValueRef ptr = ac_get_arg(&ctx->ac, param);
293 ptr = LLVMBuildPtrToInt(builder, ptr, ctx->ac.i32, "");
294 return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
295 }
296
297 LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
298 {
299 LLVMValueRef ptr[2], list;
300 bool merged_shader = si_is_merged_shader(ctx);
301
302 ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
303 list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
304 ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
305 return list;
306 }
307
308 LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
309 LLVMTypeRef type, LLVMValueRef val1,
310 LLVMValueRef val2)
311 {
312 LLVMValueRef values[2] = {
313 ac_to_integer(&ctx->ac, val1),
314 ac_to_integer(&ctx->ac, val2),
315 };
316 LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
317 return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
318 }
319
320 void si_llvm_emit_barrier(struct si_shader_context *ctx)
321 {
322 /* GFX6 only (thanks to a hw bug workaround):
323 * The real barrier instruction isn’t needed, because an entire patch
324 * always fits into a single wave.
325 */
326 if (ctx->screen->info.chip_class == GFX6 &&
327 ctx->type == PIPE_SHADER_TESS_CTRL) {
328 ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
329 return;
330 }
331
332 ac_build_s_barrier(&ctx->ac);
333 }
334
335 /* Ensure that the esgs ring is declared.
336 *
337 * We declare it with 64KB alignment as a hint that the
338 * pointer value will always be 0.
339 */
340 void si_llvm_declare_esgs_ring(struct si_shader_context *ctx)
341 {
342 if (ctx->esgs_ring)
343 return;
344
345 assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
346
347 ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
348 ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
349 "esgs_ring",
350 AC_ADDR_SPACE_LDS);
351 LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
352 LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
353 }
354
355 void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param,
356 unsigned bitoffset)
357 {
358 LLVMValueRef args[] = {
359 ac_get_arg(&ctx->ac, param),
360 LLVMConstInt(ctx->ac.i32, bitoffset, 0),
361 };
362 ac_build_intrinsic(&ctx->ac,
363 "llvm.amdgcn.init.exec.from.input",
364 ctx->ac.voidt, args, 2, AC_FUNC_ATTR_CONVERGENT);
365 }