radeonsi: move si_nir_build_llvm into si_shader_llvm.c
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_llvm.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27 #include "ac_rtld.h"
28 #include "ac_nir_to_llvm.h"
29 #include "sid.h"
30
31 #include "tgsi/tgsi_from_mesa.h"
32 #include "util/u_memory.h"
33
34 struct si_llvm_diagnostics {
35 struct pipe_debug_callback *debug;
36 unsigned retval;
37 };
38
39 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
40 {
41 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
42 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
43 const char *severity_str = NULL;
44
45 switch (severity) {
46 case LLVMDSError:
47 severity_str = "error";
48 break;
49 case LLVMDSWarning:
50 severity_str = "warning";
51 break;
52 case LLVMDSRemark:
53 case LLVMDSNote:
54 default:
55 return;
56 }
57
58 char *description = LLVMGetDiagInfoDescription(di);
59
60 pipe_debug_message(diag->debug, SHADER_INFO,
61 "LLVM diagnostic (%s): %s", severity_str, description);
62
63 if (severity == LLVMDSError) {
64 diag->retval = 1;
65 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
66 }
67
68 LLVMDisposeMessage(description);
69 }
70
71 int si_compile_llvm(struct si_screen *sscreen,
72 struct si_shader_binary *binary,
73 struct ac_shader_config *conf,
74 struct ac_llvm_compiler *compiler,
75 struct ac_llvm_context *ac,
76 struct pipe_debug_callback *debug,
77 enum pipe_shader_type shader_type,
78 const char *name,
79 bool less_optimized)
80 {
81 unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
82
83 if (si_can_dump_shader(sscreen, shader_type)) {
84 fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
85
86 if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
87 fprintf(stderr, "%s LLVM IR:\n\n", name);
88 ac_dump_module(ac->module);
89 fprintf(stderr, "\n");
90 }
91 }
92
93 if (sscreen->record_llvm_ir) {
94 char *ir = LLVMPrintModuleToString(ac->module);
95 binary->llvm_ir_string = strdup(ir);
96 LLVMDisposeMessage(ir);
97 }
98
99 if (!si_replace_shader(count, binary)) {
100 struct ac_compiler_passes *passes = compiler->passes;
101
102 if (ac->wave_size == 32)
103 passes = compiler->passes_wave32;
104 else if (less_optimized && compiler->low_opt_passes)
105 passes = compiler->low_opt_passes;
106
107 struct si_llvm_diagnostics diag = {debug};
108 LLVMContextSetDiagnosticHandler(ac->context, si_diagnostic_handler, &diag);
109
110 if (!ac_compile_module_to_elf(passes, ac->module,
111 (char **)&binary->elf_buffer,
112 &binary->elf_size))
113 diag.retval = 1;
114
115 if (diag.retval != 0) {
116 pipe_debug_message(debug, SHADER_INFO, "LLVM compilation failed");
117 return diag.retval;
118 }
119 }
120
121 struct ac_rtld_binary rtld;
122 if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
123 .info = &sscreen->info,
124 .shader_type = tgsi_processor_to_shader_stage(shader_type),
125 .wave_size = ac->wave_size,
126 .num_parts = 1,
127 .elf_ptrs = &binary->elf_buffer,
128 .elf_sizes = &binary->elf_size }))
129 return -1;
130
131 bool ok = ac_rtld_read_config(&rtld, conf);
132 ac_rtld_close(&rtld);
133 if (!ok)
134 return -1;
135
136 /* Enable 64-bit and 16-bit denormals, because there is no performance
137 * cost.
138 *
139 * If denormals are enabled, all floating-point output modifiers are
140 * ignored.
141 *
142 * Don't enable denormals for 32-bit floats, because:
143 * - Floating-point output modifiers would be ignored by the hw.
144 * - Some opcodes don't support denormals, such as v_mad_f32. We would
145 * have to stop using those.
146 * - GFX6 & GFX7 would be very slow.
147 */
148 conf->float_mode |= V_00B028_FP_64_DENORMS;
149
150 return 0;
151 }
152
153 void si_llvm_context_init(struct si_shader_context *ctx,
154 struct si_screen *sscreen,
155 struct ac_llvm_compiler *compiler,
156 unsigned wave_size)
157 {
158 memset(ctx, 0, sizeof(*ctx));
159 ctx->screen = sscreen;
160 ctx->compiler = compiler;
161
162 ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
163 sscreen->info.family,
164 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
165 wave_size, 64);
166 }
167
168 void si_llvm_create_func(struct si_shader_context *ctx, const char *name,
169 LLVMTypeRef *return_types, unsigned num_return_elems,
170 unsigned max_workgroup_size)
171 {
172 LLVMTypeRef ret_type;
173 enum ac_llvm_calling_convention call_conv;
174 enum pipe_shader_type real_shader_type;
175
176 if (num_return_elems)
177 ret_type = LLVMStructTypeInContext(ctx->ac.context,
178 return_types,
179 num_return_elems, true);
180 else
181 ret_type = ctx->ac.voidt;
182
183 real_shader_type = ctx->type;
184
185 /* LS is merged into HS (TCS), and ES is merged into GS. */
186 if (ctx->screen->info.chip_class >= GFX9) {
187 if (ctx->shader->key.as_ls)
188 real_shader_type = PIPE_SHADER_TESS_CTRL;
189 else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
190 real_shader_type = PIPE_SHADER_GEOMETRY;
191 }
192
193 switch (real_shader_type) {
194 case PIPE_SHADER_VERTEX:
195 case PIPE_SHADER_TESS_EVAL:
196 call_conv = AC_LLVM_AMDGPU_VS;
197 break;
198 case PIPE_SHADER_TESS_CTRL:
199 call_conv = AC_LLVM_AMDGPU_HS;
200 break;
201 case PIPE_SHADER_GEOMETRY:
202 call_conv = AC_LLVM_AMDGPU_GS;
203 break;
204 case PIPE_SHADER_FRAGMENT:
205 call_conv = AC_LLVM_AMDGPU_PS;
206 break;
207 case PIPE_SHADER_COMPUTE:
208 call_conv = AC_LLVM_AMDGPU_CS;
209 break;
210 default:
211 unreachable("Unhandle shader type");
212 }
213
214 /* Setup the function */
215 ctx->return_type = ret_type;
216 ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
217 ret_type, ctx->ac.module);
218 ctx->return_value = LLVMGetUndef(ctx->return_type);
219
220 if (ctx->screen->info.address32_hi) {
221 ac_llvm_add_target_dep_function_attr(ctx->main_fn,
222 "amdgpu-32bit-address-high-bits",
223 ctx->screen->info.address32_hi);
224 }
225
226 LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
227 "no-signed-zeros-fp-math",
228 "true");
229
230 ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
231 }
232
233 void si_llvm_optimize_module(struct si_shader_context *ctx)
234 {
235 /* Dump LLVM IR before any optimization passes */
236 if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
237 si_can_dump_shader(ctx->screen, ctx->type))
238 LLVMDumpModule(ctx->ac.module);
239
240 /* Run the pass */
241 LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
242 LLVMDisposeBuilder(ctx->ac.builder);
243 }
244
245 void si_llvm_dispose(struct si_shader_context *ctx)
246 {
247 LLVMDisposeModule(ctx->ac.module);
248 LLVMContextDispose(ctx->ac.context);
249 ac_llvm_context_dispose(&ctx->ac);
250 }
251
252 /**
253 * Load a dword from a constant buffer.
254 */
255 LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx,
256 LLVMValueRef resource, LLVMValueRef offset)
257 {
258 return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
259 0, 0, true, true);
260 }
261
262 void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
263 {
264 if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
265 LLVMBuildRetVoid(ctx->ac.builder);
266 else
267 LLVMBuildRet(ctx->ac.builder, ret);
268 }
269
270 LLVMValueRef si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
271 struct ac_arg param, unsigned return_index)
272 {
273 return LLVMBuildInsertValue(ctx->ac.builder, ret,
274 ac_get_arg(&ctx->ac, param),
275 return_index, "");
276 }
277
278 LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
279 struct ac_arg param, unsigned return_index)
280 {
281 LLVMBuilderRef builder = ctx->ac.builder;
282 LLVMValueRef p = ac_get_arg(&ctx->ac, param);
283
284 return LLVMBuildInsertValue(builder, ret,
285 ac_to_float(&ctx->ac, p),
286 return_index, "");
287 }
288
289 LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
290 struct ac_arg param, unsigned return_index)
291 {
292 LLVMBuilderRef builder = ctx->ac.builder;
293 LLVMValueRef ptr = ac_get_arg(&ctx->ac, param);
294 ptr = LLVMBuildPtrToInt(builder, ptr, ctx->ac.i32, "");
295 return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
296 }
297
298 LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
299 {
300 LLVMValueRef ptr[2], list;
301 bool merged_shader = si_is_merged_shader(ctx);
302
303 ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
304 list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
305 ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
306 return list;
307 }
308
309 LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
310 LLVMTypeRef type, LLVMValueRef val1,
311 LLVMValueRef val2)
312 {
313 LLVMValueRef values[2] = {
314 ac_to_integer(&ctx->ac, val1),
315 ac_to_integer(&ctx->ac, val2),
316 };
317 LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
318 return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
319 }
320
321 void si_llvm_emit_barrier(struct si_shader_context *ctx)
322 {
323 /* GFX6 only (thanks to a hw bug workaround):
324 * The real barrier instruction isn’t needed, because an entire patch
325 * always fits into a single wave.
326 */
327 if (ctx->screen->info.chip_class == GFX6 &&
328 ctx->type == PIPE_SHADER_TESS_CTRL) {
329 ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
330 return;
331 }
332
333 ac_build_s_barrier(&ctx->ac);
334 }
335
336 /* Ensure that the esgs ring is declared.
337 *
338 * We declare it with 64KB alignment as a hint that the
339 * pointer value will always be 0.
340 */
341 void si_llvm_declare_esgs_ring(struct si_shader_context *ctx)
342 {
343 if (ctx->esgs_ring)
344 return;
345
346 assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
347
348 ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
349 ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
350 "esgs_ring",
351 AC_ADDR_SPACE_LDS);
352 LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
353 LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
354 }
355
356 void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param,
357 unsigned bitoffset)
358 {
359 LLVMValueRef args[] = {
360 ac_get_arg(&ctx->ac, param),
361 LLVMConstInt(ctx->ac.i32, bitoffset, 0),
362 };
363 ac_build_intrinsic(&ctx->ac,
364 "llvm.amdgcn.init.exec.from.input",
365 ctx->ac.voidt, args, 2, AC_FUNC_ATTR_CONVERGENT);
366 }
367
368 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
369 {
370 if (nir->info.stage == MESA_SHADER_VERTEX) {
371 si_llvm_load_vs_inputs(ctx, nir);
372 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
373 unsigned colors_read =
374 ctx->shader->selector->info.colors_read;
375 LLVMValueRef main_fn = ctx->main_fn;
376
377 LLVMValueRef undef = LLVMGetUndef(ctx->ac.f32);
378
379 unsigned offset = SI_PARAM_POS_FIXED_PT + 1;
380
381 if (colors_read & 0x0f) {
382 unsigned mask = colors_read & 0x0f;
383 LLVMValueRef values[4];
384 values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
385 values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
386 values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
387 values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
388 ctx->abi.color0 =
389 ac_to_integer(&ctx->ac,
390 ac_build_gather_values(&ctx->ac, values, 4));
391 }
392 if (colors_read & 0xf0) {
393 unsigned mask = (colors_read & 0xf0) >> 4;
394 LLVMValueRef values[4];
395 values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
396 values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
397 values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
398 values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
399 ctx->abi.color1 =
400 ac_to_integer(&ctx->ac,
401 ac_build_gather_values(&ctx->ac, values, 4));
402 }
403
404 ctx->abi.interp_at_sample_force_center =
405 ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center;
406 } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
407 if (nir->info.cs.user_data_components_amd) {
408 ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data);
409 ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data,
410 nir->info.cs.user_data_components_amd);
411 }
412 }
413
414 ctx->abi.inputs = &ctx->inputs[0];
415 ctx->abi.clamp_shadow_reference = true;
416 ctx->abi.robust_buffer_access = true;
417
418 if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) {
419 assert(gl_shader_stage_is_compute(nir->info.stage));
420 si_declare_compute_memory(ctx);
421 }
422 ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args, nir);
423
424 return true;
425 }