2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
28 #include "ac_nir_to_llvm.h"
31 #include "tgsi/tgsi_from_mesa.h"
32 #include "util/u_memory.h"
34 struct si_llvm_diagnostics
{
35 struct pipe_debug_callback
*debug
;
39 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di
, void *context
)
41 struct si_llvm_diagnostics
*diag
= (struct si_llvm_diagnostics
*)context
;
42 LLVMDiagnosticSeverity severity
= LLVMGetDiagInfoSeverity(di
);
43 const char *severity_str
= NULL
;
47 severity_str
= "error";
50 severity_str
= "warning";
58 char *description
= LLVMGetDiagInfoDescription(di
);
60 pipe_debug_message(diag
->debug
, SHADER_INFO
,
61 "LLVM diagnostic (%s): %s", severity_str
, description
);
63 if (severity
== LLVMDSError
) {
65 fprintf(stderr
,"LLVM triggered Diagnostic Handler: %s\n", description
);
68 LLVMDisposeMessage(description
);
71 int si_compile_llvm(struct si_screen
*sscreen
,
72 struct si_shader_binary
*binary
,
73 struct ac_shader_config
*conf
,
74 struct ac_llvm_compiler
*compiler
,
75 struct ac_llvm_context
*ac
,
76 struct pipe_debug_callback
*debug
,
77 enum pipe_shader_type shader_type
,
81 unsigned count
= p_atomic_inc_return(&sscreen
->num_compilations
);
83 if (si_can_dump_shader(sscreen
, shader_type
)) {
84 fprintf(stderr
, "radeonsi: Compiling shader %d\n", count
);
86 if (!(sscreen
->debug_flags
& (DBG(NO_IR
) | DBG(PREOPT_IR
)))) {
87 fprintf(stderr
, "%s LLVM IR:\n\n", name
);
88 ac_dump_module(ac
->module
);
89 fprintf(stderr
, "\n");
93 if (sscreen
->record_llvm_ir
) {
94 char *ir
= LLVMPrintModuleToString(ac
->module
);
95 binary
->llvm_ir_string
= strdup(ir
);
96 LLVMDisposeMessage(ir
);
99 if (!si_replace_shader(count
, binary
)) {
100 struct ac_compiler_passes
*passes
= compiler
->passes
;
102 if (ac
->wave_size
== 32)
103 passes
= compiler
->passes_wave32
;
104 else if (less_optimized
&& compiler
->low_opt_passes
)
105 passes
= compiler
->low_opt_passes
;
107 struct si_llvm_diagnostics diag
= {debug
};
108 LLVMContextSetDiagnosticHandler(ac
->context
, si_diagnostic_handler
, &diag
);
110 if (!ac_compile_module_to_elf(passes
, ac
->module
,
111 (char **)&binary
->elf_buffer
,
115 if (diag
.retval
!= 0) {
116 pipe_debug_message(debug
, SHADER_INFO
, "LLVM compilation failed");
121 struct ac_rtld_binary rtld
;
122 if (!ac_rtld_open(&rtld
, (struct ac_rtld_open_info
){
123 .info
= &sscreen
->info
,
124 .shader_type
= tgsi_processor_to_shader_stage(shader_type
),
125 .wave_size
= ac
->wave_size
,
127 .elf_ptrs
= &binary
->elf_buffer
,
128 .elf_sizes
= &binary
->elf_size
}))
131 bool ok
= ac_rtld_read_config(&rtld
, conf
);
132 ac_rtld_close(&rtld
);
136 /* Enable 64-bit and 16-bit denormals, because there is no performance
139 * If denormals are enabled, all floating-point output modifiers are
142 * Don't enable denormals for 32-bit floats, because:
143 * - Floating-point output modifiers would be ignored by the hw.
144 * - Some opcodes don't support denormals, such as v_mad_f32. We would
145 * have to stop using those.
146 * - GFX6 & GFX7 would be very slow.
148 conf
->float_mode
|= V_00B028_FP_64_DENORMS
;
153 void si_llvm_context_init(struct si_shader_context
*ctx
,
154 struct si_screen
*sscreen
,
155 struct ac_llvm_compiler
*compiler
,
158 memset(ctx
, 0, sizeof(*ctx
));
159 ctx
->screen
= sscreen
;
160 ctx
->compiler
= compiler
;
162 ac_llvm_context_init(&ctx
->ac
, compiler
, sscreen
->info
.chip_class
,
163 sscreen
->info
.family
,
164 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH
,
168 void si_llvm_create_func(struct si_shader_context
*ctx
, const char *name
,
169 LLVMTypeRef
*return_types
, unsigned num_return_elems
,
170 unsigned max_workgroup_size
)
172 LLVMTypeRef ret_type
;
173 enum ac_llvm_calling_convention call_conv
;
174 enum pipe_shader_type real_shader_type
;
176 if (num_return_elems
)
177 ret_type
= LLVMStructTypeInContext(ctx
->ac
.context
,
179 num_return_elems
, true);
181 ret_type
= ctx
->ac
.voidt
;
183 real_shader_type
= ctx
->type
;
185 /* LS is merged into HS (TCS), and ES is merged into GS. */
186 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
187 if (ctx
->shader
->key
.as_ls
)
188 real_shader_type
= PIPE_SHADER_TESS_CTRL
;
189 else if (ctx
->shader
->key
.as_es
|| ctx
->shader
->key
.as_ngg
)
190 real_shader_type
= PIPE_SHADER_GEOMETRY
;
193 switch (real_shader_type
) {
194 case PIPE_SHADER_VERTEX
:
195 case PIPE_SHADER_TESS_EVAL
:
196 call_conv
= AC_LLVM_AMDGPU_VS
;
198 case PIPE_SHADER_TESS_CTRL
:
199 call_conv
= AC_LLVM_AMDGPU_HS
;
201 case PIPE_SHADER_GEOMETRY
:
202 call_conv
= AC_LLVM_AMDGPU_GS
;
204 case PIPE_SHADER_FRAGMENT
:
205 call_conv
= AC_LLVM_AMDGPU_PS
;
207 case PIPE_SHADER_COMPUTE
:
208 call_conv
= AC_LLVM_AMDGPU_CS
;
211 unreachable("Unhandle shader type");
214 /* Setup the function */
215 ctx
->return_type
= ret_type
;
216 ctx
->main_fn
= ac_build_main(&ctx
->args
, &ctx
->ac
, call_conv
, name
,
217 ret_type
, ctx
->ac
.module
);
218 ctx
->return_value
= LLVMGetUndef(ctx
->return_type
);
220 if (ctx
->screen
->info
.address32_hi
) {
221 ac_llvm_add_target_dep_function_attr(ctx
->main_fn
,
222 "amdgpu-32bit-address-high-bits",
223 ctx
->screen
->info
.address32_hi
);
226 LLVMAddTargetDependentFunctionAttr(ctx
->main_fn
,
227 "no-signed-zeros-fp-math",
230 ac_llvm_set_workgroup_size(ctx
->main_fn
, max_workgroup_size
);
233 void si_llvm_optimize_module(struct si_shader_context
*ctx
)
235 /* Dump LLVM IR before any optimization passes */
236 if (ctx
->screen
->debug_flags
& DBG(PREOPT_IR
) &&
237 si_can_dump_shader(ctx
->screen
, ctx
->type
))
238 LLVMDumpModule(ctx
->ac
.module
);
241 LLVMRunPassManager(ctx
->compiler
->passmgr
, ctx
->ac
.module
);
242 LLVMDisposeBuilder(ctx
->ac
.builder
);
245 void si_llvm_dispose(struct si_shader_context
*ctx
)
247 LLVMDisposeModule(ctx
->ac
.module
);
248 LLVMContextDispose(ctx
->ac
.context
);
249 ac_llvm_context_dispose(&ctx
->ac
);
253 * Load a dword from a constant buffer.
255 LLVMValueRef
si_buffer_load_const(struct si_shader_context
*ctx
,
256 LLVMValueRef resource
, LLVMValueRef offset
)
258 return ac_build_buffer_load(&ctx
->ac
, resource
, 1, NULL
, offset
, NULL
,
262 void si_llvm_build_ret(struct si_shader_context
*ctx
, LLVMValueRef ret
)
264 if (LLVMGetTypeKind(LLVMTypeOf(ret
)) == LLVMVoidTypeKind
)
265 LLVMBuildRetVoid(ctx
->ac
.builder
);
267 LLVMBuildRet(ctx
->ac
.builder
, ret
);
270 LLVMValueRef
si_insert_input_ret(struct si_shader_context
*ctx
, LLVMValueRef ret
,
271 struct ac_arg param
, unsigned return_index
)
273 return LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
274 ac_get_arg(&ctx
->ac
, param
),
278 LLVMValueRef
si_insert_input_ret_float(struct si_shader_context
*ctx
, LLVMValueRef ret
,
279 struct ac_arg param
, unsigned return_index
)
281 LLVMBuilderRef builder
= ctx
->ac
.builder
;
282 LLVMValueRef p
= ac_get_arg(&ctx
->ac
, param
);
284 return LLVMBuildInsertValue(builder
, ret
,
285 ac_to_float(&ctx
->ac
, p
),
289 LLVMValueRef
si_insert_input_ptr(struct si_shader_context
*ctx
, LLVMValueRef ret
,
290 struct ac_arg param
, unsigned return_index
)
292 LLVMBuilderRef builder
= ctx
->ac
.builder
;
293 LLVMValueRef ptr
= ac_get_arg(&ctx
->ac
, param
);
294 ptr
= LLVMBuildPtrToInt(builder
, ptr
, ctx
->ac
.i32
, "");
295 return LLVMBuildInsertValue(builder
, ret
, ptr
, return_index
, "");
298 LLVMValueRef
si_prolog_get_rw_buffers(struct si_shader_context
*ctx
)
300 LLVMValueRef ptr
[2], list
;
301 bool merged_shader
= si_is_merged_shader(ctx
);
303 ptr
[0] = LLVMGetParam(ctx
->main_fn
, (merged_shader
? 8 : 0) + SI_SGPR_RW_BUFFERS
);
304 list
= LLVMBuildIntToPtr(ctx
->ac
.builder
, ptr
[0],
305 ac_array_in_const32_addr_space(ctx
->ac
.v4i32
), "");
309 LLVMValueRef
si_build_gather_64bit(struct si_shader_context
*ctx
,
310 LLVMTypeRef type
, LLVMValueRef val1
,
313 LLVMValueRef values
[2] = {
314 ac_to_integer(&ctx
->ac
, val1
),
315 ac_to_integer(&ctx
->ac
, val2
),
317 LLVMValueRef result
= ac_build_gather_values(&ctx
->ac
, values
, 2);
318 return LLVMBuildBitCast(ctx
->ac
.builder
, result
, type
, "");
321 void si_llvm_emit_barrier(struct si_shader_context
*ctx
)
323 /* GFX6 only (thanks to a hw bug workaround):
324 * The real barrier instruction isn’t needed, because an entire patch
325 * always fits into a single wave.
327 if (ctx
->screen
->info
.chip_class
== GFX6
&&
328 ctx
->type
== PIPE_SHADER_TESS_CTRL
) {
329 ac_build_waitcnt(&ctx
->ac
, AC_WAIT_LGKM
| AC_WAIT_VLOAD
| AC_WAIT_VSTORE
);
333 ac_build_s_barrier(&ctx
->ac
);
336 /* Ensure that the esgs ring is declared.
338 * We declare it with 64KB alignment as a hint that the
339 * pointer value will always be 0.
341 void si_llvm_declare_esgs_ring(struct si_shader_context
*ctx
)
346 assert(!LLVMGetNamedGlobal(ctx
->ac
.module
, "esgs_ring"));
348 ctx
->esgs_ring
= LLVMAddGlobalInAddressSpace(
349 ctx
->ac
.module
, LLVMArrayType(ctx
->ac
.i32
, 0),
352 LLVMSetLinkage(ctx
->esgs_ring
, LLVMExternalLinkage
);
353 LLVMSetAlignment(ctx
->esgs_ring
, 64 * 1024);
356 void si_init_exec_from_input(struct si_shader_context
*ctx
, struct ac_arg param
,
359 LLVMValueRef args
[] = {
360 ac_get_arg(&ctx
->ac
, param
),
361 LLVMConstInt(ctx
->ac
.i32
, bitoffset
, 0),
363 ac_build_intrinsic(&ctx
->ac
,
364 "llvm.amdgcn.init.exec.from.input",
365 ctx
->ac
.voidt
, args
, 2, AC_FUNC_ATTR_CONVERGENT
);
368 bool si_nir_build_llvm(struct si_shader_context
*ctx
, struct nir_shader
*nir
)
370 if (nir
->info
.stage
== MESA_SHADER_VERTEX
) {
371 si_llvm_load_vs_inputs(ctx
, nir
);
372 } else if (nir
->info
.stage
== MESA_SHADER_FRAGMENT
) {
373 unsigned colors_read
=
374 ctx
->shader
->selector
->info
.colors_read
;
375 LLVMValueRef main_fn
= ctx
->main_fn
;
377 LLVMValueRef undef
= LLVMGetUndef(ctx
->ac
.f32
);
379 unsigned offset
= SI_PARAM_POS_FIXED_PT
+ 1;
381 if (colors_read
& 0x0f) {
382 unsigned mask
= colors_read
& 0x0f;
383 LLVMValueRef values
[4];
384 values
[0] = mask
& 0x1 ? LLVMGetParam(main_fn
, offset
++) : undef
;
385 values
[1] = mask
& 0x2 ? LLVMGetParam(main_fn
, offset
++) : undef
;
386 values
[2] = mask
& 0x4 ? LLVMGetParam(main_fn
, offset
++) : undef
;
387 values
[3] = mask
& 0x8 ? LLVMGetParam(main_fn
, offset
++) : undef
;
389 ac_to_integer(&ctx
->ac
,
390 ac_build_gather_values(&ctx
->ac
, values
, 4));
392 if (colors_read
& 0xf0) {
393 unsigned mask
= (colors_read
& 0xf0) >> 4;
394 LLVMValueRef values
[4];
395 values
[0] = mask
& 0x1 ? LLVMGetParam(main_fn
, offset
++) : undef
;
396 values
[1] = mask
& 0x2 ? LLVMGetParam(main_fn
, offset
++) : undef
;
397 values
[2] = mask
& 0x4 ? LLVMGetParam(main_fn
, offset
++) : undef
;
398 values
[3] = mask
& 0x8 ? LLVMGetParam(main_fn
, offset
++) : undef
;
400 ac_to_integer(&ctx
->ac
,
401 ac_build_gather_values(&ctx
->ac
, values
, 4));
404 ctx
->abi
.interp_at_sample_force_center
=
405 ctx
->shader
->key
.mono
.u
.ps
.interpolate_at_sample_force_center
;
406 } else if (nir
->info
.stage
== MESA_SHADER_COMPUTE
) {
407 if (nir
->info
.cs
.user_data_components_amd
) {
408 ctx
->abi
.user_data
= ac_get_arg(&ctx
->ac
, ctx
->cs_user_data
);
409 ctx
->abi
.user_data
= ac_build_expand_to_vec4(&ctx
->ac
, ctx
->abi
.user_data
,
410 nir
->info
.cs
.user_data_components_amd
);
414 ctx
->abi
.inputs
= &ctx
->inputs
[0];
415 ctx
->abi
.clamp_shadow_reference
= true;
416 ctx
->abi
.robust_buffer_access
= true;
418 if (ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_CS_LOCAL_SIZE
]) {
419 assert(gl_shader_stage_is_compute(nir
->info
.stage
));
420 si_declare_compute_memory(ctx
);
422 ac_nir_translate(&ctx
->ac
, &ctx
->abi
, &ctx
->args
, nir
);