2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
30 #include "tgsi/tgsi_from_mesa.h"
31 #include "util/u_memory.h"
33 struct si_llvm_diagnostics
{
34 struct pipe_debug_callback
*debug
;
38 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di
, void *context
)
40 struct si_llvm_diagnostics
*diag
= (struct si_llvm_diagnostics
*)context
;
41 LLVMDiagnosticSeverity severity
= LLVMGetDiagInfoSeverity(di
);
42 const char *severity_str
= NULL
;
46 severity_str
= "error";
49 severity_str
= "warning";
57 char *description
= LLVMGetDiagInfoDescription(di
);
59 pipe_debug_message(diag
->debug
, SHADER_INFO
,
60 "LLVM diagnostic (%s): %s", severity_str
, description
);
62 if (severity
== LLVMDSError
) {
64 fprintf(stderr
,"LLVM triggered Diagnostic Handler: %s\n", description
);
67 LLVMDisposeMessage(description
);
70 int si_compile_llvm(struct si_screen
*sscreen
,
71 struct si_shader_binary
*binary
,
72 struct ac_shader_config
*conf
,
73 struct ac_llvm_compiler
*compiler
,
74 struct ac_llvm_context
*ac
,
75 struct pipe_debug_callback
*debug
,
76 enum pipe_shader_type shader_type
,
80 unsigned count
= p_atomic_inc_return(&sscreen
->num_compilations
);
82 if (si_can_dump_shader(sscreen
, shader_type
)) {
83 fprintf(stderr
, "radeonsi: Compiling shader %d\n", count
);
85 if (!(sscreen
->debug_flags
& (DBG(NO_IR
) | DBG(PREOPT_IR
)))) {
86 fprintf(stderr
, "%s LLVM IR:\n\n", name
);
87 ac_dump_module(ac
->module
);
88 fprintf(stderr
, "\n");
92 if (sscreen
->record_llvm_ir
) {
93 char *ir
= LLVMPrintModuleToString(ac
->module
);
94 binary
->llvm_ir_string
= strdup(ir
);
95 LLVMDisposeMessage(ir
);
98 if (!si_replace_shader(count
, binary
)) {
99 struct ac_compiler_passes
*passes
= compiler
->passes
;
101 if (ac
->wave_size
== 32)
102 passes
= compiler
->passes_wave32
;
103 else if (less_optimized
&& compiler
->low_opt_passes
)
104 passes
= compiler
->low_opt_passes
;
106 struct si_llvm_diagnostics diag
= {debug
};
107 LLVMContextSetDiagnosticHandler(ac
->context
, si_diagnostic_handler
, &diag
);
109 if (!ac_compile_module_to_elf(passes
, ac
->module
,
110 (char **)&binary
->elf_buffer
,
114 if (diag
.retval
!= 0) {
115 pipe_debug_message(debug
, SHADER_INFO
, "LLVM compilation failed");
120 struct ac_rtld_binary rtld
;
121 if (!ac_rtld_open(&rtld
, (struct ac_rtld_open_info
){
122 .info
= &sscreen
->info
,
123 .shader_type
= tgsi_processor_to_shader_stage(shader_type
),
124 .wave_size
= ac
->wave_size
,
126 .elf_ptrs
= &binary
->elf_buffer
,
127 .elf_sizes
= &binary
->elf_size
}))
130 bool ok
= ac_rtld_read_config(&rtld
, conf
);
131 ac_rtld_close(&rtld
);
135 /* Enable 64-bit and 16-bit denormals, because there is no performance
138 * If denormals are enabled, all floating-point output modifiers are
141 * Don't enable denormals for 32-bit floats, because:
142 * - Floating-point output modifiers would be ignored by the hw.
143 * - Some opcodes don't support denormals, such as v_mad_f32. We would
144 * have to stop using those.
145 * - GFX6 & GFX7 would be very slow.
147 conf
->float_mode
|= V_00B028_FP_64_DENORMS
;
152 void si_llvm_context_init(struct si_shader_context
*ctx
,
153 struct si_screen
*sscreen
,
154 struct ac_llvm_compiler
*compiler
,
157 memset(ctx
, 0, sizeof(*ctx
));
158 ctx
->screen
= sscreen
;
159 ctx
->compiler
= compiler
;
161 ac_llvm_context_init(&ctx
->ac
, compiler
, sscreen
->info
.chip_class
,
162 sscreen
->info
.family
,
163 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH
,
167 void si_llvm_create_func(struct si_shader_context
*ctx
, const char *name
,
168 LLVMTypeRef
*return_types
, unsigned num_return_elems
,
169 unsigned max_workgroup_size
)
171 LLVMTypeRef ret_type
;
172 enum ac_llvm_calling_convention call_conv
;
173 enum pipe_shader_type real_shader_type
;
175 if (num_return_elems
)
176 ret_type
= LLVMStructTypeInContext(ctx
->ac
.context
,
178 num_return_elems
, true);
180 ret_type
= ctx
->ac
.voidt
;
182 real_shader_type
= ctx
->type
;
184 /* LS is merged into HS (TCS), and ES is merged into GS. */
185 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
186 if (ctx
->shader
->key
.as_ls
)
187 real_shader_type
= PIPE_SHADER_TESS_CTRL
;
188 else if (ctx
->shader
->key
.as_es
|| ctx
->shader
->key
.as_ngg
)
189 real_shader_type
= PIPE_SHADER_GEOMETRY
;
192 switch (real_shader_type
) {
193 case PIPE_SHADER_VERTEX
:
194 case PIPE_SHADER_TESS_EVAL
:
195 call_conv
= AC_LLVM_AMDGPU_VS
;
197 case PIPE_SHADER_TESS_CTRL
:
198 call_conv
= AC_LLVM_AMDGPU_HS
;
200 case PIPE_SHADER_GEOMETRY
:
201 call_conv
= AC_LLVM_AMDGPU_GS
;
203 case PIPE_SHADER_FRAGMENT
:
204 call_conv
= AC_LLVM_AMDGPU_PS
;
206 case PIPE_SHADER_COMPUTE
:
207 call_conv
= AC_LLVM_AMDGPU_CS
;
210 unreachable("Unhandle shader type");
213 /* Setup the function */
214 ctx
->return_type
= ret_type
;
215 ctx
->main_fn
= ac_build_main(&ctx
->args
, &ctx
->ac
, call_conv
, name
,
216 ret_type
, ctx
->ac
.module
);
217 ctx
->return_value
= LLVMGetUndef(ctx
->return_type
);
219 if (ctx
->screen
->info
.address32_hi
) {
220 ac_llvm_add_target_dep_function_attr(ctx
->main_fn
,
221 "amdgpu-32bit-address-high-bits",
222 ctx
->screen
->info
.address32_hi
);
225 LLVMAddTargetDependentFunctionAttr(ctx
->main_fn
,
226 "no-signed-zeros-fp-math",
229 ac_llvm_set_workgroup_size(ctx
->main_fn
, max_workgroup_size
);
232 void si_llvm_optimize_module(struct si_shader_context
*ctx
)
234 /* Dump LLVM IR before any optimization passes */
235 if (ctx
->screen
->debug_flags
& DBG(PREOPT_IR
) &&
236 si_can_dump_shader(ctx
->screen
, ctx
->type
))
237 LLVMDumpModule(ctx
->ac
.module
);
240 LLVMRunPassManager(ctx
->compiler
->passmgr
, ctx
->ac
.module
);
241 LLVMDisposeBuilder(ctx
->ac
.builder
);
244 void si_llvm_dispose(struct si_shader_context
*ctx
)
246 LLVMDisposeModule(ctx
->ac
.module
);
247 LLVMContextDispose(ctx
->ac
.context
);
248 ac_llvm_context_dispose(&ctx
->ac
);
252 * Load a dword from a constant buffer.
254 LLVMValueRef
si_buffer_load_const(struct si_shader_context
*ctx
,
255 LLVMValueRef resource
, LLVMValueRef offset
)
257 return ac_build_buffer_load(&ctx
->ac
, resource
, 1, NULL
, offset
, NULL
,
261 void si_llvm_build_ret(struct si_shader_context
*ctx
, LLVMValueRef ret
)
263 if (LLVMGetTypeKind(LLVMTypeOf(ret
)) == LLVMVoidTypeKind
)
264 LLVMBuildRetVoid(ctx
->ac
.builder
);
266 LLVMBuildRet(ctx
->ac
.builder
, ret
);
269 LLVMValueRef
si_insert_input_ret(struct si_shader_context
*ctx
, LLVMValueRef ret
,
270 struct ac_arg param
, unsigned return_index
)
272 return LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
273 ac_get_arg(&ctx
->ac
, param
),
277 LLVMValueRef
si_insert_input_ret_float(struct si_shader_context
*ctx
, LLVMValueRef ret
,
278 struct ac_arg param
, unsigned return_index
)
280 LLVMBuilderRef builder
= ctx
->ac
.builder
;
281 LLVMValueRef p
= ac_get_arg(&ctx
->ac
, param
);
283 return LLVMBuildInsertValue(builder
, ret
,
284 ac_to_float(&ctx
->ac
, p
),
288 LLVMValueRef
si_insert_input_ptr(struct si_shader_context
*ctx
, LLVMValueRef ret
,
289 struct ac_arg param
, unsigned return_index
)
291 LLVMBuilderRef builder
= ctx
->ac
.builder
;
292 LLVMValueRef ptr
= ac_get_arg(&ctx
->ac
, param
);
293 ptr
= LLVMBuildPtrToInt(builder
, ptr
, ctx
->ac
.i32
, "");
294 return LLVMBuildInsertValue(builder
, ret
, ptr
, return_index
, "");
297 LLVMValueRef
si_prolog_get_rw_buffers(struct si_shader_context
*ctx
)
299 LLVMValueRef ptr
[2], list
;
300 bool merged_shader
= si_is_merged_shader(ctx
);
302 ptr
[0] = LLVMGetParam(ctx
->main_fn
, (merged_shader
? 8 : 0) + SI_SGPR_RW_BUFFERS
);
303 list
= LLVMBuildIntToPtr(ctx
->ac
.builder
, ptr
[0],
304 ac_array_in_const32_addr_space(ctx
->ac
.v4i32
), "");
308 LLVMValueRef
si_build_gather_64bit(struct si_shader_context
*ctx
,
309 LLVMTypeRef type
, LLVMValueRef val1
,
312 LLVMValueRef values
[2] = {
313 ac_to_integer(&ctx
->ac
, val1
),
314 ac_to_integer(&ctx
->ac
, val2
),
316 LLVMValueRef result
= ac_build_gather_values(&ctx
->ac
, values
, 2);
317 return LLVMBuildBitCast(ctx
->ac
.builder
, result
, type
, "");
320 void si_llvm_emit_barrier(struct si_shader_context
*ctx
)
322 /* GFX6 only (thanks to a hw bug workaround):
323 * The real barrier instruction isn’t needed, because an entire patch
324 * always fits into a single wave.
326 if (ctx
->screen
->info
.chip_class
== GFX6
&&
327 ctx
->type
== PIPE_SHADER_TESS_CTRL
) {
328 ac_build_waitcnt(&ctx
->ac
, AC_WAIT_LGKM
| AC_WAIT_VLOAD
| AC_WAIT_VSTORE
);
332 ac_build_s_barrier(&ctx
->ac
);
335 /* Ensure that the esgs ring is declared.
337 * We declare it with 64KB alignment as a hint that the
338 * pointer value will always be 0.
340 void si_llvm_declare_esgs_ring(struct si_shader_context
*ctx
)
345 assert(!LLVMGetNamedGlobal(ctx
->ac
.module
, "esgs_ring"));
347 ctx
->esgs_ring
= LLVMAddGlobalInAddressSpace(
348 ctx
->ac
.module
, LLVMArrayType(ctx
->ac
.i32
, 0),
351 LLVMSetLinkage(ctx
->esgs_ring
, LLVMExternalLinkage
);
352 LLVMSetAlignment(ctx
->esgs_ring
, 64 * 1024);
355 void si_init_exec_from_input(struct si_shader_context
*ctx
, struct ac_arg param
,
358 LLVMValueRef args
[] = {
359 ac_get_arg(&ctx
->ac
, param
),
360 LLVMConstInt(ctx
->ac
.i32
, bitoffset
, 0),
362 ac_build_intrinsic(&ctx
->ac
,
363 "llvm.amdgcn.init.exec.from.input",
364 ctx
->ac
.voidt
, args
, 2, AC_FUNC_ATTR_CONVERGENT
);