2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "ac_nir_to_llvm.h"
28 #include "si_shader_internal.h"
30 #include "tgsi/tgsi_from_mesa.h"
31 #include "util/u_memory.h"
33 struct si_llvm_diagnostics
{
34 struct pipe_debug_callback
*debug
;
38 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di
, void *context
)
40 struct si_llvm_diagnostics
*diag
= (struct si_llvm_diagnostics
*)context
;
41 LLVMDiagnosticSeverity severity
= LLVMGetDiagInfoSeverity(di
);
42 const char *severity_str
= NULL
;
46 severity_str
= "error";
49 severity_str
= "warning";
57 char *description
= LLVMGetDiagInfoDescription(di
);
59 pipe_debug_message(diag
->debug
, SHADER_INFO
, "LLVM diagnostic (%s): %s", severity_str
,
62 if (severity
== LLVMDSError
) {
64 fprintf(stderr
, "LLVM triggered Diagnostic Handler: %s\n", description
);
67 LLVMDisposeMessage(description
);
70 bool si_compile_llvm(struct si_screen
*sscreen
, struct si_shader_binary
*binary
,
71 struct ac_shader_config
*conf
, struct ac_llvm_compiler
*compiler
,
72 struct ac_llvm_context
*ac
, struct pipe_debug_callback
*debug
,
73 gl_shader_stage stage
, const char *name
, bool less_optimized
)
75 unsigned count
= p_atomic_inc_return(&sscreen
->num_compilations
);
77 if (si_can_dump_shader(sscreen
, stage
)) {
78 fprintf(stderr
, "radeonsi: Compiling shader %d\n", count
);
80 if (!(sscreen
->debug_flags
& (DBG(NO_IR
) | DBG(PREOPT_IR
)))) {
81 fprintf(stderr
, "%s LLVM IR:\n\n", name
);
82 ac_dump_module(ac
->module
);
83 fprintf(stderr
, "\n");
87 if (sscreen
->record_llvm_ir
) {
88 char *ir
= LLVMPrintModuleToString(ac
->module
);
89 binary
->llvm_ir_string
= strdup(ir
);
90 LLVMDisposeMessage(ir
);
93 if (!si_replace_shader(count
, binary
)) {
94 struct ac_compiler_passes
*passes
= compiler
->passes
;
96 if (ac
->wave_size
== 32)
97 passes
= compiler
->passes_wave32
;
98 else if (less_optimized
&& compiler
->low_opt_passes
)
99 passes
= compiler
->low_opt_passes
;
101 struct si_llvm_diagnostics diag
= {debug
};
102 LLVMContextSetDiagnosticHandler(ac
->context
, si_diagnostic_handler
, &diag
);
104 if (!ac_compile_module_to_elf(passes
, ac
->module
, (char **)&binary
->elf_buffer
,
108 if (diag
.retval
!= 0) {
109 pipe_debug_message(debug
, SHADER_INFO
, "LLVM compilation failed");
114 struct ac_rtld_binary rtld
;
115 if (!ac_rtld_open(&rtld
, (struct ac_rtld_open_info
){
116 .info
= &sscreen
->info
,
117 .shader_type
= stage
,
118 .wave_size
= ac
->wave_size
,
120 .elf_ptrs
= &binary
->elf_buffer
,
121 .elf_sizes
= &binary
->elf_size
}))
124 bool ok
= ac_rtld_read_config(&sscreen
->info
, &rtld
, conf
);
125 ac_rtld_close(&rtld
);
129 void si_llvm_context_init(struct si_shader_context
*ctx
, struct si_screen
*sscreen
,
130 struct ac_llvm_compiler
*compiler
, unsigned wave_size
)
132 memset(ctx
, 0, sizeof(*ctx
));
133 ctx
->screen
= sscreen
;
134 ctx
->compiler
= compiler
;
136 ac_llvm_context_init(&ctx
->ac
, compiler
, sscreen
->info
.chip_class
, sscreen
->info
.family
,
137 AC_FLOAT_MODE_DEFAULT_OPENGL
, wave_size
, 64);
140 void si_llvm_create_func(struct si_shader_context
*ctx
, const char *name
, LLVMTypeRef
*return_types
,
141 unsigned num_return_elems
, unsigned max_workgroup_size
)
143 LLVMTypeRef ret_type
;
144 enum ac_llvm_calling_convention call_conv
;
146 if (num_return_elems
)
147 ret_type
= LLVMStructTypeInContext(ctx
->ac
.context
, return_types
, num_return_elems
, true);
149 ret_type
= ctx
->ac
.voidt
;
151 gl_shader_stage real_stage
= ctx
->stage
;
153 /* LS is merged into HS (TCS), and ES is merged into GS. */
154 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
155 if (ctx
->shader
->key
.as_ls
)
156 real_stage
= MESA_SHADER_TESS_CTRL
;
157 else if (ctx
->shader
->key
.as_es
|| ctx
->shader
->key
.as_ngg
)
158 real_stage
= MESA_SHADER_GEOMETRY
;
161 switch (real_stage
) {
162 case MESA_SHADER_VERTEX
:
163 case MESA_SHADER_TESS_EVAL
:
164 call_conv
= AC_LLVM_AMDGPU_VS
;
166 case MESA_SHADER_TESS_CTRL
:
167 call_conv
= AC_LLVM_AMDGPU_HS
;
169 case MESA_SHADER_GEOMETRY
:
170 call_conv
= AC_LLVM_AMDGPU_GS
;
172 case MESA_SHADER_FRAGMENT
:
173 call_conv
= AC_LLVM_AMDGPU_PS
;
175 case MESA_SHADER_COMPUTE
:
176 call_conv
= AC_LLVM_AMDGPU_CS
;
179 unreachable("Unhandle shader type");
182 /* Setup the function */
183 ctx
->return_type
= ret_type
;
184 ctx
->main_fn
= ac_build_main(&ctx
->args
, &ctx
->ac
, call_conv
, name
, ret_type
, ctx
->ac
.module
);
185 ctx
->return_value
= LLVMGetUndef(ctx
->return_type
);
187 if (ctx
->screen
->info
.address32_hi
) {
188 ac_llvm_add_target_dep_function_attr(ctx
->main_fn
, "amdgpu-32bit-address-high-bits",
189 ctx
->screen
->info
.address32_hi
);
192 LLVMAddTargetDependentFunctionAttr(ctx
->main_fn
, "no-signed-zeros-fp-math", "true");
194 ac_llvm_set_workgroup_size(ctx
->main_fn
, max_workgroup_size
);
197 void si_llvm_optimize_module(struct si_shader_context
*ctx
)
199 /* Dump LLVM IR before any optimization passes */
200 if (ctx
->screen
->debug_flags
& DBG(PREOPT_IR
) && si_can_dump_shader(ctx
->screen
, ctx
->stage
))
201 LLVMDumpModule(ctx
->ac
.module
);
204 LLVMRunPassManager(ctx
->compiler
->passmgr
, ctx
->ac
.module
);
205 LLVMDisposeBuilder(ctx
->ac
.builder
);
208 void si_llvm_dispose(struct si_shader_context
*ctx
)
210 LLVMDisposeModule(ctx
->ac
.module
);
211 LLVMContextDispose(ctx
->ac
.context
);
212 ac_llvm_context_dispose(&ctx
->ac
);
216 * Load a dword from a constant buffer.
218 LLVMValueRef
si_buffer_load_const(struct si_shader_context
*ctx
, LLVMValueRef resource
,
221 return ac_build_buffer_load(&ctx
->ac
, resource
, 1, NULL
, offset
, NULL
, 0, 0, true, true);
224 void si_llvm_build_ret(struct si_shader_context
*ctx
, LLVMValueRef ret
)
226 if (LLVMGetTypeKind(LLVMTypeOf(ret
)) == LLVMVoidTypeKind
)
227 LLVMBuildRetVoid(ctx
->ac
.builder
);
229 LLVMBuildRet(ctx
->ac
.builder
, ret
);
232 LLVMValueRef
si_insert_input_ret(struct si_shader_context
*ctx
, LLVMValueRef ret
,
233 struct ac_arg param
, unsigned return_index
)
235 return LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, ac_get_arg(&ctx
->ac
, param
), return_index
, "");
238 LLVMValueRef
si_insert_input_ret_float(struct si_shader_context
*ctx
, LLVMValueRef ret
,
239 struct ac_arg param
, unsigned return_index
)
241 LLVMBuilderRef builder
= ctx
->ac
.builder
;
242 LLVMValueRef p
= ac_get_arg(&ctx
->ac
, param
);
244 return LLVMBuildInsertValue(builder
, ret
, ac_to_float(&ctx
->ac
, p
), return_index
, "");
247 LLVMValueRef
si_insert_input_ptr(struct si_shader_context
*ctx
, LLVMValueRef ret
,
248 struct ac_arg param
, unsigned return_index
)
250 LLVMBuilderRef builder
= ctx
->ac
.builder
;
251 LLVMValueRef ptr
= ac_get_arg(&ctx
->ac
, param
);
252 ptr
= LLVMBuildPtrToInt(builder
, ptr
, ctx
->ac
.i32
, "");
253 return LLVMBuildInsertValue(builder
, ret
, ptr
, return_index
, "");
256 LLVMValueRef
si_prolog_get_rw_buffers(struct si_shader_context
*ctx
)
258 LLVMValueRef ptr
[2], list
;
259 bool merged_shader
= si_is_merged_shader(ctx
->shader
);
261 ptr
[0] = LLVMGetParam(ctx
->main_fn
, (merged_shader
? 8 : 0) + SI_SGPR_RW_BUFFERS
);
263 LLVMBuildIntToPtr(ctx
->ac
.builder
, ptr
[0], ac_array_in_const32_addr_space(ctx
->ac
.v4i32
), "");
267 LLVMValueRef
si_build_gather_64bit(struct si_shader_context
*ctx
, LLVMTypeRef type
,
268 LLVMValueRef val1
, LLVMValueRef val2
)
270 LLVMValueRef values
[2] = {
271 ac_to_integer(&ctx
->ac
, val1
),
272 ac_to_integer(&ctx
->ac
, val2
),
274 LLVMValueRef result
= ac_build_gather_values(&ctx
->ac
, values
, 2);
275 return LLVMBuildBitCast(ctx
->ac
.builder
, result
, type
, "");
278 void si_llvm_emit_barrier(struct si_shader_context
*ctx
)
280 /* GFX6 only (thanks to a hw bug workaround):
281 * The real barrier instruction isn’t needed, because an entire patch
282 * always fits into a single wave.
284 if (ctx
->screen
->info
.chip_class
== GFX6
&& ctx
->stage
== MESA_SHADER_TESS_CTRL
) {
285 ac_build_waitcnt(&ctx
->ac
, AC_WAIT_LGKM
| AC_WAIT_VLOAD
| AC_WAIT_VSTORE
);
289 ac_build_s_barrier(&ctx
->ac
);
292 /* Ensure that the esgs ring is declared.
294 * We declare it with 64KB alignment as a hint that the
295 * pointer value will always be 0.
297 void si_llvm_declare_esgs_ring(struct si_shader_context
*ctx
)
302 assert(!LLVMGetNamedGlobal(ctx
->ac
.module
, "esgs_ring"));
304 ctx
->esgs_ring
= LLVMAddGlobalInAddressSpace(ctx
->ac
.module
, LLVMArrayType(ctx
->ac
.i32
, 0),
305 "esgs_ring", AC_ADDR_SPACE_LDS
);
306 LLVMSetLinkage(ctx
->esgs_ring
, LLVMExternalLinkage
);
307 LLVMSetAlignment(ctx
->esgs_ring
, 64 * 1024);
310 void si_init_exec_from_input(struct si_shader_context
*ctx
, struct ac_arg param
, unsigned bitoffset
)
312 LLVMValueRef args
[] = {
313 ac_get_arg(&ctx
->ac
, param
),
314 LLVMConstInt(ctx
->ac
.i32
, bitoffset
, 0),
316 ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.init.exec.from.input", ctx
->ac
.voidt
, args
, 2,
317 AC_FUNC_ATTR_CONVERGENT
);
321 * Get the value of a shader input parameter and extract a bitfield.
323 static LLVMValueRef
unpack_llvm_param(struct si_shader_context
*ctx
, LLVMValueRef value
,
324 unsigned rshift
, unsigned bitwidth
)
326 if (LLVMGetTypeKind(LLVMTypeOf(value
)) == LLVMFloatTypeKind
)
327 value
= ac_to_integer(&ctx
->ac
, value
);
330 value
= LLVMBuildLShr(ctx
->ac
.builder
, value
, LLVMConstInt(ctx
->ac
.i32
, rshift
, 0), "");
332 if (rshift
+ bitwidth
< 32) {
333 unsigned mask
= (1 << bitwidth
) - 1;
334 value
= LLVMBuildAnd(ctx
->ac
.builder
, value
, LLVMConstInt(ctx
->ac
.i32
, mask
, 0), "");
340 LLVMValueRef
si_unpack_param(struct si_shader_context
*ctx
, struct ac_arg param
, unsigned rshift
,
343 LLVMValueRef value
= ac_get_arg(&ctx
->ac
, param
);
345 return unpack_llvm_param(ctx
, value
, rshift
, bitwidth
);
348 LLVMValueRef
si_get_primitive_id(struct si_shader_context
*ctx
, unsigned swizzle
)
351 return ctx
->ac
.i32_0
;
353 switch (ctx
->stage
) {
354 case MESA_SHADER_VERTEX
:
355 return ac_get_arg(&ctx
->ac
, ctx
->vs_prim_id
);
356 case MESA_SHADER_TESS_CTRL
:
357 return ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_patch_id
);
358 case MESA_SHADER_TESS_EVAL
:
359 return ac_get_arg(&ctx
->ac
, ctx
->args
.tes_patch_id
);
360 case MESA_SHADER_GEOMETRY
:
361 return ac_get_arg(&ctx
->ac
, ctx
->args
.gs_prim_id
);
364 return ctx
->ac
.i32_0
;
368 LLVMValueRef
si_llvm_get_block_size(struct ac_shader_abi
*abi
)
370 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
372 LLVMValueRef values
[3];
375 unsigned *properties
= ctx
->shader
->selector
->info
.properties
;
377 if (properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH
] != 0) {
378 unsigned sizes
[3] = {properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH
],
379 properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT
],
380 properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH
]};
382 for (i
= 0; i
< 3; ++i
)
383 values
[i
] = LLVMConstInt(ctx
->ac
.i32
, sizes
[i
], 0);
385 result
= ac_build_gather_values(&ctx
->ac
, values
, 3);
387 result
= ac_get_arg(&ctx
->ac
, ctx
->block_size
);
393 void si_llvm_declare_compute_memory(struct si_shader_context
*ctx
)
395 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
396 unsigned lds_size
= sel
->info
.properties
[TGSI_PROPERTY_CS_LOCAL_SIZE
];
398 LLVMTypeRef i8p
= LLVMPointerType(ctx
->ac
.i8
, AC_ADDR_SPACE_LDS
);
401 assert(!ctx
->ac
.lds
);
403 var
= LLVMAddGlobalInAddressSpace(ctx
->ac
.module
, LLVMArrayType(ctx
->ac
.i8
, lds_size
),
404 "compute_lds", AC_ADDR_SPACE_LDS
);
405 LLVMSetAlignment(var
, 64 * 1024);
407 ctx
->ac
.lds
= LLVMBuildBitCast(ctx
->ac
.builder
, var
, i8p
, "");
410 bool si_nir_build_llvm(struct si_shader_context
*ctx
, struct nir_shader
*nir
)
412 if (nir
->info
.stage
== MESA_SHADER_VERTEX
) {
413 si_llvm_load_vs_inputs(ctx
, nir
);
414 } else if (nir
->info
.stage
== MESA_SHADER_FRAGMENT
) {
415 unsigned colors_read
= ctx
->shader
->selector
->info
.colors_read
;
416 LLVMValueRef main_fn
= ctx
->main_fn
;
418 LLVMValueRef undef
= LLVMGetUndef(ctx
->ac
.f32
);
420 unsigned offset
= SI_PARAM_POS_FIXED_PT
+ 1;
422 if (colors_read
& 0x0f) {
423 unsigned mask
= colors_read
& 0x0f;
424 LLVMValueRef values
[4];
425 values
[0] = mask
& 0x1 ? LLVMGetParam(main_fn
, offset
++) : undef
;
426 values
[1] = mask
& 0x2 ? LLVMGetParam(main_fn
, offset
++) : undef
;
427 values
[2] = mask
& 0x4 ? LLVMGetParam(main_fn
, offset
++) : undef
;
428 values
[3] = mask
& 0x8 ? LLVMGetParam(main_fn
, offset
++) : undef
;
429 ctx
->abi
.color0
= ac_to_integer(&ctx
->ac
, ac_build_gather_values(&ctx
->ac
, values
, 4));
431 if (colors_read
& 0xf0) {
432 unsigned mask
= (colors_read
& 0xf0) >> 4;
433 LLVMValueRef values
[4];
434 values
[0] = mask
& 0x1 ? LLVMGetParam(main_fn
, offset
++) : undef
;
435 values
[1] = mask
& 0x2 ? LLVMGetParam(main_fn
, offset
++) : undef
;
436 values
[2] = mask
& 0x4 ? LLVMGetParam(main_fn
, offset
++) : undef
;
437 values
[3] = mask
& 0x8 ? LLVMGetParam(main_fn
, offset
++) : undef
;
438 ctx
->abi
.color1
= ac_to_integer(&ctx
->ac
, ac_build_gather_values(&ctx
->ac
, values
, 4));
441 ctx
->abi
.interp_at_sample_force_center
=
442 ctx
->shader
->key
.mono
.u
.ps
.interpolate_at_sample_force_center
;
444 ctx
->abi
.kill_ps_if_inf_interp
=
445 (ctx
->screen
->debug_flags
& DBG(KILL_PS_INF_INTERP
)) &&
446 (ctx
->shader
->selector
->info
.uses_persp_center
||
447 ctx
->shader
->selector
->info
.uses_persp_centroid
||
448 ctx
->shader
->selector
->info
.uses_persp_sample
);
450 } else if (nir
->info
.stage
== MESA_SHADER_COMPUTE
) {
451 if (nir
->info
.cs
.user_data_components_amd
) {
452 ctx
->abi
.user_data
= ac_get_arg(&ctx
->ac
, ctx
->cs_user_data
);
453 ctx
->abi
.user_data
= ac_build_expand_to_vec4(&ctx
->ac
, ctx
->abi
.user_data
,
454 nir
->info
.cs
.user_data_components_amd
);
458 ctx
->abi
.inputs
= &ctx
->inputs
[0];
459 ctx
->abi
.clamp_shadow_reference
= true;
460 ctx
->abi
.robust_buffer_access
= true;
461 ctx
->abi
.convert_undef_to_zero
= true;
462 ctx
->abi
.clamp_div_by_zero
= ctx
->screen
->options
.clamp_div_by_zero
;
464 if (ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_CS_LOCAL_SIZE
]) {
465 assert(gl_shader_stage_is_compute(nir
->info
.stage
));
466 si_llvm_declare_compute_memory(ctx
);
469 const struct si_shader_info
*info
= &ctx
->shader
->selector
->info
;
470 for (unsigned i
= 0; i
< info
->num_outputs
; i
++) {
471 for (unsigned j
= 0; j
< 4; j
++)
472 ctx
->abi
.outputs
[i
* 4 + j
] = ac_build_alloca_undef(&ctx
->ac
, ctx
->ac
.f32
, "");
475 ac_nir_translate(&ctx
->ac
, &ctx
->abi
, &ctx
->args
, nir
);
481 * Given a list of shader part functions, build a wrapper function that
482 * runs them in sequence to form a monolithic shader.
484 void si_build_wrapper_function(struct si_shader_context
*ctx
, LLVMValueRef
*parts
,
485 unsigned num_parts
, unsigned main_part
,
486 unsigned next_shader_first_part
)
488 LLVMBuilderRef builder
= ctx
->ac
.builder
;
489 /* PS epilog has one arg per color component; gfx9 merged shader
490 * prologs need to forward 40 SGPRs.
492 LLVMValueRef initial
[AC_MAX_ARGS
], out
[AC_MAX_ARGS
];
493 LLVMTypeRef function_type
;
494 unsigned num_first_params
;
495 unsigned num_out
, initial_num_out
;
496 ASSERTED
unsigned num_out_sgpr
; /* used in debug checks */
497 ASSERTED
unsigned initial_num_out_sgpr
; /* used in debug checks */
498 unsigned num_sgprs
, num_vgprs
;
501 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
503 for (unsigned i
= 0; i
< num_parts
; ++i
) {
504 ac_add_function_attr(ctx
->ac
.context
, parts
[i
], -1, AC_FUNC_ATTR_ALWAYSINLINE
);
505 LLVMSetLinkage(parts
[i
], LLVMPrivateLinkage
);
508 /* The parameters of the wrapper function correspond to those of the
509 * first part in terms of SGPRs and VGPRs, but we use the types of the
510 * main part to get the right types. This is relevant for the
511 * dereferenceable attribute on descriptor table pointers.
516 function_type
= LLVMGetElementType(LLVMTypeOf(parts
[0]));
517 num_first_params
= LLVMCountParamTypes(function_type
);
519 for (unsigned i
= 0; i
< num_first_params
; ++i
) {
520 LLVMValueRef param
= LLVMGetParam(parts
[0], i
);
522 if (ac_is_sgpr_param(param
)) {
523 assert(num_vgprs
== 0);
524 num_sgprs
+= ac_get_type_size(LLVMTypeOf(param
)) / 4;
526 num_vgprs
+= ac_get_type_size(LLVMTypeOf(param
)) / 4;
531 while (gprs
< num_sgprs
+ num_vgprs
) {
532 LLVMValueRef param
= LLVMGetParam(parts
[main_part
], ctx
->args
.arg_count
);
533 LLVMTypeRef type
= LLVMTypeOf(param
);
534 unsigned size
= ac_get_type_size(type
) / 4;
536 /* This is going to get casted anyways, so we don't have to
537 * have the exact same type. But we do have to preserve the
538 * pointer-ness so that LLVM knows about it.
540 enum ac_arg_type arg_type
= AC_ARG_INT
;
541 if (LLVMGetTypeKind(type
) == LLVMPointerTypeKind
) {
542 type
= LLVMGetElementType(type
);
544 if (LLVMGetTypeKind(type
) == LLVMVectorTypeKind
) {
545 if (LLVMGetVectorSize(type
) == 4)
546 arg_type
= AC_ARG_CONST_DESC_PTR
;
547 else if (LLVMGetVectorSize(type
) == 8)
548 arg_type
= AC_ARG_CONST_IMAGE_PTR
;
551 } else if (type
== ctx
->ac
.f32
) {
552 arg_type
= AC_ARG_CONST_FLOAT_PTR
;
558 ac_add_arg(&ctx
->args
, gprs
< num_sgprs
? AC_ARG_SGPR
: AC_ARG_VGPR
, size
, arg_type
, NULL
);
560 assert(ac_is_sgpr_param(param
) == (gprs
< num_sgprs
));
561 assert(gprs
+ size
<= num_sgprs
+ num_vgprs
&&
562 (gprs
>= num_sgprs
|| gprs
+ size
<= num_sgprs
));
567 /* Prepare the return type. */
568 unsigned num_returns
= 0;
569 LLVMTypeRef returns
[AC_MAX_ARGS
], last_func_type
, return_type
;
571 last_func_type
= LLVMGetElementType(LLVMTypeOf(parts
[num_parts
- 1]));
572 return_type
= LLVMGetReturnType(last_func_type
);
574 switch (LLVMGetTypeKind(return_type
)) {
575 case LLVMStructTypeKind
:
576 num_returns
= LLVMCountStructElementTypes(return_type
);
577 assert(num_returns
<= ARRAY_SIZE(returns
));
578 LLVMGetStructElementTypes(return_type
, returns
);
580 case LLVMVoidTypeKind
:
583 unreachable("unexpected type");
586 si_llvm_create_func(ctx
, "wrapper", returns
, num_returns
,
587 si_get_max_workgroup_size(ctx
->shader
));
589 if (si_is_merged_shader(ctx
->shader
))
590 ac_init_exec_full_mask(&ctx
->ac
);
592 /* Record the arguments of the function as if they were an output of
598 for (unsigned i
= 0; i
< ctx
->args
.arg_count
; ++i
) {
599 LLVMValueRef param
= LLVMGetParam(ctx
->main_fn
, i
);
600 LLVMTypeRef param_type
= LLVMTypeOf(param
);
601 LLVMTypeRef out_type
= ctx
->args
.args
[i
].file
== AC_ARG_SGPR
? ctx
->ac
.i32
: ctx
->ac
.f32
;
602 unsigned size
= ac_get_type_size(param_type
) / 4;
605 if (LLVMGetTypeKind(param_type
) == LLVMPointerTypeKind
) {
606 param
= LLVMBuildPtrToInt(builder
, param
, ctx
->ac
.i32
, "");
607 param_type
= ctx
->ac
.i32
;
610 if (param_type
!= out_type
)
611 param
= LLVMBuildBitCast(builder
, param
, out_type
, "");
612 out
[num_out
++] = param
;
614 LLVMTypeRef vector_type
= LLVMVectorType(out_type
, size
);
616 if (LLVMGetTypeKind(param_type
) == LLVMPointerTypeKind
) {
617 param
= LLVMBuildPtrToInt(builder
, param
, ctx
->ac
.i64
, "");
618 param_type
= ctx
->ac
.i64
;
621 if (param_type
!= vector_type
)
622 param
= LLVMBuildBitCast(builder
, param
, vector_type
, "");
624 for (unsigned j
= 0; j
< size
; ++j
)
626 LLVMBuildExtractElement(builder
, param
, LLVMConstInt(ctx
->ac
.i32
, j
, 0), "");
629 if (ctx
->args
.args
[i
].file
== AC_ARG_SGPR
)
630 num_out_sgpr
= num_out
;
633 memcpy(initial
, out
, sizeof(out
));
634 initial_num_out
= num_out
;
635 initial_num_out_sgpr
= num_out_sgpr
;
637 /* Now chain the parts. */
638 LLVMValueRef ret
= NULL
;
639 for (unsigned part
= 0; part
< num_parts
; ++part
) {
640 LLVMValueRef in
[AC_MAX_ARGS
];
641 LLVMTypeRef ret_type
;
642 unsigned out_idx
= 0;
643 unsigned num_params
= LLVMCountParams(parts
[part
]);
645 /* Merged shaders are executed conditionally depending
646 * on the number of enabled threads passed in the input SGPRs. */
647 if (si_is_multi_part_shader(ctx
->shader
) && part
== 0) {
648 LLVMValueRef ena
, count
= initial
[3];
650 count
= LLVMBuildAnd(builder
, count
, LLVMConstInt(ctx
->ac
.i32
, 0x7f, 0), "");
651 ena
= LLVMBuildICmp(builder
, LLVMIntULT
, ac_get_thread_id(&ctx
->ac
), count
, "");
652 ac_build_ifcc(&ctx
->ac
, ena
, 6506);
655 /* Derive arguments for the next part from outputs of the
658 for (unsigned param_idx
= 0; param_idx
< num_params
; ++param_idx
) {
660 LLVMTypeRef param_type
;
663 LLVMValueRef arg
= NULL
;
665 param
= LLVMGetParam(parts
[part
], param_idx
);
666 param_type
= LLVMTypeOf(param
);
667 param_size
= ac_get_type_size(param_type
) / 4;
668 is_sgpr
= ac_is_sgpr_param(param
);
671 ac_add_function_attr(ctx
->ac
.context
, parts
[part
], param_idx
+ 1, AC_FUNC_ATTR_INREG
);
672 } else if (out_idx
< num_out_sgpr
) {
673 /* Skip returned SGPRs the current part doesn't
674 * declare on the input. */
675 out_idx
= num_out_sgpr
;
678 assert(out_idx
+ param_size
<= (is_sgpr
? num_out_sgpr
: num_out
));
683 arg
= ac_build_gather_values(&ctx
->ac
, &out
[out_idx
], param_size
);
685 if (LLVMTypeOf(arg
) != param_type
) {
686 if (LLVMGetTypeKind(param_type
) == LLVMPointerTypeKind
) {
687 if (LLVMGetPointerAddressSpace(param_type
) == AC_ADDR_SPACE_CONST_32BIT
) {
688 arg
= LLVMBuildBitCast(builder
, arg
, ctx
->ac
.i32
, "");
689 arg
= LLVMBuildIntToPtr(builder
, arg
, param_type
, "");
691 arg
= LLVMBuildBitCast(builder
, arg
, ctx
->ac
.i64
, "");
692 arg
= LLVMBuildIntToPtr(builder
, arg
, param_type
, "");
695 arg
= LLVMBuildBitCast(builder
, arg
, param_type
, "");
700 out_idx
+= param_size
;
703 ret
= ac_build_call(&ctx
->ac
, parts
[part
], in
, num_params
);
705 if (si_is_multi_part_shader(ctx
->shader
) && part
+ 1 == next_shader_first_part
) {
706 ac_build_endif(&ctx
->ac
, 6506);
708 /* The second half of the merged shader should use
709 * the inputs from the toplevel (wrapper) function,
710 * not the return value from the last call.
712 * That's because the last call was executed condi-
713 * tionally, so we can't consume it in the main
716 memcpy(out
, initial
, sizeof(initial
));
717 num_out
= initial_num_out
;
718 num_out_sgpr
= initial_num_out_sgpr
;
722 /* Extract the returned GPRs. */
723 ret_type
= LLVMTypeOf(ret
);
727 if (LLVMGetTypeKind(ret_type
) != LLVMVoidTypeKind
) {
728 assert(LLVMGetTypeKind(ret_type
) == LLVMStructTypeKind
);
730 unsigned ret_size
= LLVMCountStructElementTypes(ret_type
);
732 for (unsigned i
= 0; i
< ret_size
; ++i
) {
733 LLVMValueRef val
= LLVMBuildExtractValue(builder
, ret
, i
, "");
735 assert(num_out
< ARRAY_SIZE(out
));
736 out
[num_out
++] = val
;
738 if (LLVMTypeOf(val
) == ctx
->ac
.i32
) {
739 assert(num_out_sgpr
+ 1 == num_out
);
740 num_out_sgpr
= num_out
;
746 /* Return the value from the last part. */
747 if (LLVMGetTypeKind(LLVMTypeOf(ret
)) == LLVMVoidTypeKind
)
748 LLVMBuildRetVoid(builder
);
750 LLVMBuildRet(builder
, ret
);