2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
27 #include "ac_llvm_util.h"
28 #include "util/u_memory.h"
30 enum si_llvm_calling_convention
{
31 RADEON_LLVM_AMDGPU_VS
= 87,
32 RADEON_LLVM_AMDGPU_GS
= 88,
33 RADEON_LLVM_AMDGPU_PS
= 89,
34 RADEON_LLVM_AMDGPU_CS
= 90,
35 RADEON_LLVM_AMDGPU_HS
= 93,
38 struct si_llvm_diagnostics
{
39 struct pipe_debug_callback
*debug
;
43 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di
, void *context
)
45 struct si_llvm_diagnostics
*diag
= (struct si_llvm_diagnostics
*)context
;
46 LLVMDiagnosticSeverity severity
= LLVMGetDiagInfoSeverity(di
);
47 char *description
= LLVMGetDiagInfoDescription(di
);
48 const char *severity_str
= NULL
;
52 severity_str
= "error";
55 severity_str
= "warning";
58 severity_str
= "remark";
61 severity_str
= "note";
64 severity_str
= "unknown";
67 pipe_debug_message(diag
->debug
, SHADER_INFO
,
68 "LLVM diagnostic (%s): %s", severity_str
, description
);
70 if (severity
== LLVMDSError
) {
72 fprintf(stderr
,"LLVM triggered Diagnostic Handler: %s\n", description
);
75 LLVMDisposeMessage(description
);
79 * Compile an LLVM module to machine code.
81 * @returns 0 for success, 1 for failure
83 unsigned si_llvm_compile(LLVMModuleRef M
, struct si_shader_binary
*binary
,
84 struct ac_llvm_compiler
*compiler
,
85 struct pipe_debug_callback
*debug
,
88 struct ac_compiler_passes
*passes
=
89 less_optimized
&& compiler
->low_opt_passes
?
90 compiler
->low_opt_passes
: compiler
->passes
;
91 struct si_llvm_diagnostics diag
;
92 LLVMContextRef llvm_ctx
;
97 /* Setup Diagnostic Handler*/
98 llvm_ctx
= LLVMGetModuleContext(M
);
100 LLVMContextSetDiagnosticHandler(llvm_ctx
, si_diagnostic_handler
, &diag
);
103 if (!ac_compile_module_to_elf(passes
, M
, (char **)&binary
->elf_buffer
,
107 if (diag
.retval
!= 0)
108 pipe_debug_message(debug
, SHADER_INFO
, "LLVM compile failed");
112 void si_shader_binary_clean(struct si_shader_binary
*binary
)
114 free((void *)binary
->elf_buffer
);
115 binary
->elf_buffer
= NULL
;
117 free(binary
->llvm_ir_string
);
118 binary
->llvm_ir_string
= NULL
;
121 LLVMTypeRef
tgsi2llvmtype(struct lp_build_tgsi_context
*bld_base
,
122 enum tgsi_opcode_type type
)
124 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
127 case TGSI_TYPE_UNSIGNED
:
128 case TGSI_TYPE_SIGNED
:
130 case TGSI_TYPE_UNSIGNED64
:
131 case TGSI_TYPE_SIGNED64
:
133 case TGSI_TYPE_DOUBLE
:
135 case TGSI_TYPE_UNTYPED
:
136 case TGSI_TYPE_FLOAT
:
143 LLVMValueRef
bitcast(struct lp_build_tgsi_context
*bld_base
,
144 enum tgsi_opcode_type type
, LLVMValueRef value
)
146 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
147 LLVMTypeRef dst_type
= tgsi2llvmtype(bld_base
, type
);
150 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, dst_type
, "");
156 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
157 * or an undefined value in the same interval otherwise.
159 LLVMValueRef
si_llvm_bound_index(struct si_shader_context
*ctx
,
163 LLVMBuilderRef builder
= ctx
->ac
.builder
;
164 LLVMValueRef c_max
= LLVMConstInt(ctx
->i32
, num
- 1, 0);
167 if (util_is_power_of_two_or_zero(num
)) {
168 index
= LLVMBuildAnd(builder
, index
, c_max
, "");
170 /* In theory, this MAX pattern should result in code that is
171 * as good as the bit-wise AND above.
173 * In practice, LLVM generates worse code (at the time of
174 * writing), because its value tracking is not strong enough.
176 cc
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, c_max
, "");
177 index
= LLVMBuildSelect(builder
, cc
, index
, c_max
, "");
183 static LLVMValueRef
emit_swizzle(struct lp_build_tgsi_context
*bld_base
,
190 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
191 LLVMValueRef swizzles
[4];
193 swizzles
[0] = LLVMConstInt(ctx
->i32
, swizzle_x
, 0);
194 swizzles
[1] = LLVMConstInt(ctx
->i32
, swizzle_y
, 0);
195 swizzles
[2] = LLVMConstInt(ctx
->i32
, swizzle_z
, 0);
196 swizzles
[3] = LLVMConstInt(ctx
->i32
, swizzle_w
, 0);
198 return LLVMBuildShuffleVector(ctx
->ac
.builder
,
200 LLVMGetUndef(LLVMTypeOf(value
)),
201 LLVMConstVector(swizzles
, 4), "");
205 * Return the description of the array covering the given temporary register
209 get_temp_array_id(struct lp_build_tgsi_context
*bld_base
,
211 const struct tgsi_ind_register
*reg
)
213 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
214 unsigned num_arrays
= ctx
->bld_base
.info
->array_max
[TGSI_FILE_TEMPORARY
];
217 if (reg
&& reg
->ArrayID
> 0 && reg
->ArrayID
<= num_arrays
)
220 for (i
= 0; i
< num_arrays
; i
++) {
221 const struct tgsi_array_info
*array
= &ctx
->temp_arrays
[i
];
223 if (reg_index
>= array
->range
.First
&& reg_index
<= array
->range
.Last
)
230 static struct tgsi_declaration_range
231 get_array_range(struct lp_build_tgsi_context
*bld_base
,
232 unsigned File
, unsigned reg_index
,
233 const struct tgsi_ind_register
*reg
)
235 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
236 struct tgsi_declaration_range range
;
238 if (File
== TGSI_FILE_TEMPORARY
) {
239 unsigned array_id
= get_temp_array_id(bld_base
, reg_index
, reg
);
241 return ctx
->temp_arrays
[array_id
- 1].range
;
245 range
.Last
= bld_base
->info
->file_max
[File
];
250 * For indirect registers, construct a pointer directly to the requested
251 * element using getelementptr if possible.
253 * Returns NULL if the insertelement/extractelement fallback for array access
257 get_pointer_into_array(struct si_shader_context
*ctx
,
261 const struct tgsi_ind_register
*reg_indirect
)
264 struct tgsi_array_info
*array
;
265 LLVMValueRef idxs
[2];
269 if (file
!= TGSI_FILE_TEMPORARY
)
272 array_id
= get_temp_array_id(&ctx
->bld_base
, reg_index
, reg_indirect
);
276 alloca
= ctx
->temp_array_allocas
[array_id
- 1];
280 array
= &ctx
->temp_arrays
[array_id
- 1];
282 if (!(array
->writemask
& (1 << swizzle
)))
283 return ctx
->undef_alloca
;
285 index
= si_get_indirect_index(ctx
, reg_indirect
, 1,
286 reg_index
- ctx
->temp_arrays
[array_id
- 1].range
.First
);
288 /* Ensure that the index is within a valid range, to guard against
289 * VM faults and overwriting critical data (e.g. spilled resource
292 * TODO It should be possible to avoid the additional instructions
293 * if LLVM is changed so that it guarantuees:
294 * 1. the scratch space descriptor isolates the current wave (this
295 * could even save the scratch offset SGPR at the cost of an
296 * additional SALU instruction)
297 * 2. the memory for allocas must be allocated at the _end_ of the
298 * scratch space (after spilled registers)
300 index
= si_llvm_bound_index(ctx
, index
, array
->range
.Last
- array
->range
.First
+ 1);
302 index
= ac_build_imad(&ctx
->ac
, index
,
303 LLVMConstInt(ctx
->i32
, util_bitcount(array
->writemask
), 0),
304 LLVMConstInt(ctx
->i32
,
305 util_bitcount(array
->writemask
& ((1 << swizzle
) - 1)), 0));
306 idxs
[0] = ctx
->i32_0
;
308 return LLVMBuildGEP(ctx
->ac
.builder
, alloca
, idxs
, 2, "");
312 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context
*bld_base
,
317 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
318 LLVMValueRef values
[2] = {
319 ac_to_integer(&ctx
->ac
, ptr
),
320 ac_to_integer(&ctx
->ac
, ptr2
),
322 LLVMValueRef result
= ac_build_gather_values(&ctx
->ac
, values
, 2);
323 return LLVMBuildBitCast(ctx
->ac
.builder
, result
, type
, "");
327 emit_array_fetch(struct lp_build_tgsi_context
*bld_base
,
328 unsigned File
, enum tgsi_opcode_type type
,
329 struct tgsi_declaration_range range
,
332 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
333 unsigned i
, size
= range
.Last
- range
.First
+ 1;
334 LLVMTypeRef vec
= LLVMVectorType(tgsi2llvmtype(bld_base
, type
), size
);
335 LLVMValueRef result
= LLVMGetUndef(vec
);
336 unsigned swizzle
= swizzle_in
;
337 struct tgsi_full_src_register tmp_reg
= {};
338 tmp_reg
.Register
.File
= File
;
339 if (tgsi_type_is_64bit(type
))
340 swizzle
|= (swizzle_in
+ 1) << 16;
342 for (i
= 0; i
< size
; ++i
) {
343 tmp_reg
.Register
.Index
= i
+ range
.First
;
345 LLVMValueRef temp
= si_llvm_emit_fetch(bld_base
, &tmp_reg
, type
, swizzle
);
346 result
= LLVMBuildInsertElement(ctx
->ac
.builder
, result
, temp
,
347 LLVMConstInt(ctx
->i32
, i
, 0), "array_vector");
353 load_value_from_array(struct lp_build_tgsi_context
*bld_base
,
355 enum tgsi_opcode_type type
,
358 const struct tgsi_ind_register
*reg_indirect
)
360 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
361 LLVMBuilderRef builder
= ctx
->ac
.builder
;
364 ptr
= get_pointer_into_array(ctx
, file
, swizzle
, reg_index
, reg_indirect
);
366 LLVMValueRef val
= LLVMBuildLoad(builder
, ptr
, "");
367 if (tgsi_type_is_64bit(type
)) {
368 LLVMValueRef ptr_hi
, val_hi
;
369 ptr_hi
= LLVMBuildGEP(builder
, ptr
, &ctx
->i32_1
, 1, "");
370 val_hi
= LLVMBuildLoad(builder
, ptr_hi
, "");
371 val
= si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
377 struct tgsi_declaration_range range
=
378 get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
380 si_get_indirect_index(ctx
, reg_indirect
, 1, reg_index
- range
.First
);
382 emit_array_fetch(bld_base
, file
, type
, range
, swizzle
);
383 return LLVMBuildExtractElement(builder
, array
, index
, "");
388 store_value_to_array(struct lp_build_tgsi_context
*bld_base
,
393 const struct tgsi_ind_register
*reg_indirect
)
395 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
396 LLVMBuilderRef builder
= ctx
->ac
.builder
;
399 ptr
= get_pointer_into_array(ctx
, file
, chan_index
, reg_index
, reg_indirect
);
401 LLVMBuildStore(builder
, value
, ptr
);
404 struct tgsi_declaration_range range
= get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
405 LLVMValueRef index
= si_get_indirect_index(ctx
, reg_indirect
, 1, reg_index
- range
.First
);
407 emit_array_fetch(bld_base
, file
, TGSI_TYPE_FLOAT
, range
, chan_index
);
408 LLVMValueRef temp_ptr
;
410 array
= LLVMBuildInsertElement(builder
, array
, value
, index
, "");
412 size
= range
.Last
- range
.First
+ 1;
413 for (i
= 0; i
< size
; ++i
) {
415 case TGSI_FILE_OUTPUT
:
416 temp_ptr
= ctx
->outputs
[i
+ range
.First
][chan_index
];
419 case TGSI_FILE_TEMPORARY
:
420 if (range
.First
+ i
>= ctx
->temps_count
)
422 temp_ptr
= ctx
->temps
[(i
+ range
.First
) * TGSI_NUM_CHANNELS
+ chan_index
];
428 value
= LLVMBuildExtractElement(builder
, array
,
429 LLVMConstInt(ctx
->i32
, i
, 0), "");
430 LLVMBuildStore(builder
, value
, temp_ptr
);
435 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
436 * reload them at each use. This must be true if the shader is using
437 * derivatives and KILL, because KILL can leave the WQM and then a lazy
438 * input load isn't in the WQM anymore.
440 static bool si_preload_fs_inputs(struct si_shader_context
*ctx
)
442 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
444 return sel
->info
.uses_derivatives
&&
449 get_output_ptr(struct lp_build_tgsi_context
*bld_base
, unsigned index
,
452 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
454 assert(index
<= ctx
->bld_base
.info
->file_max
[TGSI_FILE_OUTPUT
]);
455 return ctx
->outputs
[index
][chan
];
458 LLVMValueRef
si_llvm_emit_fetch(struct lp_build_tgsi_context
*bld_base
,
459 const struct tgsi_full_src_register
*reg
,
460 enum tgsi_opcode_type type
,
463 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
464 LLVMBuilderRef builder
= ctx
->ac
.builder
;
465 LLVMValueRef result
= NULL
, ptr
, ptr2
;
466 unsigned swizzle
= swizzle_in
& 0xffff;
468 if (swizzle_in
== ~0) {
469 LLVMValueRef values
[TGSI_NUM_CHANNELS
];
471 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
472 values
[chan
] = si_llvm_emit_fetch(bld_base
, reg
, type
, chan
);
474 return ac_build_gather_values(&ctx
->ac
, values
,
478 if (reg
->Register
.Indirect
) {
479 LLVMValueRef load
= load_value_from_array(bld_base
, reg
->Register
.File
, type
,
480 swizzle
, reg
->Register
.Index
, ®
->Indirect
);
481 return bitcast(bld_base
, type
, load
);
484 switch(reg
->Register
.File
) {
485 case TGSI_FILE_IMMEDIATE
: {
486 LLVMTypeRef ctype
= tgsi2llvmtype(bld_base
, type
);
487 if (tgsi_type_is_64bit(type
)) {
488 result
= LLVMGetUndef(LLVMVectorType(ctx
->i32
, 2));
489 result
= LLVMConstInsertElement(result
,
490 ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
],
492 result
= LLVMConstInsertElement(result
,
493 ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ (swizzle_in
>> 16)],
495 return LLVMConstBitCast(result
, ctype
);
497 return LLVMConstBitCast(ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
], ctype
);
501 case TGSI_FILE_INPUT
: {
502 unsigned index
= reg
->Register
.Index
;
503 LLVMValueRef input
[4];
505 /* I don't think doing this for vertex shaders is beneficial.
506 * For those, we want to make sure the VMEM loads are executed
507 * only once. Fragment shaders don't care much, because
508 * v_interp instructions are much cheaper than VMEM loads.
510 if (!si_preload_fs_inputs(ctx
) &&
511 ctx
->bld_base
.info
->processor
== PIPE_SHADER_FRAGMENT
)
512 ctx
->load_input(ctx
, index
, &ctx
->input_decls
[index
], input
);
514 memcpy(input
, &ctx
->inputs
[index
* 4], sizeof(input
));
516 result
= input
[swizzle
];
518 if (tgsi_type_is_64bit(type
)) {
520 ptr2
= input
[swizzle_in
>> 16];
521 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
527 case TGSI_FILE_TEMPORARY
:
528 if (reg
->Register
.Index
>= ctx
->temps_count
)
529 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
530 ptr
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
];
531 if (tgsi_type_is_64bit(type
)) {
532 ptr2
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ (swizzle_in
>> 16)];
533 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
534 LLVMBuildLoad(builder
, ptr
, ""),
535 LLVMBuildLoad(builder
, ptr2
, ""));
537 result
= LLVMBuildLoad(builder
, ptr
, "");
540 case TGSI_FILE_OUTPUT
:
541 ptr
= get_output_ptr(bld_base
, reg
->Register
.Index
, swizzle
);
542 if (tgsi_type_is_64bit(type
)) {
543 ptr2
= get_output_ptr(bld_base
, reg
->Register
.Index
, (swizzle_in
>> 16));
544 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
545 LLVMBuildLoad(builder
, ptr
, ""),
546 LLVMBuildLoad(builder
, ptr2
, ""));
548 result
= LLVMBuildLoad(builder
, ptr
, "");
552 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
555 return bitcast(bld_base
, type
, result
);
558 static LLVMValueRef
fetch_system_value(struct lp_build_tgsi_context
*bld_base
,
559 const struct tgsi_full_src_register
*reg
,
560 enum tgsi_opcode_type type
,
563 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
564 LLVMBuilderRef builder
= ctx
->ac
.builder
;
565 LLVMValueRef cval
= ctx
->system_values
[reg
->Register
.Index
];
566 unsigned swizzle
= swizzle_in
& 0xffff;
568 if (tgsi_type_is_64bit(type
)) {
571 assert(swizzle
== 0 || swizzle
== 2);
573 lo
= LLVMBuildExtractElement(
574 builder
, cval
, LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
575 hi
= LLVMBuildExtractElement(
576 builder
, cval
, LLVMConstInt(ctx
->i32
, (swizzle_in
>> 16), 0), "");
578 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
582 if (LLVMGetTypeKind(LLVMTypeOf(cval
)) == LLVMVectorTypeKind
) {
583 cval
= LLVMBuildExtractElement(
584 builder
, cval
, LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
586 assert(swizzle
== 0);
589 return bitcast(bld_base
, type
, cval
);
592 static void emit_declaration(struct lp_build_tgsi_context
*bld_base
,
593 const struct tgsi_full_declaration
*decl
)
595 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
596 LLVMBuilderRef builder
= ctx
->ac
.builder
;
597 unsigned first
, last
, i
;
598 switch(decl
->Declaration
.File
) {
599 case TGSI_FILE_ADDRESS
:
602 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
604 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
605 ctx
->addrs
[idx
][chan
] = ac_build_alloca_undef(
606 &ctx
->ac
, ctx
->i32
, "");
612 case TGSI_FILE_TEMPORARY
:
615 LLVMValueRef array_alloca
= NULL
;
617 unsigned writemask
= decl
->Declaration
.UsageMask
;
618 first
= decl
->Range
.First
;
619 last
= decl
->Range
.Last
;
620 decl_size
= 4 * ((last
- first
) + 1);
622 if (decl
->Declaration
.Array
) {
623 unsigned id
= decl
->Array
.ArrayID
- 1;
626 writemask
&= ctx
->temp_arrays
[id
].writemask
;
627 ctx
->temp_arrays
[id
].writemask
= writemask
;
628 array_size
= ((last
- first
) + 1) * util_bitcount(writemask
);
630 /* If the array has more than 16 elements, store it
631 * in memory using an alloca that spans the entire
634 * Otherwise, store each array element individually.
635 * We will then generate vectors (per-channel, up to
636 * <16 x float> if the usagemask is a single bit) for
637 * indirect addressing.
639 * Note that 16 is the number of vector elements that
640 * LLVM will store in a register, so theoretically an
641 * array with up to 4 * 16 = 64 elements could be
642 * handled this way, but whether that's a good idea
643 * depends on VGPR register pressure elsewhere.
645 * FIXME: We shouldn't need to have the non-alloca
646 * code path for arrays. LLVM should be smart enough to
647 * promote allocas into registers when profitable.
649 if (array_size
> 16 ||
650 !ctx
->screen
->llvm_has_working_vgpr_indexing
) {
651 array_alloca
= ac_build_alloca_undef(&ctx
->ac
,
652 LLVMArrayType(ctx
->f32
,
653 array_size
), "array");
654 ctx
->temp_array_allocas
[id
] = array_alloca
;
658 if (!ctx
->temps_count
) {
659 ctx
->temps_count
= bld_base
->info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
660 ctx
->temps
= MALLOC(TGSI_NUM_CHANNELS
* ctx
->temps_count
* sizeof(LLVMValueRef
));
663 for (i
= 0; i
< decl_size
; ++i
) {
665 snprintf(name
, sizeof(name
), "TEMP%d.%c",
666 first
+ i
/ 4, "xyzw"[i
% 4]);
668 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] =
669 ac_build_alloca_undef(&ctx
->ac
,
674 LLVMValueRef idxs
[2] = {
680 if (writemask
!= TGSI_WRITEMASK_XYZW
&&
681 !ctx
->undef_alloca
) {
682 /* Create a dummy alloca. We use it so that we
683 * have a pointer that is safe to load from if
684 * a shader ever reads from a channel that
685 * it never writes to.
687 ctx
->undef_alloca
= ac_build_alloca_undef(
688 &ctx
->ac
, ctx
->f32
, "undef");
691 for (i
= 0; i
< decl_size
; ++i
) {
693 if (writemask
& (1 << (i
% 4))) {
695 snprintf(name
, sizeof(name
), "TEMP%d.%c",
696 first
+ i
/ 4, "xyzw"[i
% 4]);
698 idxs
[1] = LLVMConstInt(ctx
->i32
, j
, 0);
699 ptr
= LLVMBuildGEP(builder
, array_alloca
, idxs
, 2, name
);
702 ptr
= ctx
->undef_alloca
;
704 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] = ptr
;
709 case TGSI_FILE_INPUT
:
712 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
713 if (ctx
->load_input
&&
714 ctx
->input_decls
[idx
].Declaration
.File
!= TGSI_FILE_INPUT
) {
715 ctx
->input_decls
[idx
] = *decl
;
716 ctx
->input_decls
[idx
].Range
.First
= idx
;
717 ctx
->input_decls
[idx
].Range
.Last
= idx
;
718 ctx
->input_decls
[idx
].Semantic
.Index
+= idx
- decl
->Range
.First
;
720 if (si_preload_fs_inputs(ctx
) ||
721 bld_base
->info
->processor
!= PIPE_SHADER_FRAGMENT
)
722 ctx
->load_input(ctx
, idx
, &ctx
->input_decls
[idx
],
723 &ctx
->inputs
[idx
* 4]);
729 case TGSI_FILE_SYSTEM_VALUE
:
732 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
733 si_load_system_value(ctx
, idx
, decl
);
738 case TGSI_FILE_OUTPUT
:
742 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
744 assert(idx
< RADEON_LLVM_MAX_OUTPUTS
);
745 if (ctx
->outputs
[idx
][0])
747 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
749 snprintf(name
, sizeof(name
), "OUT%d.%c",
750 idx
, "xyzw"[chan
% 4]);
752 ctx
->outputs
[idx
][chan
] = ac_build_alloca_undef(
753 &ctx
->ac
, ctx
->f32
, name
);
759 case TGSI_FILE_MEMORY
:
760 si_tgsi_declare_compute_memory(ctx
, decl
);
768 void si_llvm_emit_store(struct lp_build_tgsi_context
*bld_base
,
769 const struct tgsi_full_instruction
*inst
,
770 const struct tgsi_opcode_info
*info
,
774 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
775 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[index
];
776 LLVMBuilderRef builder
= ctx
->ac
.builder
;
777 LLVMValueRef temp_ptr
, temp_ptr2
= NULL
;
778 bool is_vec_store
= false;
779 enum tgsi_opcode_type dtype
= tgsi_opcode_infer_dst_type(inst
->Instruction
.Opcode
, index
);
782 LLVMTypeKind k
= LLVMGetTypeKind(LLVMTypeOf(dst
[0]));
783 is_vec_store
= (k
== LLVMVectorTypeKind
);
787 LLVMValueRef values
[4] = {};
788 uint32_t writemask
= reg
->Register
.WriteMask
;
790 unsigned chan
= u_bit_scan(&writemask
);
791 LLVMValueRef index
= LLVMConstInt(ctx
->i32
, chan
, 0);
792 values
[chan
] = LLVMBuildExtractElement(ctx
->ac
.builder
,
795 bld_base
->emit_store(bld_base
, inst
, info
, index
, values
);
799 uint32_t writemask
= reg
->Register
.WriteMask
;
801 unsigned chan_index
= u_bit_scan(&writemask
);
802 LLVMValueRef value
= dst
[chan_index
];
804 if (tgsi_type_is_64bit(dtype
) && (chan_index
== 1 || chan_index
== 3))
806 if (inst
->Instruction
.Saturate
)
807 value
= ac_build_clamp(&ctx
->ac
, value
);
809 if (reg
->Register
.File
== TGSI_FILE_ADDRESS
) {
810 temp_ptr
= ctx
->addrs
[reg
->Register
.Index
][chan_index
];
811 LLVMBuildStore(builder
, value
, temp_ptr
);
815 if (!tgsi_type_is_64bit(dtype
))
816 value
= ac_to_float(&ctx
->ac
, value
);
818 if (reg
->Register
.Indirect
) {
819 unsigned file
= reg
->Register
.File
;
820 unsigned reg_index
= reg
->Register
.Index
;
821 store_value_to_array(bld_base
, value
, file
, chan_index
,
822 reg_index
, ®
->Indirect
);
824 switch(reg
->Register
.File
) {
825 case TGSI_FILE_OUTPUT
:
826 temp_ptr
= ctx
->outputs
[reg
->Register
.Index
][chan_index
];
827 if (tgsi_type_is_64bit(dtype
))
828 temp_ptr2
= ctx
->outputs
[reg
->Register
.Index
][chan_index
+ 1];
831 case TGSI_FILE_TEMPORARY
:
833 if (reg
->Register
.Index
>= ctx
->temps_count
)
836 temp_ptr
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
];
837 if (tgsi_type_is_64bit(dtype
))
838 temp_ptr2
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
+ 1];
845 if (!tgsi_type_is_64bit(dtype
))
846 LLVMBuildStore(builder
, value
, temp_ptr
);
848 LLVMValueRef ptr
= LLVMBuildBitCast(builder
, value
,
849 LLVMVectorType(ctx
->i32
, 2), "");
851 value
= LLVMBuildExtractElement(builder
, ptr
,
853 val2
= LLVMBuildExtractElement(builder
, ptr
,
856 LLVMBuildStore(builder
, ac_to_float(&ctx
->ac
, value
), temp_ptr
);
857 LLVMBuildStore(builder
, ac_to_float(&ctx
->ac
, val2
), temp_ptr2
);
863 static int get_line(int pc
)
865 /* Subtract 1 so that the number shown is that of the corresponding
866 * opcode in the TGSI dump, e.g. an if block has the same suffix as
867 * the instruction number of the corresponding TGSI IF.
872 static void bgnloop_emit(const struct lp_build_tgsi_action
*action
,
873 struct lp_build_tgsi_context
*bld_base
,
874 struct lp_build_emit_data
*emit_data
)
876 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
877 ac_build_bgnloop(&ctx
->ac
, get_line(bld_base
->pc
));
880 static void brk_emit(const struct lp_build_tgsi_action
*action
,
881 struct lp_build_tgsi_context
*bld_base
,
882 struct lp_build_emit_data
*emit_data
)
884 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
885 ac_build_break(&ctx
->ac
);
888 static void cont_emit(const struct lp_build_tgsi_action
*action
,
889 struct lp_build_tgsi_context
*bld_base
,
890 struct lp_build_emit_data
*emit_data
)
892 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
893 ac_build_continue(&ctx
->ac
);
896 static void else_emit(const struct lp_build_tgsi_action
*action
,
897 struct lp_build_tgsi_context
*bld_base
,
898 struct lp_build_emit_data
*emit_data
)
900 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
901 ac_build_else(&ctx
->ac
, get_line(bld_base
->pc
));
904 static void endif_emit(const struct lp_build_tgsi_action
*action
,
905 struct lp_build_tgsi_context
*bld_base
,
906 struct lp_build_emit_data
*emit_data
)
908 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
909 ac_build_endif(&ctx
->ac
, get_line(bld_base
->pc
));
912 static void endloop_emit(const struct lp_build_tgsi_action
*action
,
913 struct lp_build_tgsi_context
*bld_base
,
914 struct lp_build_emit_data
*emit_data
)
916 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
917 ac_build_endloop(&ctx
->ac
, get_line(bld_base
->pc
));
920 static void if_emit(const struct lp_build_tgsi_action
*action
,
921 struct lp_build_tgsi_context
*bld_base
,
922 struct lp_build_emit_data
*emit_data
)
924 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
925 ac_build_if(&ctx
->ac
, emit_data
->args
[0], get_line(bld_base
->pc
));
928 static void uif_emit(const struct lp_build_tgsi_action
*action
,
929 struct lp_build_tgsi_context
*bld_base
,
930 struct lp_build_emit_data
*emit_data
)
932 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
933 ac_build_uif(&ctx
->ac
, emit_data
->args
[0], get_line(bld_base
->pc
));
936 static void emit_immediate(struct lp_build_tgsi_context
*bld_base
,
937 const struct tgsi_full_immediate
*imm
)
940 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
942 for (i
= 0; i
< 4; ++i
) {
943 ctx
->imms
[ctx
->imms_num
* TGSI_NUM_CHANNELS
+ i
] =
944 LLVMConstInt(ctx
->i32
, imm
->u
[i
].Uint
, false );
950 void si_llvm_context_init(struct si_shader_context
*ctx
,
951 struct si_screen
*sscreen
,
952 struct ac_llvm_compiler
*compiler
)
956 /* Initialize the gallivm object:
957 * We are only using the module, context, and builder fields of this struct.
958 * This should be enough for us to be able to pass our gallivm struct to the
959 * helper functions in the gallivm module.
961 memset(ctx
, 0, sizeof(*ctx
));
962 ctx
->screen
= sscreen
;
963 ctx
->compiler
= compiler
;
965 ac_llvm_context_init(&ctx
->ac
, sscreen
->info
.chip_class
, sscreen
->info
.family
);
966 ctx
->ac
.module
= ac_create_module(compiler
->tm
, ctx
->ac
.context
);
968 enum ac_float_mode float_mode
=
969 sscreen
->debug_flags
& DBG(UNSAFE_MATH
) ?
970 AC_FLOAT_MODE_UNSAFE_FP_MATH
:
971 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH
;
972 ctx
->ac
.builder
= ac_create_builder(ctx
->ac
.context
, float_mode
);
974 ctx
->gallivm
.context
= ctx
->ac
.context
;
975 ctx
->gallivm
.module
= ctx
->ac
.module
;
976 ctx
->gallivm
.builder
= ctx
->ac
.builder
;
978 struct lp_build_tgsi_context
*bld_base
= &ctx
->bld_base
;
980 type
.floating
= true;
987 lp_build_context_init(&bld_base
->base
, &ctx
->gallivm
, type
);
988 lp_build_context_init(&ctx
->bld_base
.uint_bld
, &ctx
->gallivm
, lp_uint_type(type
));
989 lp_build_context_init(&ctx
->bld_base
.int_bld
, &ctx
->gallivm
, lp_int_type(type
));
991 lp_build_context_init(&ctx
->bld_base
.dbl_bld
, &ctx
->gallivm
, type
);
992 lp_build_context_init(&ctx
->bld_base
.uint64_bld
, &ctx
->gallivm
, lp_uint_type(type
));
993 lp_build_context_init(&ctx
->bld_base
.int64_bld
, &ctx
->gallivm
, lp_int_type(type
));
996 bld_base
->emit_swizzle
= emit_swizzle
;
997 bld_base
->emit_declaration
= emit_declaration
;
998 bld_base
->emit_immediate
= emit_immediate
;
1000 bld_base
->op_actions
[TGSI_OPCODE_BGNLOOP
].emit
= bgnloop_emit
;
1001 bld_base
->op_actions
[TGSI_OPCODE_BRK
].emit
= brk_emit
;
1002 bld_base
->op_actions
[TGSI_OPCODE_CONT
].emit
= cont_emit
;
1003 bld_base
->op_actions
[TGSI_OPCODE_IF
].emit
= if_emit
;
1004 bld_base
->op_actions
[TGSI_OPCODE_UIF
].emit
= uif_emit
;
1005 bld_base
->op_actions
[TGSI_OPCODE_ELSE
].emit
= else_emit
;
1006 bld_base
->op_actions
[TGSI_OPCODE_ENDIF
].emit
= endif_emit
;
1007 bld_base
->op_actions
[TGSI_OPCODE_ENDLOOP
].emit
= endloop_emit
;
1009 si_shader_context_init_alu(&ctx
->bld_base
);
1010 si_shader_context_init_mem(ctx
);
1012 ctx
->voidt
= LLVMVoidTypeInContext(ctx
->ac
.context
);
1013 ctx
->i1
= LLVMInt1TypeInContext(ctx
->ac
.context
);
1014 ctx
->i8
= LLVMInt8TypeInContext(ctx
->ac
.context
);
1015 ctx
->i32
= LLVMInt32TypeInContext(ctx
->ac
.context
);
1016 ctx
->i64
= LLVMInt64TypeInContext(ctx
->ac
.context
);
1017 ctx
->i128
= LLVMIntTypeInContext(ctx
->ac
.context
, 128);
1018 ctx
->f32
= LLVMFloatTypeInContext(ctx
->ac
.context
);
1019 ctx
->v2i32
= LLVMVectorType(ctx
->i32
, 2);
1020 ctx
->v4i32
= LLVMVectorType(ctx
->i32
, 4);
1021 ctx
->v4f32
= LLVMVectorType(ctx
->f32
, 4);
1022 ctx
->v8i32
= LLVMVectorType(ctx
->i32
, 8);
1024 ctx
->i32_0
= LLVMConstInt(ctx
->i32
, 0, 0);
1025 ctx
->i32_1
= LLVMConstInt(ctx
->i32
, 1, 0);
1026 ctx
->i1false
= LLVMConstInt(ctx
->i1
, 0, 0);
1027 ctx
->i1true
= LLVMConstInt(ctx
->i1
, 1, 0);
1030 /* Set the context to a certain TGSI shader. Can be called repeatedly
1031 * to change the shader. */
1032 void si_llvm_context_set_tgsi(struct si_shader_context
*ctx
,
1033 struct si_shader
*shader
)
1035 const struct tgsi_shader_info
*info
= NULL
;
1036 const struct tgsi_token
*tokens
= NULL
;
1038 if (shader
&& shader
->selector
) {
1039 info
= &shader
->selector
->info
;
1040 tokens
= shader
->selector
->tokens
;
1043 ctx
->shader
= shader
;
1044 ctx
->type
= info
? info
->processor
: -1;
1045 ctx
->bld_base
.info
= info
;
1047 /* Clean up the old contents. */
1048 FREE(ctx
->temp_arrays
);
1049 ctx
->temp_arrays
= NULL
;
1050 FREE(ctx
->temp_array_allocas
);
1051 ctx
->temp_array_allocas
= NULL
;
1059 ctx
->temps_count
= 0;
1064 ctx
->num_const_buffers
= util_last_bit(info
->const_buffers_declared
);
1065 ctx
->num_shader_buffers
= util_last_bit(info
->shader_buffers_declared
);
1067 ctx
->num_samplers
= util_last_bit(info
->samplers_declared
);
1068 ctx
->num_images
= util_last_bit(info
->images_declared
);
1073 if (info
->array_max
[TGSI_FILE_TEMPORARY
] > 0) {
1074 int size
= info
->array_max
[TGSI_FILE_TEMPORARY
];
1076 ctx
->temp_arrays
= CALLOC(size
, sizeof(ctx
->temp_arrays
[0]));
1077 ctx
->temp_array_allocas
= CALLOC(size
, sizeof(ctx
->temp_array_allocas
[0]));
1079 tgsi_scan_arrays(tokens
, TGSI_FILE_TEMPORARY
, size
,
1082 if (info
->file_max
[TGSI_FILE_IMMEDIATE
] >= 0) {
1083 int size
= info
->file_max
[TGSI_FILE_IMMEDIATE
] + 1;
1084 ctx
->imms
= MALLOC(size
* TGSI_NUM_CHANNELS
* sizeof(LLVMValueRef
));
1087 /* Re-set these to start with a clean slate. */
1088 ctx
->bld_base
.num_instructions
= 0;
1089 ctx
->bld_base
.pc
= 0;
1090 memset(ctx
->outputs
, 0, sizeof(ctx
->outputs
));
1092 ctx
->bld_base
.emit_store
= si_llvm_emit_store
;
1093 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_IMMEDIATE
] = si_llvm_emit_fetch
;
1094 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_INPUT
] = si_llvm_emit_fetch
;
1095 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_TEMPORARY
] = si_llvm_emit_fetch
;
1096 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_OUTPUT
] = si_llvm_emit_fetch
;
1097 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_SYSTEM_VALUE
] = fetch_system_value
;
1100 void si_llvm_create_func(struct si_shader_context
*ctx
,
1102 LLVMTypeRef
*return_types
, unsigned num_return_elems
,
1103 LLVMTypeRef
*ParamTypes
, unsigned ParamCount
)
1105 LLVMTypeRef main_fn_type
, ret_type
;
1106 LLVMBasicBlockRef main_fn_body
;
1107 enum si_llvm_calling_convention call_conv
;
1108 unsigned real_shader_type
;
1110 if (num_return_elems
)
1111 ret_type
= LLVMStructTypeInContext(ctx
->ac
.context
,
1113 num_return_elems
, true);
1115 ret_type
= ctx
->voidt
;
1117 /* Setup the function */
1118 ctx
->return_type
= ret_type
;
1119 main_fn_type
= LLVMFunctionType(ret_type
, ParamTypes
, ParamCount
, 0);
1120 ctx
->main_fn
= LLVMAddFunction(ctx
->gallivm
.module
, name
, main_fn_type
);
1121 main_fn_body
= LLVMAppendBasicBlockInContext(ctx
->ac
.context
,
1122 ctx
->main_fn
, "main_body");
1123 LLVMPositionBuilderAtEnd(ctx
->ac
.builder
, main_fn_body
);
1125 real_shader_type
= ctx
->type
;
1127 /* LS is merged into HS (TCS), and ES is merged into GS. */
1128 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1129 if (ctx
->shader
->key
.as_ls
)
1130 real_shader_type
= PIPE_SHADER_TESS_CTRL
;
1131 else if (ctx
->shader
->key
.as_es
|| ctx
->shader
->key
.as_ngg
)
1132 real_shader_type
= PIPE_SHADER_GEOMETRY
;
1135 switch (real_shader_type
) {
1136 case PIPE_SHADER_VERTEX
:
1137 case PIPE_SHADER_TESS_EVAL
:
1138 call_conv
= RADEON_LLVM_AMDGPU_VS
;
1140 case PIPE_SHADER_TESS_CTRL
:
1141 call_conv
= RADEON_LLVM_AMDGPU_HS
;
1143 case PIPE_SHADER_GEOMETRY
:
1144 call_conv
= RADEON_LLVM_AMDGPU_GS
;
1146 case PIPE_SHADER_FRAGMENT
:
1147 call_conv
= RADEON_LLVM_AMDGPU_PS
;
1149 case PIPE_SHADER_COMPUTE
:
1150 call_conv
= RADEON_LLVM_AMDGPU_CS
;
1153 unreachable("Unhandle shader type");
1156 LLVMSetFunctionCallConv(ctx
->main_fn
, call_conv
);
1159 void si_llvm_optimize_module(struct si_shader_context
*ctx
)
1161 /* Dump LLVM IR before any optimization passes */
1162 if (ctx
->screen
->debug_flags
& DBG(PREOPT_IR
) &&
1163 si_can_dump_shader(ctx
->screen
, ctx
->type
))
1164 LLVMDumpModule(ctx
->gallivm
.module
);
1167 LLVMRunPassManager(ctx
->compiler
->passmgr
, ctx
->gallivm
.module
);
1168 LLVMDisposeBuilder(ctx
->ac
.builder
);
1171 void si_llvm_dispose(struct si_shader_context
*ctx
)
1173 LLVMDisposeModule(ctx
->gallivm
.module
);
1174 LLVMContextDispose(ctx
->gallivm
.context
);
1175 FREE(ctx
->temp_arrays
);
1176 ctx
->temp_arrays
= NULL
;
1177 FREE(ctx
->temp_array_allocas
);
1178 ctx
->temp_array_allocas
= NULL
;
1181 ctx
->temps_count
= 0;
1185 ac_llvm_context_dispose(&ctx
->ac
);