2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
27 #include "ac_llvm_util.h"
28 #include "util/u_memory.h"
30 enum si_llvm_calling_convention
{
31 RADEON_LLVM_AMDGPU_VS
= 87,
32 RADEON_LLVM_AMDGPU_GS
= 88,
33 RADEON_LLVM_AMDGPU_PS
= 89,
34 RADEON_LLVM_AMDGPU_CS
= 90,
35 RADEON_LLVM_AMDGPU_HS
= 93,
38 struct si_llvm_diagnostics
{
39 struct pipe_debug_callback
*debug
;
43 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di
, void *context
)
45 struct si_llvm_diagnostics
*diag
= (struct si_llvm_diagnostics
*)context
;
46 LLVMDiagnosticSeverity severity
= LLVMGetDiagInfoSeverity(di
);
47 char *description
= LLVMGetDiagInfoDescription(di
);
48 const char *severity_str
= NULL
;
52 severity_str
= "error";
55 severity_str
= "warning";
58 severity_str
= "remark";
61 severity_str
= "note";
64 severity_str
= "unknown";
67 pipe_debug_message(diag
->debug
, SHADER_INFO
,
68 "LLVM diagnostic (%s): %s", severity_str
, description
);
70 if (severity
== LLVMDSError
) {
72 fprintf(stderr
,"LLVM triggered Diagnostic Handler: %s\n", description
);
75 LLVMDisposeMessage(description
);
79 * Compile an LLVM module to machine code.
81 * @returns 0 for success, 1 for failure
83 unsigned si_llvm_compile(LLVMModuleRef M
, struct ac_shader_binary
*binary
,
84 struct ac_llvm_compiler
*compiler
,
85 struct pipe_debug_callback
*debug
,
88 struct ac_compiler_passes
*passes
=
89 less_optimized
&& compiler
->low_opt_passes
?
90 compiler
->low_opt_passes
: compiler
->passes
;
91 struct si_llvm_diagnostics diag
;
92 LLVMContextRef llvm_ctx
;
97 /* Setup Diagnostic Handler*/
98 llvm_ctx
= LLVMGetModuleContext(M
);
100 LLVMContextSetDiagnosticHandler(llvm_ctx
, si_diagnostic_handler
, &diag
);
103 if (!ac_compile_module_to_binary(passes
, M
, binary
))
106 if (diag
.retval
!= 0)
107 pipe_debug_message(debug
, SHADER_INFO
, "LLVM compile failed");
111 LLVMTypeRef
tgsi2llvmtype(struct lp_build_tgsi_context
*bld_base
,
112 enum tgsi_opcode_type type
)
114 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
117 case TGSI_TYPE_UNSIGNED
:
118 case TGSI_TYPE_SIGNED
:
120 case TGSI_TYPE_UNSIGNED64
:
121 case TGSI_TYPE_SIGNED64
:
123 case TGSI_TYPE_DOUBLE
:
125 case TGSI_TYPE_UNTYPED
:
126 case TGSI_TYPE_FLOAT
:
133 LLVMValueRef
bitcast(struct lp_build_tgsi_context
*bld_base
,
134 enum tgsi_opcode_type type
, LLVMValueRef value
)
136 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
137 LLVMTypeRef dst_type
= tgsi2llvmtype(bld_base
, type
);
140 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, dst_type
, "");
146 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
147 * or an undefined value in the same interval otherwise.
149 LLVMValueRef
si_llvm_bound_index(struct si_shader_context
*ctx
,
153 LLVMBuilderRef builder
= ctx
->ac
.builder
;
154 LLVMValueRef c_max
= LLVMConstInt(ctx
->i32
, num
- 1, 0);
157 if (util_is_power_of_two_or_zero(num
)) {
158 index
= LLVMBuildAnd(builder
, index
, c_max
, "");
160 /* In theory, this MAX pattern should result in code that is
161 * as good as the bit-wise AND above.
163 * In practice, LLVM generates worse code (at the time of
164 * writing), because its value tracking is not strong enough.
166 cc
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, c_max
, "");
167 index
= LLVMBuildSelect(builder
, cc
, index
, c_max
, "");
173 static LLVMValueRef
emit_swizzle(struct lp_build_tgsi_context
*bld_base
,
180 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
181 LLVMValueRef swizzles
[4];
183 swizzles
[0] = LLVMConstInt(ctx
->i32
, swizzle_x
, 0);
184 swizzles
[1] = LLVMConstInt(ctx
->i32
, swizzle_y
, 0);
185 swizzles
[2] = LLVMConstInt(ctx
->i32
, swizzle_z
, 0);
186 swizzles
[3] = LLVMConstInt(ctx
->i32
, swizzle_w
, 0);
188 return LLVMBuildShuffleVector(ctx
->ac
.builder
,
190 LLVMGetUndef(LLVMTypeOf(value
)),
191 LLVMConstVector(swizzles
, 4), "");
195 * Return the description of the array covering the given temporary register
199 get_temp_array_id(struct lp_build_tgsi_context
*bld_base
,
201 const struct tgsi_ind_register
*reg
)
203 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
204 unsigned num_arrays
= ctx
->bld_base
.info
->array_max
[TGSI_FILE_TEMPORARY
];
207 if (reg
&& reg
->ArrayID
> 0 && reg
->ArrayID
<= num_arrays
)
210 for (i
= 0; i
< num_arrays
; i
++) {
211 const struct tgsi_array_info
*array
= &ctx
->temp_arrays
[i
];
213 if (reg_index
>= array
->range
.First
&& reg_index
<= array
->range
.Last
)
220 static struct tgsi_declaration_range
221 get_array_range(struct lp_build_tgsi_context
*bld_base
,
222 unsigned File
, unsigned reg_index
,
223 const struct tgsi_ind_register
*reg
)
225 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
226 struct tgsi_declaration_range range
;
228 if (File
== TGSI_FILE_TEMPORARY
) {
229 unsigned array_id
= get_temp_array_id(bld_base
, reg_index
, reg
);
231 return ctx
->temp_arrays
[array_id
- 1].range
;
235 range
.Last
= bld_base
->info
->file_max
[File
];
240 * For indirect registers, construct a pointer directly to the requested
241 * element using getelementptr if possible.
243 * Returns NULL if the insertelement/extractelement fallback for array access
247 get_pointer_into_array(struct si_shader_context
*ctx
,
251 const struct tgsi_ind_register
*reg_indirect
)
254 struct tgsi_array_info
*array
;
255 LLVMValueRef idxs
[2];
259 if (file
!= TGSI_FILE_TEMPORARY
)
262 array_id
= get_temp_array_id(&ctx
->bld_base
, reg_index
, reg_indirect
);
266 alloca
= ctx
->temp_array_allocas
[array_id
- 1];
270 array
= &ctx
->temp_arrays
[array_id
- 1];
272 if (!(array
->writemask
& (1 << swizzle
)))
273 return ctx
->undef_alloca
;
275 index
= si_get_indirect_index(ctx
, reg_indirect
, 1,
276 reg_index
- ctx
->temp_arrays
[array_id
- 1].range
.First
);
278 /* Ensure that the index is within a valid range, to guard against
279 * VM faults and overwriting critical data (e.g. spilled resource
282 * TODO It should be possible to avoid the additional instructions
283 * if LLVM is changed so that it guarantuees:
284 * 1. the scratch space descriptor isolates the current wave (this
285 * could even save the scratch offset SGPR at the cost of an
286 * additional SALU instruction)
287 * 2. the memory for allocas must be allocated at the _end_ of the
288 * scratch space (after spilled registers)
290 index
= si_llvm_bound_index(ctx
, index
, array
->range
.Last
- array
->range
.First
+ 1);
292 index
= ac_build_imad(&ctx
->ac
, index
,
293 LLVMConstInt(ctx
->i32
, util_bitcount(array
->writemask
), 0),
294 LLVMConstInt(ctx
->i32
,
295 util_bitcount(array
->writemask
& ((1 << swizzle
) - 1)), 0));
296 idxs
[0] = ctx
->i32_0
;
298 return LLVMBuildGEP(ctx
->ac
.builder
, alloca
, idxs
, 2, "");
302 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context
*bld_base
,
307 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
308 LLVMValueRef values
[2] = {
309 ac_to_integer(&ctx
->ac
, ptr
),
310 ac_to_integer(&ctx
->ac
, ptr2
),
312 LLVMValueRef result
= ac_build_gather_values(&ctx
->ac
, values
, 2);
313 return LLVMBuildBitCast(ctx
->ac
.builder
, result
, type
, "");
317 emit_array_fetch(struct lp_build_tgsi_context
*bld_base
,
318 unsigned File
, enum tgsi_opcode_type type
,
319 struct tgsi_declaration_range range
,
322 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
323 unsigned i
, size
= range
.Last
- range
.First
+ 1;
324 LLVMTypeRef vec
= LLVMVectorType(tgsi2llvmtype(bld_base
, type
), size
);
325 LLVMValueRef result
= LLVMGetUndef(vec
);
327 struct tgsi_full_src_register tmp_reg
= {};
328 tmp_reg
.Register
.File
= File
;
330 for (i
= 0; i
< size
; ++i
) {
331 tmp_reg
.Register
.Index
= i
+ range
.First
;
332 LLVMValueRef temp
= si_llvm_emit_fetch(bld_base
, &tmp_reg
, type
, swizzle
);
333 result
= LLVMBuildInsertElement(ctx
->ac
.builder
, result
, temp
,
334 LLVMConstInt(ctx
->i32
, i
, 0), "array_vector");
340 load_value_from_array(struct lp_build_tgsi_context
*bld_base
,
342 enum tgsi_opcode_type type
,
345 const struct tgsi_ind_register
*reg_indirect
)
347 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
348 LLVMBuilderRef builder
= ctx
->ac
.builder
;
351 ptr
= get_pointer_into_array(ctx
, file
, swizzle
, reg_index
, reg_indirect
);
353 LLVMValueRef val
= LLVMBuildLoad(builder
, ptr
, "");
354 if (tgsi_type_is_64bit(type
)) {
355 LLVMValueRef ptr_hi
, val_hi
;
356 ptr_hi
= LLVMBuildGEP(builder
, ptr
, &ctx
->i32_1
, 1, "");
357 val_hi
= LLVMBuildLoad(builder
, ptr_hi
, "");
358 val
= si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
364 struct tgsi_declaration_range range
=
365 get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
367 si_get_indirect_index(ctx
, reg_indirect
, 1, reg_index
- range
.First
);
369 emit_array_fetch(bld_base
, file
, type
, range
, swizzle
);
370 return LLVMBuildExtractElement(builder
, array
, index
, "");
375 store_value_to_array(struct lp_build_tgsi_context
*bld_base
,
380 const struct tgsi_ind_register
*reg_indirect
)
382 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
383 LLVMBuilderRef builder
= ctx
->ac
.builder
;
386 ptr
= get_pointer_into_array(ctx
, file
, chan_index
, reg_index
, reg_indirect
);
388 LLVMBuildStore(builder
, value
, ptr
);
391 struct tgsi_declaration_range range
= get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
392 LLVMValueRef index
= si_get_indirect_index(ctx
, reg_indirect
, 1, reg_index
- range
.First
);
394 emit_array_fetch(bld_base
, file
, TGSI_TYPE_FLOAT
, range
, chan_index
);
395 LLVMValueRef temp_ptr
;
397 array
= LLVMBuildInsertElement(builder
, array
, value
, index
, "");
399 size
= range
.Last
- range
.First
+ 1;
400 for (i
= 0; i
< size
; ++i
) {
402 case TGSI_FILE_OUTPUT
:
403 temp_ptr
= ctx
->outputs
[i
+ range
.First
][chan_index
];
406 case TGSI_FILE_TEMPORARY
:
407 if (range
.First
+ i
>= ctx
->temps_count
)
409 temp_ptr
= ctx
->temps
[(i
+ range
.First
) * TGSI_NUM_CHANNELS
+ chan_index
];
415 value
= LLVMBuildExtractElement(builder
, array
,
416 LLVMConstInt(ctx
->i32
, i
, 0), "");
417 LLVMBuildStore(builder
, value
, temp_ptr
);
422 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
423 * reload them at each use. This must be true if the shader is using
424 * derivatives and KILL, because KILL can leave the WQM and then a lazy
425 * input load isn't in the WQM anymore.
427 static bool si_preload_fs_inputs(struct si_shader_context
*ctx
)
429 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
431 return sel
->info
.uses_derivatives
&&
436 get_output_ptr(struct lp_build_tgsi_context
*bld_base
, unsigned index
,
439 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
441 assert(index
<= ctx
->bld_base
.info
->file_max
[TGSI_FILE_OUTPUT
]);
442 return ctx
->outputs
[index
][chan
];
445 LLVMValueRef
si_llvm_emit_fetch(struct lp_build_tgsi_context
*bld_base
,
446 const struct tgsi_full_src_register
*reg
,
447 enum tgsi_opcode_type type
,
450 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
451 LLVMBuilderRef builder
= ctx
->ac
.builder
;
452 LLVMValueRef result
= NULL
, ptr
, ptr2
;
453 unsigned swizzle
= swizzle_in
& 0xffff;
455 if (swizzle_in
== ~0) {
456 LLVMValueRef values
[TGSI_NUM_CHANNELS
];
458 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
459 values
[chan
] = si_llvm_emit_fetch(bld_base
, reg
, type
, chan
);
461 return ac_build_gather_values(&ctx
->ac
, values
,
465 if (reg
->Register
.Indirect
) {
466 LLVMValueRef load
= load_value_from_array(bld_base
, reg
->Register
.File
, type
,
467 swizzle
, reg
->Register
.Index
, ®
->Indirect
);
468 return bitcast(bld_base
, type
, load
);
471 switch(reg
->Register
.File
) {
472 case TGSI_FILE_IMMEDIATE
: {
473 LLVMTypeRef ctype
= tgsi2llvmtype(bld_base
, type
);
474 if (tgsi_type_is_64bit(type
)) {
475 result
= LLVMGetUndef(LLVMVectorType(ctx
->i32
, 2));
476 result
= LLVMConstInsertElement(result
,
477 ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
],
479 result
= LLVMConstInsertElement(result
,
480 ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ (swizzle_in
>> 16)],
482 return LLVMConstBitCast(result
, ctype
);
484 return LLVMConstBitCast(ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
], ctype
);
488 case TGSI_FILE_INPUT
: {
489 unsigned index
= reg
->Register
.Index
;
490 LLVMValueRef input
[4];
492 /* I don't think doing this for vertex shaders is beneficial.
493 * For those, we want to make sure the VMEM loads are executed
494 * only once. Fragment shaders don't care much, because
495 * v_interp instructions are much cheaper than VMEM loads.
497 if (!si_preload_fs_inputs(ctx
) &&
498 ctx
->bld_base
.info
->processor
== PIPE_SHADER_FRAGMENT
)
499 ctx
->load_input(ctx
, index
, &ctx
->input_decls
[index
], input
);
501 memcpy(input
, &ctx
->inputs
[index
* 4], sizeof(input
));
503 result
= input
[swizzle
];
505 if (tgsi_type_is_64bit(type
)) {
507 ptr2
= input
[swizzle_in
>> 16];
508 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
514 case TGSI_FILE_TEMPORARY
:
515 if (reg
->Register
.Index
>= ctx
->temps_count
)
516 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
517 ptr
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
];
518 if (tgsi_type_is_64bit(type
)) {
519 ptr2
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ (swizzle_in
>> 16)];
520 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
521 LLVMBuildLoad(builder
, ptr
, ""),
522 LLVMBuildLoad(builder
, ptr2
, ""));
524 result
= LLVMBuildLoad(builder
, ptr
, "");
527 case TGSI_FILE_OUTPUT
:
528 ptr
= get_output_ptr(bld_base
, reg
->Register
.Index
, swizzle
);
529 if (tgsi_type_is_64bit(type
)) {
530 ptr2
= get_output_ptr(bld_base
, reg
->Register
.Index
, (swizzle_in
>> 16));
531 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
532 LLVMBuildLoad(builder
, ptr
, ""),
533 LLVMBuildLoad(builder
, ptr2
, ""));
535 result
= LLVMBuildLoad(builder
, ptr
, "");
539 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
542 return bitcast(bld_base
, type
, result
);
545 static LLVMValueRef
fetch_system_value(struct lp_build_tgsi_context
*bld_base
,
546 const struct tgsi_full_src_register
*reg
,
547 enum tgsi_opcode_type type
,
550 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
551 LLVMBuilderRef builder
= ctx
->ac
.builder
;
552 LLVMValueRef cval
= ctx
->system_values
[reg
->Register
.Index
];
553 unsigned swizzle
= swizzle_in
& 0xffff;
555 if (tgsi_type_is_64bit(type
)) {
558 assert(swizzle
== 0 || swizzle
== 2);
560 lo
= LLVMBuildExtractElement(
561 builder
, cval
, LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
562 hi
= LLVMBuildExtractElement(
563 builder
, cval
, LLVMConstInt(ctx
->i32
, (swizzle_in
>> 16), 0), "");
565 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
569 if (LLVMGetTypeKind(LLVMTypeOf(cval
)) == LLVMVectorTypeKind
) {
570 cval
= LLVMBuildExtractElement(
571 builder
, cval
, LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
573 assert(swizzle
== 0);
576 return bitcast(bld_base
, type
, cval
);
579 static void emit_declaration(struct lp_build_tgsi_context
*bld_base
,
580 const struct tgsi_full_declaration
*decl
)
582 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
583 LLVMBuilderRef builder
= ctx
->ac
.builder
;
584 unsigned first
, last
, i
;
585 switch(decl
->Declaration
.File
) {
586 case TGSI_FILE_ADDRESS
:
589 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
591 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
592 ctx
->addrs
[idx
][chan
] = ac_build_alloca_undef(
593 &ctx
->ac
, ctx
->i32
, "");
599 case TGSI_FILE_TEMPORARY
:
602 LLVMValueRef array_alloca
= NULL
;
604 unsigned writemask
= decl
->Declaration
.UsageMask
;
605 first
= decl
->Range
.First
;
606 last
= decl
->Range
.Last
;
607 decl_size
= 4 * ((last
- first
) + 1);
609 if (decl
->Declaration
.Array
) {
610 unsigned id
= decl
->Array
.ArrayID
- 1;
613 writemask
&= ctx
->temp_arrays
[id
].writemask
;
614 ctx
->temp_arrays
[id
].writemask
= writemask
;
615 array_size
= ((last
- first
) + 1) * util_bitcount(writemask
);
617 /* If the array has more than 16 elements, store it
618 * in memory using an alloca that spans the entire
621 * Otherwise, store each array element individually.
622 * We will then generate vectors (per-channel, up to
623 * <16 x float> if the usagemask is a single bit) for
624 * indirect addressing.
626 * Note that 16 is the number of vector elements that
627 * LLVM will store in a register, so theoretically an
628 * array with up to 4 * 16 = 64 elements could be
629 * handled this way, but whether that's a good idea
630 * depends on VGPR register pressure elsewhere.
632 * FIXME: We shouldn't need to have the non-alloca
633 * code path for arrays. LLVM should be smart enough to
634 * promote allocas into registers when profitable.
636 if (array_size
> 16 ||
637 !ctx
->screen
->llvm_has_working_vgpr_indexing
) {
638 array_alloca
= ac_build_alloca_undef(&ctx
->ac
,
639 LLVMArrayType(ctx
->f32
,
640 array_size
), "array");
641 ctx
->temp_array_allocas
[id
] = array_alloca
;
645 if (!ctx
->temps_count
) {
646 ctx
->temps_count
= bld_base
->info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
647 ctx
->temps
= MALLOC(TGSI_NUM_CHANNELS
* ctx
->temps_count
* sizeof(LLVMValueRef
));
650 for (i
= 0; i
< decl_size
; ++i
) {
652 snprintf(name
, sizeof(name
), "TEMP%d.%c",
653 first
+ i
/ 4, "xyzw"[i
% 4]);
655 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] =
656 ac_build_alloca_undef(&ctx
->ac
,
661 LLVMValueRef idxs
[2] = {
667 if (writemask
!= TGSI_WRITEMASK_XYZW
&&
668 !ctx
->undef_alloca
) {
669 /* Create a dummy alloca. We use it so that we
670 * have a pointer that is safe to load from if
671 * a shader ever reads from a channel that
672 * it never writes to.
674 ctx
->undef_alloca
= ac_build_alloca_undef(
675 &ctx
->ac
, ctx
->f32
, "undef");
678 for (i
= 0; i
< decl_size
; ++i
) {
680 if (writemask
& (1 << (i
% 4))) {
682 snprintf(name
, sizeof(name
), "TEMP%d.%c",
683 first
+ i
/ 4, "xyzw"[i
% 4]);
685 idxs
[1] = LLVMConstInt(ctx
->i32
, j
, 0);
686 ptr
= LLVMBuildGEP(builder
, array_alloca
, idxs
, 2, name
);
689 ptr
= ctx
->undef_alloca
;
691 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] = ptr
;
696 case TGSI_FILE_INPUT
:
699 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
700 if (ctx
->load_input
&&
701 ctx
->input_decls
[idx
].Declaration
.File
!= TGSI_FILE_INPUT
) {
702 ctx
->input_decls
[idx
] = *decl
;
703 ctx
->input_decls
[idx
].Range
.First
= idx
;
704 ctx
->input_decls
[idx
].Range
.Last
= idx
;
705 ctx
->input_decls
[idx
].Semantic
.Index
+= idx
- decl
->Range
.First
;
707 if (si_preload_fs_inputs(ctx
) ||
708 bld_base
->info
->processor
!= PIPE_SHADER_FRAGMENT
)
709 ctx
->load_input(ctx
, idx
, &ctx
->input_decls
[idx
],
710 &ctx
->inputs
[idx
* 4]);
716 case TGSI_FILE_SYSTEM_VALUE
:
719 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
720 si_load_system_value(ctx
, idx
, decl
);
725 case TGSI_FILE_OUTPUT
:
729 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
731 assert(idx
< RADEON_LLVM_MAX_OUTPUTS
);
732 if (ctx
->outputs
[idx
][0])
734 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
736 snprintf(name
, sizeof(name
), "OUT%d.%c",
737 idx
, "xyzw"[chan
% 4]);
739 ctx
->outputs
[idx
][chan
] = ac_build_alloca_undef(
740 &ctx
->ac
, ctx
->f32
, name
);
746 case TGSI_FILE_MEMORY
:
747 si_tgsi_declare_compute_memory(ctx
, decl
);
755 void si_llvm_emit_store(struct lp_build_tgsi_context
*bld_base
,
756 const struct tgsi_full_instruction
*inst
,
757 const struct tgsi_opcode_info
*info
,
761 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
762 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[index
];
763 LLVMBuilderRef builder
= ctx
->ac
.builder
;
764 LLVMValueRef temp_ptr
, temp_ptr2
= NULL
;
765 bool is_vec_store
= false;
766 enum tgsi_opcode_type dtype
= tgsi_opcode_infer_dst_type(inst
->Instruction
.Opcode
, index
);
769 LLVMTypeKind k
= LLVMGetTypeKind(LLVMTypeOf(dst
[0]));
770 is_vec_store
= (k
== LLVMVectorTypeKind
);
774 LLVMValueRef values
[4] = {};
775 uint32_t writemask
= reg
->Register
.WriteMask
;
777 unsigned chan
= u_bit_scan(&writemask
);
778 LLVMValueRef index
= LLVMConstInt(ctx
->i32
, chan
, 0);
779 values
[chan
] = LLVMBuildExtractElement(ctx
->ac
.builder
,
782 bld_base
->emit_store(bld_base
, inst
, info
, index
, values
);
786 uint32_t writemask
= reg
->Register
.WriteMask
;
788 unsigned chan_index
= u_bit_scan(&writemask
);
789 LLVMValueRef value
= dst
[chan_index
];
791 if (tgsi_type_is_64bit(dtype
) && (chan_index
== 1 || chan_index
== 3))
793 if (inst
->Instruction
.Saturate
)
794 value
= ac_build_clamp(&ctx
->ac
, value
);
796 if (reg
->Register
.File
== TGSI_FILE_ADDRESS
) {
797 temp_ptr
= ctx
->addrs
[reg
->Register
.Index
][chan_index
];
798 LLVMBuildStore(builder
, value
, temp_ptr
);
802 if (!tgsi_type_is_64bit(dtype
))
803 value
= ac_to_float(&ctx
->ac
, value
);
805 if (reg
->Register
.Indirect
) {
806 unsigned file
= reg
->Register
.File
;
807 unsigned reg_index
= reg
->Register
.Index
;
808 store_value_to_array(bld_base
, value
, file
, chan_index
,
809 reg_index
, ®
->Indirect
);
811 switch(reg
->Register
.File
) {
812 case TGSI_FILE_OUTPUT
:
813 temp_ptr
= ctx
->outputs
[reg
->Register
.Index
][chan_index
];
814 if (tgsi_type_is_64bit(dtype
))
815 temp_ptr2
= ctx
->outputs
[reg
->Register
.Index
][chan_index
+ 1];
818 case TGSI_FILE_TEMPORARY
:
820 if (reg
->Register
.Index
>= ctx
->temps_count
)
823 temp_ptr
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
];
824 if (tgsi_type_is_64bit(dtype
))
825 temp_ptr2
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
+ 1];
832 if (!tgsi_type_is_64bit(dtype
))
833 LLVMBuildStore(builder
, value
, temp_ptr
);
835 LLVMValueRef ptr
= LLVMBuildBitCast(builder
, value
,
836 LLVMVectorType(ctx
->i32
, 2), "");
838 value
= LLVMBuildExtractElement(builder
, ptr
,
840 val2
= LLVMBuildExtractElement(builder
, ptr
,
843 LLVMBuildStore(builder
, ac_to_float(&ctx
->ac
, value
), temp_ptr
);
844 LLVMBuildStore(builder
, ac_to_float(&ctx
->ac
, val2
), temp_ptr2
);
850 static int get_line(int pc
)
852 /* Subtract 1 so that the number shown is that of the corresponding
853 * opcode in the TGSI dump, e.g. an if block has the same suffix as
854 * the instruction number of the corresponding TGSI IF.
859 static void bgnloop_emit(const struct lp_build_tgsi_action
*action
,
860 struct lp_build_tgsi_context
*bld_base
,
861 struct lp_build_emit_data
*emit_data
)
863 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
864 ac_build_bgnloop(&ctx
->ac
, get_line(bld_base
->pc
));
867 static void brk_emit(const struct lp_build_tgsi_action
*action
,
868 struct lp_build_tgsi_context
*bld_base
,
869 struct lp_build_emit_data
*emit_data
)
871 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
872 ac_build_break(&ctx
->ac
);
875 static void cont_emit(const struct lp_build_tgsi_action
*action
,
876 struct lp_build_tgsi_context
*bld_base
,
877 struct lp_build_emit_data
*emit_data
)
879 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
880 ac_build_continue(&ctx
->ac
);
883 static void else_emit(const struct lp_build_tgsi_action
*action
,
884 struct lp_build_tgsi_context
*bld_base
,
885 struct lp_build_emit_data
*emit_data
)
887 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
888 ac_build_else(&ctx
->ac
, get_line(bld_base
->pc
));
891 static void endif_emit(const struct lp_build_tgsi_action
*action
,
892 struct lp_build_tgsi_context
*bld_base
,
893 struct lp_build_emit_data
*emit_data
)
895 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
896 ac_build_endif(&ctx
->ac
, get_line(bld_base
->pc
));
899 static void endloop_emit(const struct lp_build_tgsi_action
*action
,
900 struct lp_build_tgsi_context
*bld_base
,
901 struct lp_build_emit_data
*emit_data
)
903 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
904 ac_build_endloop(&ctx
->ac
, get_line(bld_base
->pc
));
907 static void if_emit(const struct lp_build_tgsi_action
*action
,
908 struct lp_build_tgsi_context
*bld_base
,
909 struct lp_build_emit_data
*emit_data
)
911 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
912 ac_build_if(&ctx
->ac
, emit_data
->args
[0], get_line(bld_base
->pc
));
915 static void uif_emit(const struct lp_build_tgsi_action
*action
,
916 struct lp_build_tgsi_context
*bld_base
,
917 struct lp_build_emit_data
*emit_data
)
919 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
920 ac_build_uif(&ctx
->ac
, emit_data
->args
[0], get_line(bld_base
->pc
));
923 static void emit_immediate(struct lp_build_tgsi_context
*bld_base
,
924 const struct tgsi_full_immediate
*imm
)
927 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
929 for (i
= 0; i
< 4; ++i
) {
930 ctx
->imms
[ctx
->imms_num
* TGSI_NUM_CHANNELS
+ i
] =
931 LLVMConstInt(ctx
->i32
, imm
->u
[i
].Uint
, false );
937 void si_llvm_context_init(struct si_shader_context
*ctx
,
938 struct si_screen
*sscreen
,
939 struct ac_llvm_compiler
*compiler
)
943 /* Initialize the gallivm object:
944 * We are only using the module, context, and builder fields of this struct.
945 * This should be enough for us to be able to pass our gallivm struct to the
946 * helper functions in the gallivm module.
948 memset(ctx
, 0, sizeof(*ctx
));
949 ctx
->screen
= sscreen
;
950 ctx
->compiler
= compiler
;
952 ac_llvm_context_init(&ctx
->ac
, sscreen
->info
.chip_class
, sscreen
->info
.family
);
953 ctx
->ac
.module
= ac_create_module(compiler
->tm
, ctx
->ac
.context
);
955 enum ac_float_mode float_mode
=
956 sscreen
->debug_flags
& DBG(UNSAFE_MATH
) ?
957 AC_FLOAT_MODE_UNSAFE_FP_MATH
:
958 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH
;
959 ctx
->ac
.builder
= ac_create_builder(ctx
->ac
.context
, float_mode
);
961 ctx
->gallivm
.context
= ctx
->ac
.context
;
962 ctx
->gallivm
.module
= ctx
->ac
.module
;
963 ctx
->gallivm
.builder
= ctx
->ac
.builder
;
965 struct lp_build_tgsi_context
*bld_base
= &ctx
->bld_base
;
967 type
.floating
= true;
974 lp_build_context_init(&bld_base
->base
, &ctx
->gallivm
, type
);
975 lp_build_context_init(&ctx
->bld_base
.uint_bld
, &ctx
->gallivm
, lp_uint_type(type
));
976 lp_build_context_init(&ctx
->bld_base
.int_bld
, &ctx
->gallivm
, lp_int_type(type
));
978 lp_build_context_init(&ctx
->bld_base
.dbl_bld
, &ctx
->gallivm
, type
);
979 lp_build_context_init(&ctx
->bld_base
.uint64_bld
, &ctx
->gallivm
, lp_uint_type(type
));
980 lp_build_context_init(&ctx
->bld_base
.int64_bld
, &ctx
->gallivm
, lp_int_type(type
));
983 bld_base
->emit_swizzle
= emit_swizzle
;
984 bld_base
->emit_declaration
= emit_declaration
;
985 bld_base
->emit_immediate
= emit_immediate
;
987 bld_base
->op_actions
[TGSI_OPCODE_BGNLOOP
].emit
= bgnloop_emit
;
988 bld_base
->op_actions
[TGSI_OPCODE_BRK
].emit
= brk_emit
;
989 bld_base
->op_actions
[TGSI_OPCODE_CONT
].emit
= cont_emit
;
990 bld_base
->op_actions
[TGSI_OPCODE_IF
].emit
= if_emit
;
991 bld_base
->op_actions
[TGSI_OPCODE_UIF
].emit
= uif_emit
;
992 bld_base
->op_actions
[TGSI_OPCODE_ELSE
].emit
= else_emit
;
993 bld_base
->op_actions
[TGSI_OPCODE_ENDIF
].emit
= endif_emit
;
994 bld_base
->op_actions
[TGSI_OPCODE_ENDLOOP
].emit
= endloop_emit
;
996 si_shader_context_init_alu(&ctx
->bld_base
);
997 si_shader_context_init_mem(ctx
);
999 ctx
->voidt
= LLVMVoidTypeInContext(ctx
->ac
.context
);
1000 ctx
->i1
= LLVMInt1TypeInContext(ctx
->ac
.context
);
1001 ctx
->i8
= LLVMInt8TypeInContext(ctx
->ac
.context
);
1002 ctx
->i32
= LLVMInt32TypeInContext(ctx
->ac
.context
);
1003 ctx
->i64
= LLVMInt64TypeInContext(ctx
->ac
.context
);
1004 ctx
->i128
= LLVMIntTypeInContext(ctx
->ac
.context
, 128);
1005 ctx
->f32
= LLVMFloatTypeInContext(ctx
->ac
.context
);
1006 ctx
->v2i32
= LLVMVectorType(ctx
->i32
, 2);
1007 ctx
->v4i32
= LLVMVectorType(ctx
->i32
, 4);
1008 ctx
->v4f32
= LLVMVectorType(ctx
->f32
, 4);
1009 ctx
->v8i32
= LLVMVectorType(ctx
->i32
, 8);
1011 ctx
->i32_0
= LLVMConstInt(ctx
->i32
, 0, 0);
1012 ctx
->i32_1
= LLVMConstInt(ctx
->i32
, 1, 0);
1013 ctx
->i1false
= LLVMConstInt(ctx
->i1
, 0, 0);
1014 ctx
->i1true
= LLVMConstInt(ctx
->i1
, 1, 0);
1017 /* Set the context to a certain TGSI shader. Can be called repeatedly
1018 * to change the shader. */
1019 void si_llvm_context_set_tgsi(struct si_shader_context
*ctx
,
1020 struct si_shader
*shader
)
1022 const struct tgsi_shader_info
*info
= NULL
;
1023 const struct tgsi_token
*tokens
= NULL
;
1025 if (shader
&& shader
->selector
) {
1026 info
= &shader
->selector
->info
;
1027 tokens
= shader
->selector
->tokens
;
1030 ctx
->shader
= shader
;
1031 ctx
->type
= info
? info
->processor
: -1;
1032 ctx
->bld_base
.info
= info
;
1034 /* Clean up the old contents. */
1035 FREE(ctx
->temp_arrays
);
1036 ctx
->temp_arrays
= NULL
;
1037 FREE(ctx
->temp_array_allocas
);
1038 ctx
->temp_array_allocas
= NULL
;
1046 ctx
->temps_count
= 0;
1051 ctx
->num_const_buffers
= util_last_bit(info
->const_buffers_declared
);
1052 ctx
->num_shader_buffers
= util_last_bit(info
->shader_buffers_declared
);
1054 ctx
->num_samplers
= util_last_bit(info
->samplers_declared
);
1055 ctx
->num_images
= util_last_bit(info
->images_declared
);
1060 if (info
->array_max
[TGSI_FILE_TEMPORARY
] > 0) {
1061 int size
= info
->array_max
[TGSI_FILE_TEMPORARY
];
1063 ctx
->temp_arrays
= CALLOC(size
, sizeof(ctx
->temp_arrays
[0]));
1064 ctx
->temp_array_allocas
= CALLOC(size
, sizeof(ctx
->temp_array_allocas
[0]));
1066 tgsi_scan_arrays(tokens
, TGSI_FILE_TEMPORARY
, size
,
1069 if (info
->file_max
[TGSI_FILE_IMMEDIATE
] >= 0) {
1070 int size
= info
->file_max
[TGSI_FILE_IMMEDIATE
] + 1;
1071 ctx
->imms
= MALLOC(size
* TGSI_NUM_CHANNELS
* sizeof(LLVMValueRef
));
1074 /* Re-set these to start with a clean slate. */
1075 ctx
->bld_base
.num_instructions
= 0;
1076 ctx
->bld_base
.pc
= 0;
1077 memset(ctx
->outputs
, 0, sizeof(ctx
->outputs
));
1079 ctx
->bld_base
.emit_store
= si_llvm_emit_store
;
1080 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_IMMEDIATE
] = si_llvm_emit_fetch
;
1081 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_INPUT
] = si_llvm_emit_fetch
;
1082 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_TEMPORARY
] = si_llvm_emit_fetch
;
1083 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_OUTPUT
] = si_llvm_emit_fetch
;
1084 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_SYSTEM_VALUE
] = fetch_system_value
;
1087 void si_llvm_create_func(struct si_shader_context
*ctx
,
1089 LLVMTypeRef
*return_types
, unsigned num_return_elems
,
1090 LLVMTypeRef
*ParamTypes
, unsigned ParamCount
)
1092 LLVMTypeRef main_fn_type
, ret_type
;
1093 LLVMBasicBlockRef main_fn_body
;
1094 enum si_llvm_calling_convention call_conv
;
1095 unsigned real_shader_type
;
1097 if (num_return_elems
)
1098 ret_type
= LLVMStructTypeInContext(ctx
->ac
.context
,
1100 num_return_elems
, true);
1102 ret_type
= ctx
->voidt
;
1104 /* Setup the function */
1105 ctx
->return_type
= ret_type
;
1106 main_fn_type
= LLVMFunctionType(ret_type
, ParamTypes
, ParamCount
, 0);
1107 ctx
->main_fn
= LLVMAddFunction(ctx
->gallivm
.module
, name
, main_fn_type
);
1108 main_fn_body
= LLVMAppendBasicBlockInContext(ctx
->ac
.context
,
1109 ctx
->main_fn
, "main_body");
1110 LLVMPositionBuilderAtEnd(ctx
->ac
.builder
, main_fn_body
);
1112 real_shader_type
= ctx
->type
;
1114 /* LS is merged into HS (TCS), and ES is merged into GS. */
1115 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1116 if (ctx
->shader
->key
.as_ls
)
1117 real_shader_type
= PIPE_SHADER_TESS_CTRL
;
1118 else if (ctx
->shader
->key
.as_es
)
1119 real_shader_type
= PIPE_SHADER_GEOMETRY
;
1122 switch (real_shader_type
) {
1123 case PIPE_SHADER_VERTEX
:
1124 case PIPE_SHADER_TESS_EVAL
:
1125 call_conv
= RADEON_LLVM_AMDGPU_VS
;
1127 case PIPE_SHADER_TESS_CTRL
:
1128 call_conv
= RADEON_LLVM_AMDGPU_HS
;
1130 case PIPE_SHADER_GEOMETRY
:
1131 call_conv
= RADEON_LLVM_AMDGPU_GS
;
1133 case PIPE_SHADER_FRAGMENT
:
1134 call_conv
= RADEON_LLVM_AMDGPU_PS
;
1136 case PIPE_SHADER_COMPUTE
:
1137 call_conv
= RADEON_LLVM_AMDGPU_CS
;
1140 unreachable("Unhandle shader type");
1143 LLVMSetFunctionCallConv(ctx
->main_fn
, call_conv
);
1146 void si_llvm_optimize_module(struct si_shader_context
*ctx
)
1148 /* Dump LLVM IR before any optimization passes */
1149 if (ctx
->screen
->debug_flags
& DBG(PREOPT_IR
) &&
1150 si_can_dump_shader(ctx
->screen
, ctx
->type
))
1151 LLVMDumpModule(ctx
->gallivm
.module
);
1154 LLVMRunPassManager(ctx
->compiler
->passmgr
, ctx
->gallivm
.module
);
1155 LLVMDisposeBuilder(ctx
->ac
.builder
);
1158 void si_llvm_dispose(struct si_shader_context
*ctx
)
1160 LLVMDisposeModule(ctx
->gallivm
.module
);
1161 LLVMContextDispose(ctx
->gallivm
.context
);
1162 FREE(ctx
->temp_arrays
);
1163 ctx
->temp_arrays
= NULL
;
1164 FREE(ctx
->temp_array_allocas
);
1165 ctx
->temp_array_allocas
= NULL
;
1168 ctx
->temps_count
= 0;
1172 ac_llvm_context_dispose(&ctx
->ac
);