2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "si_shader_internal.h"
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
44 /* Data for if/else/endif and bgnloop/endloop control flow structures.
47 /* Loop exit or next part of if/else/endif. */
48 LLVMBasicBlockRef next_block
;
49 LLVMBasicBlockRef loop_entry_block
;
52 enum si_llvm_calling_convention
{
53 RADEON_LLVM_AMDGPU_VS
= 87,
54 RADEON_LLVM_AMDGPU_GS
= 88,
55 RADEON_LLVM_AMDGPU_PS
= 89,
56 RADEON_LLVM_AMDGPU_CS
= 90,
57 RADEON_LLVM_AMDGPU_HS
= 93,
60 void si_llvm_add_attribute(LLVMValueRef F
, const char *name
, int value
)
64 snprintf(str
, sizeof(str
), "%i", value
);
65 LLVMAddTargetDependentFunctionAttr(F
, name
, str
);
68 struct si_llvm_diagnostics
{
69 struct pipe_debug_callback
*debug
;
73 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di
, void *context
)
75 struct si_llvm_diagnostics
*diag
= (struct si_llvm_diagnostics
*)context
;
76 LLVMDiagnosticSeverity severity
= LLVMGetDiagInfoSeverity(di
);
77 char *description
= LLVMGetDiagInfoDescription(di
);
78 const char *severity_str
= NULL
;
82 severity_str
= "error";
85 severity_str
= "warning";
88 severity_str
= "remark";
91 severity_str
= "note";
94 severity_str
= "unknown";
97 pipe_debug_message(diag
->debug
, SHADER_INFO
,
98 "LLVM diagnostic (%s): %s", severity_str
, description
);
100 if (severity
== LLVMDSError
) {
102 fprintf(stderr
,"LLVM triggered Diagnostic Handler: %s\n", description
);
105 LLVMDisposeMessage(description
);
109 * Compile an LLVM module to machine code.
111 * @returns 0 for success, 1 for failure
113 unsigned si_llvm_compile(LLVMModuleRef M
, struct ac_shader_binary
*binary
,
114 LLVMTargetMachineRef tm
,
115 struct pipe_debug_callback
*debug
)
117 struct si_llvm_diagnostics diag
;
119 LLVMContextRef llvm_ctx
;
120 LLVMMemoryBufferRef out_buffer
;
121 unsigned buffer_size
;
122 const char *buffer_data
;
128 /* Setup Diagnostic Handler*/
129 llvm_ctx
= LLVMGetModuleContext(M
);
131 LLVMContextSetDiagnosticHandler(llvm_ctx
, si_diagnostic_handler
, &diag
);
134 mem_err
= LLVMTargetMachineEmitToMemoryBuffer(tm
, M
, LLVMObjectFile
, &err
,
137 /* Process Errors/Warnings */
139 fprintf(stderr
, "%s: %s", __FUNCTION__
, err
);
140 pipe_debug_message(debug
, SHADER_INFO
,
141 "LLVM emit error: %s", err
);
147 /* Extract Shader Code*/
148 buffer_size
= LLVMGetBufferSize(out_buffer
);
149 buffer_data
= LLVMGetBufferStart(out_buffer
);
151 if (!ac_elf_read(buffer_data
, buffer_size
, binary
)) {
152 fprintf(stderr
, "radeonsi: cannot read an ELF shader binary\n");
157 LLVMDisposeMemoryBuffer(out_buffer
);
160 if (diag
.retval
!= 0)
161 pipe_debug_message(debug
, SHADER_INFO
, "LLVM compile failed");
165 LLVMTypeRef
tgsi2llvmtype(struct lp_build_tgsi_context
*bld_base
,
166 enum tgsi_opcode_type type
)
168 LLVMContextRef ctx
= bld_base
->base
.gallivm
->context
;
171 case TGSI_TYPE_UNSIGNED
:
172 case TGSI_TYPE_SIGNED
:
173 return LLVMInt32TypeInContext(ctx
);
174 case TGSI_TYPE_UNSIGNED64
:
175 case TGSI_TYPE_SIGNED64
:
176 return LLVMInt64TypeInContext(ctx
);
177 case TGSI_TYPE_DOUBLE
:
178 return LLVMDoubleTypeInContext(ctx
);
179 case TGSI_TYPE_UNTYPED
:
180 case TGSI_TYPE_FLOAT
:
181 return LLVMFloatTypeInContext(ctx
);
187 LLVMValueRef
bitcast(struct lp_build_tgsi_context
*bld_base
,
188 enum tgsi_opcode_type type
, LLVMValueRef value
)
190 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
191 LLVMTypeRef dst_type
= tgsi2llvmtype(bld_base
, type
);
194 return LLVMBuildBitCast(builder
, value
, dst_type
, "");
200 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
201 * or an undefined value in the same interval otherwise.
203 LLVMValueRef
si_llvm_bound_index(struct si_shader_context
*ctx
,
207 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
208 LLVMBuilderRef builder
= gallivm
->builder
;
209 LLVMValueRef c_max
= LLVMConstInt(ctx
->i32
, num
- 1, 0);
212 if (util_is_power_of_two(num
)) {
213 index
= LLVMBuildAnd(builder
, index
, c_max
, "");
215 /* In theory, this MAX pattern should result in code that is
216 * as good as the bit-wise AND above.
218 * In practice, LLVM generates worse code (at the time of
219 * writing), because its value tracking is not strong enough.
221 cc
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, c_max
, "");
222 index
= LLVMBuildSelect(builder
, cc
, index
, c_max
, "");
228 static struct si_llvm_flow
*
229 get_current_flow(struct si_shader_context
*ctx
)
231 if (ctx
->flow_depth
> 0)
232 return &ctx
->flow
[ctx
->flow_depth
- 1];
236 static struct si_llvm_flow
*
237 get_innermost_loop(struct si_shader_context
*ctx
)
239 for (unsigned i
= ctx
->flow_depth
; i
> 0; --i
) {
240 if (ctx
->flow
[i
- 1].loop_entry_block
)
241 return &ctx
->flow
[i
- 1];
246 static struct si_llvm_flow
*
247 push_flow(struct si_shader_context
*ctx
)
249 struct si_llvm_flow
*flow
;
251 if (ctx
->flow_depth
>= ctx
->flow_depth_max
) {
252 unsigned new_max
= MAX2(ctx
->flow_depth
<< 1, RADEON_LLVM_INITIAL_CF_DEPTH
);
253 ctx
->flow
= REALLOC(ctx
->flow
,
254 ctx
->flow_depth_max
* sizeof(*ctx
->flow
),
255 new_max
* sizeof(*ctx
->flow
));
256 ctx
->flow_depth_max
= new_max
;
259 flow
= &ctx
->flow
[ctx
->flow_depth
];
262 flow
->next_block
= NULL
;
263 flow
->loop_entry_block
= NULL
;
267 static LLVMValueRef
emit_swizzle(struct lp_build_tgsi_context
*bld_base
,
274 LLVMValueRef swizzles
[4];
276 LLVMInt32TypeInContext(bld_base
->base
.gallivm
->context
);
278 swizzles
[0] = LLVMConstInt(i32t
, swizzle_x
, 0);
279 swizzles
[1] = LLVMConstInt(i32t
, swizzle_y
, 0);
280 swizzles
[2] = LLVMConstInt(i32t
, swizzle_z
, 0);
281 swizzles
[3] = LLVMConstInt(i32t
, swizzle_w
, 0);
283 return LLVMBuildShuffleVector(bld_base
->base
.gallivm
->builder
,
285 LLVMGetUndef(LLVMTypeOf(value
)),
286 LLVMConstVector(swizzles
, 4), "");
290 * Return the description of the array covering the given temporary register
294 get_temp_array_id(struct lp_build_tgsi_context
*bld_base
,
296 const struct tgsi_ind_register
*reg
)
298 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
299 unsigned num_arrays
= ctx
->bld_base
.info
->array_max
[TGSI_FILE_TEMPORARY
];
302 if (reg
&& reg
->ArrayID
> 0 && reg
->ArrayID
<= num_arrays
)
305 for (i
= 0; i
< num_arrays
; i
++) {
306 const struct tgsi_array_info
*array
= &ctx
->temp_arrays
[i
];
308 if (reg_index
>= array
->range
.First
&& reg_index
<= array
->range
.Last
)
315 static struct tgsi_declaration_range
316 get_array_range(struct lp_build_tgsi_context
*bld_base
,
317 unsigned File
, unsigned reg_index
,
318 const struct tgsi_ind_register
*reg
)
320 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
321 struct tgsi_declaration_range range
;
323 if (File
== TGSI_FILE_TEMPORARY
) {
324 unsigned array_id
= get_temp_array_id(bld_base
, reg_index
, reg
);
326 return ctx
->temp_arrays
[array_id
- 1].range
;
330 range
.Last
= bld_base
->info
->file_max
[File
];
335 * For indirect registers, construct a pointer directly to the requested
336 * element using getelementptr if possible.
338 * Returns NULL if the insertelement/extractelement fallback for array access
342 get_pointer_into_array(struct si_shader_context
*ctx
,
346 const struct tgsi_ind_register
*reg_indirect
)
349 struct tgsi_array_info
*array
;
350 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
351 LLVMBuilderRef builder
= gallivm
->builder
;
352 LLVMValueRef idxs
[2];
356 if (file
!= TGSI_FILE_TEMPORARY
)
359 array_id
= get_temp_array_id(&ctx
->bld_base
, reg_index
, reg_indirect
);
363 alloca
= ctx
->temp_array_allocas
[array_id
- 1];
367 array
= &ctx
->temp_arrays
[array_id
- 1];
369 if (!(array
->writemask
& (1 << swizzle
)))
370 return ctx
->undef_alloca
;
372 index
= si_get_indirect_index(ctx
, reg_indirect
, 1,
373 reg_index
- ctx
->temp_arrays
[array_id
- 1].range
.First
);
375 /* Ensure that the index is within a valid range, to guard against
376 * VM faults and overwriting critical data (e.g. spilled resource
379 * TODO It should be possible to avoid the additional instructions
380 * if LLVM is changed so that it guarantuees:
381 * 1. the scratch space descriptor isolates the current wave (this
382 * could even save the scratch offset SGPR at the cost of an
383 * additional SALU instruction)
384 * 2. the memory for allocas must be allocated at the _end_ of the
385 * scratch space (after spilled registers)
387 index
= si_llvm_bound_index(ctx
, index
, array
->range
.Last
- array
->range
.First
+ 1);
389 index
= LLVMBuildMul(
391 LLVMConstInt(ctx
->i32
, util_bitcount(array
->writemask
), 0),
393 index
= LLVMBuildAdd(
395 LLVMConstInt(ctx
->i32
,
396 util_bitcount(array
->writemask
& ((1 << swizzle
) - 1)), 0),
398 idxs
[0] = ctx
->i32_0
;
400 return LLVMBuildGEP(builder
, alloca
, idxs
, 2, "");
404 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context
*bld_base
,
405 enum tgsi_opcode_type type
,
409 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
410 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
413 result
= LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base
->base
.gallivm
->context
, 32), bld_base
->base
.type
.length
* 2));
415 result
= LLVMBuildInsertElement(builder
,
417 ac_to_integer(&ctx
->ac
, ptr
),
418 bld_base
->int_bld
.zero
, "");
419 result
= LLVMBuildInsertElement(builder
,
421 ac_to_integer(&ctx
->ac
, ptr2
),
422 bld_base
->int_bld
.one
, "");
423 return bitcast(bld_base
, type
, result
);
427 emit_array_fetch(struct lp_build_tgsi_context
*bld_base
,
428 unsigned File
, enum tgsi_opcode_type type
,
429 struct tgsi_declaration_range range
,
432 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
434 LLVMBuilderRef builder
= ctx
->gallivm
.builder
;
436 unsigned i
, size
= range
.Last
- range
.First
+ 1;
437 LLVMTypeRef vec
= LLVMVectorType(tgsi2llvmtype(bld_base
, type
), size
);
438 LLVMValueRef result
= LLVMGetUndef(vec
);
440 struct tgsi_full_src_register tmp_reg
= {};
441 tmp_reg
.Register
.File
= File
;
443 for (i
= 0; i
< size
; ++i
) {
444 tmp_reg
.Register
.Index
= i
+ range
.First
;
445 LLVMValueRef temp
= si_llvm_emit_fetch(bld_base
, &tmp_reg
, type
, swizzle
);
446 result
= LLVMBuildInsertElement(builder
, result
, temp
,
447 LLVMConstInt(ctx
->i32
, i
, 0), "array_vector");
453 load_value_from_array(struct lp_build_tgsi_context
*bld_base
,
455 enum tgsi_opcode_type type
,
458 const struct tgsi_ind_register
*reg_indirect
)
460 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
461 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
462 LLVMBuilderRef builder
= gallivm
->builder
;
465 ptr
= get_pointer_into_array(ctx
, file
, swizzle
, reg_index
, reg_indirect
);
467 LLVMValueRef val
= LLVMBuildLoad(builder
, ptr
, "");
468 if (tgsi_type_is_64bit(type
)) {
469 LLVMValueRef ptr_hi
, val_hi
;
470 ptr_hi
= LLVMBuildGEP(builder
, ptr
, &ctx
->i32_1
, 1, "");
471 val_hi
= LLVMBuildLoad(builder
, ptr_hi
, "");
472 val
= si_llvm_emit_fetch_64bit(bld_base
, type
, val
, val_hi
);
477 struct tgsi_declaration_range range
=
478 get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
480 si_get_indirect_index(ctx
, reg_indirect
, 1, reg_index
- range
.First
);
482 emit_array_fetch(bld_base
, file
, type
, range
, swizzle
);
483 return LLVMBuildExtractElement(builder
, array
, index
, "");
488 store_value_to_array(struct lp_build_tgsi_context
*bld_base
,
493 const struct tgsi_ind_register
*reg_indirect
)
495 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
496 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
497 LLVMBuilderRef builder
= gallivm
->builder
;
500 ptr
= get_pointer_into_array(ctx
, file
, chan_index
, reg_index
, reg_indirect
);
502 LLVMBuildStore(builder
, value
, ptr
);
505 struct tgsi_declaration_range range
= get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
506 LLVMValueRef index
= si_get_indirect_index(ctx
, reg_indirect
, 1, reg_index
- range
.First
);
508 emit_array_fetch(bld_base
, file
, TGSI_TYPE_FLOAT
, range
, chan_index
);
509 LLVMValueRef temp_ptr
;
511 array
= LLVMBuildInsertElement(builder
, array
, value
, index
, "");
513 size
= range
.Last
- range
.First
+ 1;
514 for (i
= 0; i
< size
; ++i
) {
516 case TGSI_FILE_OUTPUT
:
517 temp_ptr
= ctx
->outputs
[i
+ range
.First
][chan_index
];
520 case TGSI_FILE_TEMPORARY
:
521 if (range
.First
+ i
>= ctx
->temps_count
)
523 temp_ptr
= ctx
->temps
[(i
+ range
.First
) * TGSI_NUM_CHANNELS
+ chan_index
];
529 value
= LLVMBuildExtractElement(builder
, array
,
530 LLVMConstInt(ctx
->i32
, i
, 0), "");
531 LLVMBuildStore(builder
, value
, temp_ptr
);
536 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
537 * reload them at each use. This must be true if the shader is using
538 * derivatives and KILL, because KILL can leave the WQM and then a lazy
539 * input load isn't in the WQM anymore.
541 static bool si_preload_fs_inputs(struct si_shader_context
*ctx
)
543 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
545 return sel
->info
.uses_derivatives
&&
550 get_output_ptr(struct lp_build_tgsi_context
*bld_base
, unsigned index
,
553 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
555 assert(index
<= ctx
->bld_base
.info
->file_max
[TGSI_FILE_OUTPUT
]);
556 return ctx
->outputs
[index
][chan
];
559 LLVMValueRef
si_llvm_emit_fetch(struct lp_build_tgsi_context
*bld_base
,
560 const struct tgsi_full_src_register
*reg
,
561 enum tgsi_opcode_type type
,
564 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
565 LLVMBuilderRef builder
= ctx
->gallivm
.builder
;
566 LLVMValueRef result
= NULL
, ptr
, ptr2
;
569 LLVMValueRef values
[TGSI_NUM_CHANNELS
];
571 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
572 values
[chan
] = si_llvm_emit_fetch(bld_base
, reg
, type
, chan
);
574 return lp_build_gather_values(&ctx
->gallivm
, values
,
578 if (reg
->Register
.Indirect
) {
579 LLVMValueRef load
= load_value_from_array(bld_base
, reg
->Register
.File
, type
,
580 swizzle
, reg
->Register
.Index
, ®
->Indirect
);
581 return bitcast(bld_base
, type
, load
);
584 switch(reg
->Register
.File
) {
585 case TGSI_FILE_IMMEDIATE
: {
586 LLVMTypeRef ctype
= tgsi2llvmtype(bld_base
, type
);
587 if (tgsi_type_is_64bit(type
)) {
588 result
= LLVMGetUndef(LLVMVectorType(ctx
->i32
, bld_base
->base
.type
.length
* 2));
589 result
= LLVMConstInsertElement(result
,
590 ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
],
592 result
= LLVMConstInsertElement(result
,
593 ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
+ 1],
595 return LLVMConstBitCast(result
, ctype
);
597 return LLVMConstBitCast(ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
], ctype
);
601 case TGSI_FILE_INPUT
: {
602 unsigned index
= reg
->Register
.Index
;
603 LLVMValueRef input
[4];
605 /* I don't think doing this for vertex shaders is beneficial.
606 * For those, we want to make sure the VMEM loads are executed
607 * only once. Fragment shaders don't care much, because
608 * v_interp instructions are much cheaper than VMEM loads.
610 if (!si_preload_fs_inputs(ctx
) &&
611 ctx
->bld_base
.info
->processor
== PIPE_SHADER_FRAGMENT
)
612 ctx
->load_input(ctx
, index
, &ctx
->input_decls
[index
], input
);
614 memcpy(input
, &ctx
->inputs
[index
* 4], sizeof(input
));
616 result
= input
[swizzle
];
618 if (tgsi_type_is_64bit(type
)) {
620 ptr2
= input
[swizzle
+ 1];
621 return si_llvm_emit_fetch_64bit(bld_base
, type
, ptr
, ptr2
);
626 case TGSI_FILE_TEMPORARY
:
627 if (reg
->Register
.Index
>= ctx
->temps_count
)
628 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
629 ptr
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
];
630 if (tgsi_type_is_64bit(type
)) {
631 ptr2
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
+ 1];
632 return si_llvm_emit_fetch_64bit(bld_base
, type
,
633 LLVMBuildLoad(builder
, ptr
, ""),
634 LLVMBuildLoad(builder
, ptr2
, ""));
636 result
= LLVMBuildLoad(builder
, ptr
, "");
639 case TGSI_FILE_OUTPUT
:
640 ptr
= get_output_ptr(bld_base
, reg
->Register
.Index
, swizzle
);
641 if (tgsi_type_is_64bit(type
)) {
642 ptr2
= get_output_ptr(bld_base
, reg
->Register
.Index
, swizzle
+ 1);
643 return si_llvm_emit_fetch_64bit(bld_base
, type
,
644 LLVMBuildLoad(builder
, ptr
, ""),
645 LLVMBuildLoad(builder
, ptr2
, ""));
647 result
= LLVMBuildLoad(builder
, ptr
, "");
651 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
654 return bitcast(bld_base
, type
, result
);
657 static LLVMValueRef
fetch_system_value(struct lp_build_tgsi_context
*bld_base
,
658 const struct tgsi_full_src_register
*reg
,
659 enum tgsi_opcode_type type
,
662 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
663 LLVMBuilderRef builder
= ctx
->gallivm
.builder
;
664 LLVMValueRef cval
= ctx
->system_values
[reg
->Register
.Index
];
666 if (tgsi_type_is_64bit(type
)) {
669 assert(swizzle
== 0 || swizzle
== 2);
671 lo
= LLVMBuildExtractElement(
672 builder
, cval
, LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
673 hi
= LLVMBuildExtractElement(
674 builder
, cval
, LLVMConstInt(ctx
->i32
, swizzle
+ 1, 0), "");
676 return si_llvm_emit_fetch_64bit(bld_base
, type
, lo
, hi
);
679 if (LLVMGetTypeKind(LLVMTypeOf(cval
)) == LLVMVectorTypeKind
) {
680 cval
= LLVMBuildExtractElement(
681 builder
, cval
, LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
683 assert(swizzle
== 0);
686 return bitcast(bld_base
, type
, cval
);
689 static void emit_declaration(struct lp_build_tgsi_context
*bld_base
,
690 const struct tgsi_full_declaration
*decl
)
692 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
693 LLVMBuilderRef builder
= ctx
->gallivm
.builder
;
694 unsigned first
, last
, i
;
695 switch(decl
->Declaration
.File
) {
696 case TGSI_FILE_ADDRESS
:
699 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
701 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
702 ctx
->addrs
[idx
][chan
] = lp_build_alloca_undef(
710 case TGSI_FILE_TEMPORARY
:
713 LLVMValueRef array_alloca
= NULL
;
715 unsigned writemask
= decl
->Declaration
.UsageMask
;
716 first
= decl
->Range
.First
;
717 last
= decl
->Range
.Last
;
718 decl_size
= 4 * ((last
- first
) + 1);
720 if (decl
->Declaration
.Array
) {
721 unsigned id
= decl
->Array
.ArrayID
- 1;
724 writemask
&= ctx
->temp_arrays
[id
].writemask
;
725 ctx
->temp_arrays
[id
].writemask
= writemask
;
726 array_size
= ((last
- first
) + 1) * util_bitcount(writemask
);
728 /* If the array has more than 16 elements, store it
729 * in memory using an alloca that spans the entire
732 * Otherwise, store each array element individually.
733 * We will then generate vectors (per-channel, up to
734 * <16 x float> if the usagemask is a single bit) for
735 * indirect addressing.
737 * Note that 16 is the number of vector elements that
738 * LLVM will store in a register, so theoretically an
739 * array with up to 4 * 16 = 64 elements could be
740 * handled this way, but whether that's a good idea
741 * depends on VGPR register pressure elsewhere.
743 * FIXME: We shouldn't need to have the non-alloca
744 * code path for arrays. LLVM should be smart enough to
745 * promote allocas into registers when profitable.
747 if (array_size
> 16 ||
748 !ctx
->screen
->llvm_has_working_vgpr_indexing
) {
749 array_alloca
= lp_build_alloca_undef(&ctx
->gallivm
,
750 LLVMArrayType(ctx
->f32
,
751 array_size
), "array");
752 ctx
->temp_array_allocas
[id
] = array_alloca
;
756 if (!ctx
->temps_count
) {
757 ctx
->temps_count
= bld_base
->info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
758 ctx
->temps
= MALLOC(TGSI_NUM_CHANNELS
* ctx
->temps_count
* sizeof(LLVMValueRef
));
761 for (i
= 0; i
< decl_size
; ++i
) {
763 snprintf(name
, sizeof(name
), "TEMP%d.%c",
764 first
+ i
/ 4, "xyzw"[i
% 4]);
766 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] =
767 lp_build_alloca_undef(&ctx
->gallivm
,
772 LLVMValueRef idxs
[2] = {
778 if (writemask
!= TGSI_WRITEMASK_XYZW
&&
779 !ctx
->undef_alloca
) {
780 /* Create a dummy alloca. We use it so that we
781 * have a pointer that is safe to load from if
782 * a shader ever reads from a channel that
783 * it never writes to.
785 ctx
->undef_alloca
= lp_build_alloca_undef(
790 for (i
= 0; i
< decl_size
; ++i
) {
792 if (writemask
& (1 << (i
% 4))) {
794 snprintf(name
, sizeof(name
), "TEMP%d.%c",
795 first
+ i
/ 4, "xyzw"[i
% 4]);
797 idxs
[1] = LLVMConstInt(ctx
->i32
, j
, 0);
798 ptr
= LLVMBuildGEP(builder
, array_alloca
, idxs
, 2, name
);
801 ptr
= ctx
->undef_alloca
;
803 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] = ptr
;
808 case TGSI_FILE_INPUT
:
811 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
812 if (ctx
->load_input
&&
813 ctx
->input_decls
[idx
].Declaration
.File
!= TGSI_FILE_INPUT
) {
814 ctx
->input_decls
[idx
] = *decl
;
815 ctx
->input_decls
[idx
].Range
.First
= idx
;
816 ctx
->input_decls
[idx
].Range
.Last
= idx
;
817 ctx
->input_decls
[idx
].Semantic
.Index
+= idx
- decl
->Range
.First
;
819 if (si_preload_fs_inputs(ctx
) ||
820 bld_base
->info
->processor
!= PIPE_SHADER_FRAGMENT
)
821 ctx
->load_input(ctx
, idx
, &ctx
->input_decls
[idx
],
822 &ctx
->inputs
[idx
* 4]);
828 case TGSI_FILE_SYSTEM_VALUE
:
831 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
832 si_load_system_value(ctx
, idx
, decl
);
837 case TGSI_FILE_OUTPUT
:
841 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
843 assert(idx
< RADEON_LLVM_MAX_OUTPUTS
);
844 if (ctx
->outputs
[idx
][0])
846 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
848 snprintf(name
, sizeof(name
), "OUT%d.%c",
849 idx
, "xyzw"[chan
% 4]);
851 ctx
->outputs
[idx
][chan
] = lp_build_alloca_undef(
859 case TGSI_FILE_MEMORY
:
860 si_declare_compute_memory(ctx
, decl
);
868 void si_llvm_emit_store(struct lp_build_tgsi_context
*bld_base
,
869 const struct tgsi_full_instruction
*inst
,
870 const struct tgsi_opcode_info
*info
,
874 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
875 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
876 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[index
];
877 LLVMBuilderRef builder
= ctx
->gallivm
.builder
;
878 LLVMValueRef temp_ptr
, temp_ptr2
= NULL
;
879 bool is_vec_store
= false;
880 enum tgsi_opcode_type dtype
= tgsi_opcode_infer_dst_type(inst
->Instruction
.Opcode
, index
);
883 LLVMTypeKind k
= LLVMGetTypeKind(LLVMTypeOf(dst
[0]));
884 is_vec_store
= (k
== LLVMVectorTypeKind
);
888 LLVMValueRef values
[4] = {};
889 uint32_t writemask
= reg
->Register
.WriteMask
;
891 unsigned chan
= u_bit_scan(&writemask
);
892 LLVMValueRef index
= LLVMConstInt(ctx
->i32
, chan
, 0);
893 values
[chan
] = LLVMBuildExtractElement(gallivm
->builder
,
896 bld_base
->emit_store(bld_base
, inst
, info
, index
, values
);
900 uint32_t writemask
= reg
->Register
.WriteMask
;
902 unsigned chan_index
= u_bit_scan(&writemask
);
903 LLVMValueRef value
= dst
[chan_index
];
905 if (tgsi_type_is_64bit(dtype
) && (chan_index
== 1 || chan_index
== 3))
907 if (inst
->Instruction
.Saturate
)
908 value
= ac_build_clamp(&ctx
->ac
, value
);
910 if (reg
->Register
.File
== TGSI_FILE_ADDRESS
) {
911 temp_ptr
= ctx
->addrs
[reg
->Register
.Index
][chan_index
];
912 LLVMBuildStore(builder
, value
, temp_ptr
);
916 if (!tgsi_type_is_64bit(dtype
))
917 value
= ac_to_float(&ctx
->ac
, value
);
919 if (reg
->Register
.Indirect
) {
920 unsigned file
= reg
->Register
.File
;
921 unsigned reg_index
= reg
->Register
.Index
;
922 store_value_to_array(bld_base
, value
, file
, chan_index
,
923 reg_index
, ®
->Indirect
);
925 switch(reg
->Register
.File
) {
926 case TGSI_FILE_OUTPUT
:
927 temp_ptr
= ctx
->outputs
[reg
->Register
.Index
][chan_index
];
928 if (tgsi_type_is_64bit(dtype
))
929 temp_ptr2
= ctx
->outputs
[reg
->Register
.Index
][chan_index
+ 1];
932 case TGSI_FILE_TEMPORARY
:
934 if (reg
->Register
.Index
>= ctx
->temps_count
)
937 temp_ptr
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
];
938 if (tgsi_type_is_64bit(dtype
))
939 temp_ptr2
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
+ 1];
946 if (!tgsi_type_is_64bit(dtype
))
947 LLVMBuildStore(builder
, value
, temp_ptr
);
949 LLVMValueRef ptr
= LLVMBuildBitCast(builder
, value
,
950 LLVMVectorType(ctx
->i32
, 2), "");
952 value
= LLVMBuildExtractElement(builder
, ptr
,
954 val2
= LLVMBuildExtractElement(builder
, ptr
,
957 LLVMBuildStore(builder
, ac_to_float(&ctx
->ac
, value
), temp_ptr
);
958 LLVMBuildStore(builder
, ac_to_float(&ctx
->ac
, val2
), temp_ptr2
);
964 static void set_basicblock_name(LLVMBasicBlockRef bb
, const char *base
, int pc
)
967 /* Subtract 1 so that the number shown is that of the corresponding
968 * opcode in the TGSI dump, e.g. an if block has the same suffix as
969 * the instruction number of the corresponding TGSI IF.
971 snprintf(buf
, sizeof(buf
), "%s%d", base
, pc
- 1);
972 LLVMSetValueName(LLVMBasicBlockAsValue(bb
), buf
);
975 /* Append a basic block at the level of the parent flow.
977 static LLVMBasicBlockRef
append_basic_block(struct si_shader_context
*ctx
,
980 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
982 assert(ctx
->flow_depth
>= 1);
984 if (ctx
->flow_depth
>= 2) {
985 struct si_llvm_flow
*flow
= &ctx
->flow
[ctx
->flow_depth
- 2];
987 return LLVMInsertBasicBlockInContext(gallivm
->context
,
988 flow
->next_block
, name
);
991 return LLVMAppendBasicBlockInContext(gallivm
->context
, ctx
->main_fn
, name
);
994 /* Emit a branch to the given default target for the current block if
995 * applicable -- that is, if the current block does not already contain a
996 * branch from a break or continue.
998 static void emit_default_branch(LLVMBuilderRef builder
, LLVMBasicBlockRef target
)
1000 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder
)))
1001 LLVMBuildBr(builder
, target
);
1004 static void bgnloop_emit(const struct lp_build_tgsi_action
*action
,
1005 struct lp_build_tgsi_context
*bld_base
,
1006 struct lp_build_emit_data
*emit_data
)
1008 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1009 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1010 struct si_llvm_flow
*flow
= push_flow(ctx
);
1011 flow
->loop_entry_block
= append_basic_block(ctx
, "LOOP");
1012 flow
->next_block
= append_basic_block(ctx
, "ENDLOOP");
1013 set_basicblock_name(flow
->loop_entry_block
, "loop", bld_base
->pc
);
1014 LLVMBuildBr(gallivm
->builder
, flow
->loop_entry_block
);
1015 LLVMPositionBuilderAtEnd(gallivm
->builder
, flow
->loop_entry_block
);
1018 static void brk_emit(const struct lp_build_tgsi_action
*action
,
1019 struct lp_build_tgsi_context
*bld_base
,
1020 struct lp_build_emit_data
*emit_data
)
1022 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1023 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1024 struct si_llvm_flow
*flow
= get_innermost_loop(ctx
);
1026 LLVMBuildBr(gallivm
->builder
, flow
->next_block
);
1029 static void cont_emit(const struct lp_build_tgsi_action
*action
,
1030 struct lp_build_tgsi_context
*bld_base
,
1031 struct lp_build_emit_data
*emit_data
)
1033 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1034 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1035 struct si_llvm_flow
*flow
= get_innermost_loop(ctx
);
1037 LLVMBuildBr(gallivm
->builder
, flow
->loop_entry_block
);
1040 static void else_emit(const struct lp_build_tgsi_action
*action
,
1041 struct lp_build_tgsi_context
*bld_base
,
1042 struct lp_build_emit_data
*emit_data
)
1044 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1045 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1046 struct si_llvm_flow
*current_branch
= get_current_flow(ctx
);
1047 LLVMBasicBlockRef endif_block
;
1049 assert(!current_branch
->loop_entry_block
);
1051 endif_block
= append_basic_block(ctx
, "ENDIF");
1052 emit_default_branch(gallivm
->builder
, endif_block
);
1054 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_branch
->next_block
);
1055 set_basicblock_name(current_branch
->next_block
, "else", bld_base
->pc
);
1057 current_branch
->next_block
= endif_block
;
1060 static void endif_emit(const struct lp_build_tgsi_action
*action
,
1061 struct lp_build_tgsi_context
*bld_base
,
1062 struct lp_build_emit_data
*emit_data
)
1064 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1065 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1066 struct si_llvm_flow
*current_branch
= get_current_flow(ctx
);
1068 assert(!current_branch
->loop_entry_block
);
1070 emit_default_branch(gallivm
->builder
, current_branch
->next_block
);
1071 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_branch
->next_block
);
1072 set_basicblock_name(current_branch
->next_block
, "endif", bld_base
->pc
);
1077 static void endloop_emit(const struct lp_build_tgsi_action
*action
,
1078 struct lp_build_tgsi_context
*bld_base
,
1079 struct lp_build_emit_data
*emit_data
)
1081 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1082 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1083 struct si_llvm_flow
*current_loop
= get_current_flow(ctx
);
1085 assert(current_loop
->loop_entry_block
);
1087 emit_default_branch(gallivm
->builder
, current_loop
->loop_entry_block
);
1089 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_loop
->next_block
);
1090 set_basicblock_name(current_loop
->next_block
, "endloop", bld_base
->pc
);
1094 static void if_cond_emit(const struct lp_build_tgsi_action
*action
,
1095 struct lp_build_tgsi_context
*bld_base
,
1096 struct lp_build_emit_data
*emit_data
,
1099 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1100 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1101 struct si_llvm_flow
*flow
= push_flow(ctx
);
1102 LLVMBasicBlockRef if_block
;
1104 if_block
= append_basic_block(ctx
, "IF");
1105 flow
->next_block
= append_basic_block(ctx
, "ELSE");
1106 set_basicblock_name(if_block
, "if", bld_base
->pc
);
1107 LLVMBuildCondBr(gallivm
->builder
, cond
, if_block
, flow
->next_block
);
1108 LLVMPositionBuilderAtEnd(gallivm
->builder
, if_block
);
1111 static void if_emit(const struct lp_build_tgsi_action
*action
,
1112 struct lp_build_tgsi_context
*bld_base
,
1113 struct lp_build_emit_data
*emit_data
)
1115 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1118 cond
= LLVMBuildFCmp(gallivm
->builder
, LLVMRealUNE
,
1120 bld_base
->base
.zero
, "");
1122 if_cond_emit(action
, bld_base
, emit_data
, cond
);
1125 static void uif_emit(const struct lp_build_tgsi_action
*action
,
1126 struct lp_build_tgsi_context
*bld_base
,
1127 struct lp_build_emit_data
*emit_data
)
1129 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1130 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1133 cond
= LLVMBuildICmp(gallivm
->builder
, LLVMIntNE
,
1134 ac_to_integer(&ctx
->ac
, emit_data
->args
[0]),
1135 bld_base
->int_bld
.zero
, "");
1137 if_cond_emit(action
, bld_base
, emit_data
, cond
);
1140 static void emit_immediate(struct lp_build_tgsi_context
*bld_base
,
1141 const struct tgsi_full_immediate
*imm
)
1144 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1146 for (i
= 0; i
< 4; ++i
) {
1147 ctx
->imms
[ctx
->imms_num
* TGSI_NUM_CHANNELS
+ i
] =
1148 LLVMConstInt(ctx
->i32
, imm
->u
[i
].Uint
, false );
1154 void si_llvm_context_init(struct si_shader_context
*ctx
,
1155 struct si_screen
*sscreen
,
1156 LLVMTargetMachineRef tm
)
1158 struct lp_type type
;
1160 /* Initialize the gallivm object:
1161 * We are only using the module, context, and builder fields of this struct.
1162 * This should be enough for us to be able to pass our gallivm struct to the
1163 * helper functions in the gallivm module.
1165 memset(ctx
, 0, sizeof(*ctx
));
1166 ctx
->screen
= sscreen
;
1169 ctx
->gallivm
.context
= LLVMContextCreate();
1170 ctx
->gallivm
.module
= LLVMModuleCreateWithNameInContext("tgsi",
1171 ctx
->gallivm
.context
);
1172 LLVMSetTarget(ctx
->gallivm
.module
, "amdgcn--");
1174 LLVMTargetDataRef data_layout
= LLVMCreateTargetDataLayout(tm
);
1175 char *data_layout_str
= LLVMCopyStringRepOfTargetData(data_layout
);
1176 LLVMSetDataLayout(ctx
->gallivm
.module
, data_layout_str
);
1177 LLVMDisposeTargetData(data_layout
);
1178 LLVMDisposeMessage(data_layout_str
);
1180 bool unsafe_fpmath
= (sscreen
->b
.debug_flags
& DBG_UNSAFE_MATH
) != 0;
1181 enum lp_float_mode float_mode
=
1182 unsafe_fpmath
? LP_FLOAT_MODE_UNSAFE_FP_MATH
:
1183 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH
;
1185 ctx
->gallivm
.builder
= lp_create_builder(ctx
->gallivm
.context
,
1188 ac_llvm_context_init(&ctx
->ac
, ctx
->gallivm
.context
, sscreen
->b
.chip_class
);
1189 ctx
->ac
.module
= ctx
->gallivm
.module
;
1190 ctx
->ac
.builder
= ctx
->gallivm
.builder
;
1192 struct lp_build_tgsi_context
*bld_base
= &ctx
->bld_base
;
1194 type
.floating
= true;
1201 lp_build_context_init(&bld_base
->base
, &ctx
->gallivm
, type
);
1202 lp_build_context_init(&ctx
->bld_base
.uint_bld
, &ctx
->gallivm
, lp_uint_type(type
));
1203 lp_build_context_init(&ctx
->bld_base
.int_bld
, &ctx
->gallivm
, lp_int_type(type
));
1205 lp_build_context_init(&ctx
->bld_base
.dbl_bld
, &ctx
->gallivm
, type
);
1206 lp_build_context_init(&ctx
->bld_base
.uint64_bld
, &ctx
->gallivm
, lp_uint_type(type
));
1207 lp_build_context_init(&ctx
->bld_base
.int64_bld
, &ctx
->gallivm
, lp_int_type(type
));
1210 bld_base
->emit_swizzle
= emit_swizzle
;
1211 bld_base
->emit_declaration
= emit_declaration
;
1212 bld_base
->emit_immediate
= emit_immediate
;
1214 /* metadata allowing 2.5 ULP */
1215 ctx
->fpmath_md_kind
= LLVMGetMDKindIDInContext(ctx
->gallivm
.context
,
1217 LLVMValueRef arg
= lp_build_const_float(&ctx
->gallivm
, 2.5);
1218 ctx
->fpmath_md_2p5_ulp
= LLVMMDNodeInContext(ctx
->gallivm
.context
,
1221 bld_base
->op_actions
[TGSI_OPCODE_BGNLOOP
].emit
= bgnloop_emit
;
1222 bld_base
->op_actions
[TGSI_OPCODE_BRK
].emit
= brk_emit
;
1223 bld_base
->op_actions
[TGSI_OPCODE_CONT
].emit
= cont_emit
;
1224 bld_base
->op_actions
[TGSI_OPCODE_IF
].emit
= if_emit
;
1225 bld_base
->op_actions
[TGSI_OPCODE_UIF
].emit
= uif_emit
;
1226 bld_base
->op_actions
[TGSI_OPCODE_ELSE
].emit
= else_emit
;
1227 bld_base
->op_actions
[TGSI_OPCODE_ENDIF
].emit
= endif_emit
;
1228 bld_base
->op_actions
[TGSI_OPCODE_ENDLOOP
].emit
= endloop_emit
;
1230 si_shader_context_init_alu(&ctx
->bld_base
);
1231 si_shader_context_init_mem(ctx
);
1233 ctx
->voidt
= LLVMVoidTypeInContext(ctx
->gallivm
.context
);
1234 ctx
->i1
= LLVMInt1TypeInContext(ctx
->gallivm
.context
);
1235 ctx
->i8
= LLVMInt8TypeInContext(ctx
->gallivm
.context
);
1236 ctx
->i32
= LLVMInt32TypeInContext(ctx
->gallivm
.context
);
1237 ctx
->i64
= LLVMInt64TypeInContext(ctx
->gallivm
.context
);
1238 ctx
->i128
= LLVMIntTypeInContext(ctx
->gallivm
.context
, 128);
1239 ctx
->f32
= LLVMFloatTypeInContext(ctx
->gallivm
.context
);
1240 ctx
->v2i32
= LLVMVectorType(ctx
->i32
, 2);
1241 ctx
->v4i32
= LLVMVectorType(ctx
->i32
, 4);
1242 ctx
->v4f32
= LLVMVectorType(ctx
->f32
, 4);
1243 ctx
->v8i32
= LLVMVectorType(ctx
->i32
, 8);
1245 ctx
->i32_0
= LLVMConstInt(ctx
->i32
, 0, 0);
1246 ctx
->i32_1
= LLVMConstInt(ctx
->i32
, 1, 0);
1249 /* Set the context to a certain TGSI shader. Can be called repeatedly
1250 * to change the shader. */
1251 void si_llvm_context_set_tgsi(struct si_shader_context
*ctx
,
1252 struct si_shader
*shader
)
1254 const struct tgsi_shader_info
*info
= NULL
;
1255 const struct tgsi_token
*tokens
= NULL
;
1257 if (shader
&& shader
->selector
) {
1258 info
= &shader
->selector
->info
;
1259 tokens
= shader
->selector
->tokens
;
1262 ctx
->shader
= shader
;
1263 ctx
->type
= info
? info
->processor
: -1;
1264 ctx
->bld_base
.info
= info
;
1266 /* Clean up the old contents. */
1267 FREE(ctx
->temp_arrays
);
1268 ctx
->temp_arrays
= NULL
;
1269 FREE(ctx
->temp_array_allocas
);
1270 ctx
->temp_array_allocas
= NULL
;
1278 ctx
->temps_count
= 0;
1280 if (!info
|| !tokens
)
1283 if (info
->array_max
[TGSI_FILE_TEMPORARY
] > 0) {
1284 int size
= info
->array_max
[TGSI_FILE_TEMPORARY
];
1286 ctx
->temp_arrays
= CALLOC(size
, sizeof(ctx
->temp_arrays
[0]));
1287 ctx
->temp_array_allocas
= CALLOC(size
, sizeof(ctx
->temp_array_allocas
[0]));
1289 tgsi_scan_arrays(tokens
, TGSI_FILE_TEMPORARY
, size
,
1292 if (info
->file_max
[TGSI_FILE_IMMEDIATE
] >= 0) {
1293 int size
= info
->file_max
[TGSI_FILE_IMMEDIATE
] + 1;
1294 ctx
->imms
= MALLOC(size
* TGSI_NUM_CHANNELS
* sizeof(LLVMValueRef
));
1297 /* Re-set these to start with a clean slate. */
1298 ctx
->bld_base
.num_instructions
= 0;
1299 ctx
->bld_base
.pc
= 0;
1300 memset(ctx
->outputs
, 0, sizeof(ctx
->outputs
));
1302 ctx
->bld_base
.emit_store
= si_llvm_emit_store
;
1303 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_IMMEDIATE
] = si_llvm_emit_fetch
;
1304 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_INPUT
] = si_llvm_emit_fetch
;
1305 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_TEMPORARY
] = si_llvm_emit_fetch
;
1306 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_OUTPUT
] = si_llvm_emit_fetch
;
1307 ctx
->bld_base
.emit_fetch_funcs
[TGSI_FILE_SYSTEM_VALUE
] = fetch_system_value
;
1309 ctx
->num_const_buffers
= util_last_bit(info
->const_buffers_declared
);
1310 ctx
->num_shader_buffers
= util_last_bit(info
->shader_buffers_declared
);
1311 ctx
->num_samplers
= util_last_bit(info
->samplers_declared
);
1312 ctx
->num_images
= util_last_bit(info
->images_declared
);
1315 void si_llvm_create_func(struct si_shader_context
*ctx
,
1317 LLVMTypeRef
*return_types
, unsigned num_return_elems
,
1318 LLVMTypeRef
*ParamTypes
, unsigned ParamCount
)
1320 LLVMTypeRef main_fn_type
, ret_type
;
1321 LLVMBasicBlockRef main_fn_body
;
1322 enum si_llvm_calling_convention call_conv
;
1323 unsigned real_shader_type
;
1325 if (num_return_elems
)
1326 ret_type
= LLVMStructTypeInContext(ctx
->gallivm
.context
,
1328 num_return_elems
, true);
1330 ret_type
= LLVMVoidTypeInContext(ctx
->gallivm
.context
);
1332 /* Setup the function */
1333 ctx
->return_type
= ret_type
;
1334 main_fn_type
= LLVMFunctionType(ret_type
, ParamTypes
, ParamCount
, 0);
1335 ctx
->main_fn
= LLVMAddFunction(ctx
->gallivm
.module
, name
, main_fn_type
);
1336 main_fn_body
= LLVMAppendBasicBlockInContext(ctx
->gallivm
.context
,
1337 ctx
->main_fn
, "main_body");
1338 LLVMPositionBuilderAtEnd(ctx
->gallivm
.builder
, main_fn_body
);
1340 real_shader_type
= ctx
->type
;
1342 /* LS is merged into HS (TCS), and ES is merged into GS. */
1343 if (ctx
->screen
->b
.chip_class
>= GFX9
) {
1344 if (ctx
->shader
->key
.as_ls
)
1345 real_shader_type
= PIPE_SHADER_TESS_CTRL
;
1346 else if (ctx
->shader
->key
.as_es
)
1347 real_shader_type
= PIPE_SHADER_GEOMETRY
;
1350 switch (real_shader_type
) {
1351 case PIPE_SHADER_VERTEX
:
1352 case PIPE_SHADER_TESS_EVAL
:
1353 call_conv
= RADEON_LLVM_AMDGPU_VS
;
1355 case PIPE_SHADER_TESS_CTRL
:
1356 call_conv
= HAVE_LLVM
>= 0x0500 ? RADEON_LLVM_AMDGPU_HS
:
1357 RADEON_LLVM_AMDGPU_VS
;
1359 case PIPE_SHADER_GEOMETRY
:
1360 call_conv
= RADEON_LLVM_AMDGPU_GS
;
1362 case PIPE_SHADER_FRAGMENT
:
1363 call_conv
= RADEON_LLVM_AMDGPU_PS
;
1365 case PIPE_SHADER_COMPUTE
:
1366 call_conv
= RADEON_LLVM_AMDGPU_CS
;
1369 unreachable("Unhandle shader type");
1372 LLVMSetFunctionCallConv(ctx
->main_fn
, call_conv
);
1375 void si_llvm_optimize_module(struct si_shader_context
*ctx
)
1377 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1378 const char *triple
= LLVMGetTarget(gallivm
->module
);
1379 LLVMTargetLibraryInfoRef target_library_info
;
1381 /* Dump LLVM IR before any optimization passes */
1382 if (ctx
->screen
->b
.debug_flags
& DBG_PREOPT_IR
&&
1383 si_can_dump_shader(&ctx
->screen
->b
, ctx
->type
))
1384 LLVMDumpModule(ctx
->gallivm
.module
);
1386 /* Create the pass manager */
1387 gallivm
->passmgr
= LLVMCreatePassManager();
1389 target_library_info
= gallivm_create_target_library_info(triple
);
1390 LLVMAddTargetLibraryInfo(target_library_info
, gallivm
->passmgr
);
1392 if (si_extra_shader_checks(&ctx
->screen
->b
, ctx
->type
))
1393 LLVMAddVerifierPass(gallivm
->passmgr
);
1395 LLVMAddAlwaysInlinerPass(gallivm
->passmgr
);
1397 /* This pass should eliminate all the load and store instructions */
1398 LLVMAddPromoteMemoryToRegisterPass(gallivm
->passmgr
);
1400 /* Add some optimization passes */
1401 LLVMAddScalarReplAggregatesPass(gallivm
->passmgr
);
1402 LLVMAddLICMPass(gallivm
->passmgr
);
1403 LLVMAddAggressiveDCEPass(gallivm
->passmgr
);
1404 LLVMAddCFGSimplificationPass(gallivm
->passmgr
);
1405 #if HAVE_LLVM >= 0x0400
1406 /* This is recommended by the instruction combining pass. */
1407 LLVMAddEarlyCSEMemSSAPass(gallivm
->passmgr
);
1409 LLVMAddInstructionCombiningPass(gallivm
->passmgr
);
1412 LLVMRunPassManager(gallivm
->passmgr
, ctx
->gallivm
.module
);
1414 LLVMDisposeBuilder(gallivm
->builder
);
1415 LLVMDisposePassManager(gallivm
->passmgr
);
1416 gallivm_dispose_target_library_info(target_library_info
);
1419 void si_llvm_dispose(struct si_shader_context
*ctx
)
1421 LLVMDisposeModule(ctx
->gallivm
.module
);
1422 LLVMContextDispose(ctx
->gallivm
.context
);
1423 FREE(ctx
->temp_arrays
);
1424 ctx
->temp_arrays
= NULL
;
1425 FREE(ctx
->temp_array_allocas
);
1426 ctx
->temp_array_allocas
= NULL
;
1429 ctx
->temps_count
= 0;
1435 ctx
->flow_depth_max
= 0;