2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "si_shader_internal.h"
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43 #include <llvm-c/Support.h>
45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
48 /* Loop exit or next part of if/else/endif. */
49 LLVMBasicBlockRef next_block
;
50 LLVMBasicBlockRef loop_entry_block
;
53 #define CPU_STRING_LEN 30
54 #define FS_STRING_LEN 30
55 #define TRIPLE_STRING_LEN 7
58 * Shader types for the LLVM backend.
60 enum si_llvm_shader_type
{
61 RADEON_LLVM_SHADER_PS
= 0,
62 RADEON_LLVM_SHADER_VS
= 1,
63 RADEON_LLVM_SHADER_GS
= 2,
64 RADEON_LLVM_SHADER_CS
= 3,
67 enum si_llvm_calling_convention
{
68 RADEON_LLVM_AMDGPU_VS
= 87,
69 RADEON_LLVM_AMDGPU_GS
= 88,
70 RADEON_LLVM_AMDGPU_PS
= 89,
71 RADEON_LLVM_AMDGPU_CS
= 90,
74 void si_llvm_add_attribute(LLVMValueRef F
, const char *name
, int value
)
78 snprintf(str
, sizeof(str
), "%i", value
);
79 LLVMAddTargetDependentFunctionAttr(F
, name
, str
);
83 * Set the shader type we want to compile
85 * @param type shader type to set
87 void si_llvm_shader_type(LLVMValueRef F
, unsigned type
)
89 enum si_llvm_shader_type llvm_type
;
90 enum si_llvm_calling_convention calling_conv
;
93 case PIPE_SHADER_VERTEX
:
94 case PIPE_SHADER_TESS_CTRL
:
95 case PIPE_SHADER_TESS_EVAL
:
96 llvm_type
= RADEON_LLVM_SHADER_VS
;
97 calling_conv
= RADEON_LLVM_AMDGPU_VS
;
99 case PIPE_SHADER_GEOMETRY
:
100 llvm_type
= RADEON_LLVM_SHADER_GS
;
101 calling_conv
= RADEON_LLVM_AMDGPU_GS
;
103 case PIPE_SHADER_FRAGMENT
:
104 llvm_type
= RADEON_LLVM_SHADER_PS
;
105 calling_conv
= RADEON_LLVM_AMDGPU_PS
;
107 case PIPE_SHADER_COMPUTE
:
108 llvm_type
= RADEON_LLVM_SHADER_CS
;
109 calling_conv
= RADEON_LLVM_AMDGPU_CS
;
112 unreachable("Unhandle shader type");
115 if (HAVE_LLVM
>= 0x309)
116 LLVMSetFunctionCallConv(F
, calling_conv
);
118 si_llvm_add_attribute(F
, "ShaderType", llvm_type
);
121 static void init_amdgpu_target()
123 gallivm_init_llvm_targets();
124 LLVMInitializeAMDGPUTargetInfo();
125 LLVMInitializeAMDGPUTarget();
126 LLVMInitializeAMDGPUTargetMC();
127 LLVMInitializeAMDGPUAsmPrinter();
129 if (HAVE_LLVM
>= 0x0400) {
131 * Workaround for bug in llvm 4.0 that causes image intrinsics
133 * https://reviews.llvm.org/D26348
135 const char *argv
[2] = {"mesa", "-simplifycfg-sink-common=false"};
136 LLVMParseCommandLineOptions(2, argv
, NULL
);
140 static once_flag init_amdgpu_target_once_flag
= ONCE_FLAG_INIT
;
142 LLVMTargetRef
si_llvm_get_amdgpu_target(const char *triple
)
144 LLVMTargetRef target
= NULL
;
145 char *err_message
= NULL
;
147 call_once(&init_amdgpu_target_once_flag
, init_amdgpu_target
);
149 if (LLVMGetTargetFromTriple(triple
, &target
, &err_message
)) {
150 fprintf(stderr
, "Cannot find target for triple %s ", triple
);
152 fprintf(stderr
, "%s\n", err_message
);
154 LLVMDisposeMessage(err_message
);
160 struct si_llvm_diagnostics
{
161 struct pipe_debug_callback
*debug
;
165 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di
, void *context
)
167 struct si_llvm_diagnostics
*diag
= (struct si_llvm_diagnostics
*)context
;
168 LLVMDiagnosticSeverity severity
= LLVMGetDiagInfoSeverity(di
);
169 char *description
= LLVMGetDiagInfoDescription(di
);
170 const char *severity_str
= NULL
;
174 severity_str
= "error";
177 severity_str
= "warning";
180 severity_str
= "remark";
183 severity_str
= "note";
186 severity_str
= "unknown";
189 pipe_debug_message(diag
->debug
, SHADER_INFO
,
190 "LLVM diagnostic (%s): %s", severity_str
, description
);
192 if (severity
== LLVMDSError
) {
194 fprintf(stderr
,"LLVM triggered Diagnostic Handler: %s\n", description
);
197 LLVMDisposeMessage(description
);
201 * Compile an LLVM module to machine code.
203 * @returns 0 for success, 1 for failure
205 unsigned si_llvm_compile(LLVMModuleRef M
, struct ac_shader_binary
*binary
,
206 LLVMTargetMachineRef tm
,
207 struct pipe_debug_callback
*debug
)
209 struct si_llvm_diagnostics diag
;
211 LLVMContextRef llvm_ctx
;
212 LLVMMemoryBufferRef out_buffer
;
213 unsigned buffer_size
;
214 const char *buffer_data
;
220 /* Setup Diagnostic Handler*/
221 llvm_ctx
= LLVMGetModuleContext(M
);
223 LLVMContextSetDiagnosticHandler(llvm_ctx
, si_diagnostic_handler
, &diag
);
226 mem_err
= LLVMTargetMachineEmitToMemoryBuffer(tm
, M
, LLVMObjectFile
, &err
,
229 /* Process Errors/Warnings */
231 fprintf(stderr
, "%s: %s", __FUNCTION__
, err
);
232 pipe_debug_message(debug
, SHADER_INFO
,
233 "LLVM emit error: %s", err
);
239 /* Extract Shader Code*/
240 buffer_size
= LLVMGetBufferSize(out_buffer
);
241 buffer_data
= LLVMGetBufferStart(out_buffer
);
243 ac_elf_read(buffer_data
, buffer_size
, binary
);
246 LLVMDisposeMemoryBuffer(out_buffer
);
249 if (diag
.retval
!= 0)
250 pipe_debug_message(debug
, SHADER_INFO
, "LLVM compile failed");
254 LLVMTypeRef
tgsi2llvmtype(struct lp_build_tgsi_context
*bld_base
,
255 enum tgsi_opcode_type type
)
257 LLVMContextRef ctx
= bld_base
->base
.gallivm
->context
;
260 case TGSI_TYPE_UNSIGNED
:
261 case TGSI_TYPE_SIGNED
:
262 return LLVMInt32TypeInContext(ctx
);
263 case TGSI_TYPE_UNSIGNED64
:
264 case TGSI_TYPE_SIGNED64
:
265 return LLVMInt64TypeInContext(ctx
);
266 case TGSI_TYPE_DOUBLE
:
267 return LLVMDoubleTypeInContext(ctx
);
268 case TGSI_TYPE_UNTYPED
:
269 case TGSI_TYPE_FLOAT
:
270 return LLVMFloatTypeInContext(ctx
);
276 LLVMValueRef
bitcast(struct lp_build_tgsi_context
*bld_base
,
277 enum tgsi_opcode_type type
, LLVMValueRef value
)
279 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
280 LLVMTypeRef dst_type
= tgsi2llvmtype(bld_base
, type
);
283 return LLVMBuildBitCast(builder
, value
, dst_type
, "");
289 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
290 * or an undefined value in the same interval otherwise.
292 LLVMValueRef
si_llvm_bound_index(struct si_shader_context
*ctx
,
296 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
297 LLVMBuilderRef builder
= gallivm
->builder
;
298 LLVMValueRef c_max
= lp_build_const_int32(gallivm
, num
- 1);
301 if (util_is_power_of_two(num
)) {
302 index
= LLVMBuildAnd(builder
, index
, c_max
, "");
304 /* In theory, this MAX pattern should result in code that is
305 * as good as the bit-wise AND above.
307 * In practice, LLVM generates worse code (at the time of
308 * writing), because its value tracking is not strong enough.
310 cc
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, c_max
, "");
311 index
= LLVMBuildSelect(builder
, cc
, index
, c_max
, "");
317 static struct si_llvm_flow
*
318 get_current_flow(struct si_shader_context
*ctx
)
320 if (ctx
->flow_depth
> 0)
321 return &ctx
->flow
[ctx
->flow_depth
- 1];
325 static struct si_llvm_flow
*
326 get_innermost_loop(struct si_shader_context
*ctx
)
328 for (unsigned i
= ctx
->flow_depth
; i
> 0; --i
) {
329 if (ctx
->flow
[i
- 1].loop_entry_block
)
330 return &ctx
->flow
[i
- 1];
335 static struct si_llvm_flow
*
336 push_flow(struct si_shader_context
*ctx
)
338 struct si_llvm_flow
*flow
;
340 if (ctx
->flow_depth
>= ctx
->flow_depth_max
) {
341 unsigned new_max
= MAX2(ctx
->flow_depth
<< 1, RADEON_LLVM_INITIAL_CF_DEPTH
);
342 ctx
->flow
= REALLOC(ctx
->flow
,
343 ctx
->flow_depth_max
* sizeof(*ctx
->flow
),
344 new_max
* sizeof(*ctx
->flow
));
345 ctx
->flow_depth_max
= new_max
;
348 flow
= &ctx
->flow
[ctx
->flow_depth
];
351 flow
->next_block
= NULL
;
352 flow
->loop_entry_block
= NULL
;
356 static LLVMValueRef
emit_swizzle(struct lp_build_tgsi_context
*bld_base
,
363 LLVMValueRef swizzles
[4];
365 LLVMInt32TypeInContext(bld_base
->base
.gallivm
->context
);
367 swizzles
[0] = LLVMConstInt(i32t
, swizzle_x
, 0);
368 swizzles
[1] = LLVMConstInt(i32t
, swizzle_y
, 0);
369 swizzles
[2] = LLVMConstInt(i32t
, swizzle_z
, 0);
370 swizzles
[3] = LLVMConstInt(i32t
, swizzle_w
, 0);
372 return LLVMBuildShuffleVector(bld_base
->base
.gallivm
->builder
,
374 LLVMGetUndef(LLVMTypeOf(value
)),
375 LLVMConstVector(swizzles
, 4), "");
379 * Return the description of the array covering the given temporary register
383 get_temp_array_id(struct lp_build_tgsi_context
*bld_base
,
385 const struct tgsi_ind_register
*reg
)
387 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
388 unsigned num_arrays
= ctx
->bld_base
.info
->array_max
[TGSI_FILE_TEMPORARY
];
391 if (reg
&& reg
->ArrayID
> 0 && reg
->ArrayID
<= num_arrays
)
394 for (i
= 0; i
< num_arrays
; i
++) {
395 const struct tgsi_array_info
*array
= &ctx
->temp_arrays
[i
];
397 if (reg_index
>= array
->range
.First
&& reg_index
<= array
->range
.Last
)
404 static struct tgsi_declaration_range
405 get_array_range(struct lp_build_tgsi_context
*bld_base
,
406 unsigned File
, unsigned reg_index
,
407 const struct tgsi_ind_register
*reg
)
409 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
410 struct tgsi_declaration_range range
;
412 if (File
== TGSI_FILE_TEMPORARY
) {
413 unsigned array_id
= get_temp_array_id(bld_base
, reg_index
, reg
);
415 return ctx
->temp_arrays
[array_id
- 1].range
;
419 range
.Last
= bld_base
->info
->file_max
[File
];
424 emit_array_index(struct si_shader_context
*ctx
,
425 const struct tgsi_ind_register
*reg
,
428 struct gallivm_state
*gallivm
= ctx
->bld_base
.base
.gallivm
;
431 return lp_build_const_int32(gallivm
, offset
);
433 LLVMValueRef addr
= LLVMBuildLoad(gallivm
->builder
, ctx
->addrs
[reg
->Index
][reg
->Swizzle
], "");
434 return LLVMBuildAdd(gallivm
->builder
, addr
, lp_build_const_int32(gallivm
, offset
), "");
438 * For indirect registers, construct a pointer directly to the requested
439 * element using getelementptr if possible.
441 * Returns NULL if the insertelement/extractelement fallback for array access
445 get_pointer_into_array(struct si_shader_context
*ctx
,
449 const struct tgsi_ind_register
*reg_indirect
)
452 struct tgsi_array_info
*array
;
453 struct gallivm_state
*gallivm
= ctx
->bld_base
.base
.gallivm
;
454 LLVMBuilderRef builder
= gallivm
->builder
;
455 LLVMValueRef idxs
[2];
459 if (file
!= TGSI_FILE_TEMPORARY
)
462 array_id
= get_temp_array_id(&ctx
->bld_base
, reg_index
, reg_indirect
);
466 alloca
= ctx
->temp_array_allocas
[array_id
- 1];
470 array
= &ctx
->temp_arrays
[array_id
- 1];
472 if (!(array
->writemask
& (1 << swizzle
)))
473 return ctx
->undef_alloca
;
475 index
= emit_array_index(ctx
, reg_indirect
,
476 reg_index
- ctx
->temp_arrays
[array_id
- 1].range
.First
);
478 /* Ensure that the index is within a valid range, to guard against
479 * VM faults and overwriting critical data (e.g. spilled resource
482 * TODO It should be possible to avoid the additional instructions
483 * if LLVM is changed so that it guarantuees:
484 * 1. the scratch space descriptor isolates the current wave (this
485 * could even save the scratch offset SGPR at the cost of an
486 * additional SALU instruction)
487 * 2. the memory for allocas must be allocated at the _end_ of the
488 * scratch space (after spilled registers)
490 index
= si_llvm_bound_index(ctx
, index
, array
->range
.Last
- array
->range
.First
+ 1);
492 index
= LLVMBuildMul(
494 lp_build_const_int32(gallivm
, util_bitcount(array
->writemask
)),
496 index
= LLVMBuildAdd(
498 lp_build_const_int32(
500 util_bitcount(array
->writemask
& ((1 << swizzle
) - 1))),
502 idxs
[0] = ctx
->bld_base
.uint_bld
.zero
;
504 return LLVMBuildGEP(builder
, alloca
, idxs
, 2, "");
508 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context
*bld_base
,
509 enum tgsi_opcode_type type
,
513 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
516 result
= LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base
->base
.gallivm
->context
, 32), bld_base
->base
.type
.length
* 2));
518 result
= LLVMBuildInsertElement(builder
,
520 bitcast(bld_base
, TGSI_TYPE_UNSIGNED
, ptr
),
521 bld_base
->int_bld
.zero
, "");
522 result
= LLVMBuildInsertElement(builder
,
524 bitcast(bld_base
, TGSI_TYPE_UNSIGNED
, ptr2
),
525 bld_base
->int_bld
.one
, "");
526 return bitcast(bld_base
, type
, result
);
530 emit_array_fetch(struct lp_build_tgsi_context
*bld_base
,
531 unsigned File
, enum tgsi_opcode_type type
,
532 struct tgsi_declaration_range range
,
535 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
536 struct gallivm_state
*gallivm
= ctx
->bld_base
.base
.gallivm
;
538 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
540 unsigned i
, size
= range
.Last
- range
.First
+ 1;
541 LLVMTypeRef vec
= LLVMVectorType(tgsi2llvmtype(bld_base
, type
), size
);
542 LLVMValueRef result
= LLVMGetUndef(vec
);
544 struct tgsi_full_src_register tmp_reg
= {};
545 tmp_reg
.Register
.File
= File
;
547 for (i
= 0; i
< size
; ++i
) {
548 tmp_reg
.Register
.Index
= i
+ range
.First
;
549 LLVMValueRef temp
= si_llvm_emit_fetch(bld_base
, &tmp_reg
, type
, swizzle
);
550 result
= LLVMBuildInsertElement(builder
, result
, temp
,
551 lp_build_const_int32(gallivm
, i
), "array_vector");
557 load_value_from_array(struct lp_build_tgsi_context
*bld_base
,
559 enum tgsi_opcode_type type
,
562 const struct tgsi_ind_register
*reg_indirect
)
564 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
565 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
566 LLVMBuilderRef builder
= gallivm
->builder
;
569 ptr
= get_pointer_into_array(ctx
, file
, swizzle
, reg_index
, reg_indirect
);
571 LLVMValueRef val
= LLVMBuildLoad(builder
, ptr
, "");
572 if (tgsi_type_is_64bit(type
)) {
573 LLVMValueRef ptr_hi
, val_hi
;
574 ptr_hi
= LLVMBuildGEP(builder
, ptr
, &bld_base
->uint_bld
.one
, 1, "");
575 val_hi
= LLVMBuildLoad(builder
, ptr_hi
, "");
576 val
= si_llvm_emit_fetch_64bit(bld_base
, type
, val
, val_hi
);
581 struct tgsi_declaration_range range
=
582 get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
584 emit_array_index(ctx
, reg_indirect
, reg_index
- range
.First
);
586 emit_array_fetch(bld_base
, file
, type
, range
, swizzle
);
587 return LLVMBuildExtractElement(builder
, array
, index
, "");
592 store_value_to_array(struct lp_build_tgsi_context
*bld_base
,
597 const struct tgsi_ind_register
*reg_indirect
)
599 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
600 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
601 LLVMBuilderRef builder
= gallivm
->builder
;
604 ptr
= get_pointer_into_array(ctx
, file
, chan_index
, reg_index
, reg_indirect
);
606 LLVMBuildStore(builder
, value
, ptr
);
609 struct tgsi_declaration_range range
= get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
610 LLVMValueRef index
= emit_array_index(ctx
, reg_indirect
, reg_index
- range
.First
);
612 emit_array_fetch(bld_base
, file
, TGSI_TYPE_FLOAT
, range
, chan_index
);
613 LLVMValueRef temp_ptr
;
615 array
= LLVMBuildInsertElement(builder
, array
, value
, index
, "");
617 size
= range
.Last
- range
.First
+ 1;
618 for (i
= 0; i
< size
; ++i
) {
620 case TGSI_FILE_OUTPUT
:
621 temp_ptr
= ctx
->outputs
[i
+ range
.First
][chan_index
];
624 case TGSI_FILE_TEMPORARY
:
625 if (range
.First
+ i
>= ctx
->temps_count
)
627 temp_ptr
= ctx
->temps
[(i
+ range
.First
) * TGSI_NUM_CHANNELS
+ chan_index
];
633 value
= LLVMBuildExtractElement(builder
, array
,
634 lp_build_const_int32(gallivm
, i
), "");
635 LLVMBuildStore(builder
, value
, temp_ptr
);
640 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
641 * reload them at each use. This must be true if the shader is using
642 * derivatives and KILL, because KILL can leave the WQM and then a lazy
643 * input load isn't in the WQM anymore.
645 static bool si_preload_fs_inputs(struct si_shader_context
*ctx
)
647 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
649 return sel
->info
.uses_derivatives
&&
654 get_output_ptr(struct lp_build_tgsi_context
*bld_base
, unsigned index
,
657 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
659 assert(index
<= ctx
->bld_base
.info
->file_max
[TGSI_FILE_OUTPUT
]);
660 return ctx
->outputs
[index
][chan
];
663 LLVMValueRef
si_llvm_emit_fetch(struct lp_build_tgsi_context
*bld_base
,
664 const struct tgsi_full_src_register
*reg
,
665 enum tgsi_opcode_type type
,
668 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
669 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
670 LLVMValueRef result
= NULL
, ptr
, ptr2
;
673 LLVMValueRef values
[TGSI_NUM_CHANNELS
];
675 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
676 values
[chan
] = si_llvm_emit_fetch(bld_base
, reg
, type
, chan
);
678 return lp_build_gather_values(bld_base
->base
.gallivm
, values
,
682 if (reg
->Register
.Indirect
) {
683 LLVMValueRef load
= load_value_from_array(bld_base
, reg
->Register
.File
, type
,
684 swizzle
, reg
->Register
.Index
, ®
->Indirect
);
685 return bitcast(bld_base
, type
, load
);
688 switch(reg
->Register
.File
) {
689 case TGSI_FILE_IMMEDIATE
: {
690 LLVMTypeRef ctype
= tgsi2llvmtype(bld_base
, type
);
691 if (tgsi_type_is_64bit(type
)) {
692 result
= LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base
->base
.gallivm
->context
, 32), bld_base
->base
.type
.length
* 2));
693 result
= LLVMConstInsertElement(result
,
694 ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
],
695 bld_base
->int_bld
.zero
);
696 result
= LLVMConstInsertElement(result
,
697 ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
+ 1],
698 bld_base
->int_bld
.one
);
699 return LLVMConstBitCast(result
, ctype
);
701 return LLVMConstBitCast(ctx
->imms
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
], ctype
);
705 case TGSI_FILE_INPUT
: {
706 unsigned index
= reg
->Register
.Index
;
707 LLVMValueRef input
[4];
709 /* I don't think doing this for vertex shaders is beneficial.
710 * For those, we want to make sure the VMEM loads are executed
711 * only once. Fragment shaders don't care much, because
712 * v_interp instructions are much cheaper than VMEM loads.
714 if (!si_preload_fs_inputs(ctx
) &&
715 ctx
->bld_base
.info
->processor
== PIPE_SHADER_FRAGMENT
)
716 ctx
->load_input(ctx
, index
, &ctx
->input_decls
[index
], input
);
718 memcpy(input
, &ctx
->inputs
[index
* 4], sizeof(input
));
720 result
= input
[swizzle
];
722 if (tgsi_type_is_64bit(type
)) {
724 ptr2
= input
[swizzle
+ 1];
725 return si_llvm_emit_fetch_64bit(bld_base
, type
, ptr
, ptr2
);
730 case TGSI_FILE_TEMPORARY
:
731 if (reg
->Register
.Index
>= ctx
->temps_count
)
732 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
733 ptr
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
];
734 if (tgsi_type_is_64bit(type
)) {
735 ptr2
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
+ 1];
736 return si_llvm_emit_fetch_64bit(bld_base
, type
,
737 LLVMBuildLoad(builder
, ptr
, ""),
738 LLVMBuildLoad(builder
, ptr2
, ""));
740 result
= LLVMBuildLoad(builder
, ptr
, "");
743 case TGSI_FILE_OUTPUT
:
744 ptr
= get_output_ptr(bld_base
, reg
->Register
.Index
, swizzle
);
745 if (tgsi_type_is_64bit(type
)) {
746 ptr2
= get_output_ptr(bld_base
, reg
->Register
.Index
, swizzle
+ 1);
747 return si_llvm_emit_fetch_64bit(bld_base
, type
,
748 LLVMBuildLoad(builder
, ptr
, ""),
749 LLVMBuildLoad(builder
, ptr2
, ""));
751 result
= LLVMBuildLoad(builder
, ptr
, "");
755 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
758 return bitcast(bld_base
, type
, result
);
761 static LLVMValueRef
fetch_system_value(struct lp_build_tgsi_context
*bld_base
,
762 const struct tgsi_full_src_register
*reg
,
763 enum tgsi_opcode_type type
,
766 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
767 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
769 LLVMValueRef cval
= ctx
->system_values
[reg
->Register
.Index
];
770 if (LLVMGetTypeKind(LLVMTypeOf(cval
)) == LLVMVectorTypeKind
) {
771 cval
= LLVMBuildExtractElement(gallivm
->builder
, cval
,
772 lp_build_const_int32(gallivm
, swizzle
), "");
774 return bitcast(bld_base
, type
, cval
);
777 static void emit_declaration(struct lp_build_tgsi_context
*bld_base
,
778 const struct tgsi_full_declaration
*decl
)
780 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
781 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
782 unsigned first
, last
, i
;
783 switch(decl
->Declaration
.File
) {
784 case TGSI_FILE_ADDRESS
:
787 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
789 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
790 ctx
->addrs
[idx
][chan
] = lp_build_alloca_undef(
792 ctx
->bld_base
.uint_bld
.elem_type
, "");
798 case TGSI_FILE_TEMPORARY
:
801 LLVMValueRef array_alloca
= NULL
;
803 unsigned writemask
= decl
->Declaration
.UsageMask
;
804 first
= decl
->Range
.First
;
805 last
= decl
->Range
.Last
;
806 decl_size
= 4 * ((last
- first
) + 1);
808 if (decl
->Declaration
.Array
) {
809 unsigned id
= decl
->Array
.ArrayID
- 1;
812 writemask
&= ctx
->temp_arrays
[id
].writemask
;
813 ctx
->temp_arrays
[id
].writemask
= writemask
;
814 array_size
= ((last
- first
) + 1) * util_bitcount(writemask
);
816 /* If the array has more than 16 elements, store it
817 * in memory using an alloca that spans the entire
820 * Otherwise, store each array element individually.
821 * We will then generate vectors (per-channel, up to
822 * <16 x float> if the usagemask is a single bit) for
823 * indirect addressing.
825 * Note that 16 is the number of vector elements that
826 * LLVM will store in a register, so theoretically an
827 * array with up to 4 * 16 = 64 elements could be
828 * handled this way, but whether that's a good idea
829 * depends on VGPR register pressure elsewhere.
831 * FIXME: We shouldn't need to have the non-alloca
832 * code path for arrays. LLVM should be smart enough to
833 * promote allocas into registers when profitable.
835 * LLVM 3.8 crashes with this.
837 if ((HAVE_LLVM
>= 0x0309 && array_size
> 16) ||
838 /* TODO: VGPR indexing is buggy on GFX9. */
839 ctx
->screen
->b
.chip_class
== GFX9
) {
840 array_alloca
= LLVMBuildAlloca(builder
,
841 LLVMArrayType(bld_base
->base
.vec_type
,
842 array_size
), "array");
843 ctx
->temp_array_allocas
[id
] = array_alloca
;
847 if (!ctx
->temps_count
) {
848 ctx
->temps_count
= bld_base
->info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
849 ctx
->temps
= MALLOC(TGSI_NUM_CHANNELS
* ctx
->temps_count
* sizeof(LLVMValueRef
));
852 for (i
= 0; i
< decl_size
; ++i
) {
854 snprintf(name
, sizeof(name
), "TEMP%d.%c",
855 first
+ i
/ 4, "xyzw"[i
% 4]);
857 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] =
858 lp_build_alloca_undef(bld_base
->base
.gallivm
,
859 bld_base
->base
.vec_type
,
863 LLVMValueRef idxs
[2] = {
864 bld_base
->uint_bld
.zero
,
869 if (writemask
!= TGSI_WRITEMASK_XYZW
&&
870 !ctx
->undef_alloca
) {
871 /* Create a dummy alloca. We use it so that we
872 * have a pointer that is safe to load from if
873 * a shader ever reads from a channel that
874 * it never writes to.
876 ctx
->undef_alloca
= lp_build_alloca_undef(
877 bld_base
->base
.gallivm
,
878 bld_base
->base
.vec_type
, "undef");
881 for (i
= 0; i
< decl_size
; ++i
) {
883 if (writemask
& (1 << (i
% 4))) {
885 snprintf(name
, sizeof(name
), "TEMP%d.%c",
886 first
+ i
/ 4, "xyzw"[i
% 4]);
888 idxs
[1] = lp_build_const_int32(bld_base
->base
.gallivm
, j
);
889 ptr
= LLVMBuildGEP(builder
, array_alloca
, idxs
, 2, name
);
892 ptr
= ctx
->undef_alloca
;
894 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] = ptr
;
899 case TGSI_FILE_INPUT
:
902 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
903 if (ctx
->load_input
&&
904 ctx
->input_decls
[idx
].Declaration
.File
!= TGSI_FILE_INPUT
) {
905 ctx
->input_decls
[idx
] = *decl
;
906 ctx
->input_decls
[idx
].Range
.First
= idx
;
907 ctx
->input_decls
[idx
].Range
.Last
= idx
;
908 ctx
->input_decls
[idx
].Semantic
.Index
+= idx
- decl
->Range
.First
;
910 if (si_preload_fs_inputs(ctx
) ||
911 bld_base
->info
->processor
!= PIPE_SHADER_FRAGMENT
)
912 ctx
->load_input(ctx
, idx
, &ctx
->input_decls
[idx
],
913 &ctx
->inputs
[idx
* 4]);
919 case TGSI_FILE_SYSTEM_VALUE
:
922 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
923 ctx
->load_system_value(ctx
, idx
, decl
);
928 case TGSI_FILE_OUTPUT
:
932 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
934 assert(idx
< RADEON_LLVM_MAX_OUTPUTS
);
935 if (ctx
->outputs
[idx
][0])
937 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
939 snprintf(name
, sizeof(name
), "OUT%d.%c",
940 idx
, "xyzw"[chan
% 4]);
942 ctx
->outputs
[idx
][chan
] = lp_build_alloca_undef(
944 ctx
->bld_base
.base
.elem_type
, name
);
950 case TGSI_FILE_MEMORY
:
951 ctx
->declare_memory_region(ctx
, decl
);
959 void si_llvm_emit_store(struct lp_build_tgsi_context
*bld_base
,
960 const struct tgsi_full_instruction
*inst
,
961 const struct tgsi_opcode_info
*info
,
964 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
965 struct gallivm_state
*gallivm
= ctx
->bld_base
.base
.gallivm
;
966 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
967 LLVMBuilderRef builder
= ctx
->bld_base
.base
.gallivm
->builder
;
968 LLVMValueRef temp_ptr
, temp_ptr2
= NULL
;
969 unsigned chan
, chan_index
;
970 bool is_vec_store
= false;
971 enum tgsi_opcode_type dtype
= tgsi_opcode_infer_dst_type(inst
->Instruction
.Opcode
);
974 LLVMTypeKind k
= LLVMGetTypeKind(LLVMTypeOf(dst
[0]));
975 is_vec_store
= (k
== LLVMVectorTypeKind
);
979 LLVMValueRef values
[4] = {};
980 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst
, chan
) {
981 LLVMValueRef index
= lp_build_const_int32(gallivm
, chan
);
982 values
[chan
] = LLVMBuildExtractElement(gallivm
->builder
,
985 bld_base
->emit_store(bld_base
, inst
, info
, values
);
989 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
990 LLVMValueRef value
= dst
[chan_index
];
992 if (tgsi_type_is_64bit(dtype
) && (chan_index
== 1 || chan_index
== 3))
994 if (inst
->Instruction
.Saturate
)
995 value
= ac_build_clamp(&ctx
->ac
, value
);
997 if (reg
->Register
.File
== TGSI_FILE_ADDRESS
) {
998 temp_ptr
= ctx
->addrs
[reg
->Register
.Index
][chan_index
];
999 LLVMBuildStore(builder
, value
, temp_ptr
);
1003 if (!tgsi_type_is_64bit(dtype
))
1004 value
= bitcast(bld_base
, TGSI_TYPE_FLOAT
, value
);
1006 if (reg
->Register
.Indirect
) {
1007 unsigned file
= reg
->Register
.File
;
1008 unsigned reg_index
= reg
->Register
.Index
;
1009 store_value_to_array(bld_base
, value
, file
, chan_index
,
1010 reg_index
, ®
->Indirect
);
1012 switch(reg
->Register
.File
) {
1013 case TGSI_FILE_OUTPUT
:
1014 temp_ptr
= ctx
->outputs
[reg
->Register
.Index
][chan_index
];
1015 if (tgsi_type_is_64bit(dtype
))
1016 temp_ptr2
= ctx
->outputs
[reg
->Register
.Index
][chan_index
+ 1];
1019 case TGSI_FILE_TEMPORARY
:
1021 if (reg
->Register
.Index
>= ctx
->temps_count
)
1024 temp_ptr
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
];
1025 if (tgsi_type_is_64bit(dtype
))
1026 temp_ptr2
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
+ 1];
1033 if (!tgsi_type_is_64bit(dtype
))
1034 LLVMBuildStore(builder
, value
, temp_ptr
);
1036 LLVMValueRef ptr
= LLVMBuildBitCast(builder
, value
,
1037 LLVMVectorType(LLVMIntTypeInContext(bld_base
->base
.gallivm
->context
, 32), 2), "");
1039 value
= LLVMBuildExtractElement(builder
, ptr
,
1040 bld_base
->uint_bld
.zero
, "");
1041 val2
= LLVMBuildExtractElement(builder
, ptr
,
1042 bld_base
->uint_bld
.one
, "");
1044 LLVMBuildStore(builder
, bitcast(bld_base
, TGSI_TYPE_FLOAT
, value
), temp_ptr
);
1045 LLVMBuildStore(builder
, bitcast(bld_base
, TGSI_TYPE_FLOAT
, val2
), temp_ptr2
);
1051 static void set_basicblock_name(LLVMBasicBlockRef bb
, const char *base
, int pc
)
1054 /* Subtract 1 so that the number shown is that of the corresponding
1055 * opcode in the TGSI dump, e.g. an if block has the same suffix as
1056 * the instruction number of the corresponding TGSI IF.
1058 snprintf(buf
, sizeof(buf
), "%s%d", base
, pc
- 1);
1059 LLVMSetValueName(LLVMBasicBlockAsValue(bb
), buf
);
1062 /* Append a basic block at the level of the parent flow.
1064 static LLVMBasicBlockRef
append_basic_block(struct si_shader_context
*ctx
,
1067 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1069 assert(ctx
->flow_depth
>= 1);
1071 if (ctx
->flow_depth
>= 2) {
1072 struct si_llvm_flow
*flow
= &ctx
->flow
[ctx
->flow_depth
- 2];
1074 return LLVMInsertBasicBlockInContext(gallivm
->context
,
1075 flow
->next_block
, name
);
1078 return LLVMAppendBasicBlockInContext(gallivm
->context
, ctx
->main_fn
, name
);
1081 /* Emit a branch to the given default target for the current block if
1082 * applicable -- that is, if the current block does not already contain a
1083 * branch from a break or continue.
1085 static void emit_default_branch(LLVMBuilderRef builder
, LLVMBasicBlockRef target
)
1087 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder
)))
1088 LLVMBuildBr(builder
, target
);
1091 static void bgnloop_emit(const struct lp_build_tgsi_action
*action
,
1092 struct lp_build_tgsi_context
*bld_base
,
1093 struct lp_build_emit_data
*emit_data
)
1095 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1096 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1097 struct si_llvm_flow
*flow
= push_flow(ctx
);
1098 flow
->loop_entry_block
= append_basic_block(ctx
, "LOOP");
1099 flow
->next_block
= append_basic_block(ctx
, "ENDLOOP");
1100 set_basicblock_name(flow
->loop_entry_block
, "loop", bld_base
->pc
);
1101 LLVMBuildBr(gallivm
->builder
, flow
->loop_entry_block
);
1102 LLVMPositionBuilderAtEnd(gallivm
->builder
, flow
->loop_entry_block
);
1105 static void brk_emit(const struct lp_build_tgsi_action
*action
,
1106 struct lp_build_tgsi_context
*bld_base
,
1107 struct lp_build_emit_data
*emit_data
)
1109 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1110 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1111 struct si_llvm_flow
*flow
= get_innermost_loop(ctx
);
1113 LLVMBuildBr(gallivm
->builder
, flow
->next_block
);
1116 static void cont_emit(const struct lp_build_tgsi_action
*action
,
1117 struct lp_build_tgsi_context
*bld_base
,
1118 struct lp_build_emit_data
*emit_data
)
1120 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1121 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1122 struct si_llvm_flow
*flow
= get_innermost_loop(ctx
);
1124 LLVMBuildBr(gallivm
->builder
, flow
->loop_entry_block
);
1127 static void else_emit(const struct lp_build_tgsi_action
*action
,
1128 struct lp_build_tgsi_context
*bld_base
,
1129 struct lp_build_emit_data
*emit_data
)
1131 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1132 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1133 struct si_llvm_flow
*current_branch
= get_current_flow(ctx
);
1134 LLVMBasicBlockRef endif_block
;
1136 assert(!current_branch
->loop_entry_block
);
1138 endif_block
= append_basic_block(ctx
, "ENDIF");
1139 emit_default_branch(gallivm
->builder
, endif_block
);
1141 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_branch
->next_block
);
1142 set_basicblock_name(current_branch
->next_block
, "else", bld_base
->pc
);
1144 current_branch
->next_block
= endif_block
;
1147 static void endif_emit(const struct lp_build_tgsi_action
*action
,
1148 struct lp_build_tgsi_context
*bld_base
,
1149 struct lp_build_emit_data
*emit_data
)
1151 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1152 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1153 struct si_llvm_flow
*current_branch
= get_current_flow(ctx
);
1155 assert(!current_branch
->loop_entry_block
);
1157 emit_default_branch(gallivm
->builder
, current_branch
->next_block
);
1158 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_branch
->next_block
);
1159 set_basicblock_name(current_branch
->next_block
, "endif", bld_base
->pc
);
1164 static void endloop_emit(const struct lp_build_tgsi_action
*action
,
1165 struct lp_build_tgsi_context
*bld_base
,
1166 struct lp_build_emit_data
*emit_data
)
1168 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1169 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1170 struct si_llvm_flow
*current_loop
= get_current_flow(ctx
);
1172 assert(current_loop
->loop_entry_block
);
1174 emit_default_branch(gallivm
->builder
, current_loop
->loop_entry_block
);
1176 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_loop
->next_block
);
1177 set_basicblock_name(current_loop
->next_block
, "endloop", bld_base
->pc
);
1181 static void if_cond_emit(const struct lp_build_tgsi_action
*action
,
1182 struct lp_build_tgsi_context
*bld_base
,
1183 struct lp_build_emit_data
*emit_data
,
1186 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1187 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1188 struct si_llvm_flow
*flow
= push_flow(ctx
);
1189 LLVMBasicBlockRef if_block
;
1191 if_block
= append_basic_block(ctx
, "IF");
1192 flow
->next_block
= append_basic_block(ctx
, "ELSE");
1193 set_basicblock_name(if_block
, "if", bld_base
->pc
);
1194 LLVMBuildCondBr(gallivm
->builder
, cond
, if_block
, flow
->next_block
);
1195 LLVMPositionBuilderAtEnd(gallivm
->builder
, if_block
);
1198 static void if_emit(const struct lp_build_tgsi_action
*action
,
1199 struct lp_build_tgsi_context
*bld_base
,
1200 struct lp_build_emit_data
*emit_data
)
1202 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1205 cond
= LLVMBuildFCmp(gallivm
->builder
, LLVMRealUNE
,
1207 bld_base
->base
.zero
, "");
1209 if_cond_emit(action
, bld_base
, emit_data
, cond
);
1212 static void uif_emit(const struct lp_build_tgsi_action
*action
,
1213 struct lp_build_tgsi_context
*bld_base
,
1214 struct lp_build_emit_data
*emit_data
)
1216 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1219 cond
= LLVMBuildICmp(gallivm
->builder
, LLVMIntNE
,
1220 bitcast(bld_base
, TGSI_TYPE_UNSIGNED
, emit_data
->args
[0]),
1221 bld_base
->int_bld
.zero
, "");
1223 if_cond_emit(action
, bld_base
, emit_data
, cond
);
1226 static void emit_immediate(struct lp_build_tgsi_context
*bld_base
,
1227 const struct tgsi_full_immediate
*imm
)
1230 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1232 for (i
= 0; i
< 4; ++i
) {
1233 ctx
->imms
[ctx
->imms_num
* TGSI_NUM_CHANNELS
+ i
] =
1234 LLVMConstInt(bld_base
->uint_bld
.elem_type
, imm
->u
[i
].Uint
, false );
1240 void si_llvm_context_init(struct si_shader_context
*ctx
,
1241 struct si_screen
*sscreen
,
1242 struct si_shader
*shader
,
1243 LLVMTargetMachineRef tm
,
1244 const struct tgsi_shader_info
*info
,
1245 const struct tgsi_token
*tokens
)
1247 struct lp_type type
;
1249 /* Initialize the gallivm object:
1250 * We are only using the module, context, and builder fields of this struct.
1251 * This should be enough for us to be able to pass our gallivm struct to the
1252 * helper functions in the gallivm module.
1254 memset(ctx
, 0, sizeof(*ctx
));
1255 ctx
->shader
= shader
;
1256 ctx
->screen
= sscreen
;
1258 ctx
->type
= info
? info
->processor
: -1;
1260 ctx
->gallivm
.context
= LLVMContextCreate();
1261 ctx
->gallivm
.module
= LLVMModuleCreateWithNameInContext("tgsi",
1262 ctx
->gallivm
.context
);
1263 LLVMSetTarget(ctx
->gallivm
.module
, "amdgcn--");
1265 #if HAVE_LLVM >= 0x0309
1266 LLVMTargetDataRef data_layout
= LLVMCreateTargetDataLayout(tm
);
1267 char *data_layout_str
= LLVMCopyStringRepOfTargetData(data_layout
);
1268 LLVMSetDataLayout(ctx
->gallivm
.module
, data_layout_str
);
1269 LLVMDisposeTargetData(data_layout
);
1270 LLVMDisposeMessage(data_layout_str
);
1273 bool unsafe_fpmath
= (sscreen
->b
.debug_flags
& DBG_UNSAFE_MATH
) != 0;
1274 enum lp_float_mode float_mode
=
1275 unsafe_fpmath
? LP_FLOAT_MODE_UNSAFE_FP_MATH
:
1276 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH
;
1278 ctx
->gallivm
.builder
= lp_create_builder(ctx
->gallivm
.context
,
1281 ac_llvm_context_init(&ctx
->ac
, ctx
->gallivm
.context
);
1282 ctx
->ac
.module
= ctx
->gallivm
.module
;
1283 ctx
->ac
.builder
= ctx
->gallivm
.builder
;
1285 struct lp_build_tgsi_context
*bld_base
= &ctx
->bld_base
;
1287 bld_base
->info
= info
;
1289 if (info
&& info
->array_max
[TGSI_FILE_TEMPORARY
] > 0) {
1290 int size
= info
->array_max
[TGSI_FILE_TEMPORARY
];
1292 ctx
->temp_arrays
= CALLOC(size
, sizeof(ctx
->temp_arrays
[0]));
1293 ctx
->temp_array_allocas
= CALLOC(size
, sizeof(ctx
->temp_array_allocas
[0]));
1296 tgsi_scan_arrays(tokens
, TGSI_FILE_TEMPORARY
, size
,
1300 if (info
&& info
->file_max
[TGSI_FILE_IMMEDIATE
] >= 0) {
1301 int size
= info
->file_max
[TGSI_FILE_IMMEDIATE
] + 1;
1302 ctx
->imms
= MALLOC(size
* TGSI_NUM_CHANNELS
* sizeof(LLVMValueRef
));
1305 type
.floating
= true;
1312 lp_build_context_init(&bld_base
->base
, &ctx
->gallivm
, type
);
1313 lp_build_context_init(&ctx
->bld_base
.uint_bld
, &ctx
->gallivm
, lp_uint_type(type
));
1314 lp_build_context_init(&ctx
->bld_base
.int_bld
, &ctx
->gallivm
, lp_int_type(type
));
1316 lp_build_context_init(&ctx
->bld_base
.dbl_bld
, &ctx
->gallivm
, type
);
1317 lp_build_context_init(&ctx
->bld_base
.uint64_bld
, &ctx
->gallivm
, lp_uint_type(type
));
1318 lp_build_context_init(&ctx
->bld_base
.int64_bld
, &ctx
->gallivm
, lp_int_type(type
));
1321 bld_base
->emit_store
= si_llvm_emit_store
;
1322 bld_base
->emit_swizzle
= emit_swizzle
;
1323 bld_base
->emit_declaration
= emit_declaration
;
1324 bld_base
->emit_immediate
= emit_immediate
;
1326 bld_base
->emit_fetch_funcs
[TGSI_FILE_IMMEDIATE
] = si_llvm_emit_fetch
;
1327 bld_base
->emit_fetch_funcs
[TGSI_FILE_INPUT
] = si_llvm_emit_fetch
;
1328 bld_base
->emit_fetch_funcs
[TGSI_FILE_TEMPORARY
] = si_llvm_emit_fetch
;
1329 bld_base
->emit_fetch_funcs
[TGSI_FILE_OUTPUT
] = si_llvm_emit_fetch
;
1330 bld_base
->emit_fetch_funcs
[TGSI_FILE_SYSTEM_VALUE
] = fetch_system_value
;
1332 /* metadata allowing 2.5 ULP */
1333 ctx
->fpmath_md_kind
= LLVMGetMDKindIDInContext(ctx
->gallivm
.context
,
1335 LLVMValueRef arg
= lp_build_const_float(&ctx
->gallivm
, 2.5);
1336 ctx
->fpmath_md_2p5_ulp
= LLVMMDNodeInContext(ctx
->gallivm
.context
,
1339 bld_base
->op_actions
[TGSI_OPCODE_BGNLOOP
].emit
= bgnloop_emit
;
1340 bld_base
->op_actions
[TGSI_OPCODE_BRK
].emit
= brk_emit
;
1341 bld_base
->op_actions
[TGSI_OPCODE_CONT
].emit
= cont_emit
;
1342 bld_base
->op_actions
[TGSI_OPCODE_IF
].emit
= if_emit
;
1343 bld_base
->op_actions
[TGSI_OPCODE_UIF
].emit
= uif_emit
;
1344 bld_base
->op_actions
[TGSI_OPCODE_ELSE
].emit
= else_emit
;
1345 bld_base
->op_actions
[TGSI_OPCODE_ENDIF
].emit
= endif_emit
;
1346 bld_base
->op_actions
[TGSI_OPCODE_ENDLOOP
].emit
= endloop_emit
;
1348 si_shader_context_init_alu(&ctx
->bld_base
);
1350 ctx
->voidt
= LLVMVoidTypeInContext(ctx
->gallivm
.context
);
1351 ctx
->i1
= LLVMInt1TypeInContext(ctx
->gallivm
.context
);
1352 ctx
->i8
= LLVMInt8TypeInContext(ctx
->gallivm
.context
);
1353 ctx
->i32
= LLVMInt32TypeInContext(ctx
->gallivm
.context
);
1354 ctx
->i64
= LLVMInt64TypeInContext(ctx
->gallivm
.context
);
1355 ctx
->i128
= LLVMIntTypeInContext(ctx
->gallivm
.context
, 128);
1356 ctx
->f32
= LLVMFloatTypeInContext(ctx
->gallivm
.context
);
1357 ctx
->v16i8
= LLVMVectorType(ctx
->i8
, 16);
1358 ctx
->v2i32
= LLVMVectorType(ctx
->i32
, 2);
1359 ctx
->v4i32
= LLVMVectorType(ctx
->i32
, 4);
1360 ctx
->v4f32
= LLVMVectorType(ctx
->f32
, 4);
1361 ctx
->v8i32
= LLVMVectorType(ctx
->i32
, 8);
1363 ctx
->i32_0
= LLVMConstInt(ctx
->i32
, 0, 0);
1364 ctx
->i32_1
= LLVMConstInt(ctx
->i32
, 1, 0);
1367 void si_llvm_create_func(struct si_shader_context
*ctx
,
1369 LLVMTypeRef
*return_types
, unsigned num_return_elems
,
1370 LLVMTypeRef
*ParamTypes
, unsigned ParamCount
)
1372 LLVMTypeRef main_fn_type
, ret_type
;
1373 LLVMBasicBlockRef main_fn_body
;
1375 if (num_return_elems
)
1376 ret_type
= LLVMStructTypeInContext(ctx
->gallivm
.context
,
1378 num_return_elems
, true);
1380 ret_type
= LLVMVoidTypeInContext(ctx
->gallivm
.context
);
1382 /* Setup the function */
1383 ctx
->return_type
= ret_type
;
1384 main_fn_type
= LLVMFunctionType(ret_type
, ParamTypes
, ParamCount
, 0);
1385 ctx
->main_fn
= LLVMAddFunction(ctx
->gallivm
.module
, name
, main_fn_type
);
1386 main_fn_body
= LLVMAppendBasicBlockInContext(ctx
->gallivm
.context
,
1387 ctx
->main_fn
, "main_body");
1388 LLVMPositionBuilderAtEnd(ctx
->gallivm
.builder
, main_fn_body
);
1391 void si_llvm_finalize_module(struct si_shader_context
*ctx
,
1394 struct gallivm_state
*gallivm
= ctx
->bld_base
.base
.gallivm
;
1395 const char *triple
= LLVMGetTarget(gallivm
->module
);
1396 LLVMTargetLibraryInfoRef target_library_info
;
1398 /* Create the pass manager */
1399 gallivm
->passmgr
= LLVMCreatePassManager();
1401 target_library_info
= gallivm_create_target_library_info(triple
);
1402 LLVMAddTargetLibraryInfo(target_library_info
, gallivm
->passmgr
);
1405 LLVMAddVerifierPass(gallivm
->passmgr
);
1407 LLVMAddAlwaysInlinerPass(gallivm
->passmgr
);
1409 /* This pass should eliminate all the load and store instructions */
1410 LLVMAddPromoteMemoryToRegisterPass(gallivm
->passmgr
);
1412 /* Add some optimization passes */
1413 LLVMAddScalarReplAggregatesPass(gallivm
->passmgr
);
1414 LLVMAddLICMPass(gallivm
->passmgr
);
1415 LLVMAddAggressiveDCEPass(gallivm
->passmgr
);
1416 LLVMAddCFGSimplificationPass(gallivm
->passmgr
);
1417 LLVMAddInstructionCombiningPass(gallivm
->passmgr
);
1420 LLVMRunPassManager(gallivm
->passmgr
, ctx
->gallivm
.module
);
1422 LLVMDisposeBuilder(gallivm
->builder
);
1423 LLVMDisposePassManager(gallivm
->passmgr
);
1424 gallivm_dispose_target_library_info(target_library_info
);
1427 void si_llvm_dispose(struct si_shader_context
*ctx
)
1429 LLVMDisposeModule(ctx
->bld_base
.base
.gallivm
->module
);
1430 LLVMContextDispose(ctx
->bld_base
.base
.gallivm
->context
);
1431 FREE(ctx
->temp_arrays
);
1432 ctx
->temp_arrays
= NULL
;
1433 FREE(ctx
->temp_array_allocas
);
1434 ctx
->temp_array_allocas
= NULL
;
1437 ctx
->temps_count
= 0;
1443 ctx
->flow_depth_max
= 0;