2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "si_shader_internal.h"
25 #include "radeon/radeon_elf_util.h"
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
41 #include <llvm-c/Transforms/Scalar.h>
43 /* Data for if/else/endif and bgnloop/endloop control flow structures.
46 /* Loop exit or next part of if/else/endif. */
47 LLVMBasicBlockRef next_block
;
48 LLVMBasicBlockRef loop_entry_block
;
51 #define CPU_STRING_LEN 30
52 #define FS_STRING_LEN 30
53 #define TRIPLE_STRING_LEN 7
56 * Shader types for the LLVM backend.
58 enum si_llvm_shader_type
{
59 RADEON_LLVM_SHADER_PS
= 0,
60 RADEON_LLVM_SHADER_VS
= 1,
61 RADEON_LLVM_SHADER_GS
= 2,
62 RADEON_LLVM_SHADER_CS
= 3,
65 enum si_llvm_calling_convention
{
66 RADEON_LLVM_AMDGPU_VS
= 87,
67 RADEON_LLVM_AMDGPU_GS
= 88,
68 RADEON_LLVM_AMDGPU_PS
= 89,
69 RADEON_LLVM_AMDGPU_CS
= 90,
72 void si_llvm_add_attribute(LLVMValueRef F
, const char *name
, int value
)
76 snprintf(str
, sizeof(str
), "%i", value
);
77 LLVMAddTargetDependentFunctionAttr(F
, name
, str
);
81 * Set the shader type we want to compile
83 * @param type shader type to set
85 void si_llvm_shader_type(LLVMValueRef F
, unsigned type
)
87 enum si_llvm_shader_type llvm_type
;
88 enum si_llvm_calling_convention calling_conv
;
91 case PIPE_SHADER_VERTEX
:
92 case PIPE_SHADER_TESS_CTRL
:
93 case PIPE_SHADER_TESS_EVAL
:
94 llvm_type
= RADEON_LLVM_SHADER_VS
;
95 calling_conv
= RADEON_LLVM_AMDGPU_VS
;
97 case PIPE_SHADER_GEOMETRY
:
98 llvm_type
= RADEON_LLVM_SHADER_GS
;
99 calling_conv
= RADEON_LLVM_AMDGPU_GS
;
101 case PIPE_SHADER_FRAGMENT
:
102 llvm_type
= RADEON_LLVM_SHADER_PS
;
103 calling_conv
= RADEON_LLVM_AMDGPU_PS
;
105 case PIPE_SHADER_COMPUTE
:
106 llvm_type
= RADEON_LLVM_SHADER_CS
;
107 calling_conv
= RADEON_LLVM_AMDGPU_CS
;
110 unreachable("Unhandle shader type");
113 if (HAVE_LLVM
>= 0x309)
114 LLVMSetFunctionCallConv(F
, calling_conv
);
116 si_llvm_add_attribute(F
, "ShaderType", llvm_type
);
119 static void init_amdgpu_target()
121 gallivm_init_llvm_targets();
122 #if HAVE_LLVM < 0x0307
123 LLVMInitializeR600TargetInfo();
124 LLVMInitializeR600Target();
125 LLVMInitializeR600TargetMC();
126 LLVMInitializeR600AsmPrinter();
128 LLVMInitializeAMDGPUTargetInfo();
129 LLVMInitializeAMDGPUTarget();
130 LLVMInitializeAMDGPUTargetMC();
131 LLVMInitializeAMDGPUAsmPrinter();
136 static once_flag init_amdgpu_target_once_flag
= ONCE_FLAG_INIT
;
138 LLVMTargetRef
si_llvm_get_amdgpu_target(const char *triple
)
140 LLVMTargetRef target
= NULL
;
141 char *err_message
= NULL
;
143 call_once(&init_amdgpu_target_once_flag
, init_amdgpu_target
);
145 if (LLVMGetTargetFromTriple(triple
, &target
, &err_message
)) {
146 fprintf(stderr
, "Cannot find target for triple %s ", triple
);
148 fprintf(stderr
, "%s\n", err_message
);
150 LLVMDisposeMessage(err_message
);
156 struct si_llvm_diagnostics
{
157 struct pipe_debug_callback
*debug
;
161 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di
, void *context
)
163 struct si_llvm_diagnostics
*diag
= (struct si_llvm_diagnostics
*)context
;
164 LLVMDiagnosticSeverity severity
= LLVMGetDiagInfoSeverity(di
);
165 char *description
= LLVMGetDiagInfoDescription(di
);
166 const char *severity_str
= NULL
;
170 severity_str
= "error";
173 severity_str
= "warning";
176 severity_str
= "remark";
179 severity_str
= "note";
182 severity_str
= "unknown";
185 pipe_debug_message(diag
->debug
, SHADER_INFO
,
186 "LLVM diagnostic (%s): %s", severity_str
, description
);
188 if (severity
== LLVMDSError
) {
190 fprintf(stderr
,"LLVM triggered Diagnostic Handler: %s\n", description
);
193 LLVMDisposeMessage(description
);
197 * Compile an LLVM module to machine code.
199 * @returns 0 for success, 1 for failure
201 unsigned si_llvm_compile(LLVMModuleRef M
, struct radeon_shader_binary
*binary
,
202 LLVMTargetMachineRef tm
,
203 struct pipe_debug_callback
*debug
)
205 struct si_llvm_diagnostics diag
;
207 LLVMContextRef llvm_ctx
;
208 LLVMMemoryBufferRef out_buffer
;
209 unsigned buffer_size
;
210 const char *buffer_data
;
216 /* Setup Diagnostic Handler*/
217 llvm_ctx
= LLVMGetModuleContext(M
);
219 LLVMContextSetDiagnosticHandler(llvm_ctx
, si_diagnostic_handler
, &diag
);
222 mem_err
= LLVMTargetMachineEmitToMemoryBuffer(tm
, M
, LLVMObjectFile
, &err
,
225 /* Process Errors/Warnings */
227 fprintf(stderr
, "%s: %s", __FUNCTION__
, err
);
228 pipe_debug_message(debug
, SHADER_INFO
,
229 "LLVM emit error: %s", err
);
235 /* Extract Shader Code*/
236 buffer_size
= LLVMGetBufferSize(out_buffer
);
237 buffer_data
= LLVMGetBufferStart(out_buffer
);
239 radeon_elf_read(buffer_data
, buffer_size
, binary
);
242 LLVMDisposeMemoryBuffer(out_buffer
);
245 if (diag
.retval
!= 0)
246 pipe_debug_message(debug
, SHADER_INFO
, "LLVM compile failed");
250 LLVMTypeRef
tgsi2llvmtype(struct lp_build_tgsi_context
*bld_base
,
251 enum tgsi_opcode_type type
)
253 LLVMContextRef ctx
= bld_base
->base
.gallivm
->context
;
256 case TGSI_TYPE_UNSIGNED
:
257 case TGSI_TYPE_SIGNED
:
258 return LLVMInt32TypeInContext(ctx
);
259 case TGSI_TYPE_UNSIGNED64
:
260 case TGSI_TYPE_SIGNED64
:
261 return LLVMInt64TypeInContext(ctx
);
262 case TGSI_TYPE_DOUBLE
:
263 return LLVMDoubleTypeInContext(ctx
);
264 case TGSI_TYPE_UNTYPED
:
265 case TGSI_TYPE_FLOAT
:
266 return LLVMFloatTypeInContext(ctx
);
272 LLVMValueRef
bitcast(struct lp_build_tgsi_context
*bld_base
,
273 enum tgsi_opcode_type type
, LLVMValueRef value
)
275 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
276 LLVMTypeRef dst_type
= tgsi2llvmtype(bld_base
, type
);
279 return LLVMBuildBitCast(builder
, value
, dst_type
, "");
285 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
286 * or an undefined value in the same interval otherwise.
288 LLVMValueRef
si_llvm_bound_index(struct si_shader_context
*ctx
,
292 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
293 LLVMBuilderRef builder
= gallivm
->builder
;
294 LLVMValueRef c_max
= lp_build_const_int32(gallivm
, num
- 1);
297 if (util_is_power_of_two(num
)) {
298 index
= LLVMBuildAnd(builder
, index
, c_max
, "");
300 /* In theory, this MAX pattern should result in code that is
301 * as good as the bit-wise AND above.
303 * In practice, LLVM generates worse code (at the time of
304 * writing), because its value tracking is not strong enough.
306 cc
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, c_max
, "");
307 index
= LLVMBuildSelect(builder
, cc
, index
, c_max
, "");
313 static struct si_llvm_flow
*
314 get_current_flow(struct si_shader_context
*ctx
)
316 if (ctx
->flow_depth
> 0)
317 return &ctx
->flow
[ctx
->flow_depth
- 1];
321 static struct si_llvm_flow
*
322 get_innermost_loop(struct si_shader_context
*ctx
)
324 for (unsigned i
= ctx
->flow_depth
; i
> 0; --i
) {
325 if (ctx
->flow
[i
- 1].loop_entry_block
)
326 return &ctx
->flow
[i
- 1];
331 static struct si_llvm_flow
*
332 push_flow(struct si_shader_context
*ctx
)
334 struct si_llvm_flow
*flow
;
336 if (ctx
->flow_depth
>= ctx
->flow_depth_max
) {
337 unsigned new_max
= MAX2(ctx
->flow_depth
<< 1, RADEON_LLVM_INITIAL_CF_DEPTH
);
338 ctx
->flow
= REALLOC(ctx
->flow
,
339 ctx
->flow_depth_max
* sizeof(*ctx
->flow
),
340 new_max
* sizeof(*ctx
->flow
));
341 ctx
->flow_depth_max
= new_max
;
344 flow
= &ctx
->flow
[ctx
->flow_depth
];
347 flow
->next_block
= NULL
;
348 flow
->loop_entry_block
= NULL
;
352 static LLVMValueRef
emit_swizzle(struct lp_build_tgsi_context
*bld_base
,
359 LLVMValueRef swizzles
[4];
361 LLVMInt32TypeInContext(bld_base
->base
.gallivm
->context
);
363 swizzles
[0] = LLVMConstInt(i32t
, swizzle_x
, 0);
364 swizzles
[1] = LLVMConstInt(i32t
, swizzle_y
, 0);
365 swizzles
[2] = LLVMConstInt(i32t
, swizzle_z
, 0);
366 swizzles
[3] = LLVMConstInt(i32t
, swizzle_w
, 0);
368 return LLVMBuildShuffleVector(bld_base
->base
.gallivm
->builder
,
370 LLVMGetUndef(LLVMTypeOf(value
)),
371 LLVMConstVector(swizzles
, 4), "");
375 * Return the description of the array covering the given temporary register
379 get_temp_array_id(struct lp_build_tgsi_context
*bld_base
,
381 const struct tgsi_ind_register
*reg
)
383 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
384 unsigned num_arrays
= ctx
->soa
.bld_base
.info
->array_max
[TGSI_FILE_TEMPORARY
];
387 if (reg
&& reg
->ArrayID
> 0 && reg
->ArrayID
<= num_arrays
)
390 for (i
= 0; i
< num_arrays
; i
++) {
391 const struct tgsi_array_info
*array
= &ctx
->temp_arrays
[i
];
393 if (reg_index
>= array
->range
.First
&& reg_index
<= array
->range
.Last
)
400 static struct tgsi_declaration_range
401 get_array_range(struct lp_build_tgsi_context
*bld_base
,
402 unsigned File
, unsigned reg_index
,
403 const struct tgsi_ind_register
*reg
)
405 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
406 struct tgsi_declaration_range range
;
408 if (File
== TGSI_FILE_TEMPORARY
) {
409 unsigned array_id
= get_temp_array_id(bld_base
, reg_index
, reg
);
411 return ctx
->temp_arrays
[array_id
- 1].range
;
415 range
.Last
= bld_base
->info
->file_max
[File
];
420 emit_array_index(struct lp_build_tgsi_soa_context
*bld
,
421 const struct tgsi_ind_register
*reg
,
424 struct gallivm_state
*gallivm
= bld
->bld_base
.base
.gallivm
;
427 return lp_build_const_int32(gallivm
, offset
);
429 LLVMValueRef addr
= LLVMBuildLoad(gallivm
->builder
, bld
->addr
[reg
->Index
][reg
->Swizzle
], "");
430 return LLVMBuildAdd(gallivm
->builder
, addr
, lp_build_const_int32(gallivm
, offset
), "");
434 * For indirect registers, construct a pointer directly to the requested
435 * element using getelementptr if possible.
437 * Returns NULL if the insertelement/extractelement fallback for array access
441 get_pointer_into_array(struct si_shader_context
*ctx
,
445 const struct tgsi_ind_register
*reg_indirect
)
448 struct tgsi_array_info
*array
;
449 struct gallivm_state
*gallivm
= ctx
->soa
.bld_base
.base
.gallivm
;
450 LLVMBuilderRef builder
= gallivm
->builder
;
451 LLVMValueRef idxs
[2];
455 if (file
!= TGSI_FILE_TEMPORARY
)
458 array_id
= get_temp_array_id(&ctx
->soa
.bld_base
, reg_index
, reg_indirect
);
462 alloca
= ctx
->temp_array_allocas
[array_id
- 1];
466 array
= &ctx
->temp_arrays
[array_id
- 1];
468 if (!(array
->writemask
& (1 << swizzle
)))
469 return ctx
->undef_alloca
;
471 index
= emit_array_index(&ctx
->soa
, reg_indirect
,
472 reg_index
- ctx
->temp_arrays
[array_id
- 1].range
.First
);
474 /* Ensure that the index is within a valid range, to guard against
475 * VM faults and overwriting critical data (e.g. spilled resource
478 * TODO It should be possible to avoid the additional instructions
479 * if LLVM is changed so that it guarantuees:
480 * 1. the scratch space descriptor isolates the current wave (this
481 * could even save the scratch offset SGPR at the cost of an
482 * additional SALU instruction)
483 * 2. the memory for allocas must be allocated at the _end_ of the
484 * scratch space (after spilled registers)
486 index
= si_llvm_bound_index(ctx
, index
, array
->range
.Last
- array
->range
.First
+ 1);
488 index
= LLVMBuildMul(
490 lp_build_const_int32(gallivm
, util_bitcount(array
->writemask
)),
492 index
= LLVMBuildAdd(
494 lp_build_const_int32(
496 util_bitcount(array
->writemask
& ((1 << swizzle
) - 1))),
498 idxs
[0] = ctx
->soa
.bld_base
.uint_bld
.zero
;
500 return LLVMBuildGEP(builder
, alloca
, idxs
, 2, "");
504 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context
*bld_base
,
505 enum tgsi_opcode_type type
,
509 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
512 result
= LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base
->base
.gallivm
->context
, 32), bld_base
->base
.type
.length
* 2));
514 result
= LLVMBuildInsertElement(builder
,
516 bitcast(bld_base
, TGSI_TYPE_UNSIGNED
, ptr
),
517 bld_base
->int_bld
.zero
, "");
518 result
= LLVMBuildInsertElement(builder
,
520 bitcast(bld_base
, TGSI_TYPE_UNSIGNED
, ptr2
),
521 bld_base
->int_bld
.one
, "");
522 return bitcast(bld_base
, type
, result
);
526 emit_array_fetch(struct lp_build_tgsi_context
*bld_base
,
527 unsigned File
, enum tgsi_opcode_type type
,
528 struct tgsi_declaration_range range
,
531 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
532 struct gallivm_state
*gallivm
= bld
->bld_base
.base
.gallivm
;
533 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
535 unsigned i
, size
= range
.Last
- range
.First
+ 1;
536 LLVMTypeRef vec
= LLVMVectorType(tgsi2llvmtype(bld_base
, type
), size
);
537 LLVMValueRef result
= LLVMGetUndef(vec
);
539 struct tgsi_full_src_register tmp_reg
= {};
540 tmp_reg
.Register
.File
= File
;
542 for (i
= 0; i
< size
; ++i
) {
543 tmp_reg
.Register
.Index
= i
+ range
.First
;
544 LLVMValueRef temp
= si_llvm_emit_fetch(bld_base
, &tmp_reg
, type
, swizzle
);
545 result
= LLVMBuildInsertElement(builder
, result
, temp
,
546 lp_build_const_int32(gallivm
, i
), "array_vector");
552 load_value_from_array(struct lp_build_tgsi_context
*bld_base
,
554 enum tgsi_opcode_type type
,
557 const struct tgsi_ind_register
*reg_indirect
)
559 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
560 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
561 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
562 LLVMBuilderRef builder
= gallivm
->builder
;
565 ptr
= get_pointer_into_array(ctx
, file
, swizzle
, reg_index
, reg_indirect
);
567 LLVMValueRef val
= LLVMBuildLoad(builder
, ptr
, "");
568 if (tgsi_type_is_64bit(type
)) {
569 LLVMValueRef ptr_hi
, val_hi
;
570 ptr_hi
= LLVMBuildGEP(builder
, ptr
, &bld_base
->uint_bld
.one
, 1, "");
571 val_hi
= LLVMBuildLoad(builder
, ptr_hi
, "");
572 val
= si_llvm_emit_fetch_64bit(bld_base
, type
, val
, val_hi
);
577 struct tgsi_declaration_range range
=
578 get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
580 emit_array_index(bld
, reg_indirect
, reg_index
- range
.First
);
582 emit_array_fetch(bld_base
, file
, type
, range
, swizzle
);
583 return LLVMBuildExtractElement(builder
, array
, index
, "");
588 store_value_to_array(struct lp_build_tgsi_context
*bld_base
,
593 const struct tgsi_ind_register
*reg_indirect
)
595 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
596 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
597 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
598 LLVMBuilderRef builder
= gallivm
->builder
;
601 ptr
= get_pointer_into_array(ctx
, file
, chan_index
, reg_index
, reg_indirect
);
603 LLVMBuildStore(builder
, value
, ptr
);
606 struct tgsi_declaration_range range
= get_array_range(bld_base
, file
, reg_index
, reg_indirect
);
607 LLVMValueRef index
= emit_array_index(bld
, reg_indirect
, reg_index
- range
.First
);
609 emit_array_fetch(bld_base
, file
, TGSI_TYPE_FLOAT
, range
, chan_index
);
610 LLVMValueRef temp_ptr
;
612 array
= LLVMBuildInsertElement(builder
, array
, value
, index
, "");
614 size
= range
.Last
- range
.First
+ 1;
615 for (i
= 0; i
< size
; ++i
) {
617 case TGSI_FILE_OUTPUT
:
618 temp_ptr
= bld
->outputs
[i
+ range
.First
][chan_index
];
621 case TGSI_FILE_TEMPORARY
:
622 if (range
.First
+ i
>= ctx
->temps_count
)
624 temp_ptr
= ctx
->temps
[(i
+ range
.First
) * TGSI_NUM_CHANNELS
+ chan_index
];
630 value
= LLVMBuildExtractElement(builder
, array
,
631 lp_build_const_int32(gallivm
, i
), "");
632 LLVMBuildStore(builder
, value
, temp_ptr
);
637 LLVMValueRef
si_llvm_emit_fetch(struct lp_build_tgsi_context
*bld_base
,
638 const struct tgsi_full_src_register
*reg
,
639 enum tgsi_opcode_type type
,
642 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
643 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
644 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
645 LLVMValueRef result
= NULL
, ptr
, ptr2
;
648 LLVMValueRef values
[TGSI_NUM_CHANNELS
];
650 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
651 values
[chan
] = si_llvm_emit_fetch(bld_base
, reg
, type
, chan
);
653 return lp_build_gather_values(bld_base
->base
.gallivm
, values
,
657 if (reg
->Register
.Indirect
) {
658 LLVMValueRef load
= load_value_from_array(bld_base
, reg
->Register
.File
, type
,
659 swizzle
, reg
->Register
.Index
, ®
->Indirect
);
660 return bitcast(bld_base
, type
, load
);
663 switch(reg
->Register
.File
) {
664 case TGSI_FILE_IMMEDIATE
: {
665 LLVMTypeRef ctype
= tgsi2llvmtype(bld_base
, type
);
666 if (tgsi_type_is_64bit(type
)) {
667 result
= LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base
->base
.gallivm
->context
, 32), bld_base
->base
.type
.length
* 2));
668 result
= LLVMConstInsertElement(result
,
669 bld
->immediates
[reg
->Register
.Index
][swizzle
],
670 bld_base
->int_bld
.zero
);
671 result
= LLVMConstInsertElement(result
,
672 bld
->immediates
[reg
->Register
.Index
][swizzle
+ 1],
673 bld_base
->int_bld
.one
);
674 return LLVMConstBitCast(result
, ctype
);
676 return LLVMConstBitCast(bld
->immediates
[reg
->Register
.Index
][swizzle
], ctype
);
680 case TGSI_FILE_INPUT
: {
681 unsigned index
= reg
->Register
.Index
;
682 LLVMValueRef input
[4];
684 /* I don't think doing this for vertex shaders is beneficial.
685 * For those, we want to make sure the VMEM loads are executed
686 * only once. Fragment shaders don't care much, because
687 * v_interp instructions are much cheaper than VMEM loads.
689 if (ctx
->soa
.bld_base
.info
->processor
== PIPE_SHADER_FRAGMENT
)
690 ctx
->load_input(ctx
, index
, &ctx
->input_decls
[index
], input
);
692 memcpy(input
, &ctx
->inputs
[index
* 4], sizeof(input
));
694 result
= input
[swizzle
];
696 if (tgsi_type_is_64bit(type
)) {
698 ptr2
= input
[swizzle
+ 1];
699 return si_llvm_emit_fetch_64bit(bld_base
, type
, ptr
, ptr2
);
704 case TGSI_FILE_TEMPORARY
:
705 if (reg
->Register
.Index
>= ctx
->temps_count
)
706 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
707 ptr
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
];
708 if (tgsi_type_is_64bit(type
)) {
709 ptr2
= ctx
->temps
[reg
->Register
.Index
* TGSI_NUM_CHANNELS
+ swizzle
+ 1];
710 return si_llvm_emit_fetch_64bit(bld_base
, type
,
711 LLVMBuildLoad(builder
, ptr
, ""),
712 LLVMBuildLoad(builder
, ptr2
, ""));
714 result
= LLVMBuildLoad(builder
, ptr
, "");
717 case TGSI_FILE_OUTPUT
:
718 ptr
= lp_get_output_ptr(bld
, reg
->Register
.Index
, swizzle
);
719 if (tgsi_type_is_64bit(type
)) {
720 ptr2
= lp_get_output_ptr(bld
, reg
->Register
.Index
, swizzle
+ 1);
721 return si_llvm_emit_fetch_64bit(bld_base
, type
,
722 LLVMBuildLoad(builder
, ptr
, ""),
723 LLVMBuildLoad(builder
, ptr2
, ""));
725 result
= LLVMBuildLoad(builder
, ptr
, "");
729 return LLVMGetUndef(tgsi2llvmtype(bld_base
, type
));
732 return bitcast(bld_base
, type
, result
);
735 static LLVMValueRef
fetch_system_value(struct lp_build_tgsi_context
*bld_base
,
736 const struct tgsi_full_src_register
*reg
,
737 enum tgsi_opcode_type type
,
740 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
741 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
743 LLVMValueRef cval
= ctx
->system_values
[reg
->Register
.Index
];
744 if (LLVMGetTypeKind(LLVMTypeOf(cval
)) == LLVMVectorTypeKind
) {
745 cval
= LLVMBuildExtractElement(gallivm
->builder
, cval
,
746 lp_build_const_int32(gallivm
, swizzle
), "");
748 return bitcast(bld_base
, type
, cval
);
751 static void emit_declaration(struct lp_build_tgsi_context
*bld_base
,
752 const struct tgsi_full_declaration
*decl
)
754 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
755 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
756 unsigned first
, last
, i
;
757 switch(decl
->Declaration
.File
) {
758 case TGSI_FILE_ADDRESS
:
761 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
763 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
764 ctx
->soa
.addr
[idx
][chan
] = lp_build_alloca_undef(
766 ctx
->soa
.bld_base
.uint_bld
.elem_type
, "");
772 case TGSI_FILE_TEMPORARY
:
775 LLVMValueRef array_alloca
= NULL
;
777 unsigned writemask
= decl
->Declaration
.UsageMask
;
778 first
= decl
->Range
.First
;
779 last
= decl
->Range
.Last
;
780 decl_size
= 4 * ((last
- first
) + 1);
782 if (decl
->Declaration
.Array
) {
783 unsigned id
= decl
->Array
.ArrayID
- 1;
786 writemask
&= ctx
->temp_arrays
[id
].writemask
;
787 ctx
->temp_arrays
[id
].writemask
= writemask
;
788 array_size
= ((last
- first
) + 1) * util_bitcount(writemask
);
790 /* If the array has more than 16 elements, store it
791 * in memory using an alloca that spans the entire
794 * Otherwise, store each array element individually.
795 * We will then generate vectors (per-channel, up to
796 * <16 x float> if the usagemask is a single bit) for
797 * indirect addressing.
799 * Note that 16 is the number of vector elements that
800 * LLVM will store in a register, so theoretically an
801 * array with up to 4 * 16 = 64 elements could be
802 * handled this way, but whether that's a good idea
803 * depends on VGPR register pressure elsewhere.
805 * FIXME: We shouldn't need to have the non-alloca
806 * code path for arrays. LLVM should be smart enough to
807 * promote allocas into registers when profitable.
809 * LLVM 3.8 crashes with this.
811 if (HAVE_LLVM
>= 0x0309 && array_size
> 16) {
812 array_alloca
= LLVMBuildAlloca(builder
,
813 LLVMArrayType(bld_base
->base
.vec_type
,
814 array_size
), "array");
815 ctx
->temp_array_allocas
[id
] = array_alloca
;
819 if (!ctx
->temps_count
) {
820 ctx
->temps_count
= bld_base
->info
->file_max
[TGSI_FILE_TEMPORARY
] + 1;
821 ctx
->temps
= MALLOC(TGSI_NUM_CHANNELS
* ctx
->temps_count
* sizeof(LLVMValueRef
));
824 for (i
= 0; i
< decl_size
; ++i
) {
826 snprintf(name
, sizeof(name
), "TEMP%d.%c",
827 first
+ i
/ 4, "xyzw"[i
% 4]);
829 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] =
830 lp_build_alloca_undef(bld_base
->base
.gallivm
,
831 bld_base
->base
.vec_type
,
835 LLVMValueRef idxs
[2] = {
836 bld_base
->uint_bld
.zero
,
841 if (writemask
!= TGSI_WRITEMASK_XYZW
&&
842 !ctx
->undef_alloca
) {
843 /* Create a dummy alloca. We use it so that we
844 * have a pointer that is safe to load from if
845 * a shader ever reads from a channel that
846 * it never writes to.
848 ctx
->undef_alloca
= lp_build_alloca_undef(
849 bld_base
->base
.gallivm
,
850 bld_base
->base
.vec_type
, "undef");
853 for (i
= 0; i
< decl_size
; ++i
) {
855 if (writemask
& (1 << (i
% 4))) {
857 snprintf(name
, sizeof(name
), "TEMP%d.%c",
858 first
+ i
/ 4, "xyzw"[i
% 4]);
860 idxs
[1] = lp_build_const_int32(bld_base
->base
.gallivm
, j
);
861 ptr
= LLVMBuildGEP(builder
, array_alloca
, idxs
, 2, name
);
864 ptr
= ctx
->undef_alloca
;
866 ctx
->temps
[first
* TGSI_NUM_CHANNELS
+ i
] = ptr
;
871 case TGSI_FILE_INPUT
:
874 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
875 if (ctx
->load_input
&&
876 ctx
->input_decls
[idx
].Declaration
.File
!= TGSI_FILE_INPUT
) {
877 ctx
->input_decls
[idx
] = *decl
;
879 if (bld_base
->info
->processor
!= PIPE_SHADER_FRAGMENT
)
880 ctx
->load_input(ctx
, idx
, decl
,
881 &ctx
->inputs
[idx
* 4]);
887 case TGSI_FILE_SYSTEM_VALUE
:
890 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
891 ctx
->load_system_value(ctx
, idx
, decl
);
896 case TGSI_FILE_OUTPUT
:
900 for (idx
= decl
->Range
.First
; idx
<= decl
->Range
.Last
; idx
++) {
902 assert(idx
< RADEON_LLVM_MAX_OUTPUTS
);
903 if (ctx
->soa
.outputs
[idx
][0])
905 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
907 snprintf(name
, sizeof(name
), "OUT%d.%c",
908 idx
, "xyzw"[chan
% 4]);
910 ctx
->soa
.outputs
[idx
][chan
] = lp_build_alloca_undef(
912 ctx
->soa
.bld_base
.base
.elem_type
, name
);
918 case TGSI_FILE_MEMORY
:
919 ctx
->declare_memory_region(ctx
, decl
);
927 LLVMValueRef
si_llvm_saturate(struct lp_build_tgsi_context
*bld_base
,
930 struct lp_build_emit_data clamp_emit_data
;
932 memset(&clamp_emit_data
, 0, sizeof(clamp_emit_data
));
933 clamp_emit_data
.arg_count
= 3;
934 clamp_emit_data
.args
[0] = value
;
935 clamp_emit_data
.args
[2] = bld_base
->base
.one
;
936 clamp_emit_data
.args
[1] = bld_base
->base
.zero
;
938 return lp_build_emit_llvm(bld_base
, TGSI_OPCODE_CLAMP
,
942 void si_llvm_emit_store(struct lp_build_tgsi_context
*bld_base
,
943 const struct tgsi_full_instruction
*inst
,
944 const struct tgsi_opcode_info
*info
,
947 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
948 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
949 struct gallivm_state
*gallivm
= bld
->bld_base
.base
.gallivm
;
950 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[0];
951 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
952 LLVMValueRef temp_ptr
, temp_ptr2
= NULL
;
953 unsigned chan
, chan_index
;
954 bool is_vec_store
= false;
955 enum tgsi_opcode_type dtype
= tgsi_opcode_infer_dst_type(inst
->Instruction
.Opcode
);
958 LLVMTypeKind k
= LLVMGetTypeKind(LLVMTypeOf(dst
[0]));
959 is_vec_store
= (k
== LLVMVectorTypeKind
);
963 LLVMValueRef values
[4] = {};
964 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst
, chan
) {
965 LLVMValueRef index
= lp_build_const_int32(gallivm
, chan
);
966 values
[chan
] = LLVMBuildExtractElement(gallivm
->builder
,
969 bld_base
->emit_store(bld_base
, inst
, info
, values
);
973 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
974 LLVMValueRef value
= dst
[chan_index
];
976 if (tgsi_type_is_64bit(dtype
) && (chan_index
== 1 || chan_index
== 3))
978 if (inst
->Instruction
.Saturate
)
979 value
= si_llvm_saturate(bld_base
, value
);
981 if (reg
->Register
.File
== TGSI_FILE_ADDRESS
) {
982 temp_ptr
= bld
->addr
[reg
->Register
.Index
][chan_index
];
983 LLVMBuildStore(builder
, value
, temp_ptr
);
987 if (!tgsi_type_is_64bit(dtype
))
988 value
= bitcast(bld_base
, TGSI_TYPE_FLOAT
, value
);
990 if (reg
->Register
.Indirect
) {
991 unsigned file
= reg
->Register
.File
;
992 unsigned reg_index
= reg
->Register
.Index
;
993 store_value_to_array(bld_base
, value
, file
, chan_index
,
994 reg_index
, ®
->Indirect
);
996 switch(reg
->Register
.File
) {
997 case TGSI_FILE_OUTPUT
:
998 temp_ptr
= bld
->outputs
[reg
->Register
.Index
][chan_index
];
999 if (tgsi_type_is_64bit(dtype
))
1000 temp_ptr2
= bld
->outputs
[reg
->Register
.Index
][chan_index
+ 1];
1003 case TGSI_FILE_TEMPORARY
:
1005 if (reg
->Register
.Index
>= ctx
->temps_count
)
1008 temp_ptr
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
];
1009 if (tgsi_type_is_64bit(dtype
))
1010 temp_ptr2
= ctx
->temps
[ TGSI_NUM_CHANNELS
* reg
->Register
.Index
+ chan_index
+ 1];
1017 if (!tgsi_type_is_64bit(dtype
))
1018 LLVMBuildStore(builder
, value
, temp_ptr
);
1020 LLVMValueRef ptr
= LLVMBuildBitCast(builder
, value
,
1021 LLVMVectorType(LLVMIntTypeInContext(bld_base
->base
.gallivm
->context
, 32), 2), "");
1023 value
= LLVMBuildExtractElement(builder
, ptr
,
1024 bld_base
->uint_bld
.zero
, "");
1025 val2
= LLVMBuildExtractElement(builder
, ptr
,
1026 bld_base
->uint_bld
.one
, "");
1028 LLVMBuildStore(builder
, bitcast(bld_base
, TGSI_TYPE_FLOAT
, value
), temp_ptr
);
1029 LLVMBuildStore(builder
, bitcast(bld_base
, TGSI_TYPE_FLOAT
, val2
), temp_ptr2
);
1035 static void set_basicblock_name(LLVMBasicBlockRef bb
, const char *base
, int pc
)
1038 /* Subtract 1 so that the number shown is that of the corresponding
1039 * opcode in the TGSI dump, e.g. an if block has the same suffix as
1040 * the instruction number of the corresponding TGSI IF.
1042 snprintf(buf
, sizeof(buf
), "%s%d", base
, pc
- 1);
1043 LLVMSetValueName(LLVMBasicBlockAsValue(bb
), buf
);
1046 /* Append a basic block at the level of the parent flow.
1048 static LLVMBasicBlockRef
append_basic_block(struct si_shader_context
*ctx
,
1051 struct gallivm_state
*gallivm
= &ctx
->gallivm
;
1053 assert(ctx
->flow_depth
>= 1);
1055 if (ctx
->flow_depth
>= 2) {
1056 struct si_llvm_flow
*flow
= &ctx
->flow
[ctx
->flow_depth
- 2];
1058 return LLVMInsertBasicBlockInContext(gallivm
->context
,
1059 flow
->next_block
, name
);
1062 return LLVMAppendBasicBlockInContext(gallivm
->context
, ctx
->main_fn
, name
);
1065 /* Emit a branch to the given default target for the current block if
1066 * applicable -- that is, if the current block does not already contain a
1067 * branch from a break or continue.
1069 static void emit_default_branch(LLVMBuilderRef builder
, LLVMBasicBlockRef target
)
1071 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder
)))
1072 LLVMBuildBr(builder
, target
);
1075 static void bgnloop_emit(const struct lp_build_tgsi_action
*action
,
1076 struct lp_build_tgsi_context
*bld_base
,
1077 struct lp_build_emit_data
*emit_data
)
1079 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1080 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1081 struct si_llvm_flow
*flow
= push_flow(ctx
);
1082 flow
->loop_entry_block
= append_basic_block(ctx
, "LOOP");
1083 flow
->next_block
= append_basic_block(ctx
, "ENDLOOP");
1084 set_basicblock_name(flow
->loop_entry_block
, "loop", bld_base
->pc
);
1085 LLVMBuildBr(gallivm
->builder
, flow
->loop_entry_block
);
1086 LLVMPositionBuilderAtEnd(gallivm
->builder
, flow
->loop_entry_block
);
1089 static void brk_emit(const struct lp_build_tgsi_action
*action
,
1090 struct lp_build_tgsi_context
*bld_base
,
1091 struct lp_build_emit_data
*emit_data
)
1093 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1094 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1095 struct si_llvm_flow
*flow
= get_innermost_loop(ctx
);
1097 LLVMBuildBr(gallivm
->builder
, flow
->next_block
);
1100 static void cont_emit(const struct lp_build_tgsi_action
*action
,
1101 struct lp_build_tgsi_context
*bld_base
,
1102 struct lp_build_emit_data
*emit_data
)
1104 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1105 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1106 struct si_llvm_flow
*flow
= get_innermost_loop(ctx
);
1108 LLVMBuildBr(gallivm
->builder
, flow
->loop_entry_block
);
1111 static void else_emit(const struct lp_build_tgsi_action
*action
,
1112 struct lp_build_tgsi_context
*bld_base
,
1113 struct lp_build_emit_data
*emit_data
)
1115 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1116 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1117 struct si_llvm_flow
*current_branch
= get_current_flow(ctx
);
1118 LLVMBasicBlockRef endif_block
;
1120 assert(!current_branch
->loop_entry_block
);
1122 endif_block
= append_basic_block(ctx
, "ENDIF");
1123 emit_default_branch(gallivm
->builder
, endif_block
);
1125 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_branch
->next_block
);
1126 set_basicblock_name(current_branch
->next_block
, "else", bld_base
->pc
);
1128 current_branch
->next_block
= endif_block
;
1131 static void endif_emit(const struct lp_build_tgsi_action
*action
,
1132 struct lp_build_tgsi_context
*bld_base
,
1133 struct lp_build_emit_data
*emit_data
)
1135 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1136 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1137 struct si_llvm_flow
*current_branch
= get_current_flow(ctx
);
1139 assert(!current_branch
->loop_entry_block
);
1141 emit_default_branch(gallivm
->builder
, current_branch
->next_block
);
1142 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_branch
->next_block
);
1143 set_basicblock_name(current_branch
->next_block
, "endif", bld_base
->pc
);
1148 static void endloop_emit(const struct lp_build_tgsi_action
*action
,
1149 struct lp_build_tgsi_context
*bld_base
,
1150 struct lp_build_emit_data
*emit_data
)
1152 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1153 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1154 struct si_llvm_flow
*current_loop
= get_current_flow(ctx
);
1156 assert(current_loop
->loop_entry_block
);
1158 emit_default_branch(gallivm
->builder
, current_loop
->loop_entry_block
);
1160 LLVMPositionBuilderAtEnd(gallivm
->builder
, current_loop
->next_block
);
1161 set_basicblock_name(current_loop
->next_block
, "endloop", bld_base
->pc
);
1165 static void if_cond_emit(const struct lp_build_tgsi_action
*action
,
1166 struct lp_build_tgsi_context
*bld_base
,
1167 struct lp_build_emit_data
*emit_data
,
1170 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1171 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1172 struct si_llvm_flow
*flow
= push_flow(ctx
);
1173 LLVMBasicBlockRef if_block
;
1175 if_block
= append_basic_block(ctx
, "IF");
1176 flow
->next_block
= append_basic_block(ctx
, "ELSE");
1177 set_basicblock_name(if_block
, "if", bld_base
->pc
);
1178 LLVMBuildCondBr(gallivm
->builder
, cond
, if_block
, flow
->next_block
);
1179 LLVMPositionBuilderAtEnd(gallivm
->builder
, if_block
);
1182 static void if_emit(const struct lp_build_tgsi_action
*action
,
1183 struct lp_build_tgsi_context
*bld_base
,
1184 struct lp_build_emit_data
*emit_data
)
1186 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1189 cond
= LLVMBuildFCmp(gallivm
->builder
, LLVMRealUNE
,
1191 bld_base
->base
.zero
, "");
1193 if_cond_emit(action
, bld_base
, emit_data
, cond
);
1196 static void uif_emit(const struct lp_build_tgsi_action
*action
,
1197 struct lp_build_tgsi_context
*bld_base
,
1198 struct lp_build_emit_data
*emit_data
)
1200 struct gallivm_state
*gallivm
= bld_base
->base
.gallivm
;
1203 cond
= LLVMBuildICmp(gallivm
->builder
, LLVMIntNE
,
1204 bitcast(bld_base
, TGSI_TYPE_UNSIGNED
, emit_data
->args
[0]),
1205 bld_base
->int_bld
.zero
, "");
1207 if_cond_emit(action
, bld_base
, emit_data
, cond
);
1210 static void emit_immediate(struct lp_build_tgsi_context
*bld_base
,
1211 const struct tgsi_full_immediate
*imm
)
1214 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1216 for (i
= 0; i
< 4; ++i
) {
1217 ctx
->soa
.immediates
[ctx
->soa
.num_immediates
][i
] =
1218 LLVMConstInt(bld_base
->uint_bld
.elem_type
, imm
->u
[i
].Uint
, false );
1221 ctx
->soa
.num_immediates
++;
1224 void si_llvm_context_init(struct si_shader_context
*ctx
, const char *triple
,
1225 const struct tgsi_shader_info
*info
,
1226 const struct tgsi_token
*tokens
)
1228 struct lp_type type
;
1230 /* Initialize the gallivm object:
1231 * We are only using the module, context, and builder fields of this struct.
1232 * This should be enough for us to be able to pass our gallivm struct to the
1233 * helper functions in the gallivm module.
1235 memset(&ctx
->gallivm
, 0, sizeof (ctx
->gallivm
));
1236 memset(&ctx
->soa
, 0, sizeof(ctx
->soa
));
1237 ctx
->gallivm
.context
= LLVMContextCreate();
1238 ctx
->gallivm
.module
= LLVMModuleCreateWithNameInContext("tgsi",
1239 ctx
->gallivm
.context
);
1240 LLVMSetTarget(ctx
->gallivm
.module
, triple
);
1241 ctx
->gallivm
.builder
= LLVMCreateBuilderInContext(ctx
->gallivm
.context
);
1243 struct lp_build_tgsi_context
*bld_base
= &ctx
->soa
.bld_base
;
1245 bld_base
->info
= info
;
1247 if (info
&& info
->array_max
[TGSI_FILE_TEMPORARY
] > 0) {
1248 int size
= info
->array_max
[TGSI_FILE_TEMPORARY
];
1250 ctx
->temp_arrays
= CALLOC(size
, sizeof(ctx
->temp_arrays
[0]));
1251 ctx
->temp_array_allocas
= CALLOC(size
, sizeof(ctx
->temp_array_allocas
[0]));
1254 tgsi_scan_arrays(tokens
, TGSI_FILE_TEMPORARY
, size
,
1258 type
.floating
= true;
1265 lp_build_context_init(&bld_base
->base
, &ctx
->gallivm
, type
);
1266 lp_build_context_init(&ctx
->soa
.bld_base
.uint_bld
, &ctx
->gallivm
, lp_uint_type(type
));
1267 lp_build_context_init(&ctx
->soa
.bld_base
.int_bld
, &ctx
->gallivm
, lp_int_type(type
));
1269 lp_build_context_init(&ctx
->soa
.bld_base
.dbl_bld
, &ctx
->gallivm
, type
);
1270 lp_build_context_init(&ctx
->soa
.bld_base
.uint64_bld
, &ctx
->gallivm
, lp_uint_type(type
));
1271 lp_build_context_init(&ctx
->soa
.bld_base
.int64_bld
, &ctx
->gallivm
, lp_int_type(type
));
1274 bld_base
->emit_store
= si_llvm_emit_store
;
1275 bld_base
->emit_swizzle
= emit_swizzle
;
1276 bld_base
->emit_declaration
= emit_declaration
;
1277 bld_base
->emit_immediate
= emit_immediate
;
1279 bld_base
->emit_fetch_funcs
[TGSI_FILE_IMMEDIATE
] = si_llvm_emit_fetch
;
1280 bld_base
->emit_fetch_funcs
[TGSI_FILE_INPUT
] = si_llvm_emit_fetch
;
1281 bld_base
->emit_fetch_funcs
[TGSI_FILE_TEMPORARY
] = si_llvm_emit_fetch
;
1282 bld_base
->emit_fetch_funcs
[TGSI_FILE_OUTPUT
] = si_llvm_emit_fetch
;
1283 bld_base
->emit_fetch_funcs
[TGSI_FILE_SYSTEM_VALUE
] = fetch_system_value
;
1285 /* metadata allowing 2.5 ULP */
1286 ctx
->fpmath_md_kind
= LLVMGetMDKindIDInContext(ctx
->gallivm
.context
,
1288 LLVMValueRef arg
= lp_build_const_float(&ctx
->gallivm
, 2.5);
1289 ctx
->fpmath_md_2p5_ulp
= LLVMMDNodeInContext(ctx
->gallivm
.context
,
1292 /* Allocate outputs */
1293 ctx
->soa
.outputs
= ctx
->outputs
;
1295 bld_base
->op_actions
[TGSI_OPCODE_BGNLOOP
].emit
= bgnloop_emit
;
1296 bld_base
->op_actions
[TGSI_OPCODE_BRK
].emit
= brk_emit
;
1297 bld_base
->op_actions
[TGSI_OPCODE_CONT
].emit
= cont_emit
;
1298 bld_base
->op_actions
[TGSI_OPCODE_IF
].emit
= if_emit
;
1299 bld_base
->op_actions
[TGSI_OPCODE_UIF
].emit
= uif_emit
;
1300 bld_base
->op_actions
[TGSI_OPCODE_ELSE
].emit
= else_emit
;
1301 bld_base
->op_actions
[TGSI_OPCODE_ENDIF
].emit
= endif_emit
;
1302 bld_base
->op_actions
[TGSI_OPCODE_ENDLOOP
].emit
= endloop_emit
;
1305 void si_llvm_create_func(struct si_shader_context
*ctx
,
1306 LLVMTypeRef
*return_types
, unsigned num_return_elems
,
1307 LLVMTypeRef
*ParamTypes
, unsigned ParamCount
)
1309 LLVMTypeRef main_fn_type
, ret_type
;
1310 LLVMBasicBlockRef main_fn_body
;
1312 if (num_return_elems
)
1313 ret_type
= LLVMStructTypeInContext(ctx
->gallivm
.context
,
1315 num_return_elems
, true);
1317 ret_type
= LLVMVoidTypeInContext(ctx
->gallivm
.context
);
1319 /* Setup the function */
1320 ctx
->return_type
= ret_type
;
1321 main_fn_type
= LLVMFunctionType(ret_type
, ParamTypes
, ParamCount
, 0);
1322 ctx
->main_fn
= LLVMAddFunction(ctx
->gallivm
.module
, "main", main_fn_type
);
1323 main_fn_body
= LLVMAppendBasicBlockInContext(ctx
->gallivm
.context
,
1324 ctx
->main_fn
, "main_body");
1325 LLVMPositionBuilderAtEnd(ctx
->gallivm
.builder
, main_fn_body
);
1328 void si_llvm_finalize_module(struct si_shader_context
*ctx
,
1331 struct gallivm_state
*gallivm
= ctx
->soa
.bld_base
.base
.gallivm
;
1332 const char *triple
= LLVMGetTarget(gallivm
->module
);
1333 LLVMTargetLibraryInfoRef target_library_info
;
1335 /* Create the pass manager */
1336 gallivm
->passmgr
= LLVMCreateFunctionPassManagerForModule(
1339 target_library_info
= gallivm_create_target_library_info(triple
);
1340 LLVMAddTargetLibraryInfo(target_library_info
, gallivm
->passmgr
);
1343 LLVMAddVerifierPass(gallivm
->passmgr
);
1345 /* This pass should eliminate all the load and store instructions */
1346 LLVMAddPromoteMemoryToRegisterPass(gallivm
->passmgr
);
1348 /* Add some optimization passes */
1349 LLVMAddScalarReplAggregatesPass(gallivm
->passmgr
);
1350 LLVMAddLICMPass(gallivm
->passmgr
);
1351 LLVMAddAggressiveDCEPass(gallivm
->passmgr
);
1352 LLVMAddCFGSimplificationPass(gallivm
->passmgr
);
1353 LLVMAddInstructionCombiningPass(gallivm
->passmgr
);
1356 LLVMInitializeFunctionPassManager(gallivm
->passmgr
);
1357 LLVMRunFunctionPassManager(gallivm
->passmgr
, ctx
->main_fn
);
1358 LLVMFinalizeFunctionPassManager(gallivm
->passmgr
);
1360 LLVMDisposeBuilder(gallivm
->builder
);
1361 LLVMDisposePassManager(gallivm
->passmgr
);
1362 gallivm_dispose_target_library_info(target_library_info
);
1365 void si_llvm_dispose(struct si_shader_context
*ctx
)
1367 LLVMDisposeModule(ctx
->soa
.bld_base
.base
.gallivm
->module
);
1368 LLVMContextDispose(ctx
->soa
.bld_base
.base
.gallivm
->context
);
1369 FREE(ctx
->temp_arrays
);
1370 ctx
->temp_arrays
= NULL
;
1371 FREE(ctx
->temp_array_allocas
);
1372 ctx
->temp_array_allocas
= NULL
;
1375 ctx
->temps_count
= 0;
1378 ctx
->flow_depth_max
= 0;