1 #include "libresoc_llvm.h"
2 #include "libresoc_llvm_build.h"
4 void InitLLVM(struct libresoc_llvm
*llvm_ref
)
6 // LLVMInitializeNativeTarget();
7 // LLVMInitializeNativeAsmPrinter();
8 LLVMInitializeAllAsmPrinters();
9 LLVMInitializeAllTargets();
10 LLVMInitializeAllTargetInfos();
11 LLVMInitializeAllTargetMCs();
12 LLVMInitializeAllDisassemblers();
14 char *def_triple
= LLVMGetDefaultTargetTriple(); // E.g. "x86_64-linux-gnu"
16 LLVMTargetRef target_ref
;
17 if (LLVMGetTargetFromTriple(def_triple
, &target_ref
, &error
)) {
21 if (!LLVMTargetHasJIT(target_ref
)) {
22 // Fatal error, cannot do JIT on this platform
25 LLVMTargetMachineRef tm_ref
=
26 LLVMCreateTargetMachine(target_ref
, def_triple
, "", "",
27 LLVMCodeGenLevelDefault
,
29 LLVMCodeModelJITDefault
);
31 LLVMDisposeErrorMessage(def_triple
);
32 llvm_ref
->orc_ref
= LLVMOrcCreateInstance(tm_ref
);
33 llvm_ref
->lc
.context
= LLVMContextCreate();
34 llvm_ref
->lc
.builder
= LLVMCreateBuilderInContext(llvm_ref
->lc
.context
);
35 llvm_ref
->lc
.voidt
= LLVMVoidTypeInContext(llvm_ref
->lc
.context
);
36 llvm_ref
->lc
.i1
= LLVMInt1TypeInContext(llvm_ref
->lc
.context
);
37 llvm_ref
->lc
.i8
= LLVMInt8TypeInContext(llvm_ref
->lc
.context
);
38 llvm_ref
->lc
.i16
= LLVMIntTypeInContext(llvm_ref
->lc
.context
, 16);
39 llvm_ref
->lc
.i32
= LLVMIntTypeInContext(llvm_ref
->lc
.context
, 32);
40 llvm_ref
->lc
.i64
= LLVMIntTypeInContext(llvm_ref
->lc
.context
, 64);
41 llvm_ref
->lc
.i128
= LLVMIntTypeInContext(llvm_ref
->lc
.context
, 128);
42 llvm_ref
->lc
.intptr
= llvm_ref
->lc
.i32
;
43 llvm_ref
->lc
.f16
= LLVMHalfTypeInContext(llvm_ref
->lc
.context
);
44 llvm_ref
->lc
.f32
= LLVMFloatTypeInContext(llvm_ref
->lc
.context
);
45 llvm_ref
->lc
.f64
= LLVMDoubleTypeInContext(llvm_ref
->lc
.context
);
46 llvm_ref
->lc
.v2i16
= LLVMVectorType(llvm_ref
->lc
.i16
, 2);
47 llvm_ref
->lc
.v4i16
= LLVMVectorType(llvm_ref
->lc
.i16
, 4);
48 llvm_ref
->lc
.v2f16
= LLVMVectorType(llvm_ref
->lc
.f16
, 2);
49 llvm_ref
->lc
.v4f16
= LLVMVectorType(llvm_ref
->lc
.f16
, 4);
50 llvm_ref
->lc
.v2i32
= LLVMVectorType(llvm_ref
->lc
.i32
, 2);
51 llvm_ref
->lc
.v3i32
= LLVMVectorType(llvm_ref
->lc
.i32
, 3);
52 llvm_ref
->lc
.v4i32
= LLVMVectorType(llvm_ref
->lc
.i32
, 4);
53 llvm_ref
->lc
.v2f32
= LLVMVectorType(llvm_ref
->lc
.f32
, 2);
54 llvm_ref
->lc
.v3f32
= LLVMVectorType(llvm_ref
->lc
.f32
, 3);
55 llvm_ref
->lc
.v4f32
= LLVMVectorType(llvm_ref
->lc
.f32
, 4);
56 llvm_ref
->lc
.v8i32
= LLVMVectorType(llvm_ref
->lc
.i32
, 8);
57 // llvm_ref->lc.iN_wavemask = LLVMIntTypeInContext(llvm_ref->lc.context, llvm_ref->lc.wave_size);
58 // llvm_ref->lc.iN_ballotmask = LLVMIntTypeInContext(llvm_ref->lc.context, ballot_mask_bits);
60 llvm_ref
->lc
.i8_0
= LLVMConstInt(llvm_ref
->lc
.i8
, 0, false);
61 llvm_ref
->lc
.i8_1
= LLVMConstInt(llvm_ref
->lc
.i8
, 1, false);
62 llvm_ref
->lc
.i16_0
= LLVMConstInt(llvm_ref
->lc
.i16
, 0, false);
63 llvm_ref
->lc
.i16_1
= LLVMConstInt(llvm_ref
->lc
.i16
, 1, false);
64 llvm_ref
->lc
.i32_0
= LLVMConstInt(llvm_ref
->lc
.i32
, 0, false);
65 llvm_ref
->lc
.i32_1
= LLVMConstInt(llvm_ref
->lc
.i32
, 1, false);
66 llvm_ref
->lc
.i64_0
= LLVMConstInt(llvm_ref
->lc
.i64
, 0, false);
67 llvm_ref
->lc
.i64_1
= LLVMConstInt(llvm_ref
->lc
.i64
, 1, false);
68 llvm_ref
->lc
.i128_0
= LLVMConstInt(llvm_ref
->lc
.i128
, 0, false);
69 llvm_ref
->lc
.i128_1
= LLVMConstInt(llvm_ref
->lc
.i128
, 1, false);
70 llvm_ref
->lc
.f16_0
= LLVMConstReal(llvm_ref
->lc
.f16
, 0.0);
71 llvm_ref
->lc
.f16_1
= LLVMConstReal(llvm_ref
->lc
.f16
, 1.0);
72 llvm_ref
->lc
.f32_0
= LLVMConstReal(llvm_ref
->lc
.f32
, 0.0);
73 llvm_ref
->lc
.f32_1
= LLVMConstReal(llvm_ref
->lc
.f32
, 1.0);
74 llvm_ref
->lc
.f64_0
= LLVMConstReal(llvm_ref
->lc
.f64
, 0.0);
75 llvm_ref
->lc
.f64_1
= LLVMConstReal(llvm_ref
->lc
.f64
, 1.0);
77 llvm_ref
->lc
.i1false
= LLVMConstInt(llvm_ref
->lc
.i1
, 0, false);
78 llvm_ref
->lc
.i1true
= LLVMConstInt(llvm_ref
->lc
.i1
, 1, false);
79 llvm_ref
->lc
.float_mode
= 0; //TODO: default value, when required take this value as parameter
82 void DestroyLLVM(struct libresoc_llvm
*llvm_ref
)
84 LLVMErrorRef error_ref
= LLVMOrcDisposeInstance(llvm_ref
->orc_ref
);
87 static uint64_t orc_sym_resolver(const char *name
, void *ctx
)
89 LLVMOrcJITStackRef orc_ref
= (LLVMOrcJITStackRef
) (ctx
);
90 LLVMOrcTargetAddress address
;
91 LLVMOrcGetSymbolAddress(orc_ref
, &address
, name
);
92 return (uint64_t)address
;
95 void handle_shader_output_decl(struct libresoc_nir_tran_ctx
*ctx
,
96 struct nir_shader
*nir
, struct nir_variable
*variable
,
97 gl_shader_stage stage
)
99 unsigned output_loc
= variable
->data
.driver_location
/ 4;
100 unsigned attrib_count
= glsl_count_attribute_slots(variable
->type
, false);
102 /* tess ctrl has it's own load/store paths for outputs */
103 if (stage
== MESA_SHADER_TESS_CTRL
)
106 if (stage
== MESA_SHADER_VERTEX
|| stage
== MESA_SHADER_TESS_EVAL
||
107 stage
== MESA_SHADER_GEOMETRY
) {
108 int idx
= variable
->data
.location
+ variable
->data
.index
;
109 if (idx
== VARYING_SLOT_CLIP_DIST0
) {
110 int length
= nir
->info
.clip_distance_array_size
+ nir
->info
.cull_distance_array_size
;
119 bool is_16bit
= glsl_type_is_16bit(glsl_without_array(variable
->type
));
120 LLVMTypeRef type
= is_16bit
? ctx
->lc
.f16
: ctx
->lc
.f32
;
121 for (unsigned i
= 0; i
< attrib_count
; ++i
) {
122 for (unsigned chan
= 0; chan
< 4; chan
++) {
123 ctx
->outputs
[llvm_reg_index_soa(output_loc
+ i
, chan
)] =
124 build_alloca_undef(&ctx
->lc
, type
, "");
129 LLVMValueRef
extract_components(struct libresoc_llvm_context
*ctx
, LLVMValueRef value
, unsigned start
,
132 LLVMValueRef chan
[channels
];
134 for (unsigned i
= 0; i
< channels
; i
++)
135 chan
[i
] = llvm_extract_elem(ctx
, value
, i
+ start
);
137 return build_gather_values(ctx
, chan
, channels
);
140 static void build_store_values_extended(struct libresoc_llvm_context
*lc
, LLVMValueRef
*values
,
141 unsigned value_count
, unsigned value_stride
,
144 LLVMBuilderRef builder
= lc
->builder
;
147 for (i
= 0; i
< value_count
; i
++) {
148 LLVMValueRef ptr
= values
[i
* value_stride
];
149 LLVMValueRef index
= LLVMConstInt(lc
->i32
, i
, false);
150 LLVMValueRef value
= LLVMBuildExtractElement(builder
, vec
, index
, "");
151 LLVMBuildStore(builder
, value
, ptr
);
155 static LLVMTypeRef
arg_llvm_type(enum arg_type type
, unsigned size
, struct libresoc_llvm_context
*ctx
)
157 if (type
== ARG_FLOAT
) {
158 return size
== 1 ? ctx
->f32
: LLVMVectorType(ctx
->f32
, size
);
159 } else if (type
== ARG_INT
) {
160 return size
== 1 ? ctx
->i32
: LLVMVectorType(ctx
->i32
, size
);
162 LLVMTypeRef ptr_type
;
167 case ARG_CONST_FLOAT_PTR
:
170 case ARG_CONST_PTR_PTR
:
171 ptr_type
= LLVMPointerType(ctx
->i8
, 0);
173 case ARG_CONST_DESC_PTR
:
174 ptr_type
= ctx
->v4i32
;
176 case ARG_CONST_IMAGE_PTR
:
177 ptr_type
= ctx
->v8i32
;
180 unreachable("unknown arg type");
183 //return ac_array_in_const32_addr_space(ptr_type);
184 return LLVMPointerType(ptr_type
, 0); //address space may be wrong
187 return LLVMPointerType(ptr_type
, 0);
191 static LLVMValueRef
get_src(struct libresoc_nir_tran_ctx
*ctx
, nir_src src
)
194 // printf("index %d\n", src.ssa->index);
195 return ctx
->ssa_defs
[src
.ssa
->index
];
198 static LLVMTypeRef
get_def_type(struct libresoc_nir_tran_ctx
*ctx
, const nir_ssa_def
*def
)
200 LLVMTypeRef type
= LLVMIntTypeInContext(ctx
->lc
.context
, def
->bit_size
);
201 if (def
->num_components
> 1) {
202 type
= LLVMVectorType(type
, def
->num_components
);
207 static LLVMValueRef
get_memory_ptr(struct libresoc_nir_tran_ctx
*ctx
, nir_src src
, unsigned bit_size
)
209 LLVMValueRef ptr
= get_src(ctx
, src
);
210 ptr
= LLVMBuildGEP(ctx
->lc
.builder
, ctx
->lc
.lds
, &ptr
, 1, "");
211 int addr_space
= LLVMGetPointerAddressSpace(LLVMTypeOf(ptr
));
213 LLVMTypeRef type
= LLVMIntTypeInContext(ctx
->lc
.context
, bit_size
);
215 return LLVMBuildBitCast(ctx
->lc
.builder
, ptr
, LLVMPointerType(type
, addr_space
), "");
218 static uint32_t widen_mask(uint32_t mask
, unsigned multiplier
)
220 uint32_t new_mask
= 0;
221 for (unsigned i
= 0; i
< 32 && (1u << i
) <= mask
; ++i
)
222 if (mask
& (1u << i
))
223 new_mask
|= ((1u << multiplier
) - 1u) << (i
* multiplier
);
227 static void get_deref_offset(struct libresoc_nir_tran_ctx
*ctx
, nir_deref_instr
*instr
, bool vs_in
,
228 unsigned *vertex_index_out
, LLVMValueRef
*vertex_index_ref
,
229 unsigned *const_out
, LLVMValueRef
*indir_out
)
231 nir_variable
*var
= nir_deref_instr_get_variable(instr
);
233 unsigned idx_lvl
= 1;
235 nir_deref_path_init(&path
, instr
, NULL
);
237 if (vertex_index_out
!= NULL
|| vertex_index_ref
!= NULL
) {
238 if (vertex_index_ref
) {
239 *vertex_index_ref
= get_src(ctx
, path
.path
[idx_lvl
]->arr
.index
);
240 if (vertex_index_out
)
241 *vertex_index_out
= 0;
243 *vertex_index_out
= nir_src_as_uint(path
.path
[idx_lvl
]->arr
.index
);
248 uint32_t const_offset
= 0;
249 LLVMValueRef offset
= NULL
;
251 if (var
->data
.compact
) {
252 assert(instr
->deref_type
== nir_deref_type_array
);
253 const_offset
= nir_src_as_uint(instr
->arr
.index
);
257 for (; path
.path
[idx_lvl
]; ++idx_lvl
) {
258 const struct glsl_type
*parent_type
= path
.path
[idx_lvl
- 1]->type
;
259 if (path
.path
[idx_lvl
]->deref_type
== nir_deref_type_struct
) {
260 unsigned index
= path
.path
[idx_lvl
]->strct
.index
;
262 for (unsigned i
= 0; i
< index
; i
++) {
263 const struct glsl_type
*ft
= glsl_get_struct_field(parent_type
, i
);
264 const_offset
+= glsl_count_attribute_slots(ft
, vs_in
);
266 } else if (path
.path
[idx_lvl
]->deref_type
== nir_deref_type_array
) {
267 unsigned size
= glsl_count_attribute_slots(path
.path
[idx_lvl
]->type
, vs_in
);
268 if (nir_src_is_const(path
.path
[idx_lvl
]->arr
.index
)) {
269 const_offset
+= size
* nir_src_as_uint(path
.path
[idx_lvl
]->arr
.index
);
271 LLVMValueRef array_off
=
272 LLVMBuildMul(ctx
->lc
.builder
, LLVMConstInt(ctx
->lc
.i32
, size
, 0),
273 get_src(ctx
, path
.path
[idx_lvl
]->arr
.index
), "");
275 offset
= LLVMBuildAdd(ctx
->lc
.builder
, offset
, array_off
, "");
280 unreachable("Uhandled deref type in get_deref_instr_offset");
284 nir_deref_path_finish(&path
);
286 if (const_offset
&& offset
)
288 LLVMBuildAdd(ctx
->lc
.builder
, offset
, LLVMConstInt(ctx
->lc
.i32
, const_offset
, 0), "");
290 *const_out
= const_offset
;
294 static unsigned type_scalar_size_bytes(const struct glsl_type
*type
)
296 assert(glsl_type_is_vector_or_scalar(type
) || glsl_type_is_matrix(type
));
297 return glsl_type_is_boolean(type
) ? 4 : glsl_get_bit_size(type
) / 8;
301 static LLVMValueRef
emit_int_cmp(struct libresoc_llvm_context
*lc
, LLVMIntPredicate pred
,
302 LLVMValueRef src0
, LLVMValueRef src1
)
304 LLVMTypeRef src0_type
= LLVMTypeOf(src0
);
305 LLVMTypeRef src1_type
= LLVMTypeOf(src1
);
307 if (LLVMGetTypeKind(src0_type
) == LLVMPointerTypeKind
&&
308 LLVMGetTypeKind(src1_type
) != LLVMPointerTypeKind
) {
309 src1
= LLVMBuildIntToPtr(lc
->builder
, src1
, src0_type
, "");
310 } else if (LLVMGetTypeKind(src1_type
) == LLVMPointerTypeKind
&&
311 LLVMGetTypeKind(src0_type
) != LLVMPointerTypeKind
) {
312 src0
= LLVMBuildIntToPtr(lc
->builder
, src0
, src1_type
, "");
315 LLVMValueRef result
= LLVMBuildICmp(lc
->builder
, pred
, src0
, src1
, "");
316 return LLVMBuildSelect(lc
->builder
, result
, LLVMConstInt(lc
->i32
, 0xFFFFFFFF, false),
320 static LLVMValueRef
emit_float_cmp(struct libresoc_llvm_context
*lc
, LLVMRealPredicate pred
,
321 LLVMValueRef src0
, LLVMValueRef src1
)
324 src0
= to_float(lc
, src0
);
325 src1
= to_float(lc
, src1
);
326 result
= LLVMBuildFCmp(lc
->builder
, pred
, src0
, src1
, "");
327 return LLVMBuildSelect(lc
->builder
, result
, LLVMConstInt(lc
->i32
, 0xFFFFFFFF, false),
331 static LLVMValueRef
emit_intrin_1f_param(struct libresoc_llvm_context
*lc
, const char *intrin
,
332 LLVMTypeRef result_type
, LLVMValueRef src0
)
334 char name
[64], type
[64];
335 LLVMValueRef params
[] = {
339 build_type_name_for_intr(LLVMTypeOf(params
[0]), type
, sizeof(type
));
340 ASSERTED
const int length
= snprintf(name
, sizeof(name
), "%s.%s", intrin
, type
);
341 assert(length
< sizeof(name
));
342 return build_intrinsic(lc
, name
, result_type
, params
, 1, FUNC_ATTR_READNONE
);
345 static LLVMValueRef
emit_intrin_1f_param_scalar(struct libresoc_llvm_context
*lc
, const char *intrin
,
346 LLVMTypeRef result_type
, LLVMValueRef src0
)
348 if (LLVMGetTypeKind(result_type
) != LLVMVectorTypeKind
)
349 return emit_intrin_1f_param(lc
, intrin
, result_type
, src0
);
351 LLVMTypeRef elem_type
= LLVMGetElementType(result_type
);
352 LLVMValueRef ret
= LLVMGetUndef(result_type
);
354 /* Scalarize the intrinsic, because vectors are not supported. */
355 for (unsigned i
= 0; i
< LLVMGetVectorSize(result_type
); i
++) {
356 char name
[64], type
[64];
357 LLVMValueRef params
[] = {
358 to_float(lc
, llvm_extract_elem(lc
, src0
, i
)),
361 build_type_name_for_intr(LLVMTypeOf(params
[0]), type
, sizeof(type
));
362 ASSERTED
const int length
= snprintf(name
, sizeof(name
), "%s.%s", intrin
, type
);
363 assert(length
< sizeof(name
));
364 ret
= LLVMBuildInsertElement(
366 build_intrinsic(lc
, name
, elem_type
, params
, 1, FUNC_ATTR_READNONE
),
367 LLVMConstInt(lc
->i32
, i
, 0), "");
372 static LLVMValueRef
emit_intrin_2f_param(struct libresoc_llvm_context
*ctx
, const char *intrin
,
373 LLVMTypeRef result_type
, LLVMValueRef src0
,
376 char name
[64], type
[64];
377 LLVMValueRef params
[] = {
382 build_type_name_for_intr(LLVMTypeOf(params
[0]), type
, sizeof(type
));
383 ASSERTED
const int length
= snprintf(name
, sizeof(name
), "%s.%s", intrin
, type
);
384 assert(length
< sizeof(name
));
385 return build_intrinsic(ctx
, name
, result_type
, params
, 2, FUNC_ATTR_READNONE
);
388 static LLVMValueRef
emit_intrin_3f_param(struct libresoc_llvm_context
*ctx
, const char *intrin
,
389 LLVMTypeRef result_type
, LLVMValueRef src0
,
390 LLVMValueRef src1
, LLVMValueRef src2
)
392 char name
[64], type
[64];
393 LLVMValueRef params
[] = {
399 build_type_name_for_intr(LLVMTypeOf(params
[0]), type
, sizeof(type
));
400 ASSERTED
const int length
= snprintf(name
, sizeof(name
), "%s.%s", intrin
, type
);
401 assert(length
< sizeof(name
));
402 return build_intrinsic(ctx
, name
, result_type
, params
, 3, FUNC_ATTR_READNONE
);
405 static LLVMValueRef
emit_bcsel(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
, LLVMValueRef src1
,
408 LLVMTypeRef src1_type
= LLVMTypeOf(src1
);
409 LLVMTypeRef src2_type
= LLVMTypeOf(src2
);
411 if (LLVMGetTypeKind(src1_type
) == LLVMPointerTypeKind
&&
412 LLVMGetTypeKind(src2_type
) != LLVMPointerTypeKind
) {
413 src2
= LLVMBuildIntToPtr(ctx
->builder
, src2
, src1_type
, "");
414 } else if (LLVMGetTypeKind(src2_type
) == LLVMPointerTypeKind
&&
415 LLVMGetTypeKind(src1_type
) != LLVMPointerTypeKind
) {
416 src1
= LLVMBuildIntToPtr(ctx
->builder
, src1
, src2_type
, "");
420 LLVMBuildICmp(ctx
->builder
, LLVMIntNE
, src0
, LLVMConstNull(LLVMTypeOf(src0
)), "");
421 return LLVMBuildSelect(ctx
->builder
, v
, to_integer_or_pointer(ctx
, src1
),
422 to_integer_or_pointer(ctx
, src2
), "");
425 static LLVMValueRef
emit_iabs(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
)
427 return build_imax(ctx
, src0
, LLVMBuildNeg(ctx
->builder
, src0
, ""));
430 static LLVMValueRef
emit_uint_carry(struct libresoc_llvm_context
*ctx
, const char *intrin
,
431 LLVMValueRef src0
, LLVMValueRef src1
)
433 LLVMTypeRef ret_type
;
434 LLVMTypeRef types
[] = {ctx
->i32
, ctx
->i1
};
436 LLVMValueRef params
[] = {src0
, src1
};
437 ret_type
= LLVMStructTypeInContext(ctx
->context
, types
, 2, true);
439 res
= build_intrinsic(ctx
, intrin
, ret_type
, params
, 2, FUNC_ATTR_READNONE
);
441 res
= LLVMBuildExtractValue(ctx
->builder
, res
, 1, "");
442 res
= LLVMBuildZExt(ctx
->builder
, res
, ctx
->i32
, "");
446 static LLVMValueRef
emit_b2f(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
, unsigned bitsize
)
448 assert(get_elem_bits(ctx
, LLVMTypeOf(src0
)) == 32);
449 LLVMValueRef result
=
450 LLVMBuildAnd(ctx
->builder
, src0
, const_uint_vec(ctx
, LLVMTypeOf(src0
), 0x3f800000), "");
451 result
= to_float(ctx
, result
);
455 bool vec2
= LLVMGetTypeKind(LLVMTypeOf(result
)) == LLVMVectorTypeKind
;
456 return LLVMBuildFPTrunc(ctx
->builder
, result
, vec2
? ctx
->v2f16
: ctx
->f16
, "");
461 return LLVMBuildFPExt(ctx
->builder
, result
, ctx
->f64
, "");
463 unreachable("Unsupported bit size.");
467 static LLVMValueRef
emit_f2b(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
)
469 src0
= to_float(ctx
, src0
);
470 LLVMValueRef zero
= LLVMConstNull(LLVMTypeOf(src0
));
471 return LLVMBuildSExt(ctx
->builder
, LLVMBuildFCmp(ctx
->builder
, LLVMRealUNE
, src0
, zero
, ""),
475 static LLVMValueRef
emit_b2i(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
, unsigned bitsize
)
477 LLVMValueRef result
= LLVMBuildAnd(ctx
->builder
, src0
, ctx
->i32_1
, "");
481 return LLVMBuildTrunc(ctx
->builder
, result
, ctx
->i8
, "");
483 return LLVMBuildTrunc(ctx
->builder
, result
, ctx
->i16
, "");
487 return LLVMBuildZExt(ctx
->builder
, result
, ctx
->i64
, "");
489 unreachable("Unsupported bit size.");
493 static LLVMValueRef
emit_i2b(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
)
495 LLVMValueRef zero
= LLVMConstNull(LLVMTypeOf(src0
));
496 return LLVMBuildSExt(ctx
->builder
, LLVMBuildICmp(ctx
->builder
, LLVMIntNE
, src0
, zero
, ""),
500 static LLVMValueRef
emit_f2f16(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
)
503 LLVMValueRef cond
= NULL
;
505 src0
= to_float(ctx
, src0
);
506 result
= LLVMBuildFPTrunc(ctx
->builder
, src0
, ctx
->f16
, "");
508 /* need to convert back up to f32 */
509 result
= LLVMBuildFPExt(ctx
->builder
, result
, ctx
->f32
, "");
513 static LLVMValueRef
emit_umul_high(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
,
516 LLVMValueRef dst64
, result
;
517 src0
= LLVMBuildZExt(ctx
->builder
, src0
, ctx
->i64
, "");
518 src1
= LLVMBuildZExt(ctx
->builder
, src1
, ctx
->i64
, "");
520 dst64
= LLVMBuildMul(ctx
->builder
, src0
, src1
, "");
521 dst64
= LLVMBuildLShr(ctx
->builder
, dst64
, LLVMConstInt(ctx
->i64
, 32, false), "");
522 result
= LLVMBuildTrunc(ctx
->builder
, dst64
, ctx
->i32
, "");
526 static LLVMValueRef
emit_imul_high(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
,
529 LLVMValueRef dst64
, result
;
530 src0
= LLVMBuildSExt(ctx
->builder
, src0
, ctx
->i64
, "");
531 src1
= LLVMBuildSExt(ctx
->builder
, src1
, ctx
->i64
, "");
533 dst64
= LLVMBuildMul(ctx
->builder
, src0
, src1
, "");
534 dst64
= LLVMBuildAShr(ctx
->builder
, dst64
, LLVMConstInt(ctx
->i64
, 32, false), "");
535 result
= LLVMBuildTrunc(ctx
->builder
, dst64
, ctx
->i32
, "");
539 static LLVMValueRef
emit_bfm(struct libresoc_llvm_context
*ctx
, LLVMValueRef bits
, LLVMValueRef offset
)
541 /* mask = ((1 << bits) - 1) << offset */
544 LLVMBuildSub(ctx
->builder
, LLVMBuildShl(ctx
->builder
, ctx
->i32_1
, bits
, ""), ctx
->i32_1
, ""),
548 static LLVMValueRef
emit_bitfield_select(struct libresoc_llvm_context
*ctx
, LLVMValueRef mask
,
549 LLVMValueRef insert
, LLVMValueRef base
)
552 * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
553 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
557 LLVMBuildAnd(ctx
->builder
, mask
, LLVMBuildXor(ctx
->builder
, insert
, base
, ""), ""), "");
560 static LLVMValueRef
emit_pack_2x16(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
,
561 LLVMValueRef (*pack
)(struct libresoc_llvm_context
*ctx
,
562 LLVMValueRef args
[2]))
564 LLVMValueRef comp
[2];
566 src0
= to_float(ctx
, src0
);
567 comp
[0] = LLVMBuildExtractElement(ctx
->builder
, src0
, ctx
->i32_0
, "");
568 comp
[1] = LLVMBuildExtractElement(ctx
->builder
, src0
, ctx
->i32_1
, "");
570 return LLVMBuildBitCast(ctx
->builder
, pack(ctx
, comp
), ctx
->i32
, "");
573 static LLVMValueRef
emit_unpack_half_2x16(struct libresoc_llvm_context
*ctx
, LLVMValueRef src0
)
575 LLVMValueRef const16
= LLVMConstInt(ctx
->i32
, 16, false);
576 LLVMValueRef temps
[2], val
;
579 for (i
= 0; i
< 2; i
++) {
580 val
= i
== 1 ? LLVMBuildLShr(ctx
->builder
, src0
, const16
, "") : src0
;
581 val
= LLVMBuildTrunc(ctx
->builder
, val
, ctx
->i16
, "");
582 val
= LLVMBuildBitCast(ctx
->builder
, val
, ctx
->f16
, "");
583 temps
[i
] = LLVMBuildFPExt(ctx
->builder
, val
, ctx
->f32
, "");
585 return build_gather_values(ctx
, temps
, 2);
588 // TODO: enable this whn ac_builddxy() is added
589 // static LLVMValueRef emit_ddxy(struct libresoc_nir_context *ctx, nir_op op, LLVMValueRef src0)
593 // LLVMValueRef result;
595 // if (op == nir_op_fddx_fine)
596 // mask = TID_MASK_LEFT;
597 // else if (op == nir_op_fddy_fine)
598 // mask = TID_MASK_TOP;
600 // mask = TID_MASK_TOP_LEFT;
602 // /* for DDX we want to next X pixel, DDY next Y pixel. */
603 // if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
608 // result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
612 static void setup_locals(struct libresoc_nir_tran_ctx
*ctx
, struct nir_function
*func
)
616 nir_foreach_function_temp_variable(variable
, func
->impl
)
618 unsigned attrib_count
= glsl_count_attribute_slots(variable
->type
, false);
619 variable
->data
.driver_location
= ctx
->num_locals
* 4;
620 variable
->data
.location_frac
= 0;
621 ctx
->num_locals
+= attrib_count
;
623 ctx
->locals
= malloc(4 * ctx
->num_locals
* sizeof(LLVMValueRef
));
627 for (i
= 0; i
< ctx
->num_locals
; i
++) {
628 for (j
= 0; j
< 4; j
++) {
629 ctx
->locals
[i
* 4 + j
] = build_alloca_undef(&ctx
->lc
, ctx
->lc
.f32
, "temp");
634 static void setup_scratch(struct libresoc_nir_tran_ctx
*ctx
, struct nir_shader
*shader
)
636 if (shader
->scratch_size
== 0)
640 build_alloca_undef(&ctx
->lc
, LLVMArrayType(ctx
->lc
.i8
, shader
->scratch_size
), "scratch");
643 static void setup_constant_data(struct libresoc_nir_tran_ctx
*ctx
, struct nir_shader
*shader
)
645 if (!shader
->constant_data
)
648 LLVMValueRef data
= LLVMConstStringInContext(ctx
->lc
.context
, shader
->constant_data
,
649 shader
->constant_data_size
, true);
650 LLVMTypeRef type
= LLVMArrayType(ctx
->lc
.i8
, shader
->constant_data_size
);
652 unsigned address_space
= 0; //TODO: dummay value
653 LLVMValueRef global
=
654 LLVMAddGlobalInAddressSpace(*(ctx
->lc
.module
), type
, "const_data", address_space
);
656 LLVMSetInitializer(global
, data
);
657 LLVMSetGlobalConstant(global
, true);
658 LLVMSetVisibility(global
, LLVMHiddenVisibility
);
659 ctx
->constant_data
= global
;
662 static LLVMTypeRef
glsl_base_to_llvm_type(struct libresoc_llvm_context
*lc
, enum glsl_base_type type
)
668 case GLSL_TYPE_SUBROUTINE
:
671 case GLSL_TYPE_UINT8
:
673 case GLSL_TYPE_INT16
:
674 case GLSL_TYPE_UINT16
:
676 case GLSL_TYPE_FLOAT
:
678 case GLSL_TYPE_FLOAT16
:
680 case GLSL_TYPE_INT64
:
681 case GLSL_TYPE_UINT64
:
683 case GLSL_TYPE_DOUBLE
:
686 unreachable("unknown GLSL type");
690 static LLVMTypeRef
glsl_to_llvm_type(struct libresoc_llvm_context
*lc
, const struct glsl_type
*type
)
692 if (glsl_type_is_scalar(type
)) {
693 return glsl_base_to_llvm_type(lc
, glsl_get_base_type(type
));
696 if (glsl_type_is_vector(type
)) {
697 return LLVMVectorType(glsl_base_to_llvm_type(lc
, glsl_get_base_type(type
)),
698 glsl_get_vector_elements(type
));
701 if (glsl_type_is_matrix(type
)) {
702 return LLVMArrayType(glsl_to_llvm_type(lc
, glsl_get_column_type(type
)),
703 glsl_get_matrix_columns(type
));
706 if (glsl_type_is_array(type
)) {
707 return LLVMArrayType(glsl_to_llvm_type(lc
, glsl_get_array_element(type
)),
708 glsl_get_length(type
));
711 assert(glsl_type_is_struct_or_ifc(type
));
713 LLVMTypeRef member_types
[glsl_get_length(type
)];
715 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
716 member_types
[i
] = glsl_to_llvm_type(lc
, glsl_get_struct_field(type
, i
));
719 return LLVMStructTypeInContext(lc
->context
, member_types
, glsl_get_length(type
), false);
722 // static LLVMValueRef visit_load(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr,
725 // LLVMValueRef values[8];
726 // LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
727 // LLVMTypeRef component_type;
728 // unsigned base = nir_intrinsic_base(instr);
729 // unsigned component = nir_intrinsic_component(instr);
730 // unsigned count = instr->dest.ssa.num_components * (instr->dest.ssa.bit_size == 64 ? 2 : 1);
731 // nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
732 // LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
733 // nir_src offset = *nir_get_io_offset_src(instr);
734 // LLVMValueRef indir_index = NULL;
736 // if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
737 // component_type = LLVMGetElementType(dest_type);
739 // component_type = dest_type;
741 // if (nir_src_is_const(offset))
742 // assert(nir_src_as_uint(offset) == 0);
744 // indir_index = get_src(ctx, offset);
746 // if (ctx->stage == MESA_SHADER_TESS_CTRL || (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
747 // LLVMValueRef result = ctx->abi->load_tess_varyings(
748 // ctx->abi, component_type, vertex_index, indir_index, 0, 0, base * 4, component,
749 // instr->num_components, false, false, !is_output);
750 // if (instr->dest.ssa.bit_size == 16) {
751 // result = to_integer(&ctx->lc, result);
752 // result = LLVMBuildTrunc(ctx->lc.builder, result, dest_type, "");
754 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
757 // /* No indirect indexing is allowed after this point. */
758 // assert(!indir_index);
760 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
761 // LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
762 // assert(nir_src_is_const(*vertex_index_src));
764 // return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component, instr->num_components,
765 // nir_src_as_uint(*vertex_index_src), 0, type);
768 // if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
769 // nir_intrinsic_io_semantics(instr).fb_fetch_output)
770 // return ctx->abi->emit_fbfetch(ctx->abi);
772 // /* Other non-fragment cases have inputs and outputs in temporaries. */
773 // if (ctx->stage != MESA_SHADER_FRAGMENT) {
774 // for (unsigned chan = component; chan < count + component; chan++) {
776 // values[chan] = LLVMBuildLoad(ctx->lc.builder, ctx->outputs[base * 4 + chan], "");
778 // values[chan] = ctx->inputs[base * 4 + chan];
779 // if (!values[chan])
780 // values[chan] = LLVMGetUndef(ctx->lc.i32);
783 // LLVMValueRef result = build_varying_gather_values(&ctx->lc, values, count, component);
784 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
787 // /* Fragment shader inputs. */
788 // unsigned vertex_id = 2; /* P0 */
790 // if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
791 // nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
793 // switch (src0[0].i32) {
804 // unreachable("Invalid vertex index");
808 // LLVMValueRef attr_number = LLVMConstInt(ctx->lc.i32, base, false);
810 // for (unsigned chan = 0; chan < count; chan++) {
811 // if (component + chan > 4)
812 // attr_number = LLVMConstInt(ctx->lc.i32, base + 1, false);
813 // LLVMValueRef llvm_chan = LLVMConstInt(ctx->lc.i32, (component + chan) % 4, false);
815 // build_fs_interp_mov(&ctx->lc, LLVMConstInt(ctx->lc.i32, vertex_id, false), llvm_chan,
816 // attr_number, get_arg(&ctx->lc, ctx->args->prim_mask));
817 // values[chan] = LLVMBuildBitCast(ctx->lc.builder, values[chan], ctx->lc.i32, "");
819 // LLVMBuildTruncOrBitCast(ctx->lc.builder, values[chan],
820 // instr->dest.ssa.bit_size == 16 ? ctx->lc.i16 : ctx->lc.i32, "");
823 // LLVMValueRef result = build_gather_values(&ctx->lc, values, count);
824 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
827 static LLVMValueRef
visit_load_shared(struct libresoc_nir_tran_ctx
*ctx
, const nir_intrinsic_instr
*instr
)
829 LLVMValueRef values
[4], derived_ptr
, index
, ret
;
831 LLVMValueRef ptr
= get_memory_ptr(ctx
, instr
->src
[0], instr
->dest
.ssa
.bit_size
);
833 for (int chan
= 0; chan
< instr
->num_components
; chan
++) {
834 index
= LLVMConstInt(ctx
->lc
.i32
, chan
, 0);
835 derived_ptr
= LLVMBuildGEP(ctx
->lc
.builder
, ptr
, &index
, 1, "");
836 values
[chan
] = LLVMBuildLoad(ctx
->lc
.builder
, derived_ptr
, "");
839 ret
= build_gather_values(&ctx
->lc
, values
, instr
->num_components
);
840 return LLVMBuildBitCast(ctx
->lc
.builder
, ret
, get_def_type(ctx
, &instr
->dest
.ssa
), "");
843 static void visit_store_shared(struct libresoc_nir_tran_ctx
*ctx
, const nir_intrinsic_instr
*instr
)
845 LLVMValueRef derived_ptr
, data
, index
;
846 LLVMBuilderRef builder
= ctx
->lc
.builder
;
848 LLVMValueRef ptr
= get_memory_ptr(ctx
, instr
->src
[1], instr
->src
[0].ssa
->bit_size
);
849 LLVMValueRef src
= get_src(ctx
, instr
->src
[0]);
851 int writemask
= nir_intrinsic_write_mask(instr
);
852 for (int chan
= 0; chan
< 4; chan
++) {
853 if (!(writemask
& (1 << chan
))) {
856 data
= llvm_extract_elem(&ctx
->lc
, src
, chan
);
857 index
= LLVMConstInt(ctx
->lc
.i32
, chan
, 0);
858 derived_ptr
= LLVMBuildGEP(builder
, ptr
, &index
, 1, "");
859 LLVMBuildStore(builder
, data
, derived_ptr
);
862 static void visit_load_const(struct libresoc_nir_tran_ctx
*ctx
, const nir_load_const_instr
*instr
)
864 LLVMValueRef values
[4], value
= NULL
;
865 LLVMTypeRef element_type
= LLVMIntTypeInContext(ctx
->lc
.context
, instr
->def
.bit_size
);
867 for (unsigned i
= 0; i
< instr
->def
.num_components
; ++i
) {
868 switch (instr
->def
.bit_size
) {
870 values
[i
] = LLVMConstInt(element_type
, instr
->value
[i
].u8
, false);
873 values
[i
] = LLVMConstInt(element_type
, instr
->value
[i
].u16
, false);
876 values
[i
] = LLVMConstInt(element_type
, instr
->value
[i
].u32
, false);
879 values
[i
] = LLVMConstInt(element_type
, instr
->value
[i
].u64
, false);
882 fprintf(stderr
, "unsupported nir load_const bit_size: %d\n", instr
->def
.bit_size
);
886 if (instr
->def
.num_components
> 1) {
887 value
= LLVMConstVector(values
, instr
->def
.num_components
);
891 ctx
->ssa_defs
[instr
->def
.index
] = value
;
894 static void visit_store_output(struct libresoc_nir_tran_ctx
*ctx
, nir_intrinsic_instr
*instr
)
896 // if (ctx->ac.postponed_kill) {
897 // LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
898 // ac_build_ifcc(&ctx->ac, cond, 7002);
901 unsigned base
= nir_intrinsic_base(instr
);
902 unsigned writemask
= nir_intrinsic_write_mask(instr
);
903 unsigned component
= nir_intrinsic_component(instr
);
904 LLVMValueRef src
= to_float(&ctx
->lc
, get_src(ctx
, instr
->src
[0]));
905 nir_src offset
= *nir_get_io_offset_src(instr
);
906 LLVMValueRef indir_index
= NULL
;
908 if (nir_src_is_const(offset
))
909 assert(nir_src_as_uint(offset
) == 0);
911 indir_index
= get_src(ctx
, offset
);
913 switch (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
))) {
917 writemask
= widen_mask(writemask
, 2);
918 src
= LLVMBuildBitCast(ctx
->lc
.builder
, src
,
919 LLVMVectorType(ctx
->lc
.f32
, get_llvm_num_components(src
) * 2), "");
922 unreachable("unhandled store_output bit size");
926 writemask
<<= component
;
928 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
929 // nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
930 // LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
932 // ctx->abi->store_tcs_outputs(ctx->abi, NULL, vertex_index, indir_index, 0, src, writemask,
933 // component, base * 4);
937 /* No indirect indexing is allowed after this point. */
938 assert(!indir_index
);
940 for (unsigned chan
= 0; chan
< 8; chan
++) {
941 if (!(writemask
& (1 << chan
)))
944 LLVMValueRef value
= llvm_extract_elem(&ctx
->lc
, src
, chan
- component
);
945 LLVMBuildStore(ctx
->lc
.builder
, value
, ctx
->outputs
[base
* 4 + chan
]);
948 // if (ctx->ac.postponed_kill)
949 // ac_build_endif(&ctx->ac, 7002);
952 static void visit_deref(struct libresoc_nir_tran_ctx
*ctx
, nir_deref_instr
*instr
)
954 if (instr
->mode
!= nir_var_mem_shared
&& instr
->mode
!= nir_var_mem_global
)
957 LLVMValueRef result
= NULL
;
958 switch (instr
->deref_type
) {
959 case nir_deref_type_var
: {
960 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->vars
, instr
->var
);
961 result
= entry
->data
;
964 case nir_deref_type_struct
:
965 if (instr
->mode
== nir_var_mem_global
) {
966 nir_deref_instr
*parent
= nir_deref_instr_parent(instr
);
967 uint64_t offset
= glsl_get_struct_field_offset(parent
->type
, instr
->strct
.index
);
968 result
= build_gep_ptr(&ctx
->lc
, get_src(ctx
, instr
->parent
),
969 LLVMConstInt(ctx
->lc
.i32
, offset
, 0));
971 result
= build_gep0(&ctx
->lc
, get_src(ctx
, instr
->parent
),
972 LLVMConstInt(ctx
->lc
.i32
, instr
->strct
.index
, 0));
975 case nir_deref_type_array
:
976 if (instr
->mode
== nir_var_mem_global
) {
977 nir_deref_instr
*parent
= nir_deref_instr_parent(instr
);
978 unsigned stride
= glsl_get_explicit_stride(parent
->type
);
980 if ((glsl_type_is_matrix(parent
->type
) && glsl_matrix_type_is_row_major(parent
->type
)) ||
981 (glsl_type_is_vector(parent
->type
) && stride
== 0))
982 stride
= type_scalar_size_bytes(parent
->type
);
985 LLVMValueRef index
= get_src(ctx
, instr
->arr
.index
);
986 if (LLVMTypeOf(index
) != ctx
->lc
.i64
)
987 index
= LLVMBuildZExt(ctx
->lc
.builder
, index
, ctx
->lc
.i64
, "");
989 LLVMValueRef offset
=
990 LLVMBuildMul(ctx
->lc
.builder
, index
, LLVMConstInt(ctx
->lc
.i64
, stride
, 0), "");
992 result
= build_gep_ptr(&ctx
->lc
, get_src(ctx
, instr
->parent
), offset
);
995 build_gep0(&ctx
->lc
, get_src(ctx
, instr
->parent
), get_src(ctx
, instr
->arr
.index
));
998 case nir_deref_type_ptr_as_array
:
999 if (instr
->mode
== nir_var_mem_global
) {
1000 unsigned stride
= nir_deref_instr_array_stride(instr
);
1002 LLVMValueRef index
= get_src(ctx
, instr
->arr
.index
);
1003 if (LLVMTypeOf(index
) != ctx
->lc
.i64
)
1004 index
= LLVMBuildZExt(ctx
->lc
.builder
, index
, ctx
->lc
.i64
, "");
1006 LLVMValueRef offset
=
1007 LLVMBuildMul(ctx
->lc
.builder
, index
, LLVMConstInt(ctx
->lc
.i64
, stride
, 0), "");
1009 result
= build_gep_ptr(&ctx
->lc
, get_src(ctx
, instr
->parent
), offset
);
1012 build_gep_ptr(&ctx
->lc
, get_src(ctx
, instr
->parent
), get_src(ctx
, instr
->arr
.index
));
1015 case nir_deref_type_cast
: {
1016 result
= get_src(ctx
, instr
->parent
);
1018 /* We can't use the structs from LLVM because the shader
1019 * specifies its own offsets. */
1020 LLVMTypeRef pointee_type
= ctx
->lc
.i8
;
1021 if (instr
->mode
== nir_var_mem_shared
)
1022 pointee_type
= glsl_to_llvm_type(&ctx
->lc
, instr
->type
);
1024 unsigned address_space
;
1026 switch (instr
->mode
) {
1027 case nir_var_mem_shared
:
1030 case nir_var_mem_global
:
1034 unreachable("Unhandled address space");
1037 LLVMTypeRef type
= LLVMPointerType(pointee_type
, address_space
);
1039 if (LLVMTypeOf(result
) != type
) {
1040 if (LLVMGetTypeKind(LLVMTypeOf(result
)) == LLVMVectorTypeKind
) {
1041 result
= LLVMBuildBitCast(ctx
->lc
.builder
, result
, type
, "");
1043 result
= LLVMBuildIntToPtr(ctx
->lc
.builder
, result
, type
, "");
1049 unreachable("Unhandled deref_instr deref type");
1052 ctx
->ssa_defs
[instr
->dest
.ssa
.index
] = result
;
1055 static void visit_phi(struct libresoc_nir_tran_ctx
*ctx
, nir_phi_instr
*instr
)
1057 LLVMTypeRef type
= get_def_type(ctx
, &instr
->dest
.ssa
);
1058 LLVMValueRef result
= LLVMBuildPhi(ctx
->lc
.builder
, type
, "");
1060 ctx
->ssa_defs
[instr
->dest
.ssa
.index
] = result
;
1061 _mesa_hash_table_insert(ctx
->phis
, instr
, result
);
1064 static bool is_def_used_in_an_export(const nir_ssa_def
*def
)
1066 nir_foreach_use (use_src
, def
) {
1067 if (use_src
->parent_instr
->type
== nir_instr_type_intrinsic
) {
1068 nir_intrinsic_instr
*instr
= nir_instr_as_intrinsic(use_src
->parent_instr
);
1069 if (instr
->intrinsic
== nir_intrinsic_store_deref
)
1071 } else if (use_src
->parent_instr
->type
== nir_instr_type_alu
) {
1072 nir_alu_instr
*instr
= nir_instr_as_alu(use_src
->parent_instr
);
1073 if (instr
->op
== nir_op_vec4
&& is_def_used_in_an_export(&instr
->dest
.dest
.ssa
)) {
1081 static void visit_ssa_undef(struct libresoc_nir_tran_ctx
*ctx
, const nir_ssa_undef_instr
*instr
)
1083 unsigned num_components
= instr
->def
.num_components
;
1084 LLVMTypeRef type
= LLVMIntTypeInContext(ctx
->lc
.context
, instr
->def
.bit_size
);
1086 if (/*!ctx->abi->convert_undef_to_zero ||*/ is_def_used_in_an_export(&instr
->def
)) {
1089 if (num_components
== 1)
1090 undef
= LLVMGetUndef(type
);
1092 undef
= LLVMGetUndef(LLVMVectorType(type
, num_components
));
1094 ctx
->ssa_defs
[instr
->def
.index
] = undef
;
1096 LLVMValueRef zero
= LLVMConstInt(type
, 0, false);
1097 if (num_components
> 1) {
1098 zero
= build_gather_values_extended(&ctx
->lc
, &zero
, 4, 0, false, false);
1100 ctx
->ssa_defs
[instr
->def
.index
] = zero
;
1104 static void visit_jump(struct libresoc_llvm_context
*lc
, const nir_jump_instr
*instr
)
1106 switch (instr
->type
) {
1107 case nir_jump_break
:
1110 case nir_jump_continue
:
1114 fprintf(stderr
, "Unknown NIR jump instr: ");
1115 nir_print_instr(&instr
->instr
, stderr
);
1116 fprintf(stderr
, "\n");
1121 static LLVMValueRef
get_alu_src(struct libresoc_nir_tran_ctx
*ctx
, nir_alu_src src
,
1122 unsigned num_components
)
1124 LLVMValueRef value
= get_src(ctx
, src
.src
);
1125 bool need_swizzle
= false;
1128 unsigned src_components
= get_llvm_num_components(value
);
1129 for (unsigned i
= 0; i
< num_components
; ++i
) {
1130 assert(src
.swizzle
[i
] < src_components
);
1131 if (src
.swizzle
[i
] != i
)
1132 need_swizzle
= true;
1135 if (need_swizzle
|| num_components
!= src_components
) {
1136 LLVMValueRef masks
[] = {LLVMConstInt(ctx
->lc
.i32
, src
.swizzle
[0], false),
1137 LLVMConstInt(ctx
->lc
.i32
, src
.swizzle
[1], false),
1138 LLVMConstInt(ctx
->lc
.i32
, src
.swizzle
[2], false),
1139 LLVMConstInt(ctx
->lc
.i32
, src
.swizzle
[3], false)};
1141 if (src_components
> 1 && num_components
== 1) {
1142 value
= LLVMBuildExtractElement(ctx
->lc
.builder
, value
, masks
[0], "");
1143 } else if (src_components
== 1 && num_components
> 1) {
1144 LLVMValueRef values
[] = {value
, value
, value
, value
};
1145 value
= build_gather_values(&ctx
->lc
, values
, num_components
);
1147 LLVMValueRef swizzle
= LLVMConstVector(masks
, num_components
);
1148 value
= LLVMBuildShuffleVector(ctx
->lc
.builder
, value
, value
, swizzle
, "");
1151 assert(!src
.negate
);
1156 static void visit_alu(struct libresoc_nir_tran_ctx
*ctx
, const nir_alu_instr
*instr
)
1158 LLVMValueRef src
[4], result
= NULL
;
1159 unsigned num_components
= instr
->dest
.dest
.ssa
.num_components
;
1160 unsigned src_components
;
1161 LLVMTypeRef def_type
= get_def_type(ctx
, &instr
->dest
.dest
.ssa
);
1163 assert(nir_op_infos
[instr
->op
].num_inputs
<= ARRAY_SIZE(src
));
1164 switch (instr
->op
) {
1170 case nir_op_pack_half_2x16
:
1171 case nir_op_pack_snorm_2x16
:
1172 case nir_op_pack_unorm_2x16
:
1175 case nir_op_unpack_half_2x16
:
1178 case nir_op_cube_face_coord
:
1179 case nir_op_cube_face_index
:
1183 src_components
= num_components
;
1186 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++)
1187 src
[i
] = get_alu_src(ctx
, instr
->src
[i
], src_components
);
1189 switch (instr
->op
) {
1194 src
[0] = to_float(&ctx
->lc
, src
[0]);
1195 result
= LLVMBuildFNeg(ctx
->lc
.builder
, src
[0], "");
1196 if (ctx
->lc
.float_mode
== FLOAT_MODE_DENORM_FLUSH_TO_ZERO
) {
1197 /* fneg will be optimized by backend compiler with sign
1198 * bit removed via XOR. This is probably a LLVM bug.
1200 result
= build_canonicalize(&ctx
->lc
, result
, instr
->dest
.dest
.ssa
.bit_size
);
1204 result
= LLVMBuildNeg(ctx
->lc
.builder
, src
[0], "");
1207 result
= LLVMBuildNot(ctx
->lc
.builder
, src
[0], "");
1210 result
= LLVMBuildAdd(ctx
->lc
.builder
, src
[0], src
[1], "");
1213 src
[0] = to_float(&ctx
->lc
, src
[0]);
1214 src
[1] = to_float(&ctx
->lc
, src
[1]);
1215 result
= LLVMBuildFAdd(ctx
->lc
.builder
, src
[0], src
[1], "");
1218 src
[0] = to_float(&ctx
->lc
, src
[0]);
1219 src
[1] = to_float(&ctx
->lc
, src
[1]);
1220 result
= LLVMBuildFSub(ctx
->lc
.builder
, src
[0], src
[1], "");
1223 result
= LLVMBuildSub(ctx
->lc
.builder
, src
[0], src
[1], "");
1226 result
= LLVMBuildMul(ctx
->lc
.builder
, src
[0], src
[1], "");
1229 result
= LLVMBuildSRem(ctx
->lc
.builder
, src
[0], src
[1], "");
1232 result
= LLVMBuildURem(ctx
->lc
.builder
, src
[0], src
[1], "");
1235 result
= LLVMBuildSRem(ctx
->lc
.builder
, src
[0], src
[1], "");
1238 result
= LLVMBuildSDiv(ctx
->lc
.builder
, src
[0], src
[1], "");
1241 result
= LLVMBuildUDiv(ctx
->lc
.builder
, src
[0], src
[1], "");
1244 src
[0] = to_float(&ctx
->lc
, src
[0]);
1245 src
[1] = to_float(&ctx
->lc
, src
[1]);
1246 result
= LLVMBuildFMul(ctx
->lc
.builder
, src
[0], src
[1], "");
1249 /* For doubles, we need precise division to pass GLCTS. */
1250 if (ctx
->lc
.float_mode
== FLOAT_MODE_DEFAULT_OPENGL
&& get_type_size(def_type
) == 8) {
1251 result
= LLVMBuildFDiv(ctx
->lc
.builder
, ctx
->lc
.f64_1
, to_float(&ctx
->lc
, src
[0]), "");
1253 result
= emit_intrin_1f_param_scalar(&ctx
->lc
, "llvm.amdgcn.rcp",
1254 to_float_type(&ctx
->lc
, def_type
), src
[0]);
1256 // TODO: abi not supported
1257 // if (ctx->abi->clamp_div_by_zero)
1258 // result = build_fmin(&ctx->lc, result,
1259 // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX));
1262 result
= LLVMBuildAnd(ctx
->lc
.builder
, src
[0], src
[1], "");
1265 result
= LLVMBuildOr(ctx
->lc
.builder
, src
[0], src
[1], "");
1268 result
= LLVMBuildXor(ctx
->lc
.builder
, src
[0], src
[1], "");
1271 if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[1])) <
1272 get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])))
1273 src
[1] = LLVMBuildZExt(ctx
->lc
.builder
, src
[1], LLVMTypeOf(src
[0]), "");
1274 else if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[1])) >
1275 get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])))
1276 src
[1] = LLVMBuildTrunc(ctx
->lc
.builder
, src
[1], LLVMTypeOf(src
[0]), "");
1277 result
= LLVMBuildShl(ctx
->lc
.builder
, src
[0], src
[1], "");
1280 if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[1])) <
1281 get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])))
1282 src
[1] = LLVMBuildZExt(ctx
->lc
.builder
, src
[1], LLVMTypeOf(src
[0]), "");
1283 else if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[1])) >
1284 get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])))
1285 src
[1] = LLVMBuildTrunc(ctx
->lc
.builder
, src
[1], LLVMTypeOf(src
[0]), "");
1286 result
= LLVMBuildAShr(ctx
->lc
.builder
, src
[0], src
[1], "");
1289 if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[1])) <
1290 get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])))
1291 src
[1] = LLVMBuildZExt(ctx
->lc
.builder
, src
[1], LLVMTypeOf(src
[0]), "");
1292 else if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[1])) >
1293 get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])))
1294 src
[1] = LLVMBuildTrunc(ctx
->lc
.builder
, src
[1], LLVMTypeOf(src
[0]), "");
1295 result
= LLVMBuildLShr(ctx
->lc
.builder
, src
[0], src
[1], "");
1298 result
= emit_int_cmp(&ctx
->lc
, LLVMIntSLT
, src
[0], src
[1]);
1301 result
= emit_int_cmp(&ctx
->lc
, LLVMIntNE
, src
[0], src
[1]);
1304 result
= emit_int_cmp(&ctx
->lc
, LLVMIntEQ
, src
[0], src
[1]);
1307 result
= emit_int_cmp(&ctx
->lc
, LLVMIntSGE
, src
[0], src
[1]);
1310 result
= emit_int_cmp(&ctx
->lc
, LLVMIntULT
, src
[0], src
[1]);
1313 result
= emit_int_cmp(&ctx
->lc
, LLVMIntUGE
, src
[0], src
[1]);
1316 result
= emit_float_cmp(&ctx
->lc
, LLVMRealOEQ
, src
[0], src
[1]);
1319 result
= emit_float_cmp(&ctx
->lc
, LLVMRealUNE
, src
[0], src
[1]);
1322 result
= emit_float_cmp(&ctx
->lc
, LLVMRealOLT
, src
[0], src
[1]);
1325 result
= emit_float_cmp(&ctx
->lc
, LLVMRealOGE
, src
[0], src
[1]);
1329 emit_intrin_1f_param(&ctx
->lc
, "llvm.fabs", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1330 if (ctx
->lc
.float_mode
== FLOAT_MODE_DENORM_FLUSH_TO_ZERO
) {
1331 /* fabs will be optimized by backend compiler with sign
1332 * bit removed via AND.
1334 result
= build_canonicalize(&ctx
->lc
, result
, instr
->dest
.dest
.ssa
.bit_size
);
1338 result
= emit_iabs(&ctx
->lc
, src
[0]);
1341 result
= build_imax(&ctx
->lc
, src
[0], src
[1]);
1344 result
= build_imin(&ctx
->lc
, src
[0], src
[1]);
1347 result
= build_umax(&ctx
->lc
, src
[0], src
[1]);
1350 result
= build_umin(&ctx
->lc
, src
[0], src
[1]);
1353 result
= build_isign(&ctx
->lc
, src
[0]);
1356 src
[0] = to_float(&ctx
->lc
, src
[0]);
1357 result
= build_fsign(&ctx
->lc
, src
[0]);
1361 emit_intrin_1f_param(&ctx
->lc
, "llvm.floor", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1365 emit_intrin_1f_param(&ctx
->lc
, "llvm.trunc", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1369 emit_intrin_1f_param(&ctx
->lc
, "llvm.ceil", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1371 case nir_op_fround_even
:
1373 emit_intrin_1f_param(&ctx
->lc
, "llvm.rint", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1376 result
= emit_intrin_1f_param_scalar(&ctx
->lc
, "llvm.amdgcn.fract",
1377 to_float_type(&ctx
->lc
, def_type
), src
[0]);
1381 emit_intrin_1f_param(&ctx
->lc
, "llvm.sin", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1385 emit_intrin_1f_param(&ctx
->lc
, "llvm.cos", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1389 emit_intrin_1f_param(&ctx
->lc
, "llvm.sqrt", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1393 emit_intrin_1f_param(&ctx
->lc
, "llvm.exp2", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1397 emit_intrin_1f_param(&ctx
->lc
, "llvm.log2", to_float_type(&ctx
->lc
, def_type
), src
[0]);
1400 result
= emit_intrin_1f_param_scalar(&ctx
->lc
, "llvm.amdgcn.rsq",
1401 to_float_type(&ctx
->lc
, def_type
), src
[0]);
1402 // TODO: abi not enabled
1403 // if (ctx->abi->clamp_div_by_zero)
1404 // result = build_fmin(&ctx->lc, result,
1405 // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX));
1407 case nir_op_frexp_exp
:
1408 // TODO: enable this when ac_build_frexp_exp() is added
1409 // src[0] = to_float(&ctx->lc, src[0]);
1410 // result = ac_build_frexp_exp(&ctx->lc, src[0], get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])));
1411 // if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) == 16)
1412 // result = LLVMBuildSExt(ctx->lc.builder, result, ctx->lc.i32, "");
1414 case nir_op_frexp_sig
:
1415 // TODO: enable this when ac_build_frexp_mant() is added
1416 // src[0] = to_float(&ctx->lc, src[0]);
1417 // result = ac_build_frexp_mant(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
1419 // case nir_op_fpow:
1420 // result = emit_intrin_2f_param(&ctx->lc, "llvm.pow", to_float_type(&ctx->lc, def_type),
1424 result
= emit_intrin_2f_param(&ctx
->lc
, "llvm.maxnum", to_float_type(&ctx
->lc
, def_type
),
1428 result
= emit_intrin_2f_param(&ctx
->lc
, "llvm.minnum", to_float_type(&ctx
->lc
, def_type
),
1433 emit_intrin_3f_param(&ctx
->lc
, "llvm.fmuladd",
1434 to_float_type(&ctx
->lc
, def_type
), src
[0], src
[1], src
[2]);
1437 src
[0] = to_float(&ctx
->lc
, src
[0]);
1438 if (get_elem_bits(&ctx
->lc
, def_type
) == 32)
1439 result
= build_intrinsic(&ctx
->lc
, "llvm.amdgcn.ldexp.f32", ctx
->lc
.f32
, src
, 2,
1440 FUNC_ATTR_READNONE
);
1441 else if (get_elem_bits(&ctx
->lc
, def_type
) == 16)
1442 result
= build_intrinsic(&ctx
->lc
, "llvm.amdgcn.ldexp.f16", ctx
->lc
.f16
, src
, 2,
1443 FUNC_ATTR_READNONE
);
1445 result
= build_intrinsic(&ctx
->lc
, "llvm.amdgcn.ldexp.f64", ctx
->lc
.f64
, src
, 2,
1446 FUNC_ATTR_READNONE
);
1449 result
= emit_bfm(&ctx
->lc
, src
[0], src
[1]);
1451 case nir_op_bitfield_select
:
1452 result
= emit_bitfield_select(&ctx
->lc
, src
[0], src
[1], src
[2]);
1455 result
= build_bfe(&ctx
->lc
, src
[0], src
[1], src
[2], false);
1458 result
= build_bfe(&ctx
->lc
, src
[0], src
[1], src
[2], true);
1460 case nir_op_bitfield_reverse
:
1461 result
= build_bitfield_reverse(&ctx
->lc
, src
[0]);
1463 case nir_op_bit_count
:
1464 result
= build_bit_count(&ctx
->lc
, src
[0]);
1469 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++)
1470 src
[i
] = to_integer(&ctx
->lc
, src
[i
]);
1471 result
= build_gather_values(&ctx
->lc
, src
, num_components
);
1477 src
[0] = to_float(&ctx
->lc
, src
[0]);
1478 result
= LLVMBuildFPToSI(ctx
->lc
.builder
, src
[0], def_type
, "");
1484 src
[0] = to_float(&ctx
->lc
, src
[0]);
1485 result
= LLVMBuildFPToUI(ctx
->lc
.builder
, src
[0], def_type
, "");
1490 result
= LLVMBuildSIToFP(ctx
->lc
.builder
, src
[0], to_float_type(&ctx
->lc
, def_type
), "");
1495 result
= LLVMBuildUIToFP(ctx
->lc
.builder
, src
[0], to_float_type(&ctx
->lc
, def_type
), "");
1497 case nir_op_f2f16_rtz
:
1500 src
[0] = to_float(&ctx
->lc
, src
[0]);
1502 /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
1503 * all f32->f16 conversions have to round towards zero, because both scalar
1504 * and vec2 down-conversions have to round equally.
1506 if (ctx
->lc
.float_mode
== FLOAT_MODE_DEFAULT_OPENGL
|| instr
->op
== nir_op_f2f16_rtz
) {
1507 src
[0] = to_float(&ctx
->lc
, src
[0]);
1509 if (LLVMTypeOf(src
[0]) == ctx
->lc
.f64
)
1510 src
[0] = LLVMBuildFPTrunc(ctx
->lc
.builder
, src
[0], ctx
->lc
.f32
, "");
1512 /* Fast path conversion. This only works if NIR is vectorized
1515 if (LLVMTypeOf(src
[0]) == ctx
->lc
.v2f32
) {
1516 LLVMValueRef args
[] = {
1517 llvm_extract_elem(&ctx
->lc
, src
[0], 0),
1518 llvm_extract_elem(&ctx
->lc
, src
[0], 1),
1520 result
= build_cvt_pkrtz_f16(&ctx
->lc
, args
);
1524 assert(get_llvm_num_components(src
[0]) == 1);
1525 LLVMValueRef param
[2] = {src
[0], LLVMGetUndef(ctx
->lc
.f32
)};
1526 result
= build_cvt_pkrtz_f16(&ctx
->lc
, param
);
1527 result
= LLVMBuildExtractElement(ctx
->lc
.builder
, result
, ctx
->lc
.i32_0
, "");
1529 if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])) < get_elem_bits(&ctx
->lc
, def_type
))
1531 LLVMBuildFPExt(ctx
->lc
.builder
, src
[0], to_float_type(&ctx
->lc
, def_type
), "");
1534 LLVMBuildFPTrunc(ctx
->lc
.builder
, src
[0], to_float_type(&ctx
->lc
, def_type
), "");
1537 case nir_op_f2f16_rtne
:
1540 src
[0] = to_float(&ctx
->lc
, src
[0]);
1541 if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])) < get_elem_bits(&ctx
->lc
, def_type
))
1542 result
= LLVMBuildFPExt(ctx
->lc
.builder
, src
[0], to_float_type(&ctx
->lc
, def_type
), "");
1545 LLVMBuildFPTrunc(ctx
->lc
.builder
, src
[0], to_float_type(&ctx
->lc
, def_type
), "");
1552 if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])) < get_elem_bits(&ctx
->lc
, def_type
))
1553 result
= LLVMBuildZExt(ctx
->lc
.builder
, src
[0], def_type
, "");
1555 result
= LLVMBuildTrunc(ctx
->lc
.builder
, src
[0], def_type
, "");
1562 if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
[0])) < get_elem_bits(&ctx
->lc
, def_type
))
1563 result
= LLVMBuildSExt(ctx
->lc
.builder
, src
[0], def_type
, "");
1565 result
= LLVMBuildTrunc(ctx
->lc
.builder
, src
[0], def_type
, "");
1567 case nir_op_b32csel
:
1568 result
= emit_bcsel(&ctx
->lc
, src
[0], src
[1], src
[2]);
1570 case nir_op_find_lsb
:
1571 result
= find_lsb(&ctx
->lc
, ctx
->lc
.i32
, src
[0]);
1573 case nir_op_ufind_msb
:
1574 result
= build_umsb(&ctx
->lc
, src
[0], ctx
->lc
.i32
);
1576 case nir_op_ifind_msb
:
1577 result
= build_imsb(&ctx
->lc
, src
[0], ctx
->lc
.i32
);
1579 case nir_op_uadd_carry
:
1580 result
= emit_uint_carry(&ctx
->lc
, "llvm.uadd.with.overflow.i32", src
[0], src
[1]);
1582 case nir_op_usub_borrow
:
1583 result
= emit_uint_carry(&ctx
->lc
, "llvm.usub.with.overflow.i32", src
[0], src
[1]);
1588 result
= emit_b2f(&ctx
->lc
, src
[0], instr
->dest
.dest
.ssa
.bit_size
);
1591 result
= emit_f2b(&ctx
->lc
, src
[0]);
1597 result
= emit_b2i(&ctx
->lc
, src
[0], instr
->dest
.dest
.ssa
.bit_size
);
1600 result
= emit_i2b(&ctx
->lc
, src
[0]);
1602 case nir_op_fquantize2f16
:
1603 result
= emit_f2f16(&ctx
->lc
, src
[0]);
1605 case nir_op_umul_high
:
1606 result
= emit_umul_high(&ctx
->lc
, src
[0], src
[1]);
1608 case nir_op_imul_high
:
1609 result
= emit_imul_high(&ctx
->lc
, src
[0], src
[1]);
1611 case nir_op_pack_half_2x16
:
1612 result
= emit_pack_2x16(&ctx
->lc
, src
[0], build_cvt_pkrtz_f16
);
1614 case nir_op_pack_snorm_2x16
:
1615 result
= emit_pack_2x16(&ctx
->lc
, src
[0], build_cvt_pknorm_i16
);
1617 case nir_op_pack_unorm_2x16
:
1618 result
= emit_pack_2x16(&ctx
->lc
, src
[0], build_cvt_pknorm_u16
);
1620 case nir_op_unpack_half_2x16
:
1621 result
= emit_unpack_half_2x16(&ctx
->lc
, src
[0]);
1625 case nir_op_fddx_fine
:
1626 case nir_op_fddy_fine
:
1627 case nir_op_fddx_coarse
:
1628 case nir_op_fddy_coarse
:
1629 // TODO: enable this when emit_ddxy() is added
1630 //result = emit_ddxy(ctx, instr->op, src[0]);
1633 case nir_op_unpack_64_2x32_split_x
: {
1634 assert(get_llvm_num_components(src
[0]) == 1);
1635 LLVMValueRef tmp
= LLVMBuildBitCast(ctx
->lc
.builder
, src
[0], ctx
->lc
.v2i32
, "");
1636 result
= LLVMBuildExtractElement(ctx
->lc
.builder
, tmp
, ctx
->lc
.i32_0
, "");
1640 case nir_op_unpack_64_2x32_split_y
: {
1641 assert(get_llvm_num_components(src
[0]) == 1);
1642 LLVMValueRef tmp
= LLVMBuildBitCast(ctx
->lc
.builder
, src
[0], ctx
->lc
.v2i32
, "");
1643 result
= LLVMBuildExtractElement(ctx
->lc
.builder
, tmp
, ctx
->lc
.i32_1
, "");
1647 case nir_op_pack_64_2x32_split
: {
1648 LLVMValueRef tmp
= build_gather_values(&ctx
->lc
, src
, 2);
1649 result
= LLVMBuildBitCast(ctx
->lc
.builder
, tmp
, ctx
->lc
.i64
, "");
1653 case nir_op_pack_32_2x16_split
: {
1654 LLVMValueRef tmp
= build_gather_values(&ctx
->lc
, src
, 2);
1655 result
= LLVMBuildBitCast(ctx
->lc
.builder
, tmp
, ctx
->lc
.i32
, "");
1659 case nir_op_unpack_32_2x16_split_x
: {
1660 LLVMValueRef tmp
= LLVMBuildBitCast(ctx
->lc
.builder
, src
[0], ctx
->lc
.v2i16
, "");
1661 result
= LLVMBuildExtractElement(ctx
->lc
.builder
, tmp
, ctx
->lc
.i32_0
, "");
1665 case nir_op_unpack_32_2x16_split_y
: {
1666 LLVMValueRef tmp
= LLVMBuildBitCast(ctx
->lc
.builder
, src
[0], ctx
->lc
.v2i16
, "");
1667 result
= LLVMBuildExtractElement(ctx
->lc
.builder
, tmp
, ctx
->lc
.i32_1
, "");
1671 case nir_op_cube_face_coord
: {
1672 src
[0] = to_float(&ctx
->lc
, src
[0]);
1673 LLVMValueRef results
[2];
1675 for (unsigned chan
= 0; chan
< 3; chan
++)
1676 in
[chan
] = llvm_extract_elem(&ctx
->lc
, src
[0], chan
);
1677 results
[0] = build_intrinsic(&ctx
->lc
, "llvm.amdgcn.cubesc", ctx
->lc
.f32
, in
, 3,
1678 FUNC_ATTR_READNONE
);
1679 results
[1] = build_intrinsic(&ctx
->lc
, "llvm.amdgcn.cubetc", ctx
->lc
.f32
, in
, 3,
1680 FUNC_ATTR_READNONE
);
1681 LLVMValueRef ma
= build_intrinsic(&ctx
->lc
, "llvm.amdgcn.cubema", ctx
->lc
.f32
, in
, 3,
1682 FUNC_ATTR_READNONE
);
1683 results
[0] = build_fdiv(&ctx
->lc
, results
[0], ma
);
1684 results
[1] = build_fdiv(&ctx
->lc
, results
[1], ma
);
1685 LLVMValueRef offset
= LLVMConstReal(ctx
->lc
.f32
, 0.5);
1686 results
[0] = LLVMBuildFAdd(ctx
->lc
.builder
, results
[0], offset
, "");
1687 results
[1] = LLVMBuildFAdd(ctx
->lc
.builder
, results
[1], offset
, "");
1688 result
= build_gather_values(&ctx
->lc
, results
, 2);
1692 case nir_op_cube_face_index
: {
1693 src
[0] = to_float(&ctx
->lc
, src
[0]);
1695 for (unsigned chan
= 0; chan
< 3; chan
++)
1696 in
[chan
] = llvm_extract_elem(&ctx
->lc
, src
[0], chan
);
1697 result
= build_intrinsic(&ctx
->lc
, "llvm.amdgcn.cubeid", ctx
->lc
.f32
, in
, 3,
1698 FUNC_ATTR_READNONE
);
1703 fprintf(stderr
, "Unknown NIR alu instr: ");
1704 nir_print_instr(&instr
->instr
, stderr
);
1705 fprintf(stderr
, "\n");
1710 assert(instr
->dest
.dest
.is_ssa
);
1711 result
= to_integer_or_pointer(&ctx
->lc
, result
);
1712 ctx
->ssa_defs
[instr
->dest
.dest
.ssa
.index
] = result
;
1716 static LLVMValueRef
visit_load_var(struct libresoc_nir_tran_ctx
*ctx
, nir_intrinsic_instr
*instr
)
1718 nir_deref_instr
*deref
= nir_instr_as_deref(instr
->src
[0].ssa
->parent_instr
);
1719 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
1721 LLVMValueRef values
[8];
1723 int ve
= instr
->dest
.ssa
.num_components
;
1725 LLVMValueRef indir_index
;
1727 unsigned const_index
;
1728 unsigned stride
= 4;
1729 int mode
= deref
->mode
;
1732 bool vs_in
= ctx
->stage
== MESA_SHADER_VERTEX
&& var
->data
.mode
== nir_var_shader_in
;
1733 idx
= var
->data
.driver_location
;
1734 comp
= var
->data
.location_frac
;
1735 mode
= var
->data
.mode
;
1737 get_deref_offset(ctx
, deref
, vs_in
, NULL
, NULL
, &const_index
, &indir_index
);
1739 if (var
->data
.compact
) {
1741 const_index
+= comp
;
1746 if (instr
->dest
.ssa
.bit_size
== 64 &&
1747 (deref
->mode
== nir_var_shader_in
|| deref
->mode
== nir_var_shader_out
||
1748 deref
->mode
== nir_var_function_temp
))
1752 case nir_var_shader_in
:
1753 /* TODO: remove this after RADV switches to lowered IO */
1754 // if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) {
1755 // return load_tess_varyings(ctx, instr, true);
1758 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
1759 // LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size);
1760 // LLVMValueRef indir_index;
1761 // unsigned const_index, vertex_index;
1762 // get_deref_offset(ctx, deref, false, &vertex_index, NULL, &const_index, &indir_index);
1763 // assert(indir_index == NULL);
1765 // return ctx->abi->load_inputs(ctx->abi, var->data.location, var->data.driver_location,
1766 // var->data.location_frac, instr->num_components, vertex_index,
1767 // const_index, type);
1770 for (unsigned chan
= comp
; chan
< ve
+ comp
; chan
++) {
1773 glsl_count_attribute_slots(var
->type
, ctx
->stage
== MESA_SHADER_VERTEX
);
1775 LLVMValueRef tmp_vec
= build_gather_values_extended(
1776 &ctx
->lc
, ctx
->inputs
+ idx
+ chan
, count
, stride
, false, true);
1778 values
[chan
] = LLVMBuildExtractElement(ctx
->lc
.builder
, tmp_vec
, indir_index
, "");
1780 values
[chan
] = ctx
->inputs
[idx
+ chan
+ const_index
* stride
];
1783 case nir_var_function_temp
:
1784 for (unsigned chan
= 0; chan
< ve
; chan
++) {
1786 unsigned count
= glsl_count_attribute_slots(var
->type
, false);
1788 LLVMValueRef tmp_vec
= build_gather_values_extended(
1789 &ctx
->lc
, ctx
->locals
+ idx
+ chan
, count
, stride
, true, true);
1791 values
[chan
] = LLVMBuildExtractElement(ctx
->lc
.builder
, tmp_vec
, indir_index
, "");
1794 LLVMBuildLoad(ctx
->lc
.builder
, ctx
->locals
[idx
+ chan
+ const_index
* stride
], "");
1798 case nir_var_shader_out
:
1799 /* TODO: remove this after RADV switches to lowered IO */
1800 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
1801 // return load_tess_varyings(ctx, instr, false);
1804 // if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.fb_fetch_output && ctx->abi->emit_fbfetch)
1805 // return ctx->abi->emit_fbfetch(ctx->abi);
1807 for (unsigned chan
= comp
; chan
< ve
+ comp
; chan
++) {
1809 unsigned count
= glsl_count_attribute_slots(var
->type
, false);
1811 LLVMValueRef tmp_vec
= build_gather_values_extended(
1812 &ctx
->lc
, ctx
->outputs
+ idx
+ chan
, count
, stride
, true, true);
1814 values
[chan
] = LLVMBuildExtractElement(ctx
->lc
.builder
, tmp_vec
, indir_index
, "");
1816 values
[chan
] = LLVMBuildLoad(ctx
->lc
.builder
,
1817 ctx
->outputs
[idx
+ chan
+ const_index
* stride
], "");
1821 case nir_var_mem_global
: {
1822 LLVMValueRef address
= get_src(ctx
, instr
->src
[0]);
1823 LLVMTypeRef result_type
= get_def_type(ctx
, &instr
->dest
.ssa
);
1824 unsigned explicit_stride
= glsl_get_explicit_stride(deref
->type
);
1825 unsigned natural_stride
= type_scalar_size_bytes(deref
->type
);
1826 unsigned stride
= explicit_stride
? explicit_stride
: natural_stride
;
1827 int elem_size_bytes
= get_elem_bits(&ctx
->lc
, result_type
) / 8;
1828 bool split_loads
= false;
1830 if (stride
!= natural_stride
|| split_loads
) {
1831 if (LLVMGetTypeKind(result_type
) == LLVMVectorTypeKind
)
1832 result_type
= LLVMGetElementType(result_type
);
1834 LLVMTypeRef ptr_type
=
1835 LLVMPointerType(result_type
, LLVMGetPointerAddressSpace(LLVMTypeOf(address
)));
1836 address
= LLVMBuildBitCast(ctx
->lc
.builder
, address
, ptr_type
, "");
1838 for (unsigned i
= 0; i
< instr
->dest
.ssa
.num_components
; ++i
) {
1839 LLVMValueRef offset
= LLVMConstInt(ctx
->lc
.i32
, i
* stride
/ natural_stride
, 0);
1841 LLVMBuildLoad(ctx
->lc
.builder
, build_gep_ptr(&ctx
->lc
, address
, offset
), "");
1843 if (nir_intrinsic_access(instr
) & (ACCESS_COHERENT
| ACCESS_VOLATILE
))
1844 LLVMSetOrdering(values
[i
], LLVMAtomicOrderingMonotonic
);
1846 return build_gather_values(&ctx
->lc
, values
, instr
->dest
.ssa
.num_components
);
1848 LLVMTypeRef ptr_type
=
1849 LLVMPointerType(result_type
, LLVMGetPointerAddressSpace(LLVMTypeOf(address
)));
1850 address
= LLVMBuildBitCast(ctx
->lc
.builder
, address
, ptr_type
, "");
1851 LLVMValueRef val
= LLVMBuildLoad(ctx
->lc
.builder
, address
, "");
1853 if (nir_intrinsic_access(instr
) & (ACCESS_COHERENT
| ACCESS_VOLATILE
))
1854 LLVMSetOrdering(val
, LLVMAtomicOrderingMonotonic
);
1859 unreachable("unhandle variable mode");
1861 ret
= build_varying_gather_values(&ctx
->lc
, values
, ve
, comp
);
1862 return LLVMBuildBitCast(ctx
->lc
.builder
, ret
, get_def_type(ctx
, &instr
->dest
.ssa
), "");
1865 static void visit_store_var(struct libresoc_nir_tran_ctx
*ctx
, nir_intrinsic_instr
*instr
)
1867 // if (ctx->lc.postponed_kill) {
1868 // LLVMValueRef cond = LLVMBuildLoad(ctx->lc.builder, ctx->lc.postponed_kill, "");
1869 // ac_build_ifcc(&ctx->lc, cond, 7002);
1872 nir_deref_instr
*deref
= nir_instr_as_deref(instr
->src
[0].ssa
->parent_instr
);
1873 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
1875 LLVMValueRef temp_ptr
, value
;
1878 LLVMValueRef src
= to_float(&ctx
->lc
, get_src(ctx
, instr
->src
[1]));
1879 int writemask
= instr
->const_index
[0];
1880 LLVMValueRef indir_index
;
1881 unsigned const_index
;
1884 get_deref_offset(ctx
, deref
, false, NULL
, NULL
, &const_index
, &indir_index
);
1885 idx
= var
->data
.driver_location
;
1886 comp
= var
->data
.location_frac
;
1888 if (var
->data
.compact
) {
1889 const_index
+= comp
;
1894 if (get_elem_bits(&ctx
->lc
, LLVMTypeOf(src
)) == 64 &&
1895 (deref
->mode
== nir_var_shader_out
|| deref
->mode
== nir_var_function_temp
)) {
1897 src
= LLVMBuildBitCast(ctx
->lc
.builder
, src
,
1898 LLVMVectorType(ctx
->lc
.f32
, get_llvm_num_components(src
) * 2), "");
1900 writemask
= widen_mask(writemask
, 2);
1903 writemask
= writemask
<< comp
;
1905 switch (deref
->mode
) {
1906 case nir_var_shader_out
:
1907 /* TODO: remove this after RADV switches to lowered IO */
1908 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
1909 // LLVMValueRef vertex_index = NULL;
1910 // LLVMValueRef indir_index = NULL;
1911 // unsigned const_index = 0;
1912 // const bool is_patch = var->data.patch ||
1913 // var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
1914 // var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
1916 // get_deref_offset(ctx, deref, false, NULL, is_patch ? NULL : &vertex_index, &const_index,
1919 // ctx->abi->store_tcs_outputs(ctx->abi, var, vertex_index, indir_index, const_index, src,
1920 // writemask, var->data.location_frac, var->data.driver_location);
1924 for (unsigned chan
= 0; chan
< 8; chan
++) {
1926 if (!(writemask
& (1 << chan
)))
1929 value
= llvm_extract_elem(&ctx
->lc
, src
, chan
- comp
);
1931 if (var
->data
.compact
)
1934 unsigned count
= glsl_count_attribute_slots(var
->type
, false);
1936 LLVMValueRef tmp_vec
= build_gather_values_extended(
1937 &ctx
->lc
, ctx
->outputs
+ idx
+ chan
, count
, stride
, true, true);
1939 tmp_vec
= LLVMBuildInsertElement(ctx
->lc
.builder
, tmp_vec
, value
, indir_index
, "");
1940 build_store_values_extended(&ctx
->lc
, ctx
->outputs
+ idx
+ chan
, count
, stride
,
1944 temp_ptr
= ctx
->outputs
[idx
+ chan
+ const_index
* stride
];
1946 LLVMBuildStore(ctx
->lc
.builder
, value
, temp_ptr
);
1950 case nir_var_function_temp
:
1951 for (unsigned chan
= 0; chan
< 8; chan
++) {
1952 if (!(writemask
& (1 << chan
)))
1955 value
= llvm_extract_elem(&ctx
->lc
, src
, chan
);
1957 unsigned count
= glsl_count_attribute_slots(var
->type
, false);
1959 LLVMValueRef tmp_vec
= build_gather_values_extended(
1960 &ctx
->lc
, ctx
->locals
+ idx
+ chan
, count
, 4, true, true);
1962 tmp_vec
= LLVMBuildInsertElement(ctx
->lc
.builder
, tmp_vec
, value
, indir_index
, "");
1963 build_store_values_extended(&ctx
->lc
, ctx
->locals
+ idx
+ chan
, count
, 4, tmp_vec
);
1965 temp_ptr
= ctx
->locals
[idx
+ chan
+ const_index
* 4];
1967 LLVMBuildStore(ctx
->lc
.builder
, value
, temp_ptr
);
1972 case nir_var_mem_global
: {
1973 int writemask
= instr
->const_index
[0];
1974 LLVMValueRef address
= get_src(ctx
, instr
->src
[0]);
1975 LLVMValueRef val
= get_src(ctx
, instr
->src
[1]);
1977 unsigned explicit_stride
= glsl_get_explicit_stride(deref
->type
);
1978 unsigned natural_stride
= type_scalar_size_bytes(deref
->type
);
1979 unsigned stride
= explicit_stride
? explicit_stride
: natural_stride
;
1980 int elem_size_bytes
= get_elem_bits(&ctx
->lc
, LLVMTypeOf(val
)) / 8;
1981 bool split_stores
= false;
1983 LLVMTypeRef ptr_type
=
1984 LLVMPointerType(LLVMTypeOf(val
), LLVMGetPointerAddressSpace(LLVMTypeOf(address
)));
1985 address
= LLVMBuildBitCast(ctx
->lc
.builder
, address
, ptr_type
, "");
1987 if (writemask
== (1u << get_llvm_num_components(val
)) - 1 && stride
== natural_stride
&&
1989 LLVMTypeRef ptr_type
=
1990 LLVMPointerType(LLVMTypeOf(val
), LLVMGetPointerAddressSpace(LLVMTypeOf(address
)));
1991 address
= LLVMBuildBitCast(ctx
->lc
.builder
, address
, ptr_type
, "");
1993 val
= LLVMBuildBitCast(ctx
->lc
.builder
, val
, LLVMGetElementType(LLVMTypeOf(address
)), "");
1994 LLVMValueRef store
= LLVMBuildStore(ctx
->lc
.builder
, val
, address
);
1996 if (nir_intrinsic_access(instr
) & (ACCESS_COHERENT
| ACCESS_VOLATILE
))
1997 LLVMSetOrdering(store
, LLVMAtomicOrderingMonotonic
);
1999 LLVMTypeRef val_type
= LLVMTypeOf(val
);
2000 if (LLVMGetTypeKind(LLVMTypeOf(val
)) == LLVMVectorTypeKind
)
2001 val_type
= LLVMGetElementType(val_type
);
2003 LLVMTypeRef ptr_type
=
2004 LLVMPointerType(val_type
, LLVMGetPointerAddressSpace(LLVMTypeOf(address
)));
2005 address
= LLVMBuildBitCast(ctx
->lc
.builder
, address
, ptr_type
, "");
2006 for (unsigned chan
= 0; chan
< 4; chan
++) {
2007 if (!(writemask
& (1 << chan
)))
2010 LLVMValueRef offset
= LLVMConstInt(ctx
->lc
.i32
, chan
* stride
/ natural_stride
, 0);
2012 LLVMValueRef ptr
= build_gep_ptr(&ctx
->lc
, address
, offset
);
2013 LLVMValueRef src
= llvm_extract_elem(&ctx
->lc
, val
, chan
);
2014 src
= LLVMBuildBitCast(ctx
->lc
.builder
, src
, LLVMGetElementType(LLVMTypeOf(ptr
)), "");
2015 LLVMValueRef store
= LLVMBuildStore(ctx
->lc
.builder
, src
, ptr
);
2017 if (nir_intrinsic_access(instr
) & (ACCESS_COHERENT
| ACCESS_VOLATILE
))
2018 LLVMSetOrdering(store
, LLVMAtomicOrderingMonotonic
);
2028 // if (ctx->ac.postponed_kill)
2029 // ac_build_endif(&ctx->ac, 7002);
2032 static void visit_intrinsic(struct libresoc_nir_tran_ctx
*ctx
, nir_intrinsic_instr
*instr
)
2034 LLVMValueRef result
= NULL
;
2036 switch (instr
->intrinsic
) {
2037 case nir_intrinsic_ballot
:
2038 // result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
2039 // if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
2040 // result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
2042 case nir_intrinsic_read_invocation
:
2044 // ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
2046 case nir_intrinsic_read_first_invocation
:
2047 // result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
2049 case nir_intrinsic_load_subgroup_invocation
:
2050 // result = ac_get_thread_id(&ctx->ac);
2052 case nir_intrinsic_load_work_group_id
: {
2053 // LLVMValueRef values[3];
2055 // for (int i = 0; i < 3; i++) {
2056 // values[i] = ctx->args->workgroup_ids[i].used
2057 // ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
2061 // result = ac_build_gather_values(&ctx->ac, values, 3);
2064 case nir_intrinsic_load_base_vertex
:
2065 case nir_intrinsic_load_first_vertex
:
2066 //result = ctx->abi->load_base_vertex(ctx->abi);
2067 result
= LLVMGetParam(ctx
->main_function
, ctx
->args
.base_vertex
.arg_index
);
2069 case nir_intrinsic_load_local_group_size
:
2070 // result = ctx->abi->load_local_group_size(ctx->abi);
2072 case nir_intrinsic_load_vertex_id
:
2073 result
= LLVMBuildAdd(ctx
->lc
.builder
, LLVMGetParam(ctx
->main_function
, ctx
->args
.vertex_id
.arg_index
),
2074 LLVMGetParam(ctx
->main_function
, ctx
->args
.base_vertex
.arg_index
), "");
2076 case nir_intrinsic_load_vertex_id_zero_base
: {
2077 // result = ctx->abi->vertex_id;
2078 result
= LLVMGetParam(ctx
->main_function
, ctx
->args
.vertex_id
.arg_index
);
2081 case nir_intrinsic_load_local_invocation_id
: {
2082 // result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
2085 case nir_intrinsic_load_base_instance
:
2086 // result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
2088 case nir_intrinsic_load_draw_id
:
2089 // result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
2091 case nir_intrinsic_load_view_index
:
2092 // result = ac_get_arg(&ctx->ac, ctx->args->view_index);
2094 case nir_intrinsic_load_invocation_id
:
2095 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2096 // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
2098 // if (ctx->ac.chip_class >= GFX10) {
2100 // LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
2101 // LLVMConstInt(ctx->ac.i32, 127, 0), "");
2103 // result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
2107 case nir_intrinsic_load_primitive_id
:
2108 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
2109 // result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
2110 // } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2111 // result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
2112 // } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
2113 // result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
2115 // fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
2117 // case nir_intrinsic_load_sample_id:
2118 // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
2120 case nir_intrinsic_load_sample_pos
:
2121 // result = load_sample_pos(ctx);
2123 case nir_intrinsic_load_sample_mask_in
:
2124 // result = ctx->abi->load_sample_mask_in(ctx->abi);
2126 case nir_intrinsic_load_frag_coord
: {
2127 // LLVMValueRef values[4] = {
2128 // ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
2129 // ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
2130 // ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
2131 // result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
2134 case nir_intrinsic_load_layer_id
:
2135 // result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
2137 case nir_intrinsic_load_front_face
:
2138 // result = ac_get_arg(&ctx->ac, ctx->args->front_face);
2140 case nir_intrinsic_load_helper_invocation
:
2141 // result = ac_build_load_helper_invocation(&ctx->ac);
2143 case nir_intrinsic_is_helper_invocation
:
2144 // result = ac_build_is_helper_invocation(&ctx->ac);
2146 case nir_intrinsic_load_color0
:
2147 // result = ctx->abi->color0;
2149 case nir_intrinsic_load_color1
:
2150 // result = ctx->abi->color1;
2152 case nir_intrinsic_load_user_data_amd
:
2153 // assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
2154 // result = ctx->abi->user_data;
2156 case nir_intrinsic_load_instance_id
:
2157 // result = ctx->abi->instance_id;
2159 case nir_intrinsic_load_num_work_groups
:
2160 // result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
2162 case nir_intrinsic_load_local_invocation_index
:
2163 // result = visit_load_local_invocation_index(ctx);
2165 case nir_intrinsic_load_subgroup_id
:
2166 // result = visit_load_subgroup_id(ctx);
2168 case nir_intrinsic_load_num_subgroups
:
2169 // result = visit_load_num_subgroups(ctx);
2171 case nir_intrinsic_first_invocation
:
2172 // result = visit_first_invocation(ctx);
2174 case nir_intrinsic_load_push_constant
:
2175 // result = visit_load_push_constant(ctx, instr);
2177 case nir_intrinsic_vulkan_resource_index
: {
2178 // LLVMValueRef index = get_src(ctx, instr->src[0]);
2179 // unsigned desc_set = nir_intrinsic_desc_set(instr);
2180 // unsigned binding = nir_intrinsic_binding(instr);
2182 // result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
2185 case nir_intrinsic_vulkan_resource_reindex
:
2186 // result = visit_vulkan_resource_reindex(ctx, instr);
2188 case nir_intrinsic_store_ssbo
:
2189 // visit_store_ssbo(ctx, instr);
2191 case nir_intrinsic_load_ssbo
:
2192 // result = visit_load_buffer(ctx, instr);
2194 case nir_intrinsic_ssbo_atomic_add
:
2195 case nir_intrinsic_ssbo_atomic_imin
:
2196 case nir_intrinsic_ssbo_atomic_umin
:
2197 case nir_intrinsic_ssbo_atomic_imax
:
2198 case nir_intrinsic_ssbo_atomic_umax
:
2199 case nir_intrinsic_ssbo_atomic_and
:
2200 case nir_intrinsic_ssbo_atomic_or
:
2201 case nir_intrinsic_ssbo_atomic_xor
:
2202 case nir_intrinsic_ssbo_atomic_exchange
:
2203 case nir_intrinsic_ssbo_atomic_comp_swap
:
2204 // result = visit_atomic_ssbo(ctx, instr);
2206 case nir_intrinsic_load_ubo
:
2207 // result = visit_load_ubo_buffer(ctx, instr);
2209 case nir_intrinsic_get_buffer_size
:
2210 // result = visit_get_buffer_size(ctx, instr);
2212 case nir_intrinsic_load_deref
:
2213 result
= visit_load_var(ctx
, instr
);
2215 case nir_intrinsic_store_deref
:
2216 visit_store_var(ctx
, instr
);
2218 case nir_intrinsic_load_input
:
2219 case nir_intrinsic_load_input_vertex
:
2220 case nir_intrinsic_load_per_vertex_input
:
2221 // result = visit_load(ctx, instr, false);
2223 case nir_intrinsic_load_output
:
2224 case nir_intrinsic_load_per_vertex_output
:
2225 // result = visit_load(ctx, instr, true);
2227 case nir_intrinsic_store_output
:
2228 case nir_intrinsic_store_per_vertex_output
:
2229 visit_store_output(ctx
, instr
);
2231 case nir_intrinsic_load_shared
:
2232 result
= visit_load_shared(ctx
, instr
);
2234 case nir_intrinsic_store_shared
:
2235 visit_store_shared(ctx
, instr
);
2237 case nir_intrinsic_bindless_image_samples
:
2238 case nir_intrinsic_image_deref_samples
:
2239 // result = visit_image_samples(ctx, instr);
2241 case nir_intrinsic_bindless_image_load
:
2242 // result = visit_image_load(ctx, instr, true);
2244 case nir_intrinsic_image_deref_load
:
2245 // result = visit_image_load(ctx, instr, false);
2247 case nir_intrinsic_bindless_image_store
:
2248 // visit_image_store(ctx, instr, true);
2250 case nir_intrinsic_image_deref_store
:
2251 // visit_image_store(ctx, instr, false);
2253 case nir_intrinsic_bindless_image_atomic_add
:
2254 case nir_intrinsic_bindless_image_atomic_imin
:
2255 case nir_intrinsic_bindless_image_atomic_umin
:
2256 case nir_intrinsic_bindless_image_atomic_imax
:
2257 case nir_intrinsic_bindless_image_atomic_umax
:
2258 case nir_intrinsic_bindless_image_atomic_and
:
2259 case nir_intrinsic_bindless_image_atomic_or
:
2260 case nir_intrinsic_bindless_image_atomic_xor
:
2261 case nir_intrinsic_bindless_image_atomic_exchange
:
2262 case nir_intrinsic_bindless_image_atomic_comp_swap
:
2263 case nir_intrinsic_bindless_image_atomic_inc_wrap
:
2264 case nir_intrinsic_bindless_image_atomic_dec_wrap
:
2265 // result = visit_image_atomic(ctx, instr, true);
2267 case nir_intrinsic_image_deref_atomic_add
:
2268 case nir_intrinsic_image_deref_atomic_imin
:
2269 case nir_intrinsic_image_deref_atomic_umin
:
2270 case nir_intrinsic_image_deref_atomic_imax
:
2271 case nir_intrinsic_image_deref_atomic_umax
:
2272 case nir_intrinsic_image_deref_atomic_and
:
2273 case nir_intrinsic_image_deref_atomic_or
:
2274 case nir_intrinsic_image_deref_atomic_xor
:
2275 case nir_intrinsic_image_deref_atomic_exchange
:
2276 case nir_intrinsic_image_deref_atomic_comp_swap
:
2277 case nir_intrinsic_image_deref_atomic_inc_wrap
:
2278 case nir_intrinsic_image_deref_atomic_dec_wrap
:
2279 // result = visit_image_atomic(ctx, instr, false);
2281 case nir_intrinsic_bindless_image_size
:
2282 // result = visit_image_size(ctx, instr, true);
2284 case nir_intrinsic_image_deref_size
:
2285 // result = visit_image_size(ctx, instr, false);
2287 case nir_intrinsic_shader_clock
:
2288 // result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
2290 case nir_intrinsic_discard
:
2291 case nir_intrinsic_discard_if
:
2292 // emit_discard(ctx, instr);
2294 case nir_intrinsic_demote
:
2295 case nir_intrinsic_demote_if
:
2296 // emit_demote(ctx, instr);
2298 case nir_intrinsic_memory_barrier
:
2299 case nir_intrinsic_group_memory_barrier
:
2300 case nir_intrinsic_memory_barrier_buffer
:
2301 case nir_intrinsic_memory_barrier_image
:
2302 case nir_intrinsic_memory_barrier_shared
:
2303 // emit_membar(&ctx->ac, instr);
2305 case nir_intrinsic_scoped_barrier
: {
2306 // assert(!(nir_intrinsic_memory_semantics(instr) &
2307 // (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
2309 // nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
2311 // unsigned wait_flags = 0;
2312 // if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
2313 // wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2314 // if (modes & nir_var_mem_shared)
2315 // wait_flags |= AC_WAIT_LGKM;
2318 // ac_build_waitcnt(&ctx->ac, wait_flags);
2320 // if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
2321 // ac_emit_barrier(&ctx->ac, ctx->stage);
2324 case nir_intrinsic_memory_barrier_tcs_patch
:
2326 case nir_intrinsic_control_barrier
:
2327 // ac_emit_barrier(&ctx->ac, ctx->stage);
2329 case nir_intrinsic_shared_atomic_add
:
2330 case nir_intrinsic_shared_atomic_imin
:
2331 case nir_intrinsic_shared_atomic_umin
:
2332 case nir_intrinsic_shared_atomic_imax
:
2333 case nir_intrinsic_shared_atomic_umax
:
2334 case nir_intrinsic_shared_atomic_and
:
2335 case nir_intrinsic_shared_atomic_or
:
2336 case nir_intrinsic_shared_atomic_xor
:
2337 case nir_intrinsic_shared_atomic_exchange
:
2338 case nir_intrinsic_shared_atomic_comp_swap
:
2339 case nir_intrinsic_shared_atomic_fadd
: {
2340 // LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size);
2341 // result = visit_var_atomic(ctx, instr, ptr, 1);
2344 case nir_intrinsic_deref_atomic_add
:
2345 case nir_intrinsic_deref_atomic_imin
:
2346 case nir_intrinsic_deref_atomic_umin
:
2347 case nir_intrinsic_deref_atomic_imax
:
2348 case nir_intrinsic_deref_atomic_umax
:
2349 case nir_intrinsic_deref_atomic_and
:
2350 case nir_intrinsic_deref_atomic_or
:
2351 case nir_intrinsic_deref_atomic_xor
:
2352 case nir_intrinsic_deref_atomic_exchange
:
2353 case nir_intrinsic_deref_atomic_comp_swap
:
2354 case nir_intrinsic_deref_atomic_fadd
: {
2355 // LLVMValueRef ptr = get_src(ctx, instr->src[0]);
2356 // result = visit_var_atomic(ctx, instr, ptr, 1);
2359 case nir_intrinsic_load_barycentric_pixel
:
2360 // result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
2362 case nir_intrinsic_load_barycentric_centroid
:
2363 // result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
2365 case nir_intrinsic_load_barycentric_sample
:
2366 // result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
2368 case nir_intrinsic_load_barycentric_model
:
2369 // result = barycentric_model(ctx);
2371 case nir_intrinsic_load_barycentric_at_offset
: {
2372 // LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
2373 // result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
2376 case nir_intrinsic_load_barycentric_at_sample
: {
2377 // LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
2378 // result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
2381 case nir_intrinsic_load_interpolated_input
: {
2382 /* We assume any indirect loads have been lowered away */
2383 // ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
2385 // assert(offset[0].i32 == 0);
2387 // LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
2388 // unsigned index = nir_intrinsic_base(instr);
2389 // unsigned component = nir_intrinsic_component(instr);
2390 // result = load_interpolated_input(ctx, interp_param, index, component,
2391 // instr->dest.ssa.num_components, instr->dest.ssa.bit_size);
2394 case nir_intrinsic_emit_vertex
:
2395 // ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
2397 case nir_intrinsic_emit_vertex_with_counter
: {
2398 // unsigned stream = nir_intrinsic_stream_id(instr);
2399 // LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
2400 // ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
2403 case nir_intrinsic_end_primitive
:
2404 case nir_intrinsic_end_primitive_with_counter
:
2405 // ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
2407 case nir_intrinsic_load_tess_coord
:
2408 // result = ctx->abi->load_tess_coord(ctx->abi);
2410 case nir_intrinsic_load_tess_level_outer
:
2411 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
2413 case nir_intrinsic_load_tess_level_inner
:
2414 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
2416 case nir_intrinsic_load_tess_level_outer_default
:
2417 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
2419 case nir_intrinsic_load_tess_level_inner_default
:
2420 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
2422 case nir_intrinsic_load_patch_vertices_in
:
2423 // result = ctx->abi->load_patch_vertices_in(ctx->abi);
2425 case nir_intrinsic_vote_all
: {
2426 // LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
2427 // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
2430 case nir_intrinsic_vote_any
: {
2431 // LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
2432 // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
2435 case nir_intrinsic_shuffle
:
2436 // if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
2437 // (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
2439 // ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
2441 // LLVMValueRef src = get_src(ctx, instr->src[0]);
2442 // LLVMValueRef index = get_src(ctx, instr->src[1]);
2443 // LLVMTypeRef type = LLVMTypeOf(src);
2444 // struct waterfall_context wctx;
2445 // LLVMValueRef index_val;
2447 // index_val = enter_waterfall(ctx, &wctx, index, true);
2449 // src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
2451 // result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
2452 // (LLVMValueRef[]){src, index_val}, 2,
2453 // AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
2455 // result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
2457 // result = exit_waterfall(ctx, &wctx, result);
2460 case nir_intrinsic_reduce
:
2461 // result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
2462 // instr->const_index[1]);
2464 case nir_intrinsic_inclusive_scan
:
2466 // ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
2468 case nir_intrinsic_exclusive_scan
:
2470 // ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
2472 case nir_intrinsic_quad_broadcast
: {
2473 // unsigned lane = nir_src_as_uint(instr->src[1]);
2474 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
2477 case nir_intrinsic_quad_swap_horizontal
:
2478 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
2480 case nir_intrinsic_quad_swap_vertical
:
2481 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
2483 case nir_intrinsic_quad_swap_diagonal
:
2484 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
2486 case nir_intrinsic_quad_swizzle_amd
: {
2487 // uint32_t mask = nir_intrinsic_swizzle_mask(instr);
2488 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
2489 // (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
2492 case nir_intrinsic_masked_swizzle_amd
: {
2493 // uint32_t mask = nir_intrinsic_swizzle_mask(instr);
2494 // result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
2497 case nir_intrinsic_write_invocation_amd
:
2498 // result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
2499 // get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
2501 case nir_intrinsic_mbcnt_amd
:
2502 // result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
2504 case nir_intrinsic_load_scratch
: {
2505 LLVMValueRef offset
= get_src(ctx
, instr
->src
[0]);
2506 LLVMValueRef ptr
= build_gep0(&ctx
->lc
, ctx
->scratch
, offset
);
2507 LLVMTypeRef comp_type
= LLVMIntTypeInContext(ctx
->lc
.context
, instr
->dest
.ssa
.bit_size
);
2508 LLVMTypeRef vec_type
= instr
->dest
.ssa
.num_components
== 1
2510 : LLVMVectorType(comp_type
, instr
->dest
.ssa
.num_components
);
2511 unsigned addr_space
= LLVMGetPointerAddressSpace(LLVMTypeOf(ptr
));
2512 ptr
= LLVMBuildBitCast(ctx
->lc
.builder
, ptr
, LLVMPointerType(vec_type
, addr_space
), "");
2513 result
= LLVMBuildLoad(ctx
->lc
.builder
, ptr
, "");
2516 case nir_intrinsic_store_scratch
: {
2517 LLVMValueRef offset
= get_src(ctx
, instr
->src
[1]);
2518 LLVMValueRef ptr
= build_gep0(&ctx
->lc
, ctx
->scratch
, offset
);
2519 LLVMTypeRef comp_type
= LLVMIntTypeInContext(ctx
->lc
.context
, instr
->src
[0].ssa
->bit_size
);
2520 unsigned addr_space
= LLVMGetPointerAddressSpace(LLVMTypeOf(ptr
));
2521 ptr
= LLVMBuildBitCast(ctx
->lc
.builder
, ptr
, LLVMPointerType(comp_type
, addr_space
), "");
2522 LLVMValueRef src
= get_src(ctx
, instr
->src
[0]);
2523 unsigned wrmask
= nir_intrinsic_write_mask(instr
);
2526 u_bit_scan_consecutive_range(&wrmask
, &start
, &count
);
2528 LLVMValueRef offset
= LLVMConstInt(ctx
->lc
.i32
, start
, false);
2529 LLVMValueRef offset_ptr
= LLVMBuildGEP(ctx
->lc
.builder
, ptr
, &offset
, 1, "");
2530 LLVMTypeRef vec_type
= count
== 1 ? comp_type
: LLVMVectorType(comp_type
, count
);
2531 offset_ptr
= LLVMBuildBitCast(ctx
->lc
.builder
, offset_ptr
,
2532 LLVMPointerType(vec_type
, addr_space
), "");
2533 LLVMValueRef offset_src
= extract_components(&ctx
->lc
, src
, start
, count
);
2534 LLVMBuildStore(ctx
->lc
.builder
, offset_src
, offset_ptr
);
2538 case nir_intrinsic_load_constant
: {
2539 unsigned base
= nir_intrinsic_base(instr
);
2540 unsigned range
= nir_intrinsic_range(instr
);
2542 LLVMValueRef offset
= get_src(ctx
, instr
->src
[0]);
2543 offset
= LLVMBuildAdd(ctx
->lc
.builder
, offset
, LLVMConstInt(ctx
->lc
.i32
, base
, false), "");
2545 /* Clamp the offset to avoid out-of-bound access because global
2546 * instructions can't handle them.
2548 LLVMValueRef size
= LLVMConstInt(ctx
->lc
.i32
, base
+ range
, false);
2549 LLVMValueRef cond
= LLVMBuildICmp(ctx
->lc
.builder
, LLVMIntULT
, offset
, size
, "");
2550 offset
= LLVMBuildSelect(ctx
->lc
.builder
, cond
, offset
, size
, "");
2552 LLVMValueRef ptr
= build_gep0(&ctx
->lc
, ctx
->constant_data
, offset
);
2553 LLVMTypeRef comp_type
= LLVMIntTypeInContext(ctx
->lc
.context
, instr
->dest
.ssa
.bit_size
);
2554 LLVMTypeRef vec_type
= instr
->dest
.ssa
.num_components
== 1
2556 : LLVMVectorType(comp_type
, instr
->dest
.ssa
.num_components
);
2557 unsigned addr_space
= LLVMGetPointerAddressSpace(LLVMTypeOf(ptr
));
2558 ptr
= LLVMBuildBitCast(ctx
->lc
.builder
, ptr
, LLVMPointerType(vec_type
, addr_space
), "");
2559 result
= LLVMBuildLoad(ctx
->lc
.builder
, ptr
, "");
2563 fprintf(stderr
, "Unknown intrinsic: ");
2564 nir_print_instr(&instr
->instr
, stderr
);
2565 fprintf(stderr
, "\n");
2569 ctx
->ssa_defs
[instr
->dest
.ssa
.index
] = result
;
2573 static void visit_cf_list(struct libresoc_nir_tran_ctx
*ctx
, struct exec_list
*list
);
2575 static void visit_block(struct libresoc_nir_tran_ctx
*ctx
, nir_block
*block
)
2577 nir_foreach_instr (instr
, block
) {
2578 switch (instr
->type
) {
2579 case nir_instr_type_alu
:
2580 visit_alu(ctx
, nir_instr_as_alu(instr
));
2582 case nir_instr_type_load_const
:
2583 visit_load_const(ctx
, nir_instr_as_load_const(instr
));
2585 case nir_instr_type_intrinsic
:
2586 visit_intrinsic(ctx
, nir_instr_as_intrinsic(instr
));
2588 case nir_instr_type_tex
:
2589 // visit_tex(ctx, nir_instr_as_tex(instr));
2591 case nir_instr_type_phi
:
2592 visit_phi(ctx
, nir_instr_as_phi(instr
));
2594 case nir_instr_type_ssa_undef
:
2595 visit_ssa_undef(ctx
, nir_instr_as_ssa_undef(instr
));
2597 case nir_instr_type_jump
:
2598 visit_jump(&ctx
->lc
, nir_instr_as_jump(instr
));
2600 case nir_instr_type_deref
:
2601 visit_deref(ctx
, nir_instr_as_deref(instr
));
2604 fprintf(stderr
, "Unknown NIR instr type: ");
2605 nir_print_instr(instr
, stderr
);
2606 fprintf(stderr
, "\n");
2612 static void visit_if(struct libresoc_nir_tran_ctx
*ctx
, nir_if
*if_stmt
)
2614 LLVMValueRef value
= get_src(ctx
, if_stmt
->condition
);
2616 nir_block
*then_block
= (nir_block
*)exec_list_get_head(&if_stmt
->then_list
);
2618 build_uif(&ctx
->lc
, value
, then_block
->index
);
2620 visit_cf_list(ctx
, &if_stmt
->then_list
);
2622 if (!exec_list_is_empty(&if_stmt
->else_list
)) {
2623 nir_block
*else_block
= (nir_block
*)exec_list_get_head(&if_stmt
->else_list
);
2625 build_else(&ctx
->lc
, else_block
->index
);
2626 visit_cf_list(ctx
, &if_stmt
->else_list
);
2629 build_endif(&ctx
->lc
, then_block
->index
);
2632 static void visit_loop(struct libresoc_nir_tran_ctx
*ctx
, nir_loop
*loop
)
2634 nir_block
*first_loop_block
= (nir_block
*)exec_list_get_head(&loop
->body
);
2636 build_bgnloop(&ctx
->lc
, first_loop_block
->index
);
2638 visit_cf_list(ctx
, &loop
->body
);
2640 build_endloop(&ctx
->lc
, first_loop_block
->index
);
2643 static void visit_cf_list(struct libresoc_nir_tran_ctx
*ctx
, struct exec_list
*list
)
2645 foreach_list_typed(nir_cf_node
, node
, node
, list
)
2647 switch (node
->type
) {
2648 case nir_cf_node_block
:
2649 visit_block(ctx
, nir_cf_node_as_block(node
));
2652 case nir_cf_node_if
:
2653 visit_if(ctx
, nir_cf_node_as_if(node
));
2656 case nir_cf_node_loop
:
2657 visit_loop(ctx
, nir_cf_node_as_loop(node
));
2666 LLVMModuleRef
libresoc_nir_translate(struct libresoc_llvm
*llvm_ref
, struct nir_shader
*nir
)
2668 struct libresoc_nir_tran_ctx ctx
= {};
2669 struct nir_function
*func
;
2670 char shader_name
[60];
2671 sprintf(shader_name
, "libresoc-shader-%s", gl_shader_stage_name(nir
->info
.stage
));
2672 LLVMModuleRef mod
= LLVMModuleCreateWithNameInContext(shader_name
, llvm_ref
->lc
.context
);
2673 ctx
.lc
.module
= &mod
;
2674 ctx
.lc
= llvm_ref
->lc
;
2675 ctx
.stage
= nir
->info
.stage
;
2676 ctx
.info
= &nir
->info
;
2678 if (ctx
.stage
== MESA_SHADER_VERTEX
) {
2679 add_arg(&ctx
.args
, ARG_SGPR
, 1, ARG_INT
, &ctx
.args
.base_vertex
);
2680 add_arg(&ctx
.args
, ARG_SGPR
, 1, ARG_INT
, &ctx
.args
.start_instance
);
2681 add_arg(&ctx
.args
, ARG_VGPR
, 1, ARG_INT
, &ctx
.args
.vertex_id
);
2683 LLVMTypeRef arg_types
[32];
2684 LLVMTypeRef ret_type
= LLVMVoidTypeInContext(ctx
.lc
.context
);
2685 for (unsigned i
= 0; i
< ctx
.args
.arg_count
; i
++) {
2686 arg_types
[i
] = arg_llvm_type(ctx
.args
.args
[i
].type
, ctx
.args
.args
[i
].size
, &ctx
.lc
);
2689 //TODO: this is zero argument function and returns void
2690 LLVMTypeRef main_function_type
= LLVMFunctionType(ret_type
, arg_types
, ctx
.args
.arg_count
, 0);
2692 LLVMValueRef main_function
= LLVMAddFunction(mod
, "main_function", main_function_type
);
2693 LLVMBasicBlockRef main_function_body
=
2694 LLVMAppendBasicBlockInContext(ctx
.lc
.context
, main_function
, "main_body");
2695 LLVMPositionBuilderAtEnd(ctx
.lc
.builder
, main_function_body
);
2696 ctx
.main_function
= main_function
;
2698 if (!nir
->info
.io_lowered
) {
2699 nir_foreach_shader_out_variable(variable
, nir
)
2701 handle_shader_output_decl(&ctx
, nir
, variable
, ctx
.stage
);
2704 ctx
.defs
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
, _mesa_key_pointer_equal
);
2705 ctx
.phis
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
, _mesa_key_pointer_equal
);
2706 ctx
.vars
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
, _mesa_key_pointer_equal
);
2707 func
= (struct nir_function
*)exec_list_get_head(&nir
->functions
);
2709 nir_index_ssa_defs(func
->impl
);
2710 ctx
.ssa_defs
= calloc(func
->impl
->ssa_alloc
, sizeof(LLVMValueRef
));
2711 setup_locals(&ctx
, func
);
2712 setup_scratch(&ctx
, nir
);
2713 setup_constant_data(&ctx
, nir
);
2715 // if (gl_shader_stage_is_compute(nir->info.stage))
2716 // setup_shared(&ctx, nir);
2717 visit_cf_list(&ctx
, &func
->impl
->body
);
2718 LLVMBuildRetVoid(ctx
.lc
.builder
);
2720 LLVMVerifyModule(mod
, LLVMPrintMessageAction
, &error
);
2721 LLVMDumpModule(mod
);
2722 LLVMDisposeMessage(error
);
2723 LLVMOrcModuleHandle mod_handle
;
2724 LLVMErrorRef error_ref
= LLVMOrcAddEagerlyCompiledIR(llvm_ref
->orc_ref
,
2728 (void *)(llvm_ref
->orc_ref
));
2729 LLVMDumpModule(mod
);
2730 char *def_triple
= LLVMGetDefaultTargetTriple(); // E.g. "x86_64-linux-gnu"
2731 LLVMDisasmContextRef disasm
= LLVMCreateDisasm(def_triple
, NULL
,
2735 LLVMOrcTargetAddress MainAddr
;
2736 LLVMOrcGetSymbolAddress(llvm_ref
->orc_ref
, &MainAddr
,"main_function");
2737 const uint8_t *bytes
= (const uint8_t *)MainAddr
;
2741 uint64_t extent
= 200;
2742 while (pc
< extent
) {
2746 * Print address. We use addresses relative to the start of the function,
2747 * so that between runs.
2751 Size
= LLVMDisasmInstruction(disasm
, (uint8_t *)bytes
+ pc
, extent
- pc
, 0, outline
,
2755 * Print the instruction.
2757 printf("\t%s \n", outline
);
2761 * Stop disassembling on return statements, if there is no record of a
2762 * jump to a successive address.
2764 * XXX: This currently assumes x86
2767 if (Size
== 1 && bytes
[pc
] == 0xc3) {
2783 // LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod");
2784 // LLVMTypeRef param_types[] = { LLVMInt32Type(), LLVMInt32Type() };
2785 // LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), param_types, 2, 0);
2786 // LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type);
2787 // LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry");
2788 // LLVMBuilderRef builder = LLVMCreateBuilder();
2789 // LLVMPositionBuilderAtEnd(builder, entry);
2790 // LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), LLVMGetParam(sum, 1), "tmp");
2791 // LLVMBuildRet(builder, tmp);
2792 // char *error = NULL;
2793 // LLVMVerifyModule(mod, LLVMAbortProcessAction, &error);
2794 // LLVMDumpModule(mod);
2795 // LLVMDisposeMessage(error);
2796 // LLVMOrcModuleHandle mod_handle;
2797 // LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref,
2800 // orc_sym_resolver,
2801 // (void *)(llvm_ref->orc_ref));