Added code to process NIR shared load/store intrinsic.
[mesa.git] / src / libre-soc / vulkan / libresoc_llvm.c
1 #include "libresoc_llvm.h"
2 #include "libresoc_llvm_build.h"
3
4 void InitLLVM(struct libresoc_llvm *llvm_ref)
5 {
6 // LLVMInitializeNativeTarget();
7 // LLVMInitializeNativeAsmPrinter();
8 LLVMInitializeAllAsmPrinters();
9 LLVMInitializeAllTargets();
10 LLVMInitializeAllTargetInfos();
11 LLVMInitializeAllTargetMCs();
12 LLVMInitializeAllDisassemblers();
13 //LLVMLinkInMCJIT();
14 char *def_triple = LLVMGetDefaultTargetTriple(); // E.g. "x86_64-linux-gnu"
15 char *error;
16 LLVMTargetRef target_ref;
17 if (LLVMGetTargetFromTriple(def_triple, &target_ref, &error)) {
18 // Fatal error
19 }
20
21 if (!LLVMTargetHasJIT(target_ref)) {
22 // Fatal error, cannot do JIT on this platform
23 }
24
25 LLVMTargetMachineRef tm_ref =
26 LLVMCreateTargetMachine(target_ref, def_triple, "", "",
27 LLVMCodeGenLevelDefault,
28 LLVMRelocDefault,
29 LLVMCodeModelJITDefault);
30 //assert(tm_ref);
31 LLVMDisposeErrorMessage(def_triple);
32 llvm_ref->orc_ref = LLVMOrcCreateInstance(tm_ref);
33 llvm_ref->lc.context = LLVMContextCreate();
34 llvm_ref->lc.builder = LLVMCreateBuilderInContext(llvm_ref->lc.context);
35 llvm_ref->lc.voidt = LLVMVoidTypeInContext(llvm_ref->lc.context);
36 llvm_ref->lc.i1 = LLVMInt1TypeInContext(llvm_ref->lc.context);
37 llvm_ref->lc.i8 = LLVMInt8TypeInContext(llvm_ref->lc.context);
38 llvm_ref->lc.i16 = LLVMIntTypeInContext(llvm_ref->lc.context, 16);
39 llvm_ref->lc.i32 = LLVMIntTypeInContext(llvm_ref->lc.context, 32);
40 llvm_ref->lc.i64 = LLVMIntTypeInContext(llvm_ref->lc.context, 64);
41 llvm_ref->lc.i128 = LLVMIntTypeInContext(llvm_ref->lc.context, 128);
42 llvm_ref->lc.intptr = llvm_ref->lc.i32;
43 llvm_ref->lc.f16 = LLVMHalfTypeInContext(llvm_ref->lc.context);
44 llvm_ref->lc.f32 = LLVMFloatTypeInContext(llvm_ref->lc.context);
45 llvm_ref->lc.f64 = LLVMDoubleTypeInContext(llvm_ref->lc.context);
46 llvm_ref->lc.v2i16 = LLVMVectorType(llvm_ref->lc.i16, 2);
47 llvm_ref->lc.v4i16 = LLVMVectorType(llvm_ref->lc.i16, 4);
48 llvm_ref->lc.v2f16 = LLVMVectorType(llvm_ref->lc.f16, 2);
49 llvm_ref->lc.v4f16 = LLVMVectorType(llvm_ref->lc.f16, 4);
50 llvm_ref->lc.v2i32 = LLVMVectorType(llvm_ref->lc.i32, 2);
51 llvm_ref->lc.v3i32 = LLVMVectorType(llvm_ref->lc.i32, 3);
52 llvm_ref->lc.v4i32 = LLVMVectorType(llvm_ref->lc.i32, 4);
53 llvm_ref->lc.v2f32 = LLVMVectorType(llvm_ref->lc.f32, 2);
54 llvm_ref->lc.v3f32 = LLVMVectorType(llvm_ref->lc.f32, 3);
55 llvm_ref->lc.v4f32 = LLVMVectorType(llvm_ref->lc.f32, 4);
56 llvm_ref->lc.v8i32 = LLVMVectorType(llvm_ref->lc.i32, 8);
57 // llvm_ref->lc.iN_wavemask = LLVMIntTypeInContext(llvm_ref->lc.context, llvm_ref->lc.wave_size);
58 // llvm_ref->lc.iN_ballotmask = LLVMIntTypeInContext(llvm_ref->lc.context, ballot_mask_bits);
59
60 llvm_ref->lc.i8_0 = LLVMConstInt(llvm_ref->lc.i8, 0, false);
61 llvm_ref->lc.i8_1 = LLVMConstInt(llvm_ref->lc.i8, 1, false);
62 llvm_ref->lc.i16_0 = LLVMConstInt(llvm_ref->lc.i16, 0, false);
63 llvm_ref->lc.i16_1 = LLVMConstInt(llvm_ref->lc.i16, 1, false);
64 llvm_ref->lc.i32_0 = LLVMConstInt(llvm_ref->lc.i32, 0, false);
65 llvm_ref->lc.i32_1 = LLVMConstInt(llvm_ref->lc.i32, 1, false);
66 llvm_ref->lc.i64_0 = LLVMConstInt(llvm_ref->lc.i64, 0, false);
67 llvm_ref->lc.i64_1 = LLVMConstInt(llvm_ref->lc.i64, 1, false);
68 llvm_ref->lc.i128_0 = LLVMConstInt(llvm_ref->lc.i128, 0, false);
69 llvm_ref->lc.i128_1 = LLVMConstInt(llvm_ref->lc.i128, 1, false);
70 llvm_ref->lc.f16_0 = LLVMConstReal(llvm_ref->lc.f16, 0.0);
71 llvm_ref->lc.f16_1 = LLVMConstReal(llvm_ref->lc.f16, 1.0);
72 llvm_ref->lc.f32_0 = LLVMConstReal(llvm_ref->lc.f32, 0.0);
73 llvm_ref->lc.f32_1 = LLVMConstReal(llvm_ref->lc.f32, 1.0);
74 llvm_ref->lc.f64_0 = LLVMConstReal(llvm_ref->lc.f64, 0.0);
75 llvm_ref->lc.f64_1 = LLVMConstReal(llvm_ref->lc.f64, 1.0);
76
77 llvm_ref->lc.i1false = LLVMConstInt(llvm_ref->lc.i1, 0, false);
78 llvm_ref->lc.i1true = LLVMConstInt(llvm_ref->lc.i1, 1, false);
79 llvm_ref->lc.float_mode = 0; //TODO: default value, when required take this value as parameter
80 }
81
82 void DestroyLLVM(struct libresoc_llvm *llvm_ref)
83 {
84 LLVMErrorRef error_ref = LLVMOrcDisposeInstance(llvm_ref->orc_ref);
85 }
86
87 static uint64_t orc_sym_resolver(const char *name, void *ctx)
88 {
89 LLVMOrcJITStackRef orc_ref = (LLVMOrcJITStackRef) (ctx);
90 LLVMOrcTargetAddress address;
91 LLVMOrcGetSymbolAddress(orc_ref, &address, name);
92 return (uint64_t)address;
93 }
94
95 void handle_shader_output_decl(struct libresoc_nir_tran_ctx *ctx,
96 struct nir_shader *nir, struct nir_variable *variable,
97 gl_shader_stage stage)
98 {
99 unsigned output_loc = variable->data.driver_location / 4;
100 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
101
102 /* tess ctrl has it's own load/store paths for outputs */
103 if (stage == MESA_SHADER_TESS_CTRL)
104 return;
105
106 if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
107 stage == MESA_SHADER_GEOMETRY) {
108 int idx = variable->data.location + variable->data.index;
109 if (idx == VARYING_SLOT_CLIP_DIST0) {
110 int length = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size;
111
112 if (length > 4)
113 attrib_count = 2;
114 else
115 attrib_count = 1;
116 }
117 }
118
119 bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
120 LLVMTypeRef type = is_16bit ? ctx->lc.f16 : ctx->lc.f32;
121 for (unsigned i = 0; i < attrib_count; ++i) {
122 for (unsigned chan = 0; chan < 4; chan++) {
123 ctx->outputs[llvm_reg_index_soa(output_loc + i, chan)] =
124 build_alloca_undef(&ctx->lc, type, "");
125 }
126 }
127 }
128
129 LLVMValueRef extract_components(struct libresoc_llvm_context *ctx, LLVMValueRef value, unsigned start,
130 unsigned channels)
131 {
132 LLVMValueRef chan[channels];
133
134 for (unsigned i = 0; i < channels; i++)
135 chan[i] = llvm_extract_elem(ctx, value, i + start);
136
137 return build_gather_values(ctx, chan, channels);
138 }
139
140 static void build_store_values_extended(struct libresoc_llvm_context *lc, LLVMValueRef *values,
141 unsigned value_count, unsigned value_stride,
142 LLVMValueRef vec)
143 {
144 LLVMBuilderRef builder = lc->builder;
145 unsigned i;
146
147 for (i = 0; i < value_count; i++) {
148 LLVMValueRef ptr = values[i * value_stride];
149 LLVMValueRef index = LLVMConstInt(lc->i32, i, false);
150 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
151 LLVMBuildStore(builder, value, ptr);
152 }
153 }
154
155 static LLVMTypeRef arg_llvm_type(enum arg_type type, unsigned size, struct libresoc_llvm_context *ctx)
156 {
157 if (type == ARG_FLOAT) {
158 return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size);
159 } else if (type == ARG_INT) {
160 return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size);
161 } else {
162 LLVMTypeRef ptr_type;
163 switch (type) {
164 case ARG_CONST_PTR:
165 ptr_type = ctx->i8;
166 break;
167 case ARG_CONST_FLOAT_PTR:
168 ptr_type = ctx->f32;
169 break;
170 case ARG_CONST_PTR_PTR:
171 ptr_type = LLVMPointerType(ctx->i8, 0);
172 break;
173 case ARG_CONST_DESC_PTR:
174 ptr_type = ctx->v4i32;
175 break;
176 case ARG_CONST_IMAGE_PTR:
177 ptr_type = ctx->v8i32;
178 break;
179 default:
180 unreachable("unknown arg type");
181 }
182 if (size == 1) {
183 //return ac_array_in_const32_addr_space(ptr_type);
184 return LLVMPointerType(ptr_type, 0); //address space may be wrong
185 } else {
186 assert(size == 2);
187 return LLVMPointerType(ptr_type, 0);
188 }
189 }
190 }
191 static LLVMValueRef get_src(struct libresoc_nir_tran_ctx *ctx, nir_src src)
192 {
193 assert(src.is_ssa);
194 // printf("index %d\n", src.ssa->index);
195 return ctx->ssa_defs[src.ssa->index];
196 }
197
198 static LLVMTypeRef get_def_type(struct libresoc_nir_tran_ctx *ctx, const nir_ssa_def *def)
199 {
200 LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, def->bit_size);
201 if (def->num_components > 1) {
202 type = LLVMVectorType(type, def->num_components);
203 }
204 return type;
205 }
206
207 static LLVMValueRef get_memory_ptr(struct libresoc_nir_tran_ctx *ctx, nir_src src, unsigned bit_size)
208 {
209 LLVMValueRef ptr = get_src(ctx, src);
210 ptr = LLVMBuildGEP(ctx->lc.builder, ctx->lc.lds, &ptr, 1, "");
211 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
212
213 LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, bit_size);
214
215 return LLVMBuildBitCast(ctx->lc.builder, ptr, LLVMPointerType(type, addr_space), "");
216 }
217
218 static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
219 {
220 uint32_t new_mask = 0;
221 for (unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
222 if (mask & (1u << i))
223 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
224 return new_mask;
225 }
226
227 static void get_deref_offset(struct libresoc_nir_tran_ctx *ctx, nir_deref_instr *instr, bool vs_in,
228 unsigned *vertex_index_out, LLVMValueRef *vertex_index_ref,
229 unsigned *const_out, LLVMValueRef *indir_out)
230 {
231 nir_variable *var = nir_deref_instr_get_variable(instr);
232 nir_deref_path path;
233 unsigned idx_lvl = 1;
234
235 nir_deref_path_init(&path, instr, NULL);
236
237 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
238 if (vertex_index_ref) {
239 *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
240 if (vertex_index_out)
241 *vertex_index_out = 0;
242 } else {
243 *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
244 }
245 ++idx_lvl;
246 }
247
248 uint32_t const_offset = 0;
249 LLVMValueRef offset = NULL;
250
251 if (var->data.compact) {
252 assert(instr->deref_type == nir_deref_type_array);
253 const_offset = nir_src_as_uint(instr->arr.index);
254 goto out;
255 }
256
257 for (; path.path[idx_lvl]; ++idx_lvl) {
258 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
259 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
260 unsigned index = path.path[idx_lvl]->strct.index;
261
262 for (unsigned i = 0; i < index; i++) {
263 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
264 const_offset += glsl_count_attribute_slots(ft, vs_in);
265 }
266 } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) {
267 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
268 if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
269 const_offset += size * nir_src_as_uint(path.path[idx_lvl]->arr.index);
270 } else {
271 LLVMValueRef array_off =
272 LLVMBuildMul(ctx->lc.builder, LLVMConstInt(ctx->lc.i32, size, 0),
273 get_src(ctx, path.path[idx_lvl]->arr.index), "");
274 if (offset)
275 offset = LLVMBuildAdd(ctx->lc.builder, offset, array_off, "");
276 else
277 offset = array_off;
278 }
279 } else
280 unreachable("Uhandled deref type in get_deref_instr_offset");
281 }
282
283 out:
284 nir_deref_path_finish(&path);
285
286 if (const_offset && offset)
287 offset =
288 LLVMBuildAdd(ctx->lc.builder, offset, LLVMConstInt(ctx->lc.i32, const_offset, 0), "");
289
290 *const_out = const_offset;
291 *indir_out = offset;
292 }
293
294 static unsigned type_scalar_size_bytes(const struct glsl_type *type)
295 {
296 assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));
297 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
298 }
299
300
301 static LLVMValueRef emit_int_cmp(struct libresoc_llvm_context *lc, LLVMIntPredicate pred,
302 LLVMValueRef src0, LLVMValueRef src1)
303 {
304 LLVMTypeRef src0_type = LLVMTypeOf(src0);
305 LLVMTypeRef src1_type = LLVMTypeOf(src1);
306
307 if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
308 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
309 src1 = LLVMBuildIntToPtr(lc->builder, src1, src0_type, "");
310 } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
311 LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
312 src0 = LLVMBuildIntToPtr(lc->builder, src0, src1_type, "");
313 }
314
315 LLVMValueRef result = LLVMBuildICmp(lc->builder, pred, src0, src1, "");
316 return LLVMBuildSelect(lc->builder, result, LLVMConstInt(lc->i32, 0xFFFFFFFF, false),
317 lc->i32_0, "");
318 }
319
320 static LLVMValueRef emit_float_cmp(struct libresoc_llvm_context *lc, LLVMRealPredicate pred,
321 LLVMValueRef src0, LLVMValueRef src1)
322 {
323 LLVMValueRef result;
324 src0 = to_float(lc, src0);
325 src1 = to_float(lc, src1);
326 result = LLVMBuildFCmp(lc->builder, pred, src0, src1, "");
327 return LLVMBuildSelect(lc->builder, result, LLVMConstInt(lc->i32, 0xFFFFFFFF, false),
328 lc->i32_0, "");
329 }
330
331 static LLVMValueRef emit_intrin_1f_param(struct libresoc_llvm_context *lc, const char *intrin,
332 LLVMTypeRef result_type, LLVMValueRef src0)
333 {
334 char name[64], type[64];
335 LLVMValueRef params[] = {
336 to_float(lc, src0),
337 };
338
339 build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
340 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
341 assert(length < sizeof(name));
342 return build_intrinsic(lc, name, result_type, params, 1, FUNC_ATTR_READNONE);
343 }
344
345 static LLVMValueRef emit_intrin_1f_param_scalar(struct libresoc_llvm_context *lc, const char *intrin,
346 LLVMTypeRef result_type, LLVMValueRef src0)
347 {
348 if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
349 return emit_intrin_1f_param(lc, intrin, result_type, src0);
350
351 LLVMTypeRef elem_type = LLVMGetElementType(result_type);
352 LLVMValueRef ret = LLVMGetUndef(result_type);
353
354 /* Scalarize the intrinsic, because vectors are not supported. */
355 for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
356 char name[64], type[64];
357 LLVMValueRef params[] = {
358 to_float(lc, llvm_extract_elem(lc, src0, i)),
359 };
360
361 build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
362 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
363 assert(length < sizeof(name));
364 ret = LLVMBuildInsertElement(
365 lc->builder, ret,
366 build_intrinsic(lc, name, elem_type, params, 1, FUNC_ATTR_READNONE),
367 LLVMConstInt(lc->i32, i, 0), "");
368 }
369 return ret;
370 }
371
372 static LLVMValueRef emit_intrin_2f_param(struct libresoc_llvm_context *ctx, const char *intrin,
373 LLVMTypeRef result_type, LLVMValueRef src0,
374 LLVMValueRef src1)
375 {
376 char name[64], type[64];
377 LLVMValueRef params[] = {
378 to_float(ctx, src0),
379 to_float(ctx, src1),
380 };
381
382 build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
383 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
384 assert(length < sizeof(name));
385 return build_intrinsic(ctx, name, result_type, params, 2, FUNC_ATTR_READNONE);
386 }
387
388 static LLVMValueRef emit_intrin_3f_param(struct libresoc_llvm_context *ctx, const char *intrin,
389 LLVMTypeRef result_type, LLVMValueRef src0,
390 LLVMValueRef src1, LLVMValueRef src2)
391 {
392 char name[64], type[64];
393 LLVMValueRef params[] = {
394 to_float(ctx, src0),
395 to_float(ctx, src1),
396 to_float(ctx, src2),
397 };
398
399 build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
400 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
401 assert(length < sizeof(name));
402 return build_intrinsic(ctx, name, result_type, params, 3, FUNC_ATTR_READNONE);
403 }
404
405 static LLVMValueRef emit_bcsel(struct libresoc_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,
406 LLVMValueRef src2)
407 {
408 LLVMTypeRef src1_type = LLVMTypeOf(src1);
409 LLVMTypeRef src2_type = LLVMTypeOf(src2);
410
411 if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
412 LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
413 src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
414 } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
415 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
416 src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
417 }
418
419 LLVMValueRef v =
420 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, LLVMConstNull(LLVMTypeOf(src0)), "");
421 return LLVMBuildSelect(ctx->builder, v, to_integer_or_pointer(ctx, src1),
422 to_integer_or_pointer(ctx, src2), "");
423 }
424
425 static LLVMValueRef emit_iabs(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
426 {
427 return build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
428 }
429
430 static LLVMValueRef emit_uint_carry(struct libresoc_llvm_context *ctx, const char *intrin,
431 LLVMValueRef src0, LLVMValueRef src1)
432 {
433 LLVMTypeRef ret_type;
434 LLVMTypeRef types[] = {ctx->i32, ctx->i1};
435 LLVMValueRef res;
436 LLVMValueRef params[] = {src0, src1};
437 ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);
438
439 res = build_intrinsic(ctx, intrin, ret_type, params, 2, FUNC_ATTR_READNONE);
440
441 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
442 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
443 return res;
444 }
445
446 static LLVMValueRef emit_b2f(struct libresoc_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
447 {
448 assert(get_elem_bits(ctx, LLVMTypeOf(src0)) == 32);
449 LLVMValueRef result =
450 LLVMBuildAnd(ctx->builder, src0, const_uint_vec(ctx, LLVMTypeOf(src0), 0x3f800000), "");
451 result = to_float(ctx, result);
452
453 switch (bitsize) {
454 case 16: {
455 bool vec2 = LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind;
456 return LLVMBuildFPTrunc(ctx->builder, result, vec2 ? ctx->v2f16 : ctx->f16, "");
457 }
458 case 32:
459 return result;
460 case 64:
461 return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
462 default:
463 unreachable("Unsupported bit size.");
464 }
465 }
466
467 static LLVMValueRef emit_f2b(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
468 {
469 src0 = to_float(ctx, src0);
470 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
471 return LLVMBuildSExt(ctx->builder, LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
472 ctx->i32, "");
473 }
474
475 static LLVMValueRef emit_b2i(struct libresoc_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
476 {
477 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
478
479 switch (bitsize) {
480 case 8:
481 return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
482 case 16:
483 return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
484 case 32:
485 return result;
486 case 64:
487 return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
488 default:
489 unreachable("Unsupported bit size.");
490 }
491 }
492
493 static LLVMValueRef emit_i2b(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
494 {
495 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
496 return LLVMBuildSExt(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
497 ctx->i32, "");
498 }
499
500 static LLVMValueRef emit_f2f16(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
501 {
502 LLVMValueRef result;
503 LLVMValueRef cond = NULL;
504
505 src0 = to_float(ctx, src0);
506 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
507
508 /* need to convert back up to f32 */
509 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
510 return result;
511 }
512
513 static LLVMValueRef emit_umul_high(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
514 LLVMValueRef src1)
515 {
516 LLVMValueRef dst64, result;
517 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
518 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
519
520 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
521 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
522 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
523 return result;
524 }
525
526 static LLVMValueRef emit_imul_high(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
527 LLVMValueRef src1)
528 {
529 LLVMValueRef dst64, result;
530 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
531 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
532
533 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
534 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
535 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
536 return result;
537 }
538
539 static LLVMValueRef emit_bfm(struct libresoc_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)
540 {
541 /* mask = ((1 << bits) - 1) << offset */
542 return LLVMBuildShl(
543 ctx->builder,
544 LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),
545 offset, "");
546 }
547
548 static LLVMValueRef emit_bitfield_select(struct libresoc_llvm_context *ctx, LLVMValueRef mask,
549 LLVMValueRef insert, LLVMValueRef base)
550 {
551 /* Calculate:
552 * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
553 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
554 */
555 return LLVMBuildXor(
556 ctx->builder, base,
557 LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
558 }
559
560 static LLVMValueRef emit_pack_2x16(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
561 LLVMValueRef (*pack)(struct libresoc_llvm_context *ctx,
562 LLVMValueRef args[2]))
563 {
564 LLVMValueRef comp[2];
565
566 src0 = to_float(ctx, src0);
567 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
568 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
569
570 return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
571 }
572
573 static LLVMValueRef emit_unpack_half_2x16(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
574 {
575 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
576 LLVMValueRef temps[2], val;
577 int i;
578
579 for (i = 0; i < 2; i++) {
580 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
581 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
582 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
583 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
584 }
585 return build_gather_values(ctx, temps, 2);
586 }
587
588 // TODO: enable this whn ac_builddxy() is added
589 // static LLVMValueRef emit_ddxy(struct libresoc_nir_context *ctx, nir_op op, LLVMValueRef src0)
590 // {
591 // unsigned mask;
592 // int idx;
593 // LLVMValueRef result;
594
595 // if (op == nir_op_fddx_fine)
596 // mask = TID_MASK_LEFT;
597 // else if (op == nir_op_fddy_fine)
598 // mask = TID_MASK_TOP;
599 // else
600 // mask = TID_MASK_TOP_LEFT;
601
602 // /* for DDX we want to next X pixel, DDY next Y pixel. */
603 // if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
604 // idx = 1;
605 // else
606 // idx = 2;
607
608 // result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
609 // return result;
610 // }
611
612 static void setup_locals(struct libresoc_nir_tran_ctx *ctx, struct nir_function *func)
613 {
614 int i, j;
615 ctx->num_locals = 0;
616 nir_foreach_function_temp_variable(variable, func->impl)
617 {
618 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
619 variable->data.driver_location = ctx->num_locals * 4;
620 variable->data.location_frac = 0;
621 ctx->num_locals += attrib_count;
622 }
623 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
624 if (!ctx->locals)
625 return;
626
627 for (i = 0; i < ctx->num_locals; i++) {
628 for (j = 0; j < 4; j++) {
629 ctx->locals[i * 4 + j] = build_alloca_undef(&ctx->lc, ctx->lc.f32, "temp");
630 }
631 }
632 }
633
634 static void setup_scratch(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *shader)
635 {
636 if (shader->scratch_size == 0)
637 return;
638
639 ctx->scratch =
640 build_alloca_undef(&ctx->lc, LLVMArrayType(ctx->lc.i8, shader->scratch_size), "scratch");
641 }
642
643 static void setup_constant_data(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *shader)
644 {
645 if (!shader->constant_data)
646 return;
647
648 LLVMValueRef data = LLVMConstStringInContext(ctx->lc.context, shader->constant_data,
649 shader->constant_data_size, true);
650 LLVMTypeRef type = LLVMArrayType(ctx->lc.i8, shader->constant_data_size);
651
652 unsigned address_space = 0; //TODO: dummay value
653 LLVMValueRef global =
654 LLVMAddGlobalInAddressSpace(*(ctx->lc.module), type, "const_data", address_space);
655
656 LLVMSetInitializer(global, data);
657 LLVMSetGlobalConstant(global, true);
658 LLVMSetVisibility(global, LLVMHiddenVisibility);
659 ctx->constant_data = global;
660 }
661
662 static LLVMTypeRef glsl_base_to_llvm_type(struct libresoc_llvm_context *lc, enum glsl_base_type type)
663 {
664 switch (type) {
665 case GLSL_TYPE_INT:
666 case GLSL_TYPE_UINT:
667 case GLSL_TYPE_BOOL:
668 case GLSL_TYPE_SUBROUTINE:
669 return lc->i32;
670 case GLSL_TYPE_INT8:
671 case GLSL_TYPE_UINT8:
672 return lc->i8;
673 case GLSL_TYPE_INT16:
674 case GLSL_TYPE_UINT16:
675 return lc->i16;
676 case GLSL_TYPE_FLOAT:
677 return lc->f32;
678 case GLSL_TYPE_FLOAT16:
679 return lc->f16;
680 case GLSL_TYPE_INT64:
681 case GLSL_TYPE_UINT64:
682 return lc->i64;
683 case GLSL_TYPE_DOUBLE:
684 return lc->f64;
685 default:
686 unreachable("unknown GLSL type");
687 }
688 }
689
690 static LLVMTypeRef glsl_to_llvm_type(struct libresoc_llvm_context *lc, const struct glsl_type *type)
691 {
692 if (glsl_type_is_scalar(type)) {
693 return glsl_base_to_llvm_type(lc, glsl_get_base_type(type));
694 }
695
696 if (glsl_type_is_vector(type)) {
697 return LLVMVectorType(glsl_base_to_llvm_type(lc, glsl_get_base_type(type)),
698 glsl_get_vector_elements(type));
699 }
700
701 if (glsl_type_is_matrix(type)) {
702 return LLVMArrayType(glsl_to_llvm_type(lc, glsl_get_column_type(type)),
703 glsl_get_matrix_columns(type));
704 }
705
706 if (glsl_type_is_array(type)) {
707 return LLVMArrayType(glsl_to_llvm_type(lc, glsl_get_array_element(type)),
708 glsl_get_length(type));
709 }
710
711 assert(glsl_type_is_struct_or_ifc(type));
712
713 LLVMTypeRef member_types[glsl_get_length(type)];
714
715 for (unsigned i = 0; i < glsl_get_length(type); i++) {
716 member_types[i] = glsl_to_llvm_type(lc, glsl_get_struct_field(type, i));
717 }
718
719 return LLVMStructTypeInContext(lc->context, member_types, glsl_get_length(type), false);
720 }
721
722 // static LLVMValueRef visit_load(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr,
723 // bool is_output)
724 // {
725 // LLVMValueRef values[8];
726 // LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
727 // LLVMTypeRef component_type;
728 // unsigned base = nir_intrinsic_base(instr);
729 // unsigned component = nir_intrinsic_component(instr);
730 // unsigned count = instr->dest.ssa.num_components * (instr->dest.ssa.bit_size == 64 ? 2 : 1);
731 // nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
732 // LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
733 // nir_src offset = *nir_get_io_offset_src(instr);
734 // LLVMValueRef indir_index = NULL;
735
736 // if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
737 // component_type = LLVMGetElementType(dest_type);
738 // else
739 // component_type = dest_type;
740
741 // if (nir_src_is_const(offset))
742 // assert(nir_src_as_uint(offset) == 0);
743 // else
744 // indir_index = get_src(ctx, offset);
745
746 // if (ctx->stage == MESA_SHADER_TESS_CTRL || (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
747 // LLVMValueRef result = ctx->abi->load_tess_varyings(
748 // ctx->abi, component_type, vertex_index, indir_index, 0, 0, base * 4, component,
749 // instr->num_components, false, false, !is_output);
750 // if (instr->dest.ssa.bit_size == 16) {
751 // result = to_integer(&ctx->lc, result);
752 // result = LLVMBuildTrunc(ctx->lc.builder, result, dest_type, "");
753 // }
754 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
755 // }
756
757 // /* No indirect indexing is allowed after this point. */
758 // assert(!indir_index);
759
760 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
761 // LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
762 // assert(nir_src_is_const(*vertex_index_src));
763
764 // return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component, instr->num_components,
765 // nir_src_as_uint(*vertex_index_src), 0, type);
766 // }
767
768 // if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
769 // nir_intrinsic_io_semantics(instr).fb_fetch_output)
770 // return ctx->abi->emit_fbfetch(ctx->abi);
771
772 // /* Other non-fragment cases have inputs and outputs in temporaries. */
773 // if (ctx->stage != MESA_SHADER_FRAGMENT) {
774 // for (unsigned chan = component; chan < count + component; chan++) {
775 // if (is_output) {
776 // values[chan] = LLVMBuildLoad(ctx->lc.builder, ctx->outputs[base * 4 + chan], "");
777 // } else {
778 // values[chan] = ctx->inputs[base * 4 + chan];
779 // if (!values[chan])
780 // values[chan] = LLVMGetUndef(ctx->lc.i32);
781 // }
782 // }
783 // LLVMValueRef result = build_varying_gather_values(&ctx->lc, values, count, component);
784 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
785 // }
786
787 // /* Fragment shader inputs. */
788 // unsigned vertex_id = 2; /* P0 */
789
790 // if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
791 // nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
792
793 // switch (src0[0].i32) {
794 // case 0:
795 // vertex_id = 2;
796 // break;
797 // case 1:
798 // vertex_id = 0;
799 // break;
800 // case 2:
801 // vertex_id = 1;
802 // break;
803 // default:
804 // unreachable("Invalid vertex index");
805 // }
806 // }
807
808 // LLVMValueRef attr_number = LLVMConstInt(ctx->lc.i32, base, false);
809
810 // for (unsigned chan = 0; chan < count; chan++) {
811 // if (component + chan > 4)
812 // attr_number = LLVMConstInt(ctx->lc.i32, base + 1, false);
813 // LLVMValueRef llvm_chan = LLVMConstInt(ctx->lc.i32, (component + chan) % 4, false);
814 // values[chan] =
815 // build_fs_interp_mov(&ctx->lc, LLVMConstInt(ctx->lc.i32, vertex_id, false), llvm_chan,
816 // attr_number, get_arg(&ctx->lc, ctx->args->prim_mask));
817 // values[chan] = LLVMBuildBitCast(ctx->lc.builder, values[chan], ctx->lc.i32, "");
818 // values[chan] =
819 // LLVMBuildTruncOrBitCast(ctx->lc.builder, values[chan],
820 // instr->dest.ssa.bit_size == 16 ? ctx->lc.i16 : ctx->lc.i32, "");
821 // }
822
823 // LLVMValueRef result = build_gather_values(&ctx->lc, values, count);
824 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
825 // }
826
827 static LLVMValueRef visit_load_shared(struct libresoc_nir_tran_ctx *ctx, const nir_intrinsic_instr *instr)
828 {
829 LLVMValueRef values[4], derived_ptr, index, ret;
830
831 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->dest.ssa.bit_size);
832
833 for (int chan = 0; chan < instr->num_components; chan++) {
834 index = LLVMConstInt(ctx->lc.i32, chan, 0);
835 derived_ptr = LLVMBuildGEP(ctx->lc.builder, ptr, &index, 1, "");
836 values[chan] = LLVMBuildLoad(ctx->lc.builder, derived_ptr, "");
837 }
838
839 ret = build_gather_values(&ctx->lc, values, instr->num_components);
840 return LLVMBuildBitCast(ctx->lc.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
841 }
842
843 static void visit_store_shared(struct libresoc_nir_tran_ctx *ctx, const nir_intrinsic_instr *instr)
844 {
845 LLVMValueRef derived_ptr, data, index;
846 LLVMBuilderRef builder = ctx->lc.builder;
847
848 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], instr->src[0].ssa->bit_size);
849 LLVMValueRef src = get_src(ctx, instr->src[0]);
850
851 int writemask = nir_intrinsic_write_mask(instr);
852 for (int chan = 0; chan < 4; chan++) {
853 if (!(writemask & (1 << chan))) {
854 continue;
855 }
856 data = llvm_extract_elem(&ctx->lc, src, chan);
857 index = LLVMConstInt(ctx->lc.i32, chan, 0);
858 derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
859 LLVMBuildStore(builder, data, derived_ptr);
860 }
861 }
862 static void visit_load_const(struct libresoc_nir_tran_ctx *ctx, const nir_load_const_instr *instr)
863 {
864 LLVMValueRef values[4], value = NULL;
865 LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->lc.context, instr->def.bit_size);
866
867 for (unsigned i = 0; i < instr->def.num_components; ++i) {
868 switch (instr->def.bit_size) {
869 case 8:
870 values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);
871 break;
872 case 16:
873 values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);
874 break;
875 case 32:
876 values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);
877 break;
878 case 64:
879 values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);
880 break;
881 default:
882 fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);
883 abort();
884 }
885 }
886 if (instr->def.num_components > 1) {
887 value = LLVMConstVector(values, instr->def.num_components);
888 } else
889 value = values[0];
890
891 ctx->ssa_defs[instr->def.index] = value;
892 }
893
894 static void visit_store_output(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
895 {
896 // if (ctx->ac.postponed_kill) {
897 // LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
898 // ac_build_ifcc(&ctx->ac, cond, 7002);
899 // }
900
901 unsigned base = nir_intrinsic_base(instr);
902 unsigned writemask = nir_intrinsic_write_mask(instr);
903 unsigned component = nir_intrinsic_component(instr);
904 LLVMValueRef src = to_float(&ctx->lc, get_src(ctx, instr->src[0]));
905 nir_src offset = *nir_get_io_offset_src(instr);
906 LLVMValueRef indir_index = NULL;
907
908 if (nir_src_is_const(offset))
909 assert(nir_src_as_uint(offset) == 0);
910 else
911 indir_index = get_src(ctx, offset);
912
913 switch (get_elem_bits(&ctx->lc, LLVMTypeOf(src))) {
914 case 32:
915 break;
916 case 64:
917 writemask = widen_mask(writemask, 2);
918 src = LLVMBuildBitCast(ctx->lc.builder, src,
919 LLVMVectorType(ctx->lc.f32, get_llvm_num_components(src) * 2), "");
920 break;
921 default:
922 unreachable("unhandled store_output bit size");
923 return;
924 }
925
926 writemask <<= component;
927
928 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
929 // nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
930 // LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
931
932 // ctx->abi->store_tcs_outputs(ctx->abi, NULL, vertex_index, indir_index, 0, src, writemask,
933 // component, base * 4);
934 // return;
935 // }
936
937 /* No indirect indexing is allowed after this point. */
938 assert(!indir_index);
939
940 for (unsigned chan = 0; chan < 8; chan++) {
941 if (!(writemask & (1 << chan)))
942 continue;
943
944 LLVMValueRef value = llvm_extract_elem(&ctx->lc, src, chan - component);
945 LLVMBuildStore(ctx->lc.builder, value, ctx->outputs[base * 4 + chan]);
946 }
947
948 // if (ctx->ac.postponed_kill)
949 // ac_build_endif(&ctx->ac, 7002);
950 }
951
952 static void visit_deref(struct libresoc_nir_tran_ctx *ctx, nir_deref_instr *instr)
953 {
954 if (instr->mode != nir_var_mem_shared && instr->mode != nir_var_mem_global)
955 return;
956
957 LLVMValueRef result = NULL;
958 switch (instr->deref_type) {
959 case nir_deref_type_var: {
960 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
961 result = entry->data;
962 break;
963 }
964 case nir_deref_type_struct:
965 if (instr->mode == nir_var_mem_global) {
966 nir_deref_instr *parent = nir_deref_instr_parent(instr);
967 uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index);
968 result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent),
969 LLVMConstInt(ctx->lc.i32, offset, 0));
970 } else {
971 result = build_gep0(&ctx->lc, get_src(ctx, instr->parent),
972 LLVMConstInt(ctx->lc.i32, instr->strct.index, 0));
973 }
974 break;
975 case nir_deref_type_array:
976 if (instr->mode == nir_var_mem_global) {
977 nir_deref_instr *parent = nir_deref_instr_parent(instr);
978 unsigned stride = glsl_get_explicit_stride(parent->type);
979
980 if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) ||
981 (glsl_type_is_vector(parent->type) && stride == 0))
982 stride = type_scalar_size_bytes(parent->type);
983
984 assert(stride > 0);
985 LLVMValueRef index = get_src(ctx, instr->arr.index);
986 if (LLVMTypeOf(index) != ctx->lc.i64)
987 index = LLVMBuildZExt(ctx->lc.builder, index, ctx->lc.i64, "");
988
989 LLVMValueRef offset =
990 LLVMBuildMul(ctx->lc.builder, index, LLVMConstInt(ctx->lc.i64, stride, 0), "");
991
992 result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), offset);
993 } else {
994 result =
995 build_gep0(&ctx->lc, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
996 }
997 break;
998 case nir_deref_type_ptr_as_array:
999 if (instr->mode == nir_var_mem_global) {
1000 unsigned stride = nir_deref_instr_array_stride(instr);
1001
1002 LLVMValueRef index = get_src(ctx, instr->arr.index);
1003 if (LLVMTypeOf(index) != ctx->lc.i64)
1004 index = LLVMBuildZExt(ctx->lc.builder, index, ctx->lc.i64, "");
1005
1006 LLVMValueRef offset =
1007 LLVMBuildMul(ctx->lc.builder, index, LLVMConstInt(ctx->lc.i64, stride, 0), "");
1008
1009 result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), offset);
1010 } else {
1011 result =
1012 build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
1013 }
1014 break;
1015 case nir_deref_type_cast: {
1016 result = get_src(ctx, instr->parent);
1017
1018 /* We can't use the structs from LLVM because the shader
1019 * specifies its own offsets. */
1020 LLVMTypeRef pointee_type = ctx->lc.i8;
1021 if (instr->mode == nir_var_mem_shared)
1022 pointee_type = glsl_to_llvm_type(&ctx->lc, instr->type);
1023
1024 unsigned address_space;
1025
1026 switch (instr->mode) {
1027 case nir_var_mem_shared:
1028 address_space = 1;
1029 break;
1030 case nir_var_mem_global:
1031 address_space = 0;
1032 break;
1033 default:
1034 unreachable("Unhandled address space");
1035 }
1036
1037 LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
1038
1039 if (LLVMTypeOf(result) != type) {
1040 if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
1041 result = LLVMBuildBitCast(ctx->lc.builder, result, type, "");
1042 } else {
1043 result = LLVMBuildIntToPtr(ctx->lc.builder, result, type, "");
1044 }
1045 }
1046 break;
1047 }
1048 default:
1049 unreachable("Unhandled deref_instr deref type");
1050 }
1051
1052 ctx->ssa_defs[instr->dest.ssa.index] = result;
1053 }
1054
1055 static void visit_phi(struct libresoc_nir_tran_ctx *ctx, nir_phi_instr *instr)
1056 {
1057 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
1058 LLVMValueRef result = LLVMBuildPhi(ctx->lc.builder, type, "");
1059
1060 ctx->ssa_defs[instr->dest.ssa.index] = result;
1061 _mesa_hash_table_insert(ctx->phis, instr, result);
1062 }
1063
1064 static bool is_def_used_in_an_export(const nir_ssa_def *def)
1065 {
1066 nir_foreach_use (use_src, def) {
1067 if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
1068 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
1069 if (instr->intrinsic == nir_intrinsic_store_deref)
1070 return true;
1071 } else if (use_src->parent_instr->type == nir_instr_type_alu) {
1072 nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
1073 if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) {
1074 return true;
1075 }
1076 }
1077 }
1078 return false;
1079 }
1080
1081 static void visit_ssa_undef(struct libresoc_nir_tran_ctx *ctx, const nir_ssa_undef_instr *instr)
1082 {
1083 unsigned num_components = instr->def.num_components;
1084 LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->def.bit_size);
1085
1086 if (/*!ctx->abi->convert_undef_to_zero ||*/ is_def_used_in_an_export(&instr->def)) {
1087 LLVMValueRef undef;
1088
1089 if (num_components == 1)
1090 undef = LLVMGetUndef(type);
1091 else {
1092 undef = LLVMGetUndef(LLVMVectorType(type, num_components));
1093 }
1094 ctx->ssa_defs[instr->def.index] = undef;
1095 } else {
1096 LLVMValueRef zero = LLVMConstInt(type, 0, false);
1097 if (num_components > 1) {
1098 zero = build_gather_values_extended(&ctx->lc, &zero, 4, 0, false, false);
1099 }
1100 ctx->ssa_defs[instr->def.index] = zero;
1101 }
1102 }
1103
1104 static void visit_jump(struct libresoc_llvm_context *lc, const nir_jump_instr *instr)
1105 {
1106 switch (instr->type) {
1107 case nir_jump_break:
1108 build_break(lc);
1109 break;
1110 case nir_jump_continue:
1111 build_continue(lc);
1112 break;
1113 default:
1114 fprintf(stderr, "Unknown NIR jump instr: ");
1115 nir_print_instr(&instr->instr, stderr);
1116 fprintf(stderr, "\n");
1117 abort();
1118 }
1119 }
1120
1121 static LLVMValueRef get_alu_src(struct libresoc_nir_tran_ctx *ctx, nir_alu_src src,
1122 unsigned num_components)
1123 {
1124 LLVMValueRef value = get_src(ctx, src.src);
1125 bool need_swizzle = false;
1126
1127 assert(value);
1128 unsigned src_components = get_llvm_num_components(value);
1129 for (unsigned i = 0; i < num_components; ++i) {
1130 assert(src.swizzle[i] < src_components);
1131 if (src.swizzle[i] != i)
1132 need_swizzle = true;
1133 }
1134
1135 if (need_swizzle || num_components != src_components) {
1136 LLVMValueRef masks[] = {LLVMConstInt(ctx->lc.i32, src.swizzle[0], false),
1137 LLVMConstInt(ctx->lc.i32, src.swizzle[1], false),
1138 LLVMConstInt(ctx->lc.i32, src.swizzle[2], false),
1139 LLVMConstInt(ctx->lc.i32, src.swizzle[3], false)};
1140
1141 if (src_components > 1 && num_components == 1) {
1142 value = LLVMBuildExtractElement(ctx->lc.builder, value, masks[0], "");
1143 } else if (src_components == 1 && num_components > 1) {
1144 LLVMValueRef values[] = {value, value, value, value};
1145 value = build_gather_values(&ctx->lc, values, num_components);
1146 } else {
1147 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1148 value = LLVMBuildShuffleVector(ctx->lc.builder, value, value, swizzle, "");
1149 }
1150 }
1151 assert(!src.negate);
1152 assert(!src.abs);
1153 return value;
1154 }
1155
1156 static void visit_alu(struct libresoc_nir_tran_ctx *ctx, const nir_alu_instr *instr)
1157 {
1158 LLVMValueRef src[4], result = NULL;
1159 unsigned num_components = instr->dest.dest.ssa.num_components;
1160 unsigned src_components;
1161 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
1162
1163 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1164 switch (instr->op) {
1165 case nir_op_vec2:
1166 case nir_op_vec3:
1167 case nir_op_vec4:
1168 src_components = 1;
1169 break;
1170 case nir_op_pack_half_2x16:
1171 case nir_op_pack_snorm_2x16:
1172 case nir_op_pack_unorm_2x16:
1173 src_components = 2;
1174 break;
1175 case nir_op_unpack_half_2x16:
1176 src_components = 1;
1177 break;
1178 case nir_op_cube_face_coord:
1179 case nir_op_cube_face_index:
1180 src_components = 3;
1181 break;
1182 default:
1183 src_components = num_components;
1184 break;
1185 }
1186 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1187 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1188
1189 switch (instr->op) {
1190 case nir_op_mov:
1191 result = src[0];
1192 break;
1193 case nir_op_fneg:
1194 src[0] = to_float(&ctx->lc, src[0]);
1195 result = LLVMBuildFNeg(ctx->lc.builder, src[0], "");
1196 if (ctx->lc.float_mode == FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
1197 /* fneg will be optimized by backend compiler with sign
1198 * bit removed via XOR. This is probably a LLVM bug.
1199 */
1200 result = build_canonicalize(&ctx->lc, result, instr->dest.dest.ssa.bit_size);
1201 }
1202 break;
1203 case nir_op_ineg:
1204 result = LLVMBuildNeg(ctx->lc.builder, src[0], "");
1205 break;
1206 case nir_op_inot:
1207 result = LLVMBuildNot(ctx->lc.builder, src[0], "");
1208 break;
1209 case nir_op_iadd:
1210 result = LLVMBuildAdd(ctx->lc.builder, src[0], src[1], "");
1211 break;
1212 case nir_op_fadd:
1213 src[0] = to_float(&ctx->lc, src[0]);
1214 src[1] = to_float(&ctx->lc, src[1]);
1215 result = LLVMBuildFAdd(ctx->lc.builder, src[0], src[1], "");
1216 break;
1217 case nir_op_fsub:
1218 src[0] = to_float(&ctx->lc, src[0]);
1219 src[1] = to_float(&ctx->lc, src[1]);
1220 result = LLVMBuildFSub(ctx->lc.builder, src[0], src[1], "");
1221 break;
1222 case nir_op_isub:
1223 result = LLVMBuildSub(ctx->lc.builder, src[0], src[1], "");
1224 break;
1225 case nir_op_imul:
1226 result = LLVMBuildMul(ctx->lc.builder, src[0], src[1], "");
1227 break;
1228 case nir_op_imod:
1229 result = LLVMBuildSRem(ctx->lc.builder, src[0], src[1], "");
1230 break;
1231 case nir_op_umod:
1232 result = LLVMBuildURem(ctx->lc.builder, src[0], src[1], "");
1233 break;
1234 case nir_op_irem:
1235 result = LLVMBuildSRem(ctx->lc.builder, src[0], src[1], "");
1236 break;
1237 case nir_op_idiv:
1238 result = LLVMBuildSDiv(ctx->lc.builder, src[0], src[1], "");
1239 break;
1240 case nir_op_udiv:
1241 result = LLVMBuildUDiv(ctx->lc.builder, src[0], src[1], "");
1242 break;
1243 case nir_op_fmul:
1244 src[0] = to_float(&ctx->lc, src[0]);
1245 src[1] = to_float(&ctx->lc, src[1]);
1246 result = LLVMBuildFMul(ctx->lc.builder, src[0], src[1], "");
1247 break;
1248 case nir_op_frcp:
1249 /* For doubles, we need precise division to pass GLCTS. */
1250 if (ctx->lc.float_mode == FLOAT_MODE_DEFAULT_OPENGL && get_type_size(def_type) == 8) {
1251 result = LLVMBuildFDiv(ctx->lc.builder, ctx->lc.f64_1, to_float(&ctx->lc, src[0]), "");
1252 } else {
1253 result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.rcp",
1254 to_float_type(&ctx->lc, def_type), src[0]);
1255 }
1256 // TODO: abi not supported
1257 // if (ctx->abi->clamp_div_by_zero)
1258 // result = build_fmin(&ctx->lc, result,
1259 // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX));
1260 break;
1261 case nir_op_iand:
1262 result = LLVMBuildAnd(ctx->lc.builder, src[0], src[1], "");
1263 break;
1264 case nir_op_ior:
1265 result = LLVMBuildOr(ctx->lc.builder, src[0], src[1], "");
1266 break;
1267 case nir_op_ixor:
1268 result = LLVMBuildXor(ctx->lc.builder, src[0], src[1], "");
1269 break;
1270 case nir_op_ishl:
1271 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
1272 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1273 src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1274 else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
1275 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1276 src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1277 result = LLVMBuildShl(ctx->lc.builder, src[0], src[1], "");
1278 break;
1279 case nir_op_ishr:
1280 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
1281 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1282 src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1283 else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
1284 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1285 src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1286 result = LLVMBuildAShr(ctx->lc.builder, src[0], src[1], "");
1287 break;
1288 case nir_op_ushr:
1289 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
1290 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1291 src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1292 else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
1293 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1294 src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1295 result = LLVMBuildLShr(ctx->lc.builder, src[0], src[1], "");
1296 break;
1297 case nir_op_ilt32:
1298 result = emit_int_cmp(&ctx->lc, LLVMIntSLT, src[0], src[1]);
1299 break;
1300 case nir_op_ine32:
1301 result = emit_int_cmp(&ctx->lc, LLVMIntNE, src[0], src[1]);
1302 break;
1303 case nir_op_ieq32:
1304 result = emit_int_cmp(&ctx->lc, LLVMIntEQ, src[0], src[1]);
1305 break;
1306 case nir_op_ige32:
1307 result = emit_int_cmp(&ctx->lc, LLVMIntSGE, src[0], src[1]);
1308 break;
1309 case nir_op_ult32:
1310 result = emit_int_cmp(&ctx->lc, LLVMIntULT, src[0], src[1]);
1311 break;
1312 case nir_op_uge32:
1313 result = emit_int_cmp(&ctx->lc, LLVMIntUGE, src[0], src[1]);
1314 break;
1315 case nir_op_feq32:
1316 result = emit_float_cmp(&ctx->lc, LLVMRealOEQ, src[0], src[1]);
1317 break;
1318 case nir_op_fneu32:
1319 result = emit_float_cmp(&ctx->lc, LLVMRealUNE, src[0], src[1]);
1320 break;
1321 case nir_op_flt32:
1322 result = emit_float_cmp(&ctx->lc, LLVMRealOLT, src[0], src[1]);
1323 break;
1324 case nir_op_fge32:
1325 result = emit_float_cmp(&ctx->lc, LLVMRealOGE, src[0], src[1]);
1326 break;
1327 case nir_op_fabs:
1328 result =
1329 emit_intrin_1f_param(&ctx->lc, "llvm.fabs", to_float_type(&ctx->lc, def_type), src[0]);
1330 if (ctx->lc.float_mode == FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
1331 /* fabs will be optimized by backend compiler with sign
1332 * bit removed via AND.
1333 */
1334 result = build_canonicalize(&ctx->lc, result, instr->dest.dest.ssa.bit_size);
1335 }
1336 break;
1337 case nir_op_iabs:
1338 result = emit_iabs(&ctx->lc, src[0]);
1339 break;
1340 case nir_op_imax:
1341 result = build_imax(&ctx->lc, src[0], src[1]);
1342 break;
1343 case nir_op_imin:
1344 result = build_imin(&ctx->lc, src[0], src[1]);
1345 break;
1346 case nir_op_umax:
1347 result = build_umax(&ctx->lc, src[0], src[1]);
1348 break;
1349 case nir_op_umin:
1350 result = build_umin(&ctx->lc, src[0], src[1]);
1351 break;
1352 case nir_op_isign:
1353 result = build_isign(&ctx->lc, src[0]);
1354 break;
1355 case nir_op_fsign:
1356 src[0] = to_float(&ctx->lc, src[0]);
1357 result = build_fsign(&ctx->lc, src[0]);
1358 break;
1359 case nir_op_ffloor:
1360 result =
1361 emit_intrin_1f_param(&ctx->lc, "llvm.floor", to_float_type(&ctx->lc, def_type), src[0]);
1362 break;
1363 case nir_op_ftrunc:
1364 result =
1365 emit_intrin_1f_param(&ctx->lc, "llvm.trunc", to_float_type(&ctx->lc, def_type), src[0]);
1366 break;
1367 case nir_op_fceil:
1368 result =
1369 emit_intrin_1f_param(&ctx->lc, "llvm.ceil", to_float_type(&ctx->lc, def_type), src[0]);
1370 break;
1371 case nir_op_fround_even:
1372 result =
1373 emit_intrin_1f_param(&ctx->lc, "llvm.rint", to_float_type(&ctx->lc, def_type), src[0]);
1374 break;
1375 case nir_op_ffract:
1376 result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.fract",
1377 to_float_type(&ctx->lc, def_type), src[0]);
1378 break;
1379 case nir_op_fsin:
1380 result =
1381 emit_intrin_1f_param(&ctx->lc, "llvm.sin", to_float_type(&ctx->lc, def_type), src[0]);
1382 break;
1383 case nir_op_fcos:
1384 result =
1385 emit_intrin_1f_param(&ctx->lc, "llvm.cos", to_float_type(&ctx->lc, def_type), src[0]);
1386 break;
1387 case nir_op_fsqrt:
1388 result =
1389 emit_intrin_1f_param(&ctx->lc, "llvm.sqrt", to_float_type(&ctx->lc, def_type), src[0]);
1390 break;
1391 case nir_op_fexp2:
1392 result =
1393 emit_intrin_1f_param(&ctx->lc, "llvm.exp2", to_float_type(&ctx->lc, def_type), src[0]);
1394 break;
1395 case nir_op_flog2:
1396 result =
1397 emit_intrin_1f_param(&ctx->lc, "llvm.log2", to_float_type(&ctx->lc, def_type), src[0]);
1398 break;
1399 case nir_op_frsq:
1400 result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.rsq",
1401 to_float_type(&ctx->lc, def_type), src[0]);
1402 // TODO: abi not enabled
1403 // if (ctx->abi->clamp_div_by_zero)
1404 // result = build_fmin(&ctx->lc, result,
1405 // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX));
1406 break;
1407 case nir_op_frexp_exp:
1408 // TODO: enable this when ac_build_frexp_exp() is added
1409 // src[0] = to_float(&ctx->lc, src[0]);
1410 // result = ac_build_frexp_exp(&ctx->lc, src[0], get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])));
1411 // if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) == 16)
1412 // result = LLVMBuildSExt(ctx->lc.builder, result, ctx->lc.i32, "");
1413 break;
1414 case nir_op_frexp_sig:
1415 // TODO: enable this when ac_build_frexp_mant() is added
1416 // src[0] = to_float(&ctx->lc, src[0]);
1417 // result = ac_build_frexp_mant(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
1418 // break;
1419 // case nir_op_fpow:
1420 // result = emit_intrin_2f_param(&ctx->lc, "llvm.pow", to_float_type(&ctx->lc, def_type),
1421 // src[0], src[1]);
1422 break;
1423 case nir_op_fmax:
1424 result = emit_intrin_2f_param(&ctx->lc, "llvm.maxnum", to_float_type(&ctx->lc, def_type),
1425 src[0], src[1]);
1426 break;
1427 case nir_op_fmin:
1428 result = emit_intrin_2f_param(&ctx->lc, "llvm.minnum", to_float_type(&ctx->lc, def_type),
1429 src[0], src[1]);
1430 break;
1431 case nir_op_ffma:
1432 result =
1433 emit_intrin_3f_param(&ctx->lc, "llvm.fmuladd",
1434 to_float_type(&ctx->lc, def_type), src[0], src[1], src[2]);
1435 break;
1436 case nir_op_ldexp:
1437 src[0] = to_float(&ctx->lc, src[0]);
1438 if (get_elem_bits(&ctx->lc, def_type) == 32)
1439 result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f32", ctx->lc.f32, src, 2,
1440 FUNC_ATTR_READNONE);
1441 else if (get_elem_bits(&ctx->lc, def_type) == 16)
1442 result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f16", ctx->lc.f16, src, 2,
1443 FUNC_ATTR_READNONE);
1444 else
1445 result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f64", ctx->lc.f64, src, 2,
1446 FUNC_ATTR_READNONE);
1447 break;
1448 case nir_op_bfm:
1449 result = emit_bfm(&ctx->lc, src[0], src[1]);
1450 break;
1451 case nir_op_bitfield_select:
1452 result = emit_bitfield_select(&ctx->lc, src[0], src[1], src[2]);
1453 break;
1454 case nir_op_ubfe:
1455 result = build_bfe(&ctx->lc, src[0], src[1], src[2], false);
1456 break;
1457 case nir_op_ibfe:
1458 result = build_bfe(&ctx->lc, src[0], src[1], src[2], true);
1459 break;
1460 case nir_op_bitfield_reverse:
1461 result = build_bitfield_reverse(&ctx->lc, src[0]);
1462 break;
1463 case nir_op_bit_count:
1464 result = build_bit_count(&ctx->lc, src[0]);
1465 break;
1466 case nir_op_vec2:
1467 case nir_op_vec3:
1468 case nir_op_vec4:
1469 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1470 src[i] = to_integer(&ctx->lc, src[i]);
1471 result = build_gather_values(&ctx->lc, src, num_components);
1472 break;
1473 case nir_op_f2i8:
1474 case nir_op_f2i16:
1475 case nir_op_f2i32:
1476 case nir_op_f2i64:
1477 src[0] = to_float(&ctx->lc, src[0]);
1478 result = LLVMBuildFPToSI(ctx->lc.builder, src[0], def_type, "");
1479 break;
1480 case nir_op_f2u8:
1481 case nir_op_f2u16:
1482 case nir_op_f2u32:
1483 case nir_op_f2u64:
1484 src[0] = to_float(&ctx->lc, src[0]);
1485 result = LLVMBuildFPToUI(ctx->lc.builder, src[0], def_type, "");
1486 break;
1487 case nir_op_i2f16:
1488 case nir_op_i2f32:
1489 case nir_op_i2f64:
1490 result = LLVMBuildSIToFP(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1491 break;
1492 case nir_op_u2f16:
1493 case nir_op_u2f32:
1494 case nir_op_u2f64:
1495 result = LLVMBuildUIToFP(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1496 break;
1497 case nir_op_f2f16_rtz:
1498 case nir_op_f2f16:
1499 case nir_op_f2fmp:
1500 src[0] = to_float(&ctx->lc, src[0]);
1501
1502 /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
1503 * all f32->f16 conversions have to round towards zero, because both scalar
1504 * and vec2 down-conversions have to round equally.
1505 */
1506 if (ctx->lc.float_mode == FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
1507 src[0] = to_float(&ctx->lc, src[0]);
1508
1509 if (LLVMTypeOf(src[0]) == ctx->lc.f64)
1510 src[0] = LLVMBuildFPTrunc(ctx->lc.builder, src[0], ctx->lc.f32, "");
1511
1512 /* Fast path conversion. This only works if NIR is vectorized
1513 * to vec2 16.
1514 */
1515 if (LLVMTypeOf(src[0]) == ctx->lc.v2f32) {
1516 LLVMValueRef args[] = {
1517 llvm_extract_elem(&ctx->lc, src[0], 0),
1518 llvm_extract_elem(&ctx->lc, src[0], 1),
1519 };
1520 result = build_cvt_pkrtz_f16(&ctx->lc, args);
1521 break;
1522 }
1523
1524 assert(get_llvm_num_components(src[0]) == 1);
1525 LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->lc.f32)};
1526 result = build_cvt_pkrtz_f16(&ctx->lc, param);
1527 result = LLVMBuildExtractElement(ctx->lc.builder, result, ctx->lc.i32_0, "");
1528 } else {
1529 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
1530 result =
1531 LLVMBuildFPExt(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1532 else
1533 result =
1534 LLVMBuildFPTrunc(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1535 }
1536 break;
1537 case nir_op_f2f16_rtne:
1538 case nir_op_f2f32:
1539 case nir_op_f2f64:
1540 src[0] = to_float(&ctx->lc, src[0]);
1541 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
1542 result = LLVMBuildFPExt(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1543 else
1544 result =
1545 LLVMBuildFPTrunc(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1546 break;
1547 case nir_op_u2u8:
1548 case nir_op_u2u16:
1549 case nir_op_u2ump:
1550 case nir_op_u2u32:
1551 case nir_op_u2u64:
1552 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
1553 result = LLVMBuildZExt(ctx->lc.builder, src[0], def_type, "");
1554 else
1555 result = LLVMBuildTrunc(ctx->lc.builder, src[0], def_type, "");
1556 break;
1557 case nir_op_i2i8:
1558 case nir_op_i2i16:
1559 case nir_op_i2imp:
1560 case nir_op_i2i32:
1561 case nir_op_i2i64:
1562 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
1563 result = LLVMBuildSExt(ctx->lc.builder, src[0], def_type, "");
1564 else
1565 result = LLVMBuildTrunc(ctx->lc.builder, src[0], def_type, "");
1566 break;
1567 case nir_op_b32csel:
1568 result = emit_bcsel(&ctx->lc, src[0], src[1], src[2]);
1569 break;
1570 case nir_op_find_lsb:
1571 result = find_lsb(&ctx->lc, ctx->lc.i32, src[0]);
1572 break;
1573 case nir_op_ufind_msb:
1574 result = build_umsb(&ctx->lc, src[0], ctx->lc.i32);
1575 break;
1576 case nir_op_ifind_msb:
1577 result = build_imsb(&ctx->lc, src[0], ctx->lc.i32);
1578 break;
1579 case nir_op_uadd_carry:
1580 result = emit_uint_carry(&ctx->lc, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1581 break;
1582 case nir_op_usub_borrow:
1583 result = emit_uint_carry(&ctx->lc, "llvm.usub.with.overflow.i32", src[0], src[1]);
1584 break;
1585 case nir_op_b2f16:
1586 case nir_op_b2f32:
1587 case nir_op_b2f64:
1588 result = emit_b2f(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
1589 break;
1590 case nir_op_f2b32:
1591 result = emit_f2b(&ctx->lc, src[0]);
1592 break;
1593 case nir_op_b2i8:
1594 case nir_op_b2i16:
1595 case nir_op_b2i32:
1596 case nir_op_b2i64:
1597 result = emit_b2i(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
1598 break;
1599 case nir_op_i2b32:
1600 result = emit_i2b(&ctx->lc, src[0]);
1601 break;
1602 case nir_op_fquantize2f16:
1603 result = emit_f2f16(&ctx->lc, src[0]);
1604 break;
1605 case nir_op_umul_high:
1606 result = emit_umul_high(&ctx->lc, src[0], src[1]);
1607 break;
1608 case nir_op_imul_high:
1609 result = emit_imul_high(&ctx->lc, src[0], src[1]);
1610 break;
1611 case nir_op_pack_half_2x16:
1612 result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pkrtz_f16);
1613 break;
1614 case nir_op_pack_snorm_2x16:
1615 result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pknorm_i16);
1616 break;
1617 case nir_op_pack_unorm_2x16:
1618 result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pknorm_u16);
1619 break;
1620 case nir_op_unpack_half_2x16:
1621 result = emit_unpack_half_2x16(&ctx->lc, src[0]);
1622 break;
1623 case nir_op_fddx:
1624 case nir_op_fddy:
1625 case nir_op_fddx_fine:
1626 case nir_op_fddy_fine:
1627 case nir_op_fddx_coarse:
1628 case nir_op_fddy_coarse:
1629 // TODO: enable this when emit_ddxy() is added
1630 //result = emit_ddxy(ctx, instr->op, src[0]);
1631 break;
1632
1633 case nir_op_unpack_64_2x32_split_x: {
1634 assert(get_llvm_num_components(src[0]) == 1);
1635 LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i32, "");
1636 result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_0, "");
1637 break;
1638 }
1639
1640 case nir_op_unpack_64_2x32_split_y: {
1641 assert(get_llvm_num_components(src[0]) == 1);
1642 LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i32, "");
1643 result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_1, "");
1644 break;
1645 }
1646
1647 case nir_op_pack_64_2x32_split: {
1648 LLVMValueRef tmp = build_gather_values(&ctx->lc, src, 2);
1649 result = LLVMBuildBitCast(ctx->lc.builder, tmp, ctx->lc.i64, "");
1650 break;
1651 }
1652
1653 case nir_op_pack_32_2x16_split: {
1654 LLVMValueRef tmp = build_gather_values(&ctx->lc, src, 2);
1655 result = LLVMBuildBitCast(ctx->lc.builder, tmp, ctx->lc.i32, "");
1656 break;
1657 }
1658
1659 case nir_op_unpack_32_2x16_split_x: {
1660 LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i16, "");
1661 result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_0, "");
1662 break;
1663 }
1664
1665 case nir_op_unpack_32_2x16_split_y: {
1666 LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i16, "");
1667 result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_1, "");
1668 break;
1669 }
1670
1671 case nir_op_cube_face_coord: {
1672 src[0] = to_float(&ctx->lc, src[0]);
1673 LLVMValueRef results[2];
1674 LLVMValueRef in[3];
1675 for (unsigned chan = 0; chan < 3; chan++)
1676 in[chan] = llvm_extract_elem(&ctx->lc, src[0], chan);
1677 results[0] = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubesc", ctx->lc.f32, in, 3,
1678 FUNC_ATTR_READNONE);
1679 results[1] = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubetc", ctx->lc.f32, in, 3,
1680 FUNC_ATTR_READNONE);
1681 LLVMValueRef ma = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubema", ctx->lc.f32, in, 3,
1682 FUNC_ATTR_READNONE);
1683 results[0] = build_fdiv(&ctx->lc, results[0], ma);
1684 results[1] = build_fdiv(&ctx->lc, results[1], ma);
1685 LLVMValueRef offset = LLVMConstReal(ctx->lc.f32, 0.5);
1686 results[0] = LLVMBuildFAdd(ctx->lc.builder, results[0], offset, "");
1687 results[1] = LLVMBuildFAdd(ctx->lc.builder, results[1], offset, "");
1688 result = build_gather_values(&ctx->lc, results, 2);
1689 break;
1690 }
1691
1692 case nir_op_cube_face_index: {
1693 src[0] = to_float(&ctx->lc, src[0]);
1694 LLVMValueRef in[3];
1695 for (unsigned chan = 0; chan < 3; chan++)
1696 in[chan] = llvm_extract_elem(&ctx->lc, src[0], chan);
1697 result = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubeid", ctx->lc.f32, in, 3,
1698 FUNC_ATTR_READNONE);
1699 break;
1700 }
1701
1702 default:
1703 fprintf(stderr, "Unknown NIR alu instr: ");
1704 nir_print_instr(&instr->instr, stderr);
1705 fprintf(stderr, "\n");
1706 abort();
1707 }
1708
1709 if (result) {
1710 assert(instr->dest.dest.is_ssa);
1711 result = to_integer_or_pointer(&ctx->lc, result);
1712 ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
1713 }
1714 }
1715
1716 static LLVMValueRef visit_load_var(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
1717 {
1718 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1719 nir_variable *var = nir_deref_instr_get_variable(deref);
1720
1721 LLVMValueRef values[8];
1722 int idx = 0;
1723 int ve = instr->dest.ssa.num_components;
1724 unsigned comp = 0;
1725 LLVMValueRef indir_index;
1726 LLVMValueRef ret;
1727 unsigned const_index;
1728 unsigned stride = 4;
1729 int mode = deref->mode;
1730
1731 if (var) {
1732 bool vs_in = ctx->stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in;
1733 idx = var->data.driver_location;
1734 comp = var->data.location_frac;
1735 mode = var->data.mode;
1736
1737 get_deref_offset(ctx, deref, vs_in, NULL, NULL, &const_index, &indir_index);
1738
1739 if (var->data.compact) {
1740 stride = 1;
1741 const_index += comp;
1742 comp = 0;
1743 }
1744 }
1745
1746 if (instr->dest.ssa.bit_size == 64 &&
1747 (deref->mode == nir_var_shader_in || deref->mode == nir_var_shader_out ||
1748 deref->mode == nir_var_function_temp))
1749 ve *= 2;
1750
1751 switch (mode) {
1752 case nir_var_shader_in:
1753 /* TODO: remove this after RADV switches to lowered IO */
1754 // if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) {
1755 // return load_tess_varyings(ctx, instr, true);
1756 // }
1757
1758 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
1759 // LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size);
1760 // LLVMValueRef indir_index;
1761 // unsigned const_index, vertex_index;
1762 // get_deref_offset(ctx, deref, false, &vertex_index, NULL, &const_index, &indir_index);
1763 // assert(indir_index == NULL);
1764
1765 // return ctx->abi->load_inputs(ctx->abi, var->data.location, var->data.driver_location,
1766 // var->data.location_frac, instr->num_components, vertex_index,
1767 // const_index, type);
1768 // }
1769
1770 for (unsigned chan = comp; chan < ve + comp; chan++) {
1771 if (indir_index) {
1772 unsigned count =
1773 glsl_count_attribute_slots(var->type, ctx->stage == MESA_SHADER_VERTEX);
1774 count -= chan / 4;
1775 LLVMValueRef tmp_vec = build_gather_values_extended(
1776 &ctx->lc, ctx->inputs + idx + chan, count, stride, false, true);
1777
1778 values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
1779 } else
1780 values[chan] = ctx->inputs[idx + chan + const_index * stride];
1781 }
1782 break;
1783 case nir_var_function_temp:
1784 for (unsigned chan = 0; chan < ve; chan++) {
1785 if (indir_index) {
1786 unsigned count = glsl_count_attribute_slots(var->type, false);
1787 count -= chan / 4;
1788 LLVMValueRef tmp_vec = build_gather_values_extended(
1789 &ctx->lc, ctx->locals + idx + chan, count, stride, true, true);
1790
1791 values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
1792 } else {
1793 values[chan] =
1794 LLVMBuildLoad(ctx->lc.builder, ctx->locals[idx + chan + const_index * stride], "");
1795 }
1796 }
1797 break;
1798 case nir_var_shader_out:
1799 /* TODO: remove this after RADV switches to lowered IO */
1800 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
1801 // return load_tess_varyings(ctx, instr, false);
1802 // }
1803
1804 // if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.fb_fetch_output && ctx->abi->emit_fbfetch)
1805 // return ctx->abi->emit_fbfetch(ctx->abi);
1806
1807 for (unsigned chan = comp; chan < ve + comp; chan++) {
1808 if (indir_index) {
1809 unsigned count = glsl_count_attribute_slots(var->type, false);
1810 count -= chan / 4;
1811 LLVMValueRef tmp_vec = build_gather_values_extended(
1812 &ctx->lc, ctx->outputs + idx + chan, count, stride, true, true);
1813
1814 values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
1815 } else {
1816 values[chan] = LLVMBuildLoad(ctx->lc.builder,
1817 ctx->outputs[idx + chan + const_index * stride], "");
1818 }
1819 }
1820 break;
1821 case nir_var_mem_global: {
1822 LLVMValueRef address = get_src(ctx, instr->src[0]);
1823 LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
1824 unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
1825 unsigned natural_stride = type_scalar_size_bytes(deref->type);
1826 unsigned stride = explicit_stride ? explicit_stride : natural_stride;
1827 int elem_size_bytes = get_elem_bits(&ctx->lc, result_type) / 8;
1828 bool split_loads = false;
1829
1830 if (stride != natural_stride || split_loads) {
1831 if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
1832 result_type = LLVMGetElementType(result_type);
1833
1834 LLVMTypeRef ptr_type =
1835 LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
1836 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
1837
1838 for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
1839 LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, i * stride / natural_stride, 0);
1840 values[i] =
1841 LLVMBuildLoad(ctx->lc.builder, build_gep_ptr(&ctx->lc, address, offset), "");
1842
1843 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
1844 LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
1845 }
1846 return build_gather_values(&ctx->lc, values, instr->dest.ssa.num_components);
1847 } else {
1848 LLVMTypeRef ptr_type =
1849 LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
1850 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
1851 LLVMValueRef val = LLVMBuildLoad(ctx->lc.builder, address, "");
1852
1853 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
1854 LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
1855 return val;
1856 }
1857 }
1858 default:
1859 unreachable("unhandle variable mode");
1860 }
1861 ret = build_varying_gather_values(&ctx->lc, values, ve, comp);
1862 return LLVMBuildBitCast(ctx->lc.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
1863 }
1864
1865 static void visit_store_var(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
1866 {
1867 // if (ctx->lc.postponed_kill) {
1868 // LLVMValueRef cond = LLVMBuildLoad(ctx->lc.builder, ctx->lc.postponed_kill, "");
1869 // ac_build_ifcc(&ctx->lc, cond, 7002);
1870 // }
1871
1872 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1873 nir_variable *var = nir_deref_instr_get_variable(deref);
1874
1875 LLVMValueRef temp_ptr, value;
1876 int idx = 0;
1877 unsigned comp = 0;
1878 LLVMValueRef src = to_float(&ctx->lc, get_src(ctx, instr->src[1]));
1879 int writemask = instr->const_index[0];
1880 LLVMValueRef indir_index;
1881 unsigned const_index;
1882
1883 if (var) {
1884 get_deref_offset(ctx, deref, false, NULL, NULL, &const_index, &indir_index);
1885 idx = var->data.driver_location;
1886 comp = var->data.location_frac;
1887
1888 if (var->data.compact) {
1889 const_index += comp;
1890 comp = 0;
1891 }
1892 }
1893
1894 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src)) == 64 &&
1895 (deref->mode == nir_var_shader_out || deref->mode == nir_var_function_temp)) {
1896
1897 src = LLVMBuildBitCast(ctx->lc.builder, src,
1898 LLVMVectorType(ctx->lc.f32, get_llvm_num_components(src) * 2), "");
1899
1900 writemask = widen_mask(writemask, 2);
1901 }
1902
1903 writemask = writemask << comp;
1904
1905 switch (deref->mode) {
1906 case nir_var_shader_out:
1907 /* TODO: remove this after RADV switches to lowered IO */
1908 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
1909 // LLVMValueRef vertex_index = NULL;
1910 // LLVMValueRef indir_index = NULL;
1911 // unsigned const_index = 0;
1912 // const bool is_patch = var->data.patch ||
1913 // var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
1914 // var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
1915
1916 // get_deref_offset(ctx, deref, false, NULL, is_patch ? NULL : &vertex_index, &const_index,
1917 // &indir_index);
1918
1919 // ctx->abi->store_tcs_outputs(ctx->abi, var, vertex_index, indir_index, const_index, src,
1920 // writemask, var->data.location_frac, var->data.driver_location);
1921 // break;
1922 // }
1923
1924 for (unsigned chan = 0; chan < 8; chan++) {
1925 int stride = 4;
1926 if (!(writemask & (1 << chan)))
1927 continue;
1928
1929 value = llvm_extract_elem(&ctx->lc, src, chan - comp);
1930
1931 if (var->data.compact)
1932 stride = 1;
1933 if (indir_index) {
1934 unsigned count = glsl_count_attribute_slots(var->type, false);
1935 count -= chan / 4;
1936 LLVMValueRef tmp_vec = build_gather_values_extended(
1937 &ctx->lc, ctx->outputs + idx + chan, count, stride, true, true);
1938
1939 tmp_vec = LLVMBuildInsertElement(ctx->lc.builder, tmp_vec, value, indir_index, "");
1940 build_store_values_extended(&ctx->lc, ctx->outputs + idx + chan, count, stride,
1941 tmp_vec);
1942
1943 } else {
1944 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
1945
1946 LLVMBuildStore(ctx->lc.builder, value, temp_ptr);
1947 }
1948 }
1949 break;
1950 case nir_var_function_temp:
1951 for (unsigned chan = 0; chan < 8; chan++) {
1952 if (!(writemask & (1 << chan)))
1953 continue;
1954
1955 value = llvm_extract_elem(&ctx->lc, src, chan);
1956 if (indir_index) {
1957 unsigned count = glsl_count_attribute_slots(var->type, false);
1958 count -= chan / 4;
1959 LLVMValueRef tmp_vec = build_gather_values_extended(
1960 &ctx->lc, ctx->locals + idx + chan, count, 4, true, true);
1961
1962 tmp_vec = LLVMBuildInsertElement(ctx->lc.builder, tmp_vec, value, indir_index, "");
1963 build_store_values_extended(&ctx->lc, ctx->locals + idx + chan, count, 4, tmp_vec);
1964 } else {
1965 temp_ptr = ctx->locals[idx + chan + const_index * 4];
1966
1967 LLVMBuildStore(ctx->lc.builder, value, temp_ptr);
1968 }
1969 }
1970 break;
1971
1972 case nir_var_mem_global: {
1973 int writemask = instr->const_index[0];
1974 LLVMValueRef address = get_src(ctx, instr->src[0]);
1975 LLVMValueRef val = get_src(ctx, instr->src[1]);
1976
1977 unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
1978 unsigned natural_stride = type_scalar_size_bytes(deref->type);
1979 unsigned stride = explicit_stride ? explicit_stride : natural_stride;
1980 int elem_size_bytes = get_elem_bits(&ctx->lc, LLVMTypeOf(val)) / 8;
1981 bool split_stores = false;
1982
1983 LLVMTypeRef ptr_type =
1984 LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
1985 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
1986
1987 if (writemask == (1u << get_llvm_num_components(val)) - 1 && stride == natural_stride &&
1988 !split_stores) {
1989 LLVMTypeRef ptr_type =
1990 LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
1991 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
1992
1993 val = LLVMBuildBitCast(ctx->lc.builder, val, LLVMGetElementType(LLVMTypeOf(address)), "");
1994 LLVMValueRef store = LLVMBuildStore(ctx->lc.builder, val, address);
1995
1996 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
1997 LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
1998 } else {
1999 LLVMTypeRef val_type = LLVMTypeOf(val);
2000 if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
2001 val_type = LLVMGetElementType(val_type);
2002
2003 LLVMTypeRef ptr_type =
2004 LLVMPointerType(val_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2005 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
2006 for (unsigned chan = 0; chan < 4; chan++) {
2007 if (!(writemask & (1 << chan)))
2008 continue;
2009
2010 LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, chan * stride / natural_stride, 0);
2011
2012 LLVMValueRef ptr = build_gep_ptr(&ctx->lc, address, offset);
2013 LLVMValueRef src = llvm_extract_elem(&ctx->lc, val, chan);
2014 src = LLVMBuildBitCast(ctx->lc.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), "");
2015 LLVMValueRef store = LLVMBuildStore(ctx->lc.builder, src, ptr);
2016
2017 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
2018 LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
2019 }
2020 }
2021 break;
2022 }
2023 default:
2024 abort();
2025 break;
2026 }
2027
2028 // if (ctx->ac.postponed_kill)
2029 // ac_build_endif(&ctx->ac, 7002);
2030 }
2031
2032 static void visit_intrinsic(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
2033 {
2034 LLVMValueRef result = NULL;
2035
2036 switch (instr->intrinsic) {
2037 case nir_intrinsic_ballot:
2038 // result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
2039 // if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
2040 // result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
2041 break;
2042 case nir_intrinsic_read_invocation:
2043 // result =
2044 // ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
2045 break;
2046 case nir_intrinsic_read_first_invocation:
2047 // result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
2048 break;
2049 case nir_intrinsic_load_subgroup_invocation:
2050 // result = ac_get_thread_id(&ctx->ac);
2051 break;
2052 case nir_intrinsic_load_work_group_id: {
2053 // LLVMValueRef values[3];
2054
2055 // for (int i = 0; i < 3; i++) {
2056 // values[i] = ctx->args->workgroup_ids[i].used
2057 // ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
2058 // : ctx->ac.i32_0;
2059 // }
2060
2061 // result = ac_build_gather_values(&ctx->ac, values, 3);
2062 break;
2063 }
2064 case nir_intrinsic_load_base_vertex:
2065 case nir_intrinsic_load_first_vertex:
2066 //result = ctx->abi->load_base_vertex(ctx->abi);
2067 result = LLVMGetParam(ctx->main_function, ctx->args.base_vertex.arg_index);
2068 break;
2069 case nir_intrinsic_load_local_group_size:
2070 // result = ctx->abi->load_local_group_size(ctx->abi);
2071 break;
2072 case nir_intrinsic_load_vertex_id:
2073 result = LLVMBuildAdd(ctx->lc.builder, LLVMGetParam(ctx->main_function, ctx->args.vertex_id.arg_index),
2074 LLVMGetParam(ctx->main_function, ctx->args.base_vertex.arg_index), "");
2075 break;
2076 case nir_intrinsic_load_vertex_id_zero_base: {
2077 // result = ctx->abi->vertex_id;
2078 result = LLVMGetParam(ctx->main_function, ctx->args.vertex_id.arg_index);
2079 break;
2080 }
2081 case nir_intrinsic_load_local_invocation_id: {
2082 // result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
2083 break;
2084 }
2085 case nir_intrinsic_load_base_instance:
2086 // result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
2087 break;
2088 case nir_intrinsic_load_draw_id:
2089 // result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
2090 break;
2091 case nir_intrinsic_load_view_index:
2092 // result = ac_get_arg(&ctx->ac, ctx->args->view_index);
2093 break;
2094 case nir_intrinsic_load_invocation_id:
2095 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2096 // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
2097 // } else {
2098 // if (ctx->ac.chip_class >= GFX10) {
2099 // result =
2100 // LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
2101 // LLVMConstInt(ctx->ac.i32, 127, 0), "");
2102 // } else {
2103 // result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
2104 // }
2105 // }
2106 break;
2107 case nir_intrinsic_load_primitive_id:
2108 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
2109 // result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
2110 // } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2111 // result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
2112 // } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
2113 // result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
2114 // } else
2115 // fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
2116 // break;
2117 // case nir_intrinsic_load_sample_id:
2118 // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
2119 break;
2120 case nir_intrinsic_load_sample_pos:
2121 // result = load_sample_pos(ctx);
2122 break;
2123 case nir_intrinsic_load_sample_mask_in:
2124 // result = ctx->abi->load_sample_mask_in(ctx->abi);
2125 break;
2126 case nir_intrinsic_load_frag_coord: {
2127 // LLVMValueRef values[4] = {
2128 // ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
2129 // ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
2130 // ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
2131 // result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
2132 break;
2133 }
2134 case nir_intrinsic_load_layer_id:
2135 // result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
2136 break;
2137 case nir_intrinsic_load_front_face:
2138 // result = ac_get_arg(&ctx->ac, ctx->args->front_face);
2139 break;
2140 case nir_intrinsic_load_helper_invocation:
2141 // result = ac_build_load_helper_invocation(&ctx->ac);
2142 break;
2143 case nir_intrinsic_is_helper_invocation:
2144 // result = ac_build_is_helper_invocation(&ctx->ac);
2145 break;
2146 case nir_intrinsic_load_color0:
2147 // result = ctx->abi->color0;
2148 break;
2149 case nir_intrinsic_load_color1:
2150 // result = ctx->abi->color1;
2151 break;
2152 case nir_intrinsic_load_user_data_amd:
2153 // assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
2154 // result = ctx->abi->user_data;
2155 break;
2156 case nir_intrinsic_load_instance_id:
2157 // result = ctx->abi->instance_id;
2158 break;
2159 case nir_intrinsic_load_num_work_groups:
2160 // result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
2161 break;
2162 case nir_intrinsic_load_local_invocation_index:
2163 // result = visit_load_local_invocation_index(ctx);
2164 break;
2165 case nir_intrinsic_load_subgroup_id:
2166 // result = visit_load_subgroup_id(ctx);
2167 break;
2168 case nir_intrinsic_load_num_subgroups:
2169 // result = visit_load_num_subgroups(ctx);
2170 break;
2171 case nir_intrinsic_first_invocation:
2172 // result = visit_first_invocation(ctx);
2173 break;
2174 case nir_intrinsic_load_push_constant:
2175 // result = visit_load_push_constant(ctx, instr);
2176 break;
2177 case nir_intrinsic_vulkan_resource_index: {
2178 // LLVMValueRef index = get_src(ctx, instr->src[0]);
2179 // unsigned desc_set = nir_intrinsic_desc_set(instr);
2180 // unsigned binding = nir_intrinsic_binding(instr);
2181
2182 // result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
2183 break;
2184 }
2185 case nir_intrinsic_vulkan_resource_reindex:
2186 // result = visit_vulkan_resource_reindex(ctx, instr);
2187 break;
2188 case nir_intrinsic_store_ssbo:
2189 // visit_store_ssbo(ctx, instr);
2190 break;
2191 case nir_intrinsic_load_ssbo:
2192 // result = visit_load_buffer(ctx, instr);
2193 break;
2194 case nir_intrinsic_ssbo_atomic_add:
2195 case nir_intrinsic_ssbo_atomic_imin:
2196 case nir_intrinsic_ssbo_atomic_umin:
2197 case nir_intrinsic_ssbo_atomic_imax:
2198 case nir_intrinsic_ssbo_atomic_umax:
2199 case nir_intrinsic_ssbo_atomic_and:
2200 case nir_intrinsic_ssbo_atomic_or:
2201 case nir_intrinsic_ssbo_atomic_xor:
2202 case nir_intrinsic_ssbo_atomic_exchange:
2203 case nir_intrinsic_ssbo_atomic_comp_swap:
2204 // result = visit_atomic_ssbo(ctx, instr);
2205 break;
2206 case nir_intrinsic_load_ubo:
2207 // result = visit_load_ubo_buffer(ctx, instr);
2208 break;
2209 case nir_intrinsic_get_buffer_size:
2210 // result = visit_get_buffer_size(ctx, instr);
2211 break;
2212 case nir_intrinsic_load_deref:
2213 result = visit_load_var(ctx, instr);
2214 break;
2215 case nir_intrinsic_store_deref:
2216 visit_store_var(ctx, instr);
2217 break;
2218 case nir_intrinsic_load_input:
2219 case nir_intrinsic_load_input_vertex:
2220 case nir_intrinsic_load_per_vertex_input:
2221 // result = visit_load(ctx, instr, false);
2222 break;
2223 case nir_intrinsic_load_output:
2224 case nir_intrinsic_load_per_vertex_output:
2225 // result = visit_load(ctx, instr, true);
2226 break;
2227 case nir_intrinsic_store_output:
2228 case nir_intrinsic_store_per_vertex_output:
2229 visit_store_output(ctx, instr);
2230 break;
2231 case nir_intrinsic_load_shared:
2232 result = visit_load_shared(ctx, instr);
2233 break;
2234 case nir_intrinsic_store_shared:
2235 visit_store_shared(ctx, instr);
2236 break;
2237 case nir_intrinsic_bindless_image_samples:
2238 case nir_intrinsic_image_deref_samples:
2239 // result = visit_image_samples(ctx, instr);
2240 break;
2241 case nir_intrinsic_bindless_image_load:
2242 // result = visit_image_load(ctx, instr, true);
2243 break;
2244 case nir_intrinsic_image_deref_load:
2245 // result = visit_image_load(ctx, instr, false);
2246 break;
2247 case nir_intrinsic_bindless_image_store:
2248 // visit_image_store(ctx, instr, true);
2249 break;
2250 case nir_intrinsic_image_deref_store:
2251 // visit_image_store(ctx, instr, false);
2252 break;
2253 case nir_intrinsic_bindless_image_atomic_add:
2254 case nir_intrinsic_bindless_image_atomic_imin:
2255 case nir_intrinsic_bindless_image_atomic_umin:
2256 case nir_intrinsic_bindless_image_atomic_imax:
2257 case nir_intrinsic_bindless_image_atomic_umax:
2258 case nir_intrinsic_bindless_image_atomic_and:
2259 case nir_intrinsic_bindless_image_atomic_or:
2260 case nir_intrinsic_bindless_image_atomic_xor:
2261 case nir_intrinsic_bindless_image_atomic_exchange:
2262 case nir_intrinsic_bindless_image_atomic_comp_swap:
2263 case nir_intrinsic_bindless_image_atomic_inc_wrap:
2264 case nir_intrinsic_bindless_image_atomic_dec_wrap:
2265 // result = visit_image_atomic(ctx, instr, true);
2266 break;
2267 case nir_intrinsic_image_deref_atomic_add:
2268 case nir_intrinsic_image_deref_atomic_imin:
2269 case nir_intrinsic_image_deref_atomic_umin:
2270 case nir_intrinsic_image_deref_atomic_imax:
2271 case nir_intrinsic_image_deref_atomic_umax:
2272 case nir_intrinsic_image_deref_atomic_and:
2273 case nir_intrinsic_image_deref_atomic_or:
2274 case nir_intrinsic_image_deref_atomic_xor:
2275 case nir_intrinsic_image_deref_atomic_exchange:
2276 case nir_intrinsic_image_deref_atomic_comp_swap:
2277 case nir_intrinsic_image_deref_atomic_inc_wrap:
2278 case nir_intrinsic_image_deref_atomic_dec_wrap:
2279 // result = visit_image_atomic(ctx, instr, false);
2280 break;
2281 case nir_intrinsic_bindless_image_size:
2282 // result = visit_image_size(ctx, instr, true);
2283 break;
2284 case nir_intrinsic_image_deref_size:
2285 // result = visit_image_size(ctx, instr, false);
2286 break;
2287 case nir_intrinsic_shader_clock:
2288 // result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
2289 break;
2290 case nir_intrinsic_discard:
2291 case nir_intrinsic_discard_if:
2292 // emit_discard(ctx, instr);
2293 break;
2294 case nir_intrinsic_demote:
2295 case nir_intrinsic_demote_if:
2296 // emit_demote(ctx, instr);
2297 break;
2298 case nir_intrinsic_memory_barrier:
2299 case nir_intrinsic_group_memory_barrier:
2300 case nir_intrinsic_memory_barrier_buffer:
2301 case nir_intrinsic_memory_barrier_image:
2302 case nir_intrinsic_memory_barrier_shared:
2303 // emit_membar(&ctx->ac, instr);
2304 break;
2305 case nir_intrinsic_scoped_barrier: {
2306 // assert(!(nir_intrinsic_memory_semantics(instr) &
2307 // (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
2308
2309 // nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
2310
2311 // unsigned wait_flags = 0;
2312 // if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
2313 // wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2314 // if (modes & nir_var_mem_shared)
2315 // wait_flags |= AC_WAIT_LGKM;
2316
2317 // if (wait_flags)
2318 // ac_build_waitcnt(&ctx->ac, wait_flags);
2319
2320 // if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
2321 // ac_emit_barrier(&ctx->ac, ctx->stage);
2322 break;
2323 }
2324 case nir_intrinsic_memory_barrier_tcs_patch:
2325 break;
2326 case nir_intrinsic_control_barrier:
2327 // ac_emit_barrier(&ctx->ac, ctx->stage);
2328 break;
2329 case nir_intrinsic_shared_atomic_add:
2330 case nir_intrinsic_shared_atomic_imin:
2331 case nir_intrinsic_shared_atomic_umin:
2332 case nir_intrinsic_shared_atomic_imax:
2333 case nir_intrinsic_shared_atomic_umax:
2334 case nir_intrinsic_shared_atomic_and:
2335 case nir_intrinsic_shared_atomic_or:
2336 case nir_intrinsic_shared_atomic_xor:
2337 case nir_intrinsic_shared_atomic_exchange:
2338 case nir_intrinsic_shared_atomic_comp_swap:
2339 case nir_intrinsic_shared_atomic_fadd: {
2340 // LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size);
2341 // result = visit_var_atomic(ctx, instr, ptr, 1);
2342 break;
2343 }
2344 case nir_intrinsic_deref_atomic_add:
2345 case nir_intrinsic_deref_atomic_imin:
2346 case nir_intrinsic_deref_atomic_umin:
2347 case nir_intrinsic_deref_atomic_imax:
2348 case nir_intrinsic_deref_atomic_umax:
2349 case nir_intrinsic_deref_atomic_and:
2350 case nir_intrinsic_deref_atomic_or:
2351 case nir_intrinsic_deref_atomic_xor:
2352 case nir_intrinsic_deref_atomic_exchange:
2353 case nir_intrinsic_deref_atomic_comp_swap:
2354 case nir_intrinsic_deref_atomic_fadd: {
2355 // LLVMValueRef ptr = get_src(ctx, instr->src[0]);
2356 // result = visit_var_atomic(ctx, instr, ptr, 1);
2357 break;
2358 }
2359 case nir_intrinsic_load_barycentric_pixel:
2360 // result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
2361 break;
2362 case nir_intrinsic_load_barycentric_centroid:
2363 // result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
2364 break;
2365 case nir_intrinsic_load_barycentric_sample:
2366 // result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
2367 break;
2368 case nir_intrinsic_load_barycentric_model:
2369 // result = barycentric_model(ctx);
2370 break;
2371 case nir_intrinsic_load_barycentric_at_offset: {
2372 // LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
2373 // result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
2374 break;
2375 }
2376 case nir_intrinsic_load_barycentric_at_sample: {
2377 // LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
2378 // result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
2379 break;
2380 }
2381 case nir_intrinsic_load_interpolated_input: {
2382 /* We assume any indirect loads have been lowered away */
2383 // ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
2384 // assert(offset);
2385 // assert(offset[0].i32 == 0);
2386
2387 // LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
2388 // unsigned index = nir_intrinsic_base(instr);
2389 // unsigned component = nir_intrinsic_component(instr);
2390 // result = load_interpolated_input(ctx, interp_param, index, component,
2391 // instr->dest.ssa.num_components, instr->dest.ssa.bit_size);
2392 break;
2393 }
2394 case nir_intrinsic_emit_vertex:
2395 // ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
2396 break;
2397 case nir_intrinsic_emit_vertex_with_counter: {
2398 // unsigned stream = nir_intrinsic_stream_id(instr);
2399 // LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
2400 // ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
2401 break;
2402 }
2403 case nir_intrinsic_end_primitive:
2404 case nir_intrinsic_end_primitive_with_counter:
2405 // ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
2406 break;
2407 case nir_intrinsic_load_tess_coord:
2408 // result = ctx->abi->load_tess_coord(ctx->abi);
2409 break;
2410 case nir_intrinsic_load_tess_level_outer:
2411 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
2412 break;
2413 case nir_intrinsic_load_tess_level_inner:
2414 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
2415 break;
2416 case nir_intrinsic_load_tess_level_outer_default:
2417 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
2418 break;
2419 case nir_intrinsic_load_tess_level_inner_default:
2420 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
2421 break;
2422 case nir_intrinsic_load_patch_vertices_in:
2423 // result = ctx->abi->load_patch_vertices_in(ctx->abi);
2424 break;
2425 case nir_intrinsic_vote_all: {
2426 // LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
2427 // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
2428 break;
2429 }
2430 case nir_intrinsic_vote_any: {
2431 // LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
2432 // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
2433 break;
2434 }
2435 case nir_intrinsic_shuffle:
2436 // if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
2437 // (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
2438 // result =
2439 // ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
2440 // } else {
2441 // LLVMValueRef src = get_src(ctx, instr->src[0]);
2442 // LLVMValueRef index = get_src(ctx, instr->src[1]);
2443 // LLVMTypeRef type = LLVMTypeOf(src);
2444 // struct waterfall_context wctx;
2445 // LLVMValueRef index_val;
2446
2447 // index_val = enter_waterfall(ctx, &wctx, index, true);
2448
2449 // src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
2450
2451 // result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
2452 // (LLVMValueRef[]){src, index_val}, 2,
2453 // AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
2454
2455 // result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
2456
2457 // result = exit_waterfall(ctx, &wctx, result);
2458 // }
2459 break;
2460 case nir_intrinsic_reduce:
2461 // result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
2462 // instr->const_index[1]);
2463 break;
2464 case nir_intrinsic_inclusive_scan:
2465 // result =
2466 // ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
2467 break;
2468 case nir_intrinsic_exclusive_scan:
2469 // result =
2470 // ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
2471 break;
2472 case nir_intrinsic_quad_broadcast: {
2473 // unsigned lane = nir_src_as_uint(instr->src[1]);
2474 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
2475 break;
2476 }
2477 case nir_intrinsic_quad_swap_horizontal:
2478 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
2479 break;
2480 case nir_intrinsic_quad_swap_vertical:
2481 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
2482 break;
2483 case nir_intrinsic_quad_swap_diagonal:
2484 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
2485 break;
2486 case nir_intrinsic_quad_swizzle_amd: {
2487 // uint32_t mask = nir_intrinsic_swizzle_mask(instr);
2488 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
2489 // (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
2490 break;
2491 }
2492 case nir_intrinsic_masked_swizzle_amd: {
2493 // uint32_t mask = nir_intrinsic_swizzle_mask(instr);
2494 // result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
2495 break;
2496 }
2497 case nir_intrinsic_write_invocation_amd:
2498 // result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
2499 // get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
2500 break;
2501 case nir_intrinsic_mbcnt_amd:
2502 // result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
2503 break;
2504 case nir_intrinsic_load_scratch: {
2505 LLVMValueRef offset = get_src(ctx, instr->src[0]);
2506 LLVMValueRef ptr = build_gep0(&ctx->lc, ctx->scratch, offset);
2507 LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size);
2508 LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
2509 ? comp_type
2510 : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
2511 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2512 ptr = LLVMBuildBitCast(ctx->lc.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
2513 result = LLVMBuildLoad(ctx->lc.builder, ptr, "");
2514 break;
2515 }
2516 case nir_intrinsic_store_scratch: {
2517 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2518 LLVMValueRef ptr = build_gep0(&ctx->lc, ctx->scratch, offset);
2519 LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->lc.context, instr->src[0].ssa->bit_size);
2520 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2521 ptr = LLVMBuildBitCast(ctx->lc.builder, ptr, LLVMPointerType(comp_type, addr_space), "");
2522 LLVMValueRef src = get_src(ctx, instr->src[0]);
2523 unsigned wrmask = nir_intrinsic_write_mask(instr);
2524 while (wrmask) {
2525 int start, count;
2526 u_bit_scan_consecutive_range(&wrmask, &start, &count);
2527
2528 LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, start, false);
2529 LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->lc.builder, ptr, &offset, 1, "");
2530 LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);
2531 offset_ptr = LLVMBuildBitCast(ctx->lc.builder, offset_ptr,
2532 LLVMPointerType(vec_type, addr_space), "");
2533 LLVMValueRef offset_src = extract_components(&ctx->lc, src, start, count);
2534 LLVMBuildStore(ctx->lc.builder, offset_src, offset_ptr);
2535 }
2536 break;
2537 }
2538 case nir_intrinsic_load_constant: {
2539 unsigned base = nir_intrinsic_base(instr);
2540 unsigned range = nir_intrinsic_range(instr);
2541
2542 LLVMValueRef offset = get_src(ctx, instr->src[0]);
2543 offset = LLVMBuildAdd(ctx->lc.builder, offset, LLVMConstInt(ctx->lc.i32, base, false), "");
2544
2545 /* Clamp the offset to avoid out-of-bound access because global
2546 * instructions can't handle them.
2547 */
2548 LLVMValueRef size = LLVMConstInt(ctx->lc.i32, base + range, false);
2549 LLVMValueRef cond = LLVMBuildICmp(ctx->lc.builder, LLVMIntULT, offset, size, "");
2550 offset = LLVMBuildSelect(ctx->lc.builder, cond, offset, size, "");
2551
2552 LLVMValueRef ptr = build_gep0(&ctx->lc, ctx->constant_data, offset);
2553 LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size);
2554 LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
2555 ? comp_type
2556 : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
2557 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2558 ptr = LLVMBuildBitCast(ctx->lc.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
2559 result = LLVMBuildLoad(ctx->lc.builder, ptr, "");
2560 break;
2561 }
2562 default:
2563 fprintf(stderr, "Unknown intrinsic: ");
2564 nir_print_instr(&instr->instr, stderr);
2565 fprintf(stderr, "\n");
2566 break;
2567 }
2568 if (result) {
2569 ctx->ssa_defs[instr->dest.ssa.index] = result;
2570 }
2571 }
2572
2573 static void visit_cf_list(struct libresoc_nir_tran_ctx *ctx, struct exec_list *list);
2574
2575 static void visit_block(struct libresoc_nir_tran_ctx *ctx, nir_block *block)
2576 {
2577 nir_foreach_instr (instr, block) {
2578 switch (instr->type) {
2579 case nir_instr_type_alu:
2580 visit_alu(ctx, nir_instr_as_alu(instr));
2581 break;
2582 case nir_instr_type_load_const:
2583 visit_load_const(ctx, nir_instr_as_load_const(instr));
2584 break;
2585 case nir_instr_type_intrinsic:
2586 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
2587 break;
2588 case nir_instr_type_tex:
2589 // visit_tex(ctx, nir_instr_as_tex(instr));
2590 break;
2591 case nir_instr_type_phi:
2592 visit_phi(ctx, nir_instr_as_phi(instr));
2593 break;
2594 case nir_instr_type_ssa_undef:
2595 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
2596 break;
2597 case nir_instr_type_jump:
2598 visit_jump(&ctx->lc, nir_instr_as_jump(instr));
2599 break;
2600 case nir_instr_type_deref:
2601 visit_deref(ctx, nir_instr_as_deref(instr));
2602 break;
2603 default:
2604 fprintf(stderr, "Unknown NIR instr type: ");
2605 nir_print_instr(instr, stderr);
2606 fprintf(stderr, "\n");
2607 abort();
2608 }
2609 }
2610 }
2611
2612 static void visit_if(struct libresoc_nir_tran_ctx *ctx, nir_if *if_stmt)
2613 {
2614 LLVMValueRef value = get_src(ctx, if_stmt->condition);
2615
2616 nir_block *then_block = (nir_block *)exec_list_get_head(&if_stmt->then_list);
2617
2618 build_uif(&ctx->lc, value, then_block->index);
2619
2620 visit_cf_list(ctx, &if_stmt->then_list);
2621
2622 if (!exec_list_is_empty(&if_stmt->else_list)) {
2623 nir_block *else_block = (nir_block *)exec_list_get_head(&if_stmt->else_list);
2624
2625 build_else(&ctx->lc, else_block->index);
2626 visit_cf_list(ctx, &if_stmt->else_list);
2627 }
2628
2629 build_endif(&ctx->lc, then_block->index);
2630 }
2631
2632 static void visit_loop(struct libresoc_nir_tran_ctx *ctx, nir_loop *loop)
2633 {
2634 nir_block *first_loop_block = (nir_block *)exec_list_get_head(&loop->body);
2635
2636 build_bgnloop(&ctx->lc, first_loop_block->index);
2637
2638 visit_cf_list(ctx, &loop->body);
2639
2640 build_endloop(&ctx->lc, first_loop_block->index);
2641 }
2642
2643 static void visit_cf_list(struct libresoc_nir_tran_ctx *ctx, struct exec_list *list)
2644 {
2645 foreach_list_typed(nir_cf_node, node, node, list)
2646 {
2647 switch (node->type) {
2648 case nir_cf_node_block:
2649 visit_block(ctx, nir_cf_node_as_block(node));
2650 break;
2651
2652 case nir_cf_node_if:
2653 visit_if(ctx, nir_cf_node_as_if(node));
2654 break;
2655
2656 case nir_cf_node_loop:
2657 visit_loop(ctx, nir_cf_node_as_loop(node));
2658 break;
2659
2660 default:
2661 assert(0);
2662 }
2663 }
2664 }
2665
2666 LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir)
2667 {
2668 struct libresoc_nir_tran_ctx ctx = {};
2669 struct nir_function *func;
2670 char shader_name[60];
2671 sprintf(shader_name, "libresoc-shader-%s", gl_shader_stage_name(nir->info.stage));
2672 LLVMModuleRef mod = LLVMModuleCreateWithNameInContext(shader_name, llvm_ref->lc.context);
2673 ctx.lc.module = &mod;
2674 ctx.lc = llvm_ref->lc;
2675 ctx.stage = nir->info.stage;
2676 ctx.info = &nir->info;
2677
2678 if (ctx.stage == MESA_SHADER_VERTEX) {
2679 add_arg(&ctx.args, ARG_SGPR, 1, ARG_INT, &ctx.args.base_vertex);
2680 add_arg(&ctx.args, ARG_SGPR, 1, ARG_INT, &ctx.args.start_instance);
2681 add_arg(&ctx.args, ARG_VGPR, 1, ARG_INT, &ctx.args.vertex_id);
2682 }
2683 LLVMTypeRef arg_types[32];
2684 LLVMTypeRef ret_type = LLVMVoidTypeInContext(ctx.lc.context);
2685 for (unsigned i = 0; i < ctx.args.arg_count; i++) {
2686 arg_types[i] = arg_llvm_type(ctx.args.args[i].type, ctx.args.args[i].size, &ctx.lc);
2687 }
2688
2689 //TODO: this is zero argument function and returns void
2690 LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, ctx.args.arg_count, 0);
2691
2692 LLVMValueRef main_function = LLVMAddFunction(mod, "main_function", main_function_type);
2693 LLVMBasicBlockRef main_function_body =
2694 LLVMAppendBasicBlockInContext(ctx.lc.context, main_function, "main_body");
2695 LLVMPositionBuilderAtEnd(ctx.lc.builder, main_function_body);
2696 ctx.main_function = main_function;
2697
2698 if (!nir->info.io_lowered) {
2699 nir_foreach_shader_out_variable(variable, nir)
2700 {
2701 handle_shader_output_decl(&ctx, nir, variable, ctx.stage);
2702 }
2703 }
2704 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2705 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2706 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2707 func = (struct nir_function *)exec_list_get_head(&nir->functions);
2708
2709 nir_index_ssa_defs(func->impl);
2710 ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2711 setup_locals(&ctx, func);
2712 setup_scratch(&ctx, nir);
2713 setup_constant_data(&ctx, nir);
2714
2715 // if (gl_shader_stage_is_compute(nir->info.stage))
2716 // setup_shared(&ctx, nir);
2717 visit_cf_list(&ctx, &func->impl->body);
2718 LLVMBuildRetVoid(ctx.lc.builder);
2719 char *error = NULL;
2720 LLVMVerifyModule(mod, LLVMPrintMessageAction, &error);
2721 LLVMDumpModule(mod);
2722 LLVMDisposeMessage(error);
2723 LLVMOrcModuleHandle mod_handle;
2724 LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref,
2725 &mod_handle,
2726 mod,
2727 orc_sym_resolver,
2728 (void *)(llvm_ref->orc_ref));
2729 LLVMDumpModule(mod);
2730 char *def_triple = LLVMGetDefaultTargetTriple(); // E.g. "x86_64-linux-gnu"
2731 LLVMDisasmContextRef disasm = LLVMCreateDisasm(def_triple, NULL,
2732 0, NULL,
2733 NULL);
2734 if (disasm) {
2735 LLVMOrcTargetAddress MainAddr;
2736 LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr ,"main_function");
2737 const uint8_t *bytes = (const uint8_t *)MainAddr;
2738 char outline[1024];
2739 uint64_t pc;
2740 pc = 0;
2741 uint64_t extent = 200;
2742 while (pc < extent) {
2743 size_t Size;
2744
2745 /*
2746 * Print address. We use addresses relative to the start of the function,
2747 * so that between runs.
2748 */
2749
2750
2751 Size = LLVMDisasmInstruction(disasm, (uint8_t *)bytes + pc, extent - pc, 0, outline,
2752 sizeof outline);
2753
2754 /*
2755 * Print the instruction.
2756 */
2757 printf("\t%s \n", outline);
2758
2759
2760 /*
2761 * Stop disassembling on return statements, if there is no record of a
2762 * jump to a successive address.
2763 *
2764 * XXX: This currently assumes x86
2765 */
2766
2767 if (Size == 1 && bytes[pc] == 0xc3) {
2768 break;
2769 }
2770
2771 /*
2772 * Advance.
2773 */
2774
2775 pc += Size;
2776
2777 if (pc >= extent) {
2778 break;
2779 }
2780 }
2781 }
2782 return mod;
2783 // LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod");
2784 // LLVMTypeRef param_types[] = { LLVMInt32Type(), LLVMInt32Type() };
2785 // LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), param_types, 2, 0);
2786 // LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type);
2787 // LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry");
2788 // LLVMBuilderRef builder = LLVMCreateBuilder();
2789 // LLVMPositionBuilderAtEnd(builder, entry);
2790 // LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), LLVMGetParam(sum, 1), "tmp");
2791 // LLVMBuildRet(builder, tmp);
2792 // char *error = NULL;
2793 // LLVMVerifyModule(mod, LLVMAbortProcessAction, &error);
2794 // LLVMDumpModule(mod);
2795 // LLVMDisposeMessage(error);
2796 // LLVMOrcModuleHandle mod_handle;
2797 // LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref,
2798 // &mod_handle,
2799 // mod,
2800 // orc_sym_resolver,
2801 // (void *)(llvm_ref->orc_ref));
2802 }