Updated code generation so that for vertex shader output position is written at last...
[mesa.git] / src / libre-soc / vulkan / libresoc_llvm.c
1 #include "libresoc_llvm.h"
2 #include "libresoc_llvm_build.h"
3
4 void InitLLVM(struct libresoc_llvm *llvm_ref)
5 {
6 // LLVMInitializeNativeTarget();
7 // LLVMInitializeNativeAsmPrinter();
8 LLVMInitializeAllAsmPrinters();
9 LLVMInitializeAllTargets();
10 LLVMInitializeAllTargetInfos();
11 LLVMInitializeAllTargetMCs();
12 LLVMInitializeAllDisassemblers();
13 //LLVMLinkInMCJIT();
14 char *def_triple = LLVMGetDefaultTargetTriple(); // E.g. "x86_64-linux-gnu"
15 char *error;
16 LLVMTargetRef target_ref;
17 if (LLVMGetTargetFromTriple(def_triple, &target_ref, &error)) {
18 // Fatal error
19 }
20
21 if (!LLVMTargetHasJIT(target_ref)) {
22 // Fatal error, cannot do JIT on this platform
23 }
24
25 LLVMTargetMachineRef tm_ref =
26 LLVMCreateTargetMachine(target_ref, def_triple, "", "",
27 LLVMCodeGenLevelDefault,
28 LLVMRelocDefault,
29 LLVMCodeModelJITDefault);
30 //assert(tm_ref);
31 LLVMDisposeErrorMessage(def_triple);
32 llvm_ref->orc_ref = LLVMOrcCreateInstance(tm_ref);
33 llvm_ref->lc.context = LLVMContextCreate();
34 llvm_ref->lc.builder = LLVMCreateBuilderInContext(llvm_ref->lc.context);
35 llvm_ref->lc.voidt = LLVMVoidTypeInContext(llvm_ref->lc.context);
36 llvm_ref->lc.i1 = LLVMInt1TypeInContext(llvm_ref->lc.context);
37 llvm_ref->lc.i8 = LLVMInt8TypeInContext(llvm_ref->lc.context);
38 llvm_ref->lc.i16 = LLVMIntTypeInContext(llvm_ref->lc.context, 16);
39 llvm_ref->lc.i32 = LLVMIntTypeInContext(llvm_ref->lc.context, 32);
40 llvm_ref->lc.i64 = LLVMIntTypeInContext(llvm_ref->lc.context, 64);
41 llvm_ref->lc.i128 = LLVMIntTypeInContext(llvm_ref->lc.context, 128);
42 llvm_ref->lc.intptr = llvm_ref->lc.i32;
43 llvm_ref->lc.f16 = LLVMHalfTypeInContext(llvm_ref->lc.context);
44 llvm_ref->lc.f32 = LLVMFloatTypeInContext(llvm_ref->lc.context);
45 llvm_ref->lc.f64 = LLVMDoubleTypeInContext(llvm_ref->lc.context);
46 llvm_ref->lc.v2i16 = LLVMVectorType(llvm_ref->lc.i16, 2);
47 llvm_ref->lc.v4i16 = LLVMVectorType(llvm_ref->lc.i16, 4);
48 llvm_ref->lc.v2f16 = LLVMVectorType(llvm_ref->lc.f16, 2);
49 llvm_ref->lc.v4f16 = LLVMVectorType(llvm_ref->lc.f16, 4);
50 llvm_ref->lc.v2i32 = LLVMVectorType(llvm_ref->lc.i32, 2);
51 llvm_ref->lc.v3i32 = LLVMVectorType(llvm_ref->lc.i32, 3);
52 llvm_ref->lc.v4i32 = LLVMVectorType(llvm_ref->lc.i32, 4);
53 llvm_ref->lc.v2f32 = LLVMVectorType(llvm_ref->lc.f32, 2);
54 llvm_ref->lc.v3f32 = LLVMVectorType(llvm_ref->lc.f32, 3);
55 llvm_ref->lc.v4f32 = LLVMVectorType(llvm_ref->lc.f32, 4);
56 llvm_ref->lc.v8i32 = LLVMVectorType(llvm_ref->lc.i32, 8);
57 // llvm_ref->lc.iN_wavemask = LLVMIntTypeInContext(llvm_ref->lc.context, llvm_ref->lc.wave_size);
58 // llvm_ref->lc.iN_ballotmask = LLVMIntTypeInContext(llvm_ref->lc.context, ballot_mask_bits);
59
60 llvm_ref->lc.i8_0 = LLVMConstInt(llvm_ref->lc.i8, 0, false);
61 llvm_ref->lc.i8_1 = LLVMConstInt(llvm_ref->lc.i8, 1, false);
62 llvm_ref->lc.i16_0 = LLVMConstInt(llvm_ref->lc.i16, 0, false);
63 llvm_ref->lc.i16_1 = LLVMConstInt(llvm_ref->lc.i16, 1, false);
64 llvm_ref->lc.i32_0 = LLVMConstInt(llvm_ref->lc.i32, 0, false);
65 llvm_ref->lc.i32_1 = LLVMConstInt(llvm_ref->lc.i32, 1, false);
66 llvm_ref->lc.i64_0 = LLVMConstInt(llvm_ref->lc.i64, 0, false);
67 llvm_ref->lc.i64_1 = LLVMConstInt(llvm_ref->lc.i64, 1, false);
68 llvm_ref->lc.i128_0 = LLVMConstInt(llvm_ref->lc.i128, 0, false);
69 llvm_ref->lc.i128_1 = LLVMConstInt(llvm_ref->lc.i128, 1, false);
70 llvm_ref->lc.f16_0 = LLVMConstReal(llvm_ref->lc.f16, 0.0);
71 llvm_ref->lc.f16_1 = LLVMConstReal(llvm_ref->lc.f16, 1.0);
72 llvm_ref->lc.f32_0 = LLVMConstReal(llvm_ref->lc.f32, 0.0);
73 llvm_ref->lc.f32_1 = LLVMConstReal(llvm_ref->lc.f32, 1.0);
74 llvm_ref->lc.f64_0 = LLVMConstReal(llvm_ref->lc.f64, 0.0);
75 llvm_ref->lc.f64_1 = LLVMConstReal(llvm_ref->lc.f64, 1.0);
76
77 llvm_ref->lc.i1false = LLVMConstInt(llvm_ref->lc.i1, 0, false);
78 llvm_ref->lc.i1true = LLVMConstInt(llvm_ref->lc.i1, 1, false);
79 llvm_ref->lc.float_mode = 0; //TODO: default value, when required take this value as parameter
80 }
81
82 void DestroyLLVM(struct libresoc_llvm *llvm_ref)
83 {
84 LLVMErrorRef error_ref = LLVMOrcDisposeInstance(llvm_ref->orc_ref);
85 }
86
87 static uint64_t orc_sym_resolver(const char *name, void *ctx)
88 {
89 LLVMOrcJITStackRef orc_ref = (LLVMOrcJITStackRef) (ctx);
90 LLVMOrcTargetAddress address;
91 LLVMOrcGetSymbolAddress(orc_ref, &address, name);
92 return (uint64_t)address;
93 }
94
95 void handle_shader_output_decl(struct libresoc_nir_tran_ctx *ctx,
96 struct nir_shader *nir, struct nir_variable *variable,
97 gl_shader_stage stage)
98 {
99 unsigned output_loc = variable->data.driver_location / 4;
100 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
101
102 /* tess ctrl has it's own load/store paths for outputs */
103 if (stage == MESA_SHADER_TESS_CTRL)
104 return;
105
106 if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
107 stage == MESA_SHADER_GEOMETRY) {
108 int idx = variable->data.location + variable->data.index;
109 if (idx == VARYING_SLOT_CLIP_DIST0) {
110 int length = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size;
111
112 if (length > 4)
113 attrib_count = 2;
114 else
115 attrib_count = 1;
116 }
117 }
118
119 bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
120 LLVMTypeRef type = is_16bit ? ctx->lc.f16 : ctx->lc.f32;
121 for (unsigned i = 0; i < attrib_count; ++i) {
122 for (unsigned chan = 0; chan < 4; chan++) {
123 ctx->outputs[llvm_reg_index_soa(output_loc + i, chan)] =
124 build_alloca_undef(&ctx->lc, type, "");
125 }
126 }
127 }
128
129 LLVMValueRef extract_components(struct libresoc_llvm_context *ctx, LLVMValueRef value, unsigned start,
130 unsigned channels)
131 {
132 LLVMValueRef chan[channels];
133
134 for (unsigned i = 0; i < channels; i++)
135 chan[i] = llvm_extract_elem(ctx, value, i + start);
136
137 return build_gather_values(ctx, chan, channels);
138 }
139
140 static void build_store_values_extended(struct libresoc_llvm_context *lc, LLVMValueRef *values,
141 unsigned value_count, unsigned value_stride,
142 LLVMValueRef vec)
143 {
144 LLVMBuilderRef builder = lc->builder;
145 unsigned i;
146
147 for (i = 0; i < value_count; i++) {
148 LLVMValueRef ptr = values[i * value_stride];
149 LLVMValueRef index = LLVMConstInt(lc->i32, i, false);
150 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
151 LLVMBuildStore(builder, value, ptr);
152 }
153 }
154
155 static LLVMTypeRef arg_llvm_type(enum arg_type type, unsigned size, struct libresoc_llvm_context *ctx)
156 {
157 if (type == ARG_FLOAT) {
158 return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size);
159 } else if (type == ARG_INT) {
160 return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size);
161 } else {
162 LLVMTypeRef ptr_type;
163 switch (type) {
164 case ARG_CONST_PTR:
165 ptr_type = ctx->i8;
166 break;
167 case ARG_CONST_FLOAT_PTR:
168 ptr_type = ctx->f32;
169 break;
170 case ARG_CONST_PTR_PTR:
171 ptr_type = LLVMPointerType(ctx->i8, 0);
172 break;
173 case ARG_CONST_DESC_PTR:
174 ptr_type = ctx->v4i32;
175 break;
176 case ARG_CONST_IMAGE_PTR:
177 ptr_type = ctx->v8i32;
178 break;
179 default:
180 unreachable("unknown arg type");
181 }
182 if (size == 1) {
183 //return ac_array_in_const32_addr_space(ptr_type);
184 return LLVMPointerType(ptr_type, 0); //address space may be wrong
185 } else {
186 assert(size == 2);
187 return LLVMPointerType(ptr_type, 0);
188 }
189 }
190 }
191 static LLVMValueRef get_src(struct libresoc_nir_tran_ctx *ctx, nir_src src)
192 {
193 assert(src.is_ssa);
194 // printf("index %d\n", src.ssa->index);
195 return ctx->ssa_defs[src.ssa->index];
196 }
197
198 static LLVMTypeRef get_def_type(struct libresoc_nir_tran_ctx *ctx, const nir_ssa_def *def)
199 {
200 LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, def->bit_size);
201 if (def->num_components > 1) {
202 type = LLVMVectorType(type, def->num_components);
203 }
204 return type;
205 }
206
207 static LLVMValueRef get_memory_ptr(struct libresoc_nir_tran_ctx *ctx, nir_src src, unsigned bit_size)
208 {
209 LLVMValueRef ptr = get_src(ctx, src);
210 ptr = LLVMBuildGEP(ctx->lc.builder, ctx->lc.lds, &ptr, 1, "");
211 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
212
213 LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, bit_size);
214
215 return LLVMBuildBitCast(ctx->lc.builder, ptr, LLVMPointerType(type, addr_space), "");
216 }
217
218 static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
219 {
220 uint32_t new_mask = 0;
221 for (unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
222 if (mask & (1u << i))
223 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
224 return new_mask;
225 }
226
227 static void get_deref_offset(struct libresoc_nir_tran_ctx *ctx, nir_deref_instr *instr, bool vs_in,
228 unsigned *vertex_index_out, LLVMValueRef *vertex_index_ref,
229 unsigned *const_out, LLVMValueRef *indir_out)
230 {
231 nir_variable *var = nir_deref_instr_get_variable(instr);
232 nir_deref_path path;
233 unsigned idx_lvl = 1;
234
235 nir_deref_path_init(&path, instr, NULL);
236
237 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
238 if (vertex_index_ref) {
239 *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
240 if (vertex_index_out)
241 *vertex_index_out = 0;
242 } else {
243 *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
244 }
245 ++idx_lvl;
246 }
247
248 uint32_t const_offset = 0;
249 LLVMValueRef offset = NULL;
250
251 if (var->data.compact) {
252 assert(instr->deref_type == nir_deref_type_array);
253 const_offset = nir_src_as_uint(instr->arr.index);
254 goto out;
255 }
256
257 for (; path.path[idx_lvl]; ++idx_lvl) {
258 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
259 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
260 unsigned index = path.path[idx_lvl]->strct.index;
261
262 for (unsigned i = 0; i < index; i++) {
263 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
264 const_offset += glsl_count_attribute_slots(ft, vs_in);
265 }
266 } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) {
267 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
268 if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
269 const_offset += size * nir_src_as_uint(path.path[idx_lvl]->arr.index);
270 } else {
271 LLVMValueRef array_off =
272 LLVMBuildMul(ctx->lc.builder, LLVMConstInt(ctx->lc.i32, size, 0),
273 get_src(ctx, path.path[idx_lvl]->arr.index), "");
274 if (offset)
275 offset = LLVMBuildAdd(ctx->lc.builder, offset, array_off, "");
276 else
277 offset = array_off;
278 }
279 } else
280 unreachable("Uhandled deref type in get_deref_instr_offset");
281 }
282
283 out:
284 nir_deref_path_finish(&path);
285
286 if (const_offset && offset)
287 offset =
288 LLVMBuildAdd(ctx->lc.builder, offset, LLVMConstInt(ctx->lc.i32, const_offset, 0), "");
289
290 *const_out = const_offset;
291 *indir_out = offset;
292 }
293
294 static unsigned type_scalar_size_bytes(const struct glsl_type *type)
295 {
296 assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));
297 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
298 }
299
300
301 static LLVMValueRef emit_int_cmp(struct libresoc_llvm_context *lc, LLVMIntPredicate pred,
302 LLVMValueRef src0, LLVMValueRef src1)
303 {
304 LLVMTypeRef src0_type = LLVMTypeOf(src0);
305 LLVMTypeRef src1_type = LLVMTypeOf(src1);
306
307 if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
308 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
309 src1 = LLVMBuildIntToPtr(lc->builder, src1, src0_type, "");
310 } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
311 LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
312 src0 = LLVMBuildIntToPtr(lc->builder, src0, src1_type, "");
313 }
314
315 LLVMValueRef result = LLVMBuildICmp(lc->builder, pred, src0, src1, "");
316 return LLVMBuildSelect(lc->builder, result, LLVMConstInt(lc->i32, 0xFFFFFFFF, false),
317 lc->i32_0, "");
318 }
319
320 static LLVMValueRef emit_float_cmp(struct libresoc_llvm_context *lc, LLVMRealPredicate pred,
321 LLVMValueRef src0, LLVMValueRef src1)
322 {
323 LLVMValueRef result;
324 src0 = to_float(lc, src0);
325 src1 = to_float(lc, src1);
326 result = LLVMBuildFCmp(lc->builder, pred, src0, src1, "");
327 return LLVMBuildSelect(lc->builder, result, LLVMConstInt(lc->i32, 0xFFFFFFFF, false),
328 lc->i32_0, "");
329 }
330
331 static LLVMValueRef emit_intrin_1f_param(struct libresoc_llvm_context *lc, const char *intrin,
332 LLVMTypeRef result_type, LLVMValueRef src0)
333 {
334 char name[64], type[64];
335 LLVMValueRef params[] = {
336 to_float(lc, src0),
337 };
338
339 build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
340 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
341 assert(length < sizeof(name));
342 return build_intrinsic(lc, name, result_type, params, 1, FUNC_ATTR_READNONE);
343 }
344
345 static LLVMValueRef emit_intrin_1f_param_scalar(struct libresoc_llvm_context *lc, const char *intrin,
346 LLVMTypeRef result_type, LLVMValueRef src0)
347 {
348 if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
349 return emit_intrin_1f_param(lc, intrin, result_type, src0);
350
351 LLVMTypeRef elem_type = LLVMGetElementType(result_type);
352 LLVMValueRef ret = LLVMGetUndef(result_type);
353
354 /* Scalarize the intrinsic, because vectors are not supported. */
355 for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
356 char name[64], type[64];
357 LLVMValueRef params[] = {
358 to_float(lc, llvm_extract_elem(lc, src0, i)),
359 };
360
361 build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
362 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
363 assert(length < sizeof(name));
364 ret = LLVMBuildInsertElement(
365 lc->builder, ret,
366 build_intrinsic(lc, name, elem_type, params, 1, FUNC_ATTR_READNONE),
367 LLVMConstInt(lc->i32, i, 0), "");
368 }
369 return ret;
370 }
371
372 static LLVMValueRef emit_intrin_2f_param(struct libresoc_llvm_context *ctx, const char *intrin,
373 LLVMTypeRef result_type, LLVMValueRef src0,
374 LLVMValueRef src1)
375 {
376 char name[64], type[64];
377 LLVMValueRef params[] = {
378 to_float(ctx, src0),
379 to_float(ctx, src1),
380 };
381
382 build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
383 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
384 assert(length < sizeof(name));
385 return build_intrinsic(ctx, name, result_type, params, 2, FUNC_ATTR_READNONE);
386 }
387
388 static LLVMValueRef emit_intrin_3f_param(struct libresoc_llvm_context *ctx, const char *intrin,
389 LLVMTypeRef result_type, LLVMValueRef src0,
390 LLVMValueRef src1, LLVMValueRef src2)
391 {
392 char name[64], type[64];
393 LLVMValueRef params[] = {
394 to_float(ctx, src0),
395 to_float(ctx, src1),
396 to_float(ctx, src2),
397 };
398
399 build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
400 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
401 assert(length < sizeof(name));
402 return build_intrinsic(ctx, name, result_type, params, 3, FUNC_ATTR_READNONE);
403 }
404
405 static LLVMValueRef emit_bcsel(struct libresoc_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,
406 LLVMValueRef src2)
407 {
408 LLVMTypeRef src1_type = LLVMTypeOf(src1);
409 LLVMTypeRef src2_type = LLVMTypeOf(src2);
410
411 if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
412 LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
413 src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
414 } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
415 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
416 src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
417 }
418
419 LLVMValueRef v =
420 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, LLVMConstNull(LLVMTypeOf(src0)), "");
421 return LLVMBuildSelect(ctx->builder, v, to_integer_or_pointer(ctx, src1),
422 to_integer_or_pointer(ctx, src2), "");
423 }
424
425 static LLVMValueRef emit_iabs(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
426 {
427 return build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
428 }
429
430 static LLVMValueRef emit_uint_carry(struct libresoc_llvm_context *ctx, const char *intrin,
431 LLVMValueRef src0, LLVMValueRef src1)
432 {
433 LLVMTypeRef ret_type;
434 LLVMTypeRef types[] = {ctx->i32, ctx->i1};
435 LLVMValueRef res;
436 LLVMValueRef params[] = {src0, src1};
437 ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);
438
439 res = build_intrinsic(ctx, intrin, ret_type, params, 2, FUNC_ATTR_READNONE);
440
441 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
442 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
443 return res;
444 }
445
446 static LLVMValueRef emit_b2f(struct libresoc_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
447 {
448 assert(get_elem_bits(ctx, LLVMTypeOf(src0)) == 32);
449 LLVMValueRef result =
450 LLVMBuildAnd(ctx->builder, src0, const_uint_vec(ctx, LLVMTypeOf(src0), 0x3f800000), "");
451 result = to_float(ctx, result);
452
453 switch (bitsize) {
454 case 16: {
455 bool vec2 = LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind;
456 return LLVMBuildFPTrunc(ctx->builder, result, vec2 ? ctx->v2f16 : ctx->f16, "");
457 }
458 case 32:
459 return result;
460 case 64:
461 return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
462 default:
463 unreachable("Unsupported bit size.");
464 }
465 }
466
467 static LLVMValueRef emit_f2b(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
468 {
469 src0 = to_float(ctx, src0);
470 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
471 return LLVMBuildSExt(ctx->builder, LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
472 ctx->i32, "");
473 }
474
475 static LLVMValueRef emit_b2i(struct libresoc_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
476 {
477 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
478
479 switch (bitsize) {
480 case 8:
481 return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
482 case 16:
483 return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
484 case 32:
485 return result;
486 case 64:
487 return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
488 default:
489 unreachable("Unsupported bit size.");
490 }
491 }
492
493 static LLVMValueRef emit_i2b(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
494 {
495 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
496 return LLVMBuildSExt(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
497 ctx->i32, "");
498 }
499
500 static LLVMValueRef emit_f2f16(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
501 {
502 LLVMValueRef result;
503 LLVMValueRef cond = NULL;
504
505 src0 = to_float(ctx, src0);
506 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
507
508 /* need to convert back up to f32 */
509 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
510 return result;
511 }
512
513 static LLVMValueRef emit_umul_high(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
514 LLVMValueRef src1)
515 {
516 LLVMValueRef dst64, result;
517 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
518 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
519
520 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
521 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
522 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
523 return result;
524 }
525
526 static LLVMValueRef emit_imul_high(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
527 LLVMValueRef src1)
528 {
529 LLVMValueRef dst64, result;
530 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
531 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
532
533 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
534 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
535 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
536 return result;
537 }
538
539 static LLVMValueRef emit_bfm(struct libresoc_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)
540 {
541 /* mask = ((1 << bits) - 1) << offset */
542 return LLVMBuildShl(
543 ctx->builder,
544 LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),
545 offset, "");
546 }
547
548 static LLVMValueRef emit_bitfield_select(struct libresoc_llvm_context *ctx, LLVMValueRef mask,
549 LLVMValueRef insert, LLVMValueRef base)
550 {
551 /* Calculate:
552 * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
553 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
554 */
555 return LLVMBuildXor(
556 ctx->builder, base,
557 LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
558 }
559
560 static LLVMValueRef emit_pack_2x16(struct libresoc_llvm_context *ctx, LLVMValueRef src0,
561 LLVMValueRef (*pack)(struct libresoc_llvm_context *ctx,
562 LLVMValueRef args[2]))
563 {
564 LLVMValueRef comp[2];
565
566 src0 = to_float(ctx, src0);
567 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
568 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
569
570 return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
571 }
572
573 static LLVMValueRef emit_unpack_half_2x16(struct libresoc_llvm_context *ctx, LLVMValueRef src0)
574 {
575 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
576 LLVMValueRef temps[2], val;
577 int i;
578
579 for (i = 0; i < 2; i++) {
580 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
581 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
582 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
583 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
584 }
585 return build_gather_values(ctx, temps, 2);
586 }
587
588 // TODO: enable this whn ac_builddxy() is added
589 // static LLVMValueRef emit_ddxy(struct libresoc_nir_context *ctx, nir_op op, LLVMValueRef src0)
590 // {
591 // unsigned mask;
592 // int idx;
593 // LLVMValueRef result;
594
595 // if (op == nir_op_fddx_fine)
596 // mask = TID_MASK_LEFT;
597 // else if (op == nir_op_fddy_fine)
598 // mask = TID_MASK_TOP;
599 // else
600 // mask = TID_MASK_TOP_LEFT;
601
602 // /* for DDX we want to next X pixel, DDY next Y pixel. */
603 // if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
604 // idx = 1;
605 // else
606 // idx = 2;
607
608 // result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
609 // return result;
610 // }
611
612 static void setup_locals(struct libresoc_nir_tran_ctx *ctx, struct nir_function *func)
613 {
614 int i, j;
615 ctx->num_locals = 0;
616 nir_foreach_function_temp_variable(variable, func->impl)
617 {
618 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
619 variable->data.driver_location = ctx->num_locals * 4;
620 variable->data.location_frac = 0;
621 ctx->num_locals += attrib_count;
622 }
623 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
624 if (!ctx->locals)
625 return;
626
627 for (i = 0; i < ctx->num_locals; i++) {
628 for (j = 0; j < 4; j++) {
629 ctx->locals[i * 4 + j] = build_alloca_undef(&ctx->lc, ctx->lc.f32, "temp");
630 }
631 }
632 }
633
634 static void setup_scratch(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *shader)
635 {
636 if (shader->scratch_size == 0)
637 return;
638
639 ctx->scratch =
640 build_alloca_undef(&ctx->lc, LLVMArrayType(ctx->lc.i8, shader->scratch_size), "scratch");
641 }
642
643 static void setup_constant_data(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *shader)
644 {
645 if (!shader->constant_data)
646 return;
647
648 LLVMValueRef data = LLVMConstStringInContext(ctx->lc.context, shader->constant_data,
649 shader->constant_data_size, true);
650 LLVMTypeRef type = LLVMArrayType(ctx->lc.i8, shader->constant_data_size);
651
652 unsigned address_space = 0; //TODO: dummay value
653 LLVMValueRef global =
654 LLVMAddGlobalInAddressSpace(*(ctx->lc.module), type, "const_data", address_space);
655
656 LLVMSetInitializer(global, data);
657 LLVMSetGlobalConstant(global, true);
658 LLVMSetVisibility(global, LLVMHiddenVisibility);
659 ctx->constant_data = global;
660 }
661
662 static LLVMTypeRef glsl_base_to_llvm_type(struct libresoc_llvm_context *lc, enum glsl_base_type type)
663 {
664 switch (type) {
665 case GLSL_TYPE_INT:
666 case GLSL_TYPE_UINT:
667 case GLSL_TYPE_BOOL:
668 case GLSL_TYPE_SUBROUTINE:
669 return lc->i32;
670 case GLSL_TYPE_INT8:
671 case GLSL_TYPE_UINT8:
672 return lc->i8;
673 case GLSL_TYPE_INT16:
674 case GLSL_TYPE_UINT16:
675 return lc->i16;
676 case GLSL_TYPE_FLOAT:
677 return lc->f32;
678 case GLSL_TYPE_FLOAT16:
679 return lc->f16;
680 case GLSL_TYPE_INT64:
681 case GLSL_TYPE_UINT64:
682 return lc->i64;
683 case GLSL_TYPE_DOUBLE:
684 return lc->f64;
685 default:
686 unreachable("unknown GLSL type");
687 }
688 }
689
690 static LLVMTypeRef glsl_to_llvm_type(struct libresoc_llvm_context *lc, const struct glsl_type *type)
691 {
692 if (glsl_type_is_scalar(type)) {
693 return glsl_base_to_llvm_type(lc, glsl_get_base_type(type));
694 }
695
696 if (glsl_type_is_vector(type)) {
697 return LLVMVectorType(glsl_base_to_llvm_type(lc, glsl_get_base_type(type)),
698 glsl_get_vector_elements(type));
699 }
700
701 if (glsl_type_is_matrix(type)) {
702 return LLVMArrayType(glsl_to_llvm_type(lc, glsl_get_column_type(type)),
703 glsl_get_matrix_columns(type));
704 }
705
706 if (glsl_type_is_array(type)) {
707 return LLVMArrayType(glsl_to_llvm_type(lc, glsl_get_array_element(type)),
708 glsl_get_length(type));
709 }
710
711 assert(glsl_type_is_struct_or_ifc(type));
712
713 LLVMTypeRef member_types[glsl_get_length(type)];
714
715 for (unsigned i = 0; i < glsl_get_length(type); i++) {
716 member_types[i] = glsl_to_llvm_type(lc, glsl_get_struct_field(type, i));
717 }
718
719 return LLVMStructTypeInContext(lc->context, member_types, glsl_get_length(type), false);
720 }
721
722 // static LLVMValueRef visit_load(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr,
723 // bool is_output)
724 // {
725 // LLVMValueRef values[8];
726 // LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
727 // LLVMTypeRef component_type;
728 // unsigned base = nir_intrinsic_base(instr);
729 // unsigned component = nir_intrinsic_component(instr);
730 // unsigned count = instr->dest.ssa.num_components * (instr->dest.ssa.bit_size == 64 ? 2 : 1);
731 // nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
732 // LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
733 // nir_src offset = *nir_get_io_offset_src(instr);
734 // LLVMValueRef indir_index = NULL;
735
736 // if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
737 // component_type = LLVMGetElementType(dest_type);
738 // else
739 // component_type = dest_type;
740
741 // if (nir_src_is_const(offset))
742 // assert(nir_src_as_uint(offset) == 0);
743 // else
744 // indir_index = get_src(ctx, offset);
745
746 // if (ctx->stage == MESA_SHADER_TESS_CTRL || (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
747 // LLVMValueRef result = ctx->abi->load_tess_varyings(
748 // ctx->abi, component_type, vertex_index, indir_index, 0, 0, base * 4, component,
749 // instr->num_components, false, false, !is_output);
750 // if (instr->dest.ssa.bit_size == 16) {
751 // result = to_integer(&ctx->lc, result);
752 // result = LLVMBuildTrunc(ctx->lc.builder, result, dest_type, "");
753 // }
754 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
755 // }
756
757 // /* No indirect indexing is allowed after this point. */
758 // assert(!indir_index);
759
760 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
761 // LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
762 // assert(nir_src_is_const(*vertex_index_src));
763
764 // return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component, instr->num_components,
765 // nir_src_as_uint(*vertex_index_src), 0, type);
766 // }
767
768 // if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
769 // nir_intrinsic_io_semantics(instr).fb_fetch_output)
770 // return ctx->abi->emit_fbfetch(ctx->abi);
771
772 // /* Other non-fragment cases have inputs and outputs in temporaries. */
773 // if (ctx->stage != MESA_SHADER_FRAGMENT) {
774 // for (unsigned chan = component; chan < count + component; chan++) {
775 // if (is_output) {
776 // values[chan] = LLVMBuildLoad(ctx->lc.builder, ctx->outputs[base * 4 + chan], "");
777 // } else {
778 // values[chan] = ctx->inputs[base * 4 + chan];
779 // if (!values[chan])
780 // values[chan] = LLVMGetUndef(ctx->lc.i32);
781 // }
782 // }
783 // LLVMValueRef result = build_varying_gather_values(&ctx->lc, values, count, component);
784 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
785 // }
786
787 // /* Fragment shader inputs. */
788 // unsigned vertex_id = 2; /* P0 */
789
790 // if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
791 // nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
792
793 // switch (src0[0].i32) {
794 // case 0:
795 // vertex_id = 2;
796 // break;
797 // case 1:
798 // vertex_id = 0;
799 // break;
800 // case 2:
801 // vertex_id = 1;
802 // break;
803 // default:
804 // unreachable("Invalid vertex index");
805 // }
806 // }
807
808 // LLVMValueRef attr_number = LLVMConstInt(ctx->lc.i32, base, false);
809
810 // for (unsigned chan = 0; chan < count; chan++) {
811 // if (component + chan > 4)
812 // attr_number = LLVMConstInt(ctx->lc.i32, base + 1, false);
813 // LLVMValueRef llvm_chan = LLVMConstInt(ctx->lc.i32, (component + chan) % 4, false);
814 // values[chan] =
815 // build_fs_interp_mov(&ctx->lc, LLVMConstInt(ctx->lc.i32, vertex_id, false), llvm_chan,
816 // attr_number, get_arg(&ctx->lc, ctx->args->prim_mask));
817 // values[chan] = LLVMBuildBitCast(ctx->lc.builder, values[chan], ctx->lc.i32, "");
818 // values[chan] =
819 // LLVMBuildTruncOrBitCast(ctx->lc.builder, values[chan],
820 // instr->dest.ssa.bit_size == 16 ? ctx->lc.i16 : ctx->lc.i32, "");
821 // }
822
823 // LLVMValueRef result = build_gather_values(&ctx->lc, values, count);
824 // return LLVMBuildBitCast(ctx->lc.builder, result, dest_type, "");
825 // }
826
827 static LLVMValueRef visit_load_shared(struct libresoc_nir_tran_ctx *ctx, const nir_intrinsic_instr *instr)
828 {
829 LLVMValueRef values[4], derived_ptr, index, ret;
830
831 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->dest.ssa.bit_size);
832
833 for (int chan = 0; chan < instr->num_components; chan++) {
834 index = LLVMConstInt(ctx->lc.i32, chan, 0);
835 derived_ptr = LLVMBuildGEP(ctx->lc.builder, ptr, &index, 1, "");
836 values[chan] = LLVMBuildLoad(ctx->lc.builder, derived_ptr, "");
837 }
838
839 ret = build_gather_values(&ctx->lc, values, instr->num_components);
840 return LLVMBuildBitCast(ctx->lc.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
841 }
842
843 static void visit_store_shared(struct libresoc_nir_tran_ctx *ctx, const nir_intrinsic_instr *instr)
844 {
845 LLVMValueRef derived_ptr, data, index;
846 LLVMBuilderRef builder = ctx->lc.builder;
847
848 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], instr->src[0].ssa->bit_size);
849 LLVMValueRef src = get_src(ctx, instr->src[0]);
850
851 int writemask = nir_intrinsic_write_mask(instr);
852 for (int chan = 0; chan < 4; chan++) {
853 if (!(writemask & (1 << chan))) {
854 continue;
855 }
856 data = llvm_extract_elem(&ctx->lc, src, chan);
857 index = LLVMConstInt(ctx->lc.i32, chan, 0);
858 derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
859 LLVMBuildStore(builder, data, derived_ptr);
860 }
861 }
862 static void visit_load_const(struct libresoc_nir_tran_ctx *ctx, const nir_load_const_instr *instr)
863 {
864 LLVMValueRef values[4], value = NULL;
865 LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->lc.context, instr->def.bit_size);
866
867 for (unsigned i = 0; i < instr->def.num_components; ++i) {
868 switch (instr->def.bit_size) {
869 case 8:
870 values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);
871 break;
872 case 16:
873 values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);
874 break;
875 case 32:
876 values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);
877 break;
878 case 64:
879 values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);
880 break;
881 default:
882 fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);
883 abort();
884 }
885 }
886 if (instr->def.num_components > 1) {
887 value = LLVMConstVector(values, instr->def.num_components);
888 } else
889 value = values[0];
890
891 ctx->ssa_defs[instr->def.index] = value;
892 }
893
894 static void visit_store_output(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
895 {
896 // if (ctx->ac.postponed_kill) {
897 // LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
898 // ac_build_ifcc(&ctx->ac, cond, 7002);
899 // }
900
901 unsigned base = nir_intrinsic_base(instr);
902 unsigned writemask = nir_intrinsic_write_mask(instr);
903 unsigned component = nir_intrinsic_component(instr);
904 LLVMValueRef src = to_float(&ctx->lc, get_src(ctx, instr->src[0]));
905 nir_src offset = *nir_get_io_offset_src(instr);
906 LLVMValueRef indir_index = NULL;
907
908 if (nir_src_is_const(offset))
909 assert(nir_src_as_uint(offset) == 0);
910 else
911 indir_index = get_src(ctx, offset);
912
913 switch (get_elem_bits(&ctx->lc, LLVMTypeOf(src))) {
914 case 32:
915 break;
916 case 64:
917 writemask = widen_mask(writemask, 2);
918 src = LLVMBuildBitCast(ctx->lc.builder, src,
919 LLVMVectorType(ctx->lc.f32, get_llvm_num_components(src) * 2), "");
920 break;
921 default:
922 unreachable("unhandled store_output bit size");
923 return;
924 }
925
926 writemask <<= component;
927
928 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
929 // nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
930 // LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
931
932 // ctx->abi->store_tcs_outputs(ctx->abi, NULL, vertex_index, indir_index, 0, src, writemask,
933 // component, base * 4);
934 // return;
935 // }
936
937 /* No indirect indexing is allowed after this point. */
938 assert(!indir_index);
939
940 for (unsigned chan = 0; chan < 8; chan++) {
941 if (!(writemask & (1 << chan)))
942 continue;
943
944 LLVMValueRef value = llvm_extract_elem(&ctx->lc, src, chan - component);
945 LLVMBuildStore(ctx->lc.builder, value, ctx->outputs[base * 4 + chan]);
946 }
947
948 // if (ctx->ac.postponed_kill)
949 // ac_build_endif(&ctx->ac, 7002);
950 }
951
952 static void visit_deref(struct libresoc_nir_tran_ctx *ctx, nir_deref_instr *instr)
953 {
954 if (instr->mode != nir_var_mem_shared && instr->mode != nir_var_mem_global)
955 return;
956
957 LLVMValueRef result = NULL;
958 switch (instr->deref_type) {
959 case nir_deref_type_var: {
960 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
961 result = entry->data;
962 break;
963 }
964 case nir_deref_type_struct:
965 if (instr->mode == nir_var_mem_global) {
966 nir_deref_instr *parent = nir_deref_instr_parent(instr);
967 uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index);
968 result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent),
969 LLVMConstInt(ctx->lc.i32, offset, 0));
970 } else {
971 result = build_gep0(&ctx->lc, get_src(ctx, instr->parent),
972 LLVMConstInt(ctx->lc.i32, instr->strct.index, 0));
973 }
974 break;
975 case nir_deref_type_array:
976 if (instr->mode == nir_var_mem_global) {
977 nir_deref_instr *parent = nir_deref_instr_parent(instr);
978 unsigned stride = glsl_get_explicit_stride(parent->type);
979
980 if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) ||
981 (glsl_type_is_vector(parent->type) && stride == 0))
982 stride = type_scalar_size_bytes(parent->type);
983
984 assert(stride > 0);
985 LLVMValueRef index = get_src(ctx, instr->arr.index);
986 if (LLVMTypeOf(index) != ctx->lc.i64)
987 index = LLVMBuildZExt(ctx->lc.builder, index, ctx->lc.i64, "");
988
989 LLVMValueRef offset =
990 LLVMBuildMul(ctx->lc.builder, index, LLVMConstInt(ctx->lc.i64, stride, 0), "");
991
992 result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), offset);
993 } else {
994 result =
995 build_gep0(&ctx->lc, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
996 }
997 break;
998 case nir_deref_type_ptr_as_array:
999 if (instr->mode == nir_var_mem_global) {
1000 unsigned stride = nir_deref_instr_array_stride(instr);
1001
1002 LLVMValueRef index = get_src(ctx, instr->arr.index);
1003 if (LLVMTypeOf(index) != ctx->lc.i64)
1004 index = LLVMBuildZExt(ctx->lc.builder, index, ctx->lc.i64, "");
1005
1006 LLVMValueRef offset =
1007 LLVMBuildMul(ctx->lc.builder, index, LLVMConstInt(ctx->lc.i64, stride, 0), "");
1008
1009 result = build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), offset);
1010 } else {
1011 result =
1012 build_gep_ptr(&ctx->lc, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
1013 }
1014 break;
1015 case nir_deref_type_cast: {
1016 result = get_src(ctx, instr->parent);
1017
1018 /* We can't use the structs from LLVM because the shader
1019 * specifies its own offsets. */
1020 LLVMTypeRef pointee_type = ctx->lc.i8;
1021 if (instr->mode == nir_var_mem_shared)
1022 pointee_type = glsl_to_llvm_type(&ctx->lc, instr->type);
1023
1024 unsigned address_space;
1025
1026 switch (instr->mode) {
1027 case nir_var_mem_shared:
1028 address_space = 1;
1029 break;
1030 case nir_var_mem_global:
1031 address_space = 0;
1032 break;
1033 default:
1034 unreachable("Unhandled address space");
1035 }
1036
1037 LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
1038
1039 if (LLVMTypeOf(result) != type) {
1040 if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
1041 result = LLVMBuildBitCast(ctx->lc.builder, result, type, "");
1042 } else {
1043 result = LLVMBuildIntToPtr(ctx->lc.builder, result, type, "");
1044 }
1045 }
1046 break;
1047 }
1048 default:
1049 unreachable("Unhandled deref_instr deref type");
1050 }
1051
1052 ctx->ssa_defs[instr->dest.ssa.index] = result;
1053 }
1054
1055 static void visit_phi(struct libresoc_nir_tran_ctx *ctx, nir_phi_instr *instr)
1056 {
1057 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
1058 LLVMValueRef result = LLVMBuildPhi(ctx->lc.builder, type, "");
1059
1060 ctx->ssa_defs[instr->dest.ssa.index] = result;
1061 _mesa_hash_table_insert(ctx->phis, instr, result);
1062 }
1063
1064 static bool is_def_used_in_an_export(const nir_ssa_def *def)
1065 {
1066 nir_foreach_use (use_src, def) {
1067 if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
1068 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
1069 if (instr->intrinsic == nir_intrinsic_store_deref)
1070 return true;
1071 } else if (use_src->parent_instr->type == nir_instr_type_alu) {
1072 nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
1073 if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) {
1074 return true;
1075 }
1076 }
1077 }
1078 return false;
1079 }
1080
1081 static void visit_ssa_undef(struct libresoc_nir_tran_ctx *ctx, const nir_ssa_undef_instr *instr)
1082 {
1083 unsigned num_components = instr->def.num_components;
1084 LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->def.bit_size);
1085
1086 if (/*!ctx->abi->convert_undef_to_zero ||*/ is_def_used_in_an_export(&instr->def)) {
1087 LLVMValueRef undef;
1088
1089 if (num_components == 1)
1090 undef = LLVMGetUndef(type);
1091 else {
1092 undef = LLVMGetUndef(LLVMVectorType(type, num_components));
1093 }
1094 ctx->ssa_defs[instr->def.index] = undef;
1095 } else {
1096 LLVMValueRef zero = LLVMConstInt(type, 0, false);
1097 if (num_components > 1) {
1098 zero = build_gather_values_extended(&ctx->lc, &zero, 4, 0, false, false);
1099 }
1100 ctx->ssa_defs[instr->def.index] = zero;
1101 }
1102 }
1103
1104 static void visit_jump(struct libresoc_llvm_context *lc, const nir_jump_instr *instr)
1105 {
1106 switch (instr->type) {
1107 case nir_jump_break:
1108 build_break(lc);
1109 break;
1110 case nir_jump_continue:
1111 build_continue(lc);
1112 break;
1113 default:
1114 fprintf(stderr, "Unknown NIR jump instr: ");
1115 nir_print_instr(&instr->instr, stderr);
1116 fprintf(stderr, "\n");
1117 abort();
1118 }
1119 }
1120
1121 static LLVMValueRef get_alu_src(struct libresoc_nir_tran_ctx *ctx, nir_alu_src src,
1122 unsigned num_components)
1123 {
1124 LLVMValueRef value = get_src(ctx, src.src);
1125 bool need_swizzle = false;
1126
1127 assert(value);
1128 unsigned src_components = get_llvm_num_components(value);
1129 for (unsigned i = 0; i < num_components; ++i) {
1130 assert(src.swizzle[i] < src_components);
1131 if (src.swizzle[i] != i)
1132 need_swizzle = true;
1133 }
1134
1135 if (need_swizzle || num_components != src_components) {
1136 LLVMValueRef masks[] = {LLVMConstInt(ctx->lc.i32, src.swizzle[0], false),
1137 LLVMConstInt(ctx->lc.i32, src.swizzle[1], false),
1138 LLVMConstInt(ctx->lc.i32, src.swizzle[2], false),
1139 LLVMConstInt(ctx->lc.i32, src.swizzle[3], false)};
1140
1141 if (src_components > 1 && num_components == 1) {
1142 value = LLVMBuildExtractElement(ctx->lc.builder, value, masks[0], "");
1143 } else if (src_components == 1 && num_components > 1) {
1144 LLVMValueRef values[] = {value, value, value, value};
1145 value = build_gather_values(&ctx->lc, values, num_components);
1146 } else {
1147 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1148 value = LLVMBuildShuffleVector(ctx->lc.builder, value, value, swizzle, "");
1149 }
1150 }
1151 assert(!src.negate);
1152 assert(!src.abs);
1153 return value;
1154 }
1155
1156 static void visit_alu(struct libresoc_nir_tran_ctx *ctx, const nir_alu_instr *instr)
1157 {
1158 LLVMValueRef src[4], result = NULL;
1159 unsigned num_components = instr->dest.dest.ssa.num_components;
1160 unsigned src_components;
1161 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
1162
1163 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1164 switch (instr->op) {
1165 case nir_op_vec2:
1166 case nir_op_vec3:
1167 case nir_op_vec4:
1168 src_components = 1;
1169 break;
1170 case nir_op_pack_half_2x16:
1171 case nir_op_pack_snorm_2x16:
1172 case nir_op_pack_unorm_2x16:
1173 src_components = 2;
1174 break;
1175 case nir_op_unpack_half_2x16:
1176 src_components = 1;
1177 break;
1178 case nir_op_cube_face_coord:
1179 case nir_op_cube_face_index:
1180 src_components = 3;
1181 break;
1182 default:
1183 src_components = num_components;
1184 break;
1185 }
1186 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1187 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1188
1189 switch (instr->op) {
1190 case nir_op_mov:
1191 result = src[0];
1192 break;
1193 case nir_op_fneg:
1194 src[0] = to_float(&ctx->lc, src[0]);
1195 result = LLVMBuildFNeg(ctx->lc.builder, src[0], "");
1196 if (ctx->lc.float_mode == FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
1197 /* fneg will be optimized by backend compiler with sign
1198 * bit removed via XOR. This is probably a LLVM bug.
1199 */
1200 result = build_canonicalize(&ctx->lc, result, instr->dest.dest.ssa.bit_size);
1201 }
1202 break;
1203 case nir_op_ineg:
1204 result = LLVMBuildNeg(ctx->lc.builder, src[0], "");
1205 break;
1206 case nir_op_inot:
1207 result = LLVMBuildNot(ctx->lc.builder, src[0], "");
1208 break;
1209 case nir_op_iadd:
1210 result = LLVMBuildAdd(ctx->lc.builder, src[0], src[1], "");
1211 break;
1212 case nir_op_fadd:
1213 src[0] = to_float(&ctx->lc, src[0]);
1214 src[1] = to_float(&ctx->lc, src[1]);
1215 result = LLVMBuildFAdd(ctx->lc.builder, src[0], src[1], "");
1216 break;
1217 case nir_op_fsub:
1218 src[0] = to_float(&ctx->lc, src[0]);
1219 src[1] = to_float(&ctx->lc, src[1]);
1220 result = LLVMBuildFSub(ctx->lc.builder, src[0], src[1], "");
1221 break;
1222 case nir_op_isub:
1223 result = LLVMBuildSub(ctx->lc.builder, src[0], src[1], "");
1224 break;
1225 case nir_op_imul:
1226 result = LLVMBuildMul(ctx->lc.builder, src[0], src[1], "");
1227 break;
1228 case nir_op_imod:
1229 result = LLVMBuildSRem(ctx->lc.builder, src[0], src[1], "");
1230 break;
1231 case nir_op_umod:
1232 result = LLVMBuildURem(ctx->lc.builder, src[0], src[1], "");
1233 break;
1234 case nir_op_irem:
1235 result = LLVMBuildSRem(ctx->lc.builder, src[0], src[1], "");
1236 break;
1237 case nir_op_idiv:
1238 result = LLVMBuildSDiv(ctx->lc.builder, src[0], src[1], "");
1239 break;
1240 case nir_op_udiv:
1241 result = LLVMBuildUDiv(ctx->lc.builder, src[0], src[1], "");
1242 break;
1243 case nir_op_fmul:
1244 src[0] = to_float(&ctx->lc, src[0]);
1245 src[1] = to_float(&ctx->lc, src[1]);
1246 result = LLVMBuildFMul(ctx->lc.builder, src[0], src[1], "");
1247 break;
1248 case nir_op_frcp:
1249 /* For doubles, we need precise division to pass GLCTS. */
1250 if (ctx->lc.float_mode == FLOAT_MODE_DEFAULT_OPENGL && get_type_size(def_type) == 8) {
1251 result = LLVMBuildFDiv(ctx->lc.builder, ctx->lc.f64_1, to_float(&ctx->lc, src[0]), "");
1252 } else {
1253 result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.rcp",
1254 to_float_type(&ctx->lc, def_type), src[0]);
1255 }
1256 // TODO: abi not supported
1257 // if (ctx->abi->clamp_div_by_zero)
1258 // result = build_fmin(&ctx->lc, result,
1259 // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX));
1260 break;
1261 case nir_op_iand:
1262 result = LLVMBuildAnd(ctx->lc.builder, src[0], src[1], "");
1263 break;
1264 case nir_op_ior:
1265 result = LLVMBuildOr(ctx->lc.builder, src[0], src[1], "");
1266 break;
1267 case nir_op_ixor:
1268 result = LLVMBuildXor(ctx->lc.builder, src[0], src[1], "");
1269 break;
1270 case nir_op_ishl:
1271 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
1272 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1273 src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1274 else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
1275 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1276 src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1277 result = LLVMBuildShl(ctx->lc.builder, src[0], src[1], "");
1278 break;
1279 case nir_op_ishr:
1280 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
1281 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1282 src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1283 else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
1284 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1285 src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1286 result = LLVMBuildAShr(ctx->lc.builder, src[0], src[1], "");
1287 break;
1288 case nir_op_ushr:
1289 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) <
1290 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1291 src[1] = LLVMBuildZExt(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1292 else if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[1])) >
1293 get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])))
1294 src[1] = LLVMBuildTrunc(ctx->lc.builder, src[1], LLVMTypeOf(src[0]), "");
1295 result = LLVMBuildLShr(ctx->lc.builder, src[0], src[1], "");
1296 break;
1297 case nir_op_ilt32:
1298 result = emit_int_cmp(&ctx->lc, LLVMIntSLT, src[0], src[1]);
1299 break;
1300 case nir_op_ine32:
1301 result = emit_int_cmp(&ctx->lc, LLVMIntNE, src[0], src[1]);
1302 break;
1303 case nir_op_ieq32:
1304 result = emit_int_cmp(&ctx->lc, LLVMIntEQ, src[0], src[1]);
1305 break;
1306 case nir_op_ige32:
1307 result = emit_int_cmp(&ctx->lc, LLVMIntSGE, src[0], src[1]);
1308 break;
1309 case nir_op_ult32:
1310 result = emit_int_cmp(&ctx->lc, LLVMIntULT, src[0], src[1]);
1311 break;
1312 case nir_op_uge32:
1313 result = emit_int_cmp(&ctx->lc, LLVMIntUGE, src[0], src[1]);
1314 break;
1315 case nir_op_feq32:
1316 result = emit_float_cmp(&ctx->lc, LLVMRealOEQ, src[0], src[1]);
1317 break;
1318 case nir_op_fneu32:
1319 result = emit_float_cmp(&ctx->lc, LLVMRealUNE, src[0], src[1]);
1320 break;
1321 case nir_op_flt32:
1322 result = emit_float_cmp(&ctx->lc, LLVMRealOLT, src[0], src[1]);
1323 break;
1324 case nir_op_fge32:
1325 result = emit_float_cmp(&ctx->lc, LLVMRealOGE, src[0], src[1]);
1326 break;
1327 case nir_op_fabs:
1328 result =
1329 emit_intrin_1f_param(&ctx->lc, "llvm.fabs", to_float_type(&ctx->lc, def_type), src[0]);
1330 if (ctx->lc.float_mode == FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
1331 /* fabs will be optimized by backend compiler with sign
1332 * bit removed via AND.
1333 */
1334 result = build_canonicalize(&ctx->lc, result, instr->dest.dest.ssa.bit_size);
1335 }
1336 break;
1337 case nir_op_iabs:
1338 result = emit_iabs(&ctx->lc, src[0]);
1339 break;
1340 case nir_op_imax:
1341 result = build_imax(&ctx->lc, src[0], src[1]);
1342 break;
1343 case nir_op_imin:
1344 result = build_imin(&ctx->lc, src[0], src[1]);
1345 break;
1346 case nir_op_umax:
1347 result = build_umax(&ctx->lc, src[0], src[1]);
1348 break;
1349 case nir_op_umin:
1350 result = build_umin(&ctx->lc, src[0], src[1]);
1351 break;
1352 case nir_op_isign:
1353 result = build_isign(&ctx->lc, src[0]);
1354 break;
1355 case nir_op_fsign:
1356 src[0] = to_float(&ctx->lc, src[0]);
1357 result = build_fsign(&ctx->lc, src[0]);
1358 break;
1359 case nir_op_ffloor:
1360 result =
1361 emit_intrin_1f_param(&ctx->lc, "llvm.floor", to_float_type(&ctx->lc, def_type), src[0]);
1362 break;
1363 case nir_op_ftrunc:
1364 result =
1365 emit_intrin_1f_param(&ctx->lc, "llvm.trunc", to_float_type(&ctx->lc, def_type), src[0]);
1366 break;
1367 case nir_op_fceil:
1368 result =
1369 emit_intrin_1f_param(&ctx->lc, "llvm.ceil", to_float_type(&ctx->lc, def_type), src[0]);
1370 break;
1371 case nir_op_fround_even:
1372 result =
1373 emit_intrin_1f_param(&ctx->lc, "llvm.rint", to_float_type(&ctx->lc, def_type), src[0]);
1374 break;
1375 case nir_op_ffract:
1376 result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.fract",
1377 to_float_type(&ctx->lc, def_type), src[0]);
1378 break;
1379 case nir_op_fsin:
1380 result =
1381 emit_intrin_1f_param(&ctx->lc, "llvm.sin", to_float_type(&ctx->lc, def_type), src[0]);
1382 break;
1383 case nir_op_fcos:
1384 result =
1385 emit_intrin_1f_param(&ctx->lc, "llvm.cos", to_float_type(&ctx->lc, def_type), src[0]);
1386 break;
1387 case nir_op_fsqrt:
1388 result =
1389 emit_intrin_1f_param(&ctx->lc, "llvm.sqrt", to_float_type(&ctx->lc, def_type), src[0]);
1390 break;
1391 case nir_op_fexp2:
1392 result =
1393 emit_intrin_1f_param(&ctx->lc, "llvm.exp2", to_float_type(&ctx->lc, def_type), src[0]);
1394 break;
1395 case nir_op_flog2:
1396 result =
1397 emit_intrin_1f_param(&ctx->lc, "llvm.log2", to_float_type(&ctx->lc, def_type), src[0]);
1398 break;
1399 case nir_op_frsq:
1400 result = emit_intrin_1f_param_scalar(&ctx->lc, "llvm.amdgcn.rsq",
1401 to_float_type(&ctx->lc, def_type), src[0]);
1402 // TODO: abi not enabled
1403 // if (ctx->abi->clamp_div_by_zero)
1404 // result = build_fmin(&ctx->lc, result,
1405 // LLVMConstReal(to_float_type(&ctx->lc, def_type), FLT_MAX));
1406 break;
1407 case nir_op_frexp_exp:
1408 // TODO: enable this when ac_build_frexp_exp() is added
1409 // src[0] = to_float(&ctx->lc, src[0]);
1410 // result = ac_build_frexp_exp(&ctx->lc, src[0], get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])));
1411 // if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) == 16)
1412 // result = LLVMBuildSExt(ctx->lc.builder, result, ctx->lc.i32, "");
1413 break;
1414 case nir_op_frexp_sig:
1415 // TODO: enable this when ac_build_frexp_mant() is added
1416 // src[0] = to_float(&ctx->lc, src[0]);
1417 // result = ac_build_frexp_mant(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
1418 // break;
1419 // case nir_op_fpow:
1420 // result = emit_intrin_2f_param(&ctx->lc, "llvm.pow", to_float_type(&ctx->lc, def_type),
1421 // src[0], src[1]);
1422 break;
1423 case nir_op_fmax:
1424 result = emit_intrin_2f_param(&ctx->lc, "llvm.maxnum", to_float_type(&ctx->lc, def_type),
1425 src[0], src[1]);
1426 break;
1427 case nir_op_fmin:
1428 result = emit_intrin_2f_param(&ctx->lc, "llvm.minnum", to_float_type(&ctx->lc, def_type),
1429 src[0], src[1]);
1430 break;
1431 case nir_op_ffma:
1432 result =
1433 emit_intrin_3f_param(&ctx->lc, "llvm.fmuladd",
1434 to_float_type(&ctx->lc, def_type), src[0], src[1], src[2]);
1435 break;
1436 case nir_op_ldexp:
1437 src[0] = to_float(&ctx->lc, src[0]);
1438 if (get_elem_bits(&ctx->lc, def_type) == 32)
1439 result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f32", ctx->lc.f32, src, 2,
1440 FUNC_ATTR_READNONE);
1441 else if (get_elem_bits(&ctx->lc, def_type) == 16)
1442 result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f16", ctx->lc.f16, src, 2,
1443 FUNC_ATTR_READNONE);
1444 else
1445 result = build_intrinsic(&ctx->lc, "llvm.amdgcn.ldexp.f64", ctx->lc.f64, src, 2,
1446 FUNC_ATTR_READNONE);
1447 break;
1448 case nir_op_bfm:
1449 result = emit_bfm(&ctx->lc, src[0], src[1]);
1450 break;
1451 case nir_op_bitfield_select:
1452 result = emit_bitfield_select(&ctx->lc, src[0], src[1], src[2]);
1453 break;
1454 case nir_op_ubfe:
1455 result = build_bfe(&ctx->lc, src[0], src[1], src[2], false);
1456 break;
1457 case nir_op_ibfe:
1458 result = build_bfe(&ctx->lc, src[0], src[1], src[2], true);
1459 break;
1460 case nir_op_bitfield_reverse:
1461 result = build_bitfield_reverse(&ctx->lc, src[0]);
1462 break;
1463 case nir_op_bit_count:
1464 result = build_bit_count(&ctx->lc, src[0]);
1465 break;
1466 case nir_op_vec2:
1467 case nir_op_vec3:
1468 case nir_op_vec4:
1469 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1470 src[i] = to_integer(&ctx->lc, src[i]);
1471 result = build_gather_values(&ctx->lc, src, num_components);
1472 break;
1473 case nir_op_f2i8:
1474 case nir_op_f2i16:
1475 case nir_op_f2i32:
1476 case nir_op_f2i64:
1477 src[0] = to_float(&ctx->lc, src[0]);
1478 result = LLVMBuildFPToSI(ctx->lc.builder, src[0], def_type, "");
1479 break;
1480 case nir_op_f2u8:
1481 case nir_op_f2u16:
1482 case nir_op_f2u32:
1483 case nir_op_f2u64:
1484 src[0] = to_float(&ctx->lc, src[0]);
1485 result = LLVMBuildFPToUI(ctx->lc.builder, src[0], def_type, "");
1486 break;
1487 case nir_op_i2f16:
1488 case nir_op_i2f32:
1489 case nir_op_i2f64:
1490 result = LLVMBuildSIToFP(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1491 break;
1492 case nir_op_u2f16:
1493 case nir_op_u2f32:
1494 case nir_op_u2f64:
1495 result = LLVMBuildUIToFP(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1496 break;
1497 case nir_op_f2f16_rtz:
1498 case nir_op_f2f16:
1499 case nir_op_f2fmp:
1500 src[0] = to_float(&ctx->lc, src[0]);
1501
1502 /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
1503 * all f32->f16 conversions have to round towards zero, because both scalar
1504 * and vec2 down-conversions have to round equally.
1505 */
1506 if (ctx->lc.float_mode == FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
1507 src[0] = to_float(&ctx->lc, src[0]);
1508
1509 if (LLVMTypeOf(src[0]) == ctx->lc.f64)
1510 src[0] = LLVMBuildFPTrunc(ctx->lc.builder, src[0], ctx->lc.f32, "");
1511
1512 /* Fast path conversion. This only works if NIR is vectorized
1513 * to vec2 16.
1514 */
1515 if (LLVMTypeOf(src[0]) == ctx->lc.v2f32) {
1516 LLVMValueRef args[] = {
1517 llvm_extract_elem(&ctx->lc, src[0], 0),
1518 llvm_extract_elem(&ctx->lc, src[0], 1),
1519 };
1520 result = build_cvt_pkrtz_f16(&ctx->lc, args);
1521 break;
1522 }
1523
1524 assert(get_llvm_num_components(src[0]) == 1);
1525 LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->lc.f32)};
1526 result = build_cvt_pkrtz_f16(&ctx->lc, param);
1527 result = LLVMBuildExtractElement(ctx->lc.builder, result, ctx->lc.i32_0, "");
1528 } else {
1529 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
1530 result =
1531 LLVMBuildFPExt(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1532 else
1533 result =
1534 LLVMBuildFPTrunc(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1535 }
1536 break;
1537 case nir_op_f2f16_rtne:
1538 case nir_op_f2f32:
1539 case nir_op_f2f64:
1540 src[0] = to_float(&ctx->lc, src[0]);
1541 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
1542 result = LLVMBuildFPExt(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1543 else
1544 result =
1545 LLVMBuildFPTrunc(ctx->lc.builder, src[0], to_float_type(&ctx->lc, def_type), "");
1546 break;
1547 case nir_op_u2u8:
1548 case nir_op_u2u16:
1549 case nir_op_u2ump:
1550 case nir_op_u2u32:
1551 case nir_op_u2u64:
1552 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
1553 result = LLVMBuildZExt(ctx->lc.builder, src[0], def_type, "");
1554 else
1555 result = LLVMBuildTrunc(ctx->lc.builder, src[0], def_type, "");
1556 break;
1557 case nir_op_i2i8:
1558 case nir_op_i2i16:
1559 case nir_op_i2imp:
1560 case nir_op_i2i32:
1561 case nir_op_i2i64:
1562 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->lc, def_type))
1563 result = LLVMBuildSExt(ctx->lc.builder, src[0], def_type, "");
1564 else
1565 result = LLVMBuildTrunc(ctx->lc.builder, src[0], def_type, "");
1566 break;
1567 case nir_op_b32csel:
1568 result = emit_bcsel(&ctx->lc, src[0], src[1], src[2]);
1569 break;
1570 case nir_op_find_lsb:
1571 result = find_lsb(&ctx->lc, ctx->lc.i32, src[0]);
1572 break;
1573 case nir_op_ufind_msb:
1574 result = build_umsb(&ctx->lc, src[0], ctx->lc.i32);
1575 break;
1576 case nir_op_ifind_msb:
1577 result = build_imsb(&ctx->lc, src[0], ctx->lc.i32);
1578 break;
1579 case nir_op_uadd_carry:
1580 result = emit_uint_carry(&ctx->lc, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1581 break;
1582 case nir_op_usub_borrow:
1583 result = emit_uint_carry(&ctx->lc, "llvm.usub.with.overflow.i32", src[0], src[1]);
1584 break;
1585 case nir_op_b2f16:
1586 case nir_op_b2f32:
1587 case nir_op_b2f64:
1588 result = emit_b2f(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
1589 break;
1590 case nir_op_f2b32:
1591 result = emit_f2b(&ctx->lc, src[0]);
1592 break;
1593 case nir_op_b2i8:
1594 case nir_op_b2i16:
1595 case nir_op_b2i32:
1596 case nir_op_b2i64:
1597 result = emit_b2i(&ctx->lc, src[0], instr->dest.dest.ssa.bit_size);
1598 break;
1599 case nir_op_i2b32:
1600 result = emit_i2b(&ctx->lc, src[0]);
1601 break;
1602 case nir_op_fquantize2f16:
1603 result = emit_f2f16(&ctx->lc, src[0]);
1604 break;
1605 case nir_op_umul_high:
1606 result = emit_umul_high(&ctx->lc, src[0], src[1]);
1607 break;
1608 case nir_op_imul_high:
1609 result = emit_imul_high(&ctx->lc, src[0], src[1]);
1610 break;
1611 case nir_op_pack_half_2x16:
1612 result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pkrtz_f16);
1613 break;
1614 case nir_op_pack_snorm_2x16:
1615 result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pknorm_i16);
1616 break;
1617 case nir_op_pack_unorm_2x16:
1618 result = emit_pack_2x16(&ctx->lc, src[0], build_cvt_pknorm_u16);
1619 break;
1620 case nir_op_unpack_half_2x16:
1621 result = emit_unpack_half_2x16(&ctx->lc, src[0]);
1622 break;
1623 case nir_op_fddx:
1624 case nir_op_fddy:
1625 case nir_op_fddx_fine:
1626 case nir_op_fddy_fine:
1627 case nir_op_fddx_coarse:
1628 case nir_op_fddy_coarse:
1629 // TODO: enable this when emit_ddxy() is added
1630 //result = emit_ddxy(ctx, instr->op, src[0]);
1631 break;
1632
1633 case nir_op_unpack_64_2x32_split_x: {
1634 assert(get_llvm_num_components(src[0]) == 1);
1635 LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i32, "");
1636 result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_0, "");
1637 break;
1638 }
1639
1640 case nir_op_unpack_64_2x32_split_y: {
1641 assert(get_llvm_num_components(src[0]) == 1);
1642 LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i32, "");
1643 result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_1, "");
1644 break;
1645 }
1646
1647 case nir_op_pack_64_2x32_split: {
1648 LLVMValueRef tmp = build_gather_values(&ctx->lc, src, 2);
1649 result = LLVMBuildBitCast(ctx->lc.builder, tmp, ctx->lc.i64, "");
1650 break;
1651 }
1652
1653 case nir_op_pack_32_2x16_split: {
1654 LLVMValueRef tmp = build_gather_values(&ctx->lc, src, 2);
1655 result = LLVMBuildBitCast(ctx->lc.builder, tmp, ctx->lc.i32, "");
1656 break;
1657 }
1658
1659 case nir_op_unpack_32_2x16_split_x: {
1660 LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i16, "");
1661 result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_0, "");
1662 break;
1663 }
1664
1665 case nir_op_unpack_32_2x16_split_y: {
1666 LLVMValueRef tmp = LLVMBuildBitCast(ctx->lc.builder, src[0], ctx->lc.v2i16, "");
1667 result = LLVMBuildExtractElement(ctx->lc.builder, tmp, ctx->lc.i32_1, "");
1668 break;
1669 }
1670
1671 case nir_op_cube_face_coord: {
1672 src[0] = to_float(&ctx->lc, src[0]);
1673 LLVMValueRef results[2];
1674 LLVMValueRef in[3];
1675 for (unsigned chan = 0; chan < 3; chan++)
1676 in[chan] = llvm_extract_elem(&ctx->lc, src[0], chan);
1677 results[0] = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubesc", ctx->lc.f32, in, 3,
1678 FUNC_ATTR_READNONE);
1679 results[1] = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubetc", ctx->lc.f32, in, 3,
1680 FUNC_ATTR_READNONE);
1681 LLVMValueRef ma = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubema", ctx->lc.f32, in, 3,
1682 FUNC_ATTR_READNONE);
1683 results[0] = build_fdiv(&ctx->lc, results[0], ma);
1684 results[1] = build_fdiv(&ctx->lc, results[1], ma);
1685 LLVMValueRef offset = LLVMConstReal(ctx->lc.f32, 0.5);
1686 results[0] = LLVMBuildFAdd(ctx->lc.builder, results[0], offset, "");
1687 results[1] = LLVMBuildFAdd(ctx->lc.builder, results[1], offset, "");
1688 result = build_gather_values(&ctx->lc, results, 2);
1689 break;
1690 }
1691
1692 case nir_op_cube_face_index: {
1693 src[0] = to_float(&ctx->lc, src[0]);
1694 LLVMValueRef in[3];
1695 for (unsigned chan = 0; chan < 3; chan++)
1696 in[chan] = llvm_extract_elem(&ctx->lc, src[0], chan);
1697 result = build_intrinsic(&ctx->lc, "llvm.amdgcn.cubeid", ctx->lc.f32, in, 3,
1698 FUNC_ATTR_READNONE);
1699 break;
1700 }
1701
1702 default:
1703 fprintf(stderr, "Unknown NIR alu instr: ");
1704 nir_print_instr(&instr->instr, stderr);
1705 fprintf(stderr, "\n");
1706 abort();
1707 }
1708
1709 if (result) {
1710 assert(instr->dest.dest.is_ssa);
1711 result = to_integer_or_pointer(&ctx->lc, result);
1712 ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
1713 }
1714 }
1715
1716 static LLVMValueRef visit_load_var(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
1717 {
1718 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1719 nir_variable *var = nir_deref_instr_get_variable(deref);
1720
1721 LLVMValueRef values[8];
1722 int idx = 0;
1723 int ve = instr->dest.ssa.num_components;
1724 unsigned comp = 0;
1725 LLVMValueRef indir_index;
1726 LLVMValueRef ret;
1727 unsigned const_index;
1728 unsigned stride = 4;
1729 int mode = deref->mode;
1730
1731 if (var) {
1732 bool vs_in = ctx->stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in;
1733 idx = var->data.driver_location;
1734 comp = var->data.location_frac;
1735 mode = var->data.mode;
1736
1737 get_deref_offset(ctx, deref, vs_in, NULL, NULL, &const_index, &indir_index);
1738
1739 if (var->data.compact) {
1740 stride = 1;
1741 const_index += comp;
1742 comp = 0;
1743 }
1744 }
1745
1746 if (instr->dest.ssa.bit_size == 64 &&
1747 (deref->mode == nir_var_shader_in || deref->mode == nir_var_shader_out ||
1748 deref->mode == nir_var_function_temp))
1749 ve *= 2;
1750
1751 switch (mode) {
1752 case nir_var_shader_in:
1753 /* TODO: remove this after RADV switches to lowered IO */
1754 // if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) {
1755 // return load_tess_varyings(ctx, instr, true);
1756 // }
1757
1758 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
1759 // LLVMTypeRef type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size);
1760 // LLVMValueRef indir_index;
1761 // unsigned const_index, vertex_index;
1762 // get_deref_offset(ctx, deref, false, &vertex_index, NULL, &const_index, &indir_index);
1763 // assert(indir_index == NULL);
1764
1765 // return ctx->abi->load_inputs(ctx->abi, var->data.location, var->data.driver_location,
1766 // var->data.location_frac, instr->num_components, vertex_index,
1767 // const_index, type);
1768 // }
1769
1770 for (unsigned chan = comp; chan < ve + comp; chan++) {
1771 if (indir_index) {
1772 unsigned count =
1773 glsl_count_attribute_slots(var->type, ctx->stage == MESA_SHADER_VERTEX);
1774 count -= chan / 4;
1775 LLVMValueRef tmp_vec = build_gather_values_extended(
1776 &ctx->lc, ctx->inputs + idx + chan, count, stride, false, true);
1777
1778 values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
1779 } else
1780 values[chan] = ctx->inputs[idx + chan + const_index * stride];
1781 }
1782 break;
1783 case nir_var_function_temp:
1784 for (unsigned chan = 0; chan < ve; chan++) {
1785 if (indir_index) {
1786 unsigned count = glsl_count_attribute_slots(var->type, false);
1787 count -= chan / 4;
1788 LLVMValueRef tmp_vec = build_gather_values_extended(
1789 &ctx->lc, ctx->locals + idx + chan, count, stride, true, true);
1790
1791 values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
1792 } else {
1793 values[chan] =
1794 LLVMBuildLoad(ctx->lc.builder, ctx->locals[idx + chan + const_index * stride], "");
1795 }
1796 }
1797 break;
1798 case nir_var_shader_out:
1799 /* TODO: remove this after RADV switches to lowered IO */
1800 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
1801 // return load_tess_varyings(ctx, instr, false);
1802 // }
1803
1804 // if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.fb_fetch_output && ctx->abi->emit_fbfetch)
1805 // return ctx->abi->emit_fbfetch(ctx->abi);
1806
1807 for (unsigned chan = comp; chan < ve + comp; chan++) {
1808 if (indir_index) {
1809 unsigned count = glsl_count_attribute_slots(var->type, false);
1810 count -= chan / 4;
1811 LLVMValueRef tmp_vec = build_gather_values_extended(
1812 &ctx->lc, ctx->outputs + idx + chan, count, stride, true, true);
1813
1814 values[chan] = LLVMBuildExtractElement(ctx->lc.builder, tmp_vec, indir_index, "");
1815 } else {
1816 values[chan] = LLVMBuildLoad(ctx->lc.builder,
1817 ctx->outputs[idx + chan + const_index * stride], "");
1818 }
1819 }
1820 break;
1821 case nir_var_mem_global: {
1822 LLVMValueRef address = get_src(ctx, instr->src[0]);
1823 LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
1824 unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
1825 unsigned natural_stride = type_scalar_size_bytes(deref->type);
1826 unsigned stride = explicit_stride ? explicit_stride : natural_stride;
1827 int elem_size_bytes = get_elem_bits(&ctx->lc, result_type) / 8;
1828 bool split_loads = false;
1829
1830 if (stride != natural_stride || split_loads) {
1831 if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
1832 result_type = LLVMGetElementType(result_type);
1833
1834 LLVMTypeRef ptr_type =
1835 LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
1836 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
1837
1838 for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
1839 LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, i * stride / natural_stride, 0);
1840 values[i] =
1841 LLVMBuildLoad(ctx->lc.builder, build_gep_ptr(&ctx->lc, address, offset), "");
1842
1843 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
1844 LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
1845 }
1846 return build_gather_values(&ctx->lc, values, instr->dest.ssa.num_components);
1847 } else {
1848 LLVMTypeRef ptr_type =
1849 LLVMPointerType(result_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
1850 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
1851 LLVMValueRef val = LLVMBuildLoad(ctx->lc.builder, address, "");
1852
1853 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
1854 LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
1855 return val;
1856 }
1857 }
1858 default:
1859 unreachable("unhandle variable mode");
1860 }
1861 ret = build_varying_gather_values(&ctx->lc, values, ve, comp);
1862 return LLVMBuildBitCast(ctx->lc.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
1863 }
1864
1865 static void visit_store_var(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
1866 {
1867 // if (ctx->lc.postponed_kill) {
1868 // LLVMValueRef cond = LLVMBuildLoad(ctx->lc.builder, ctx->lc.postponed_kill, "");
1869 // ac_build_ifcc(&ctx->lc, cond, 7002);
1870 // }
1871
1872 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1873 nir_variable *var = nir_deref_instr_get_variable(deref);
1874
1875 LLVMValueRef temp_ptr, value;
1876 int idx = 0;
1877 unsigned comp = 0;
1878 LLVMValueRef src = to_float(&ctx->lc, get_src(ctx, instr->src[1]));
1879 int writemask = instr->const_index[0];
1880 LLVMValueRef indir_index;
1881 unsigned const_index;
1882
1883 if (var) {
1884 get_deref_offset(ctx, deref, false, NULL, NULL, &const_index, &indir_index);
1885 idx = var->data.driver_location;
1886 comp = var->data.location_frac;
1887
1888 if (var->data.compact) {
1889 const_index += comp;
1890 comp = 0;
1891 }
1892 }
1893
1894 if (get_elem_bits(&ctx->lc, LLVMTypeOf(src)) == 64 &&
1895 (deref->mode == nir_var_shader_out || deref->mode == nir_var_function_temp)) {
1896
1897 src = LLVMBuildBitCast(ctx->lc.builder, src,
1898 LLVMVectorType(ctx->lc.f32, get_llvm_num_components(src) * 2), "");
1899
1900 writemask = widen_mask(writemask, 2);
1901 }
1902
1903 writemask = writemask << comp;
1904
1905 switch (deref->mode) {
1906 case nir_var_shader_out:
1907 if (ctx->stage == MESA_SHADER_VERTEX && var->data.location == VARYING_SLOT_POS) {
1908
1909 LLVMValueRef vertexCache = LLVMGetParam(ctx->main_function, 2);
1910 LLVMValueRef idx[3];
1911
1912 idx[0] = LLVMConstInt(ctx->lc.i32, 0, false);
1913 idx[1] = LLVMConstInt(ctx->lc.i32, 0, false);
1914 LLVMValueRef outPos = LLVMConstInBoundsGEP(vertexCache,
1915 idx,
1916 2);
1917 LLVMBuildStore(ctx->lc.builder, src, outPos);
1918 break;
1919 }
1920 if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.location == FRAG_RESULT_DATA0) {
1921
1922 LLVMValueRef outColor = LLVMGetParam(ctx->main_function, 4);
1923 // LLVMValueRef idx[3];
1924
1925 // idx[0] = LLVMConstInt(ctx->lc.i32, 0, false);
1926 // idx[1] = LLVMConstInt(ctx->lc.i32, 0, false);
1927 // LLVMValueRef outPos = LLVMConstInBoundsGEP(vertexCache,
1928 // idx,
1929 // 2);
1930 LLVMBuildStore(ctx->lc.builder, src, outColor);
1931 break;
1932 }
1933 /* TODO: remove this after RADV switches to lowered IO */
1934 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
1935 // LLVMValueRef vertex_index = NULL;
1936 // LLVMValueRef indir_index = NULL;
1937 // unsigned const_index = 0;
1938 // const bool is_patch = var->data.patch ||
1939 // var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
1940 // var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
1941
1942 // get_deref_offset(ctx, deref, false, NULL, is_patch ? NULL : &vertex_index, &const_index,
1943 // &indir_index);
1944
1945 // ctx->abi->store_tcs_outputs(ctx->abi, var, vertex_index, indir_index, const_index, src,
1946 // writemask, var->data.location_frac, var->data.driver_location);
1947 // break;
1948 // }
1949
1950 for (unsigned chan = 0; chan < 8; chan++) {
1951 int stride = 4;
1952 if (!(writemask & (1 << chan)))
1953 continue;
1954
1955 value = llvm_extract_elem(&ctx->lc, src, chan - comp);
1956
1957 if (var->data.compact)
1958 stride = 1;
1959 if (indir_index) {
1960 unsigned count = glsl_count_attribute_slots(var->type, false);
1961 count -= chan / 4;
1962 LLVMValueRef tmp_vec = build_gather_values_extended(
1963 &ctx->lc, ctx->outputs + idx + chan, count, stride, true, true);
1964
1965 tmp_vec = LLVMBuildInsertElement(ctx->lc.builder, tmp_vec, value, indir_index, "");
1966 build_store_values_extended(&ctx->lc, ctx->outputs + idx + chan, count, stride,
1967 tmp_vec);
1968
1969 } else {
1970 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
1971
1972 LLVMBuildStore(ctx->lc.builder, value, temp_ptr);
1973 }
1974 }
1975 break;
1976 case nir_var_function_temp:
1977 for (unsigned chan = 0; chan < 8; chan++) {
1978 if (!(writemask & (1 << chan)))
1979 continue;
1980
1981 value = llvm_extract_elem(&ctx->lc, src, chan);
1982 if (indir_index) {
1983 unsigned count = glsl_count_attribute_slots(var->type, false);
1984 count -= chan / 4;
1985 LLVMValueRef tmp_vec = build_gather_values_extended(
1986 &ctx->lc, ctx->locals + idx + chan, count, 4, true, true);
1987
1988 tmp_vec = LLVMBuildInsertElement(ctx->lc.builder, tmp_vec, value, indir_index, "");
1989 build_store_values_extended(&ctx->lc, ctx->locals + idx + chan, count, 4, tmp_vec);
1990 } else {
1991 temp_ptr = ctx->locals[idx + chan + const_index * 4];
1992
1993 LLVMBuildStore(ctx->lc.builder, value, temp_ptr);
1994 }
1995 }
1996 break;
1997
1998 case nir_var_mem_global: {
1999 int writemask = instr->const_index[0];
2000 LLVMValueRef address = get_src(ctx, instr->src[0]);
2001 LLVMValueRef val = get_src(ctx, instr->src[1]);
2002
2003 unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
2004 unsigned natural_stride = type_scalar_size_bytes(deref->type);
2005 unsigned stride = explicit_stride ? explicit_stride : natural_stride;
2006 int elem_size_bytes = get_elem_bits(&ctx->lc, LLVMTypeOf(val)) / 8;
2007 bool split_stores = false;
2008
2009 LLVMTypeRef ptr_type =
2010 LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2011 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
2012
2013 if (writemask == (1u << get_llvm_num_components(val)) - 1 && stride == natural_stride &&
2014 !split_stores) {
2015 LLVMTypeRef ptr_type =
2016 LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2017 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
2018
2019 val = LLVMBuildBitCast(ctx->lc.builder, val, LLVMGetElementType(LLVMTypeOf(address)), "");
2020 LLVMValueRef store = LLVMBuildStore(ctx->lc.builder, val, address);
2021
2022 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
2023 LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
2024 } else {
2025 LLVMTypeRef val_type = LLVMTypeOf(val);
2026 if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
2027 val_type = LLVMGetElementType(val_type);
2028
2029 LLVMTypeRef ptr_type =
2030 LLVMPointerType(val_type, LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2031 address = LLVMBuildBitCast(ctx->lc.builder, address, ptr_type, "");
2032 for (unsigned chan = 0; chan < 4; chan++) {
2033 if (!(writemask & (1 << chan)))
2034 continue;
2035
2036 LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, chan * stride / natural_stride, 0);
2037
2038 LLVMValueRef ptr = build_gep_ptr(&ctx->lc, address, offset);
2039 LLVMValueRef src = llvm_extract_elem(&ctx->lc, val, chan);
2040 src = LLVMBuildBitCast(ctx->lc.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), "");
2041 LLVMValueRef store = LLVMBuildStore(ctx->lc.builder, src, ptr);
2042
2043 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
2044 LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
2045 }
2046 }
2047 break;
2048 }
2049 default:
2050 abort();
2051 break;
2052 }
2053
2054 // if (ctx->ac.postponed_kill)
2055 // ac_build_endif(&ctx->ac, 7002);
2056 }
2057
2058 static void visit_intrinsic(struct libresoc_nir_tran_ctx *ctx, nir_intrinsic_instr *instr)
2059 {
2060 LLVMValueRef result = NULL;
2061
2062 switch (instr->intrinsic) {
2063 case nir_intrinsic_ballot:
2064 // result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
2065 // if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
2066 // result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
2067 break;
2068 case nir_intrinsic_read_invocation:
2069 // result =
2070 // ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
2071 break;
2072 case nir_intrinsic_read_first_invocation:
2073 // result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
2074 break;
2075 case nir_intrinsic_load_subgroup_invocation:
2076 // result = ac_get_thread_id(&ctx->ac);
2077 break;
2078 case nir_intrinsic_load_work_group_id: {
2079 // LLVMValueRef values[3];
2080
2081 // for (int i = 0; i < 3; i++) {
2082 // values[i] = ctx->args->workgroup_ids[i].used
2083 // ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
2084 // : ctx->ac.i32_0;
2085 // }
2086
2087 // result = ac_build_gather_values(&ctx->ac, values, 3);
2088 break;
2089 }
2090 case nir_intrinsic_load_base_vertex:
2091 case nir_intrinsic_load_first_vertex:
2092 //result = ctx->abi->load_base_vertex(ctx->abi);
2093 result = LLVMGetParam(ctx->main_function, ctx->args.base_vertex.arg_index);
2094 break;
2095 case nir_intrinsic_load_local_group_size:
2096 // result = ctx->abi->load_local_group_size(ctx->abi);
2097 break;
2098 case nir_intrinsic_load_vertex_id:
2099 result = LLVMGetParam(ctx->main_function, 1);
2100 // result = LLVMBuildAdd(ctx->lc.builder, LLVMGetParam(ctx->main_function, ctx->args.vertex_id.arg_index),
2101 // LLVMGetParam(ctx->main_function, ctx->args.base_vertex.arg_index), "");
2102 break;
2103 case nir_intrinsic_load_vertex_id_zero_base: {
2104 // result = ctx->abi->vertex_id;
2105 result = LLVMGetParam(ctx->main_function, ctx->args.vertex_id.arg_index);
2106 break;
2107 }
2108 case nir_intrinsic_load_local_invocation_id: {
2109 // result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
2110 break;
2111 }
2112 case nir_intrinsic_load_base_instance:
2113 // result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
2114 break;
2115 case nir_intrinsic_load_draw_id:
2116 // result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
2117 break;
2118 case nir_intrinsic_load_view_index:
2119 // result = ac_get_arg(&ctx->ac, ctx->args->view_index);
2120 break;
2121 case nir_intrinsic_load_invocation_id:
2122 // if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2123 // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
2124 // } else {
2125 // if (ctx->ac.chip_class >= GFX10) {
2126 // result =
2127 // LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
2128 // LLVMConstInt(ctx->ac.i32, 127, 0), "");
2129 // } else {
2130 // result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
2131 // }
2132 // }
2133 break;
2134 case nir_intrinsic_load_primitive_id:
2135 // if (ctx->stage == MESA_SHADER_GEOMETRY) {
2136 // result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
2137 // } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2138 // result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
2139 // } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
2140 // result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
2141 // } else
2142 // fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
2143 // break;
2144 // case nir_intrinsic_load_sample_id:
2145 // result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
2146 break;
2147 case nir_intrinsic_load_sample_pos:
2148 // result = load_sample_pos(ctx);
2149 break;
2150 case nir_intrinsic_load_sample_mask_in:
2151 // result = ctx->abi->load_sample_mask_in(ctx->abi);
2152 break;
2153 case nir_intrinsic_load_frag_coord: {
2154 // LLVMValueRef values[4] = {
2155 // ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
2156 // ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
2157 // ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
2158 // result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
2159 break;
2160 }
2161 case nir_intrinsic_load_layer_id:
2162 // result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
2163 break;
2164 case nir_intrinsic_load_front_face:
2165 // result = ac_get_arg(&ctx->ac, ctx->args->front_face);
2166 break;
2167 case nir_intrinsic_load_helper_invocation:
2168 // result = ac_build_load_helper_invocation(&ctx->ac);
2169 break;
2170 case nir_intrinsic_is_helper_invocation:
2171 // result = ac_build_is_helper_invocation(&ctx->ac);
2172 break;
2173 case nir_intrinsic_load_color0:
2174 // result = ctx->abi->color0;
2175 break;
2176 case nir_intrinsic_load_color1:
2177 // result = ctx->abi->color1;
2178 break;
2179 case nir_intrinsic_load_user_data_amd:
2180 // assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
2181 // result = ctx->abi->user_data;
2182 break;
2183 case nir_intrinsic_load_instance_id:
2184 // result = ctx->abi->instance_id;
2185 break;
2186 case nir_intrinsic_load_num_work_groups:
2187 // result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
2188 break;
2189 case nir_intrinsic_load_local_invocation_index:
2190 // result = visit_load_local_invocation_index(ctx);
2191 break;
2192 case nir_intrinsic_load_subgroup_id:
2193 // result = visit_load_subgroup_id(ctx);
2194 break;
2195 case nir_intrinsic_load_num_subgroups:
2196 // result = visit_load_num_subgroups(ctx);
2197 break;
2198 case nir_intrinsic_first_invocation:
2199 // result = visit_first_invocation(ctx);
2200 break;
2201 case nir_intrinsic_load_push_constant:
2202 // result = visit_load_push_constant(ctx, instr);
2203 break;
2204 case nir_intrinsic_vulkan_resource_index: {
2205 // LLVMValueRef index = get_src(ctx, instr->src[0]);
2206 // unsigned desc_set = nir_intrinsic_desc_set(instr);
2207 // unsigned binding = nir_intrinsic_binding(instr);
2208
2209 // result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
2210 break;
2211 }
2212 case nir_intrinsic_vulkan_resource_reindex:
2213 // result = visit_vulkan_resource_reindex(ctx, instr);
2214 break;
2215 case nir_intrinsic_store_ssbo:
2216 // visit_store_ssbo(ctx, instr);
2217 break;
2218 case nir_intrinsic_load_ssbo:
2219 // result = visit_load_buffer(ctx, instr);
2220 break;
2221 case nir_intrinsic_ssbo_atomic_add:
2222 case nir_intrinsic_ssbo_atomic_imin:
2223 case nir_intrinsic_ssbo_atomic_umin:
2224 case nir_intrinsic_ssbo_atomic_imax:
2225 case nir_intrinsic_ssbo_atomic_umax:
2226 case nir_intrinsic_ssbo_atomic_and:
2227 case nir_intrinsic_ssbo_atomic_or:
2228 case nir_intrinsic_ssbo_atomic_xor:
2229 case nir_intrinsic_ssbo_atomic_exchange:
2230 case nir_intrinsic_ssbo_atomic_comp_swap:
2231 // result = visit_atomic_ssbo(ctx, instr);
2232 break;
2233 case nir_intrinsic_load_ubo:
2234 // result = visit_load_ubo_buffer(ctx, instr);
2235 break;
2236 case nir_intrinsic_get_buffer_size:
2237 // result = visit_get_buffer_size(ctx, instr);
2238 break;
2239 case nir_intrinsic_load_deref:
2240 result = visit_load_var(ctx, instr);
2241 break;
2242 case nir_intrinsic_store_deref:
2243 visit_store_var(ctx, instr);
2244 break;
2245 case nir_intrinsic_load_input:
2246 case nir_intrinsic_load_input_vertex:
2247 case nir_intrinsic_load_per_vertex_input:
2248 // result = visit_load(ctx, instr, false);
2249 break;
2250 case nir_intrinsic_load_output:
2251 case nir_intrinsic_load_per_vertex_output:
2252 // result = visit_load(ctx, instr, true);
2253 break;
2254 case nir_intrinsic_store_output:
2255 case nir_intrinsic_store_per_vertex_output:
2256 visit_store_output(ctx, instr);
2257 break;
2258 case nir_intrinsic_load_shared:
2259 result = visit_load_shared(ctx, instr);
2260 break;
2261 case nir_intrinsic_store_shared:
2262 visit_store_shared(ctx, instr);
2263 break;
2264 case nir_intrinsic_bindless_image_samples:
2265 case nir_intrinsic_image_deref_samples:
2266 // result = visit_image_samples(ctx, instr);
2267 break;
2268 case nir_intrinsic_bindless_image_load:
2269 // result = visit_image_load(ctx, instr, true);
2270 break;
2271 case nir_intrinsic_image_deref_load:
2272 // result = visit_image_load(ctx, instr, false);
2273 break;
2274 case nir_intrinsic_bindless_image_store:
2275 // visit_image_store(ctx, instr, true);
2276 break;
2277 case nir_intrinsic_image_deref_store:
2278 // visit_image_store(ctx, instr, false);
2279 break;
2280 case nir_intrinsic_bindless_image_atomic_add:
2281 case nir_intrinsic_bindless_image_atomic_imin:
2282 case nir_intrinsic_bindless_image_atomic_umin:
2283 case nir_intrinsic_bindless_image_atomic_imax:
2284 case nir_intrinsic_bindless_image_atomic_umax:
2285 case nir_intrinsic_bindless_image_atomic_and:
2286 case nir_intrinsic_bindless_image_atomic_or:
2287 case nir_intrinsic_bindless_image_atomic_xor:
2288 case nir_intrinsic_bindless_image_atomic_exchange:
2289 case nir_intrinsic_bindless_image_atomic_comp_swap:
2290 case nir_intrinsic_bindless_image_atomic_inc_wrap:
2291 case nir_intrinsic_bindless_image_atomic_dec_wrap:
2292 // result = visit_image_atomic(ctx, instr, true);
2293 break;
2294 case nir_intrinsic_image_deref_atomic_add:
2295 case nir_intrinsic_image_deref_atomic_imin:
2296 case nir_intrinsic_image_deref_atomic_umin:
2297 case nir_intrinsic_image_deref_atomic_imax:
2298 case nir_intrinsic_image_deref_atomic_umax:
2299 case nir_intrinsic_image_deref_atomic_and:
2300 case nir_intrinsic_image_deref_atomic_or:
2301 case nir_intrinsic_image_deref_atomic_xor:
2302 case nir_intrinsic_image_deref_atomic_exchange:
2303 case nir_intrinsic_image_deref_atomic_comp_swap:
2304 case nir_intrinsic_image_deref_atomic_inc_wrap:
2305 case nir_intrinsic_image_deref_atomic_dec_wrap:
2306 // result = visit_image_atomic(ctx, instr, false);
2307 break;
2308 case nir_intrinsic_bindless_image_size:
2309 // result = visit_image_size(ctx, instr, true);
2310 break;
2311 case nir_intrinsic_image_deref_size:
2312 // result = visit_image_size(ctx, instr, false);
2313 break;
2314 case nir_intrinsic_shader_clock:
2315 // result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
2316 break;
2317 case nir_intrinsic_discard:
2318 case nir_intrinsic_discard_if:
2319 // emit_discard(ctx, instr);
2320 break;
2321 case nir_intrinsic_demote:
2322 case nir_intrinsic_demote_if:
2323 // emit_demote(ctx, instr);
2324 break;
2325 case nir_intrinsic_memory_barrier:
2326 case nir_intrinsic_group_memory_barrier:
2327 case nir_intrinsic_memory_barrier_buffer:
2328 case nir_intrinsic_memory_barrier_image:
2329 case nir_intrinsic_memory_barrier_shared:
2330 // emit_membar(&ctx->ac, instr);
2331 break;
2332 case nir_intrinsic_scoped_barrier: {
2333 // assert(!(nir_intrinsic_memory_semantics(instr) &
2334 // (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
2335
2336 // nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
2337
2338 // unsigned wait_flags = 0;
2339 // if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
2340 // wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2341 // if (modes & nir_var_mem_shared)
2342 // wait_flags |= AC_WAIT_LGKM;
2343
2344 // if (wait_flags)
2345 // ac_build_waitcnt(&ctx->ac, wait_flags);
2346
2347 // if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
2348 // ac_emit_barrier(&ctx->ac, ctx->stage);
2349 break;
2350 }
2351 case nir_intrinsic_memory_barrier_tcs_patch:
2352 break;
2353 case nir_intrinsic_control_barrier:
2354 // ac_emit_barrier(&ctx->ac, ctx->stage);
2355 break;
2356 case nir_intrinsic_shared_atomic_add:
2357 case nir_intrinsic_shared_atomic_imin:
2358 case nir_intrinsic_shared_atomic_umin:
2359 case nir_intrinsic_shared_atomic_imax:
2360 case nir_intrinsic_shared_atomic_umax:
2361 case nir_intrinsic_shared_atomic_and:
2362 case nir_intrinsic_shared_atomic_or:
2363 case nir_intrinsic_shared_atomic_xor:
2364 case nir_intrinsic_shared_atomic_exchange:
2365 case nir_intrinsic_shared_atomic_comp_swap:
2366 case nir_intrinsic_shared_atomic_fadd: {
2367 // LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size);
2368 // result = visit_var_atomic(ctx, instr, ptr, 1);
2369 break;
2370 }
2371 case nir_intrinsic_deref_atomic_add:
2372 case nir_intrinsic_deref_atomic_imin:
2373 case nir_intrinsic_deref_atomic_umin:
2374 case nir_intrinsic_deref_atomic_imax:
2375 case nir_intrinsic_deref_atomic_umax:
2376 case nir_intrinsic_deref_atomic_and:
2377 case nir_intrinsic_deref_atomic_or:
2378 case nir_intrinsic_deref_atomic_xor:
2379 case nir_intrinsic_deref_atomic_exchange:
2380 case nir_intrinsic_deref_atomic_comp_swap:
2381 case nir_intrinsic_deref_atomic_fadd: {
2382 // LLVMValueRef ptr = get_src(ctx, instr->src[0]);
2383 // result = visit_var_atomic(ctx, instr, ptr, 1);
2384 break;
2385 }
2386 case nir_intrinsic_load_barycentric_pixel:
2387 // result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
2388 break;
2389 case nir_intrinsic_load_barycentric_centroid:
2390 // result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
2391 break;
2392 case nir_intrinsic_load_barycentric_sample:
2393 // result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
2394 break;
2395 case nir_intrinsic_load_barycentric_model:
2396 // result = barycentric_model(ctx);
2397 break;
2398 case nir_intrinsic_load_barycentric_at_offset: {
2399 // LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
2400 // result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
2401 break;
2402 }
2403 case nir_intrinsic_load_barycentric_at_sample: {
2404 // LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
2405 // result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
2406 break;
2407 }
2408 case nir_intrinsic_load_interpolated_input: {
2409 /* We assume any indirect loads have been lowered away */
2410 // ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
2411 // assert(offset);
2412 // assert(offset[0].i32 == 0);
2413
2414 // LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
2415 // unsigned index = nir_intrinsic_base(instr);
2416 // unsigned component = nir_intrinsic_component(instr);
2417 // result = load_interpolated_input(ctx, interp_param, index, component,
2418 // instr->dest.ssa.num_components, instr->dest.ssa.bit_size);
2419 break;
2420 }
2421 case nir_intrinsic_emit_vertex:
2422 // ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
2423 break;
2424 case nir_intrinsic_emit_vertex_with_counter: {
2425 // unsigned stream = nir_intrinsic_stream_id(instr);
2426 // LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
2427 // ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
2428 break;
2429 }
2430 case nir_intrinsic_end_primitive:
2431 case nir_intrinsic_end_primitive_with_counter:
2432 // ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
2433 break;
2434 case nir_intrinsic_load_tess_coord:
2435 // result = ctx->abi->load_tess_coord(ctx->abi);
2436 break;
2437 case nir_intrinsic_load_tess_level_outer:
2438 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
2439 break;
2440 case nir_intrinsic_load_tess_level_inner:
2441 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
2442 break;
2443 case nir_intrinsic_load_tess_level_outer_default:
2444 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
2445 break;
2446 case nir_intrinsic_load_tess_level_inner_default:
2447 // result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
2448 break;
2449 case nir_intrinsic_load_patch_vertices_in:
2450 // result = ctx->abi->load_patch_vertices_in(ctx->abi);
2451 break;
2452 case nir_intrinsic_vote_all: {
2453 // LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
2454 // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
2455 break;
2456 }
2457 case nir_intrinsic_vote_any: {
2458 // LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
2459 // result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
2460 break;
2461 }
2462 case nir_intrinsic_shuffle:
2463 // if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
2464 // (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
2465 // result =
2466 // ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
2467 // } else {
2468 // LLVMValueRef src = get_src(ctx, instr->src[0]);
2469 // LLVMValueRef index = get_src(ctx, instr->src[1]);
2470 // LLVMTypeRef type = LLVMTypeOf(src);
2471 // struct waterfall_context wctx;
2472 // LLVMValueRef index_val;
2473
2474 // index_val = enter_waterfall(ctx, &wctx, index, true);
2475
2476 // src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
2477
2478 // result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
2479 // (LLVMValueRef[]){src, index_val}, 2,
2480 // AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
2481
2482 // result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
2483
2484 // result = exit_waterfall(ctx, &wctx, result);
2485 // }
2486 break;
2487 case nir_intrinsic_reduce:
2488 // result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
2489 // instr->const_index[1]);
2490 break;
2491 case nir_intrinsic_inclusive_scan:
2492 // result =
2493 // ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
2494 break;
2495 case nir_intrinsic_exclusive_scan:
2496 // result =
2497 // ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
2498 break;
2499 case nir_intrinsic_quad_broadcast: {
2500 // unsigned lane = nir_src_as_uint(instr->src[1]);
2501 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
2502 break;
2503 }
2504 case nir_intrinsic_quad_swap_horizontal:
2505 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
2506 break;
2507 case nir_intrinsic_quad_swap_vertical:
2508 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
2509 break;
2510 case nir_intrinsic_quad_swap_diagonal:
2511 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
2512 break;
2513 case nir_intrinsic_quad_swizzle_amd: {
2514 // uint32_t mask = nir_intrinsic_swizzle_mask(instr);
2515 // result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
2516 // (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
2517 break;
2518 }
2519 case nir_intrinsic_masked_swizzle_amd: {
2520 // uint32_t mask = nir_intrinsic_swizzle_mask(instr);
2521 // result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
2522 break;
2523 }
2524 case nir_intrinsic_write_invocation_amd:
2525 // result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
2526 // get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
2527 break;
2528 case nir_intrinsic_mbcnt_amd:
2529 // result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
2530 break;
2531 case nir_intrinsic_load_scratch: {
2532 LLVMValueRef offset = get_src(ctx, instr->src[0]);
2533 LLVMValueRef ptr = build_gep0(&ctx->lc, ctx->scratch, offset);
2534 LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size);
2535 LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
2536 ? comp_type
2537 : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
2538 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2539 ptr = LLVMBuildBitCast(ctx->lc.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
2540 result = LLVMBuildLoad(ctx->lc.builder, ptr, "");
2541 break;
2542 }
2543 case nir_intrinsic_store_scratch: {
2544 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2545 LLVMValueRef ptr = build_gep0(&ctx->lc, ctx->scratch, offset);
2546 LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->lc.context, instr->src[0].ssa->bit_size);
2547 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2548 ptr = LLVMBuildBitCast(ctx->lc.builder, ptr, LLVMPointerType(comp_type, addr_space), "");
2549 LLVMValueRef src = get_src(ctx, instr->src[0]);
2550 unsigned wrmask = nir_intrinsic_write_mask(instr);
2551 while (wrmask) {
2552 int start, count;
2553 u_bit_scan_consecutive_range(&wrmask, &start, &count);
2554
2555 LLVMValueRef offset = LLVMConstInt(ctx->lc.i32, start, false);
2556 LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->lc.builder, ptr, &offset, 1, "");
2557 LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);
2558 offset_ptr = LLVMBuildBitCast(ctx->lc.builder, offset_ptr,
2559 LLVMPointerType(vec_type, addr_space), "");
2560 LLVMValueRef offset_src = extract_components(&ctx->lc, src, start, count);
2561 LLVMBuildStore(ctx->lc.builder, offset_src, offset_ptr);
2562 }
2563 break;
2564 }
2565 case nir_intrinsic_load_constant: {
2566 unsigned base = nir_intrinsic_base(instr);
2567 unsigned range = nir_intrinsic_range(instr);
2568
2569 LLVMValueRef offset = get_src(ctx, instr->src[0]);
2570 offset = LLVMBuildAdd(ctx->lc.builder, offset, LLVMConstInt(ctx->lc.i32, base, false), "");
2571
2572 /* Clamp the offset to avoid out-of-bound access because global
2573 * instructions can't handle them.
2574 */
2575 LLVMValueRef size = LLVMConstInt(ctx->lc.i32, base + range, false);
2576 LLVMValueRef cond = LLVMBuildICmp(ctx->lc.builder, LLVMIntULT, offset, size, "");
2577 offset = LLVMBuildSelect(ctx->lc.builder, cond, offset, size, "");
2578
2579 LLVMValueRef ptr = build_gep0(&ctx->lc, ctx->constant_data, offset);
2580 LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->lc.context, instr->dest.ssa.bit_size);
2581 LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
2582 ? comp_type
2583 : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
2584 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2585 ptr = LLVMBuildBitCast(ctx->lc.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
2586 result = LLVMBuildLoad(ctx->lc.builder, ptr, "");
2587 break;
2588 }
2589 default:
2590 fprintf(stderr, "Unknown intrinsic: ");
2591 nir_print_instr(&instr->instr, stderr);
2592 fprintf(stderr, "\n");
2593 break;
2594 }
2595 if (result) {
2596 ctx->ssa_defs[instr->dest.ssa.index] = result;
2597 }
2598 }
2599
2600 static void visit_cf_list(struct libresoc_nir_tran_ctx *ctx, struct exec_list *list);
2601
2602 static void visit_block(struct libresoc_nir_tran_ctx *ctx, nir_block *block)
2603 {
2604 nir_foreach_instr (instr, block) {
2605 switch (instr->type) {
2606 case nir_instr_type_alu:
2607 visit_alu(ctx, nir_instr_as_alu(instr));
2608 break;
2609 case nir_instr_type_load_const:
2610 visit_load_const(ctx, nir_instr_as_load_const(instr));
2611 break;
2612 case nir_instr_type_intrinsic:
2613 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
2614 break;
2615 case nir_instr_type_tex:
2616 // visit_tex(ctx, nir_instr_as_tex(instr));
2617 break;
2618 case nir_instr_type_phi:
2619 visit_phi(ctx, nir_instr_as_phi(instr));
2620 break;
2621 case nir_instr_type_ssa_undef:
2622 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
2623 break;
2624 case nir_instr_type_jump:
2625 visit_jump(&ctx->lc, nir_instr_as_jump(instr));
2626 break;
2627 case nir_instr_type_deref:
2628 visit_deref(ctx, nir_instr_as_deref(instr));
2629 break;
2630 default:
2631 fprintf(stderr, "Unknown NIR instr type: ");
2632 nir_print_instr(instr, stderr);
2633 fprintf(stderr, "\n");
2634 abort();
2635 }
2636 }
2637 }
2638
2639 static void visit_if(struct libresoc_nir_tran_ctx *ctx, nir_if *if_stmt)
2640 {
2641 LLVMValueRef value = get_src(ctx, if_stmt->condition);
2642
2643 nir_block *then_block = (nir_block *)exec_list_get_head(&if_stmt->then_list);
2644
2645 build_uif(&ctx->lc, value, then_block->index);
2646
2647 visit_cf_list(ctx, &if_stmt->then_list);
2648
2649 if (!exec_list_is_empty(&if_stmt->else_list)) {
2650 nir_block *else_block = (nir_block *)exec_list_get_head(&if_stmt->else_list);
2651
2652 build_else(&ctx->lc, else_block->index);
2653 visit_cf_list(ctx, &if_stmt->else_list);
2654 }
2655
2656 build_endif(&ctx->lc, then_block->index);
2657 }
2658
2659 static void visit_loop(struct libresoc_nir_tran_ctx *ctx, nir_loop *loop)
2660 {
2661 nir_block *first_loop_block = (nir_block *)exec_list_get_head(&loop->body);
2662
2663 build_bgnloop(&ctx->lc, first_loop_block->index);
2664
2665 visit_cf_list(ctx, &loop->body);
2666
2667 build_endloop(&ctx->lc, first_loop_block->index);
2668 }
2669
2670 static void visit_cf_list(struct libresoc_nir_tran_ctx *ctx, struct exec_list *list)
2671 {
2672 foreach_list_typed(nir_cf_node, node, node, list)
2673 {
2674 switch (node->type) {
2675 case nir_cf_node_block:
2676 visit_block(ctx, nir_cf_node_as_block(node));
2677 break;
2678
2679 case nir_cf_node_if:
2680 visit_if(ctx, nir_cf_node_as_if(node));
2681 break;
2682
2683 case nir_cf_node_loop:
2684 visit_loop(ctx, nir_cf_node_as_loop(node));
2685 break;
2686
2687 default:
2688 assert(0);
2689 }
2690 }
2691 }
2692
2693 LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir)
2694 {
2695 struct libresoc_nir_tran_ctx ctx = {};
2696 struct nir_function *func;
2697 char shader_name[60];
2698 sprintf(shader_name, "libresoc-shader-%s", gl_shader_stage_name(nir->info.stage));
2699 LLVMModuleRef mod = LLVMModuleCreateWithNameInContext(shader_name, llvm_ref->lc.context);
2700 ctx.lc.module = &mod;
2701 ctx.lc = llvm_ref->lc;
2702 ctx.stage = nir->info.stage;
2703 ctx.info = &nir->info;
2704
2705 if (ctx.stage == MESA_SHADER_VERTEX) {
2706 add_arg(&ctx.args, ARG_SGPR, 1, ARG_INT, &ctx.args.base_vertex);
2707 add_arg(&ctx.args, ARG_SGPR, 1, ARG_INT, &ctx.args.start_instance);
2708 add_arg(&ctx.args, ARG_VGPR, 1, ARG_INT, &ctx.args.vertex_id);
2709 }
2710 LLVMTypeRef arg_types[32];
2711 LLVMTypeRef ret_type = LLVMVoidTypeInContext(ctx.lc.context);
2712 for (unsigned i = 0; i < ctx.args.arg_count; i++) {
2713 arg_types[i] = arg_llvm_type(ctx.args.args[i].type, ctx.args.args[i].size, &ctx.lc);
2714 }
2715
2716 LLVMTypeRef main_function_type;
2717 if(nir->info.stage == MESA_SHADER_VERTEX) {
2718 LLVMTypeRef arg_types[32];
2719 LLVMTypeRef floatType = LLVMFloatTypeInContext(llvm_ref->lc.context);
2720 LLVMTypeRef float4 = LLVMVectorType(floatType, 4);
2721 LLVMTypeRef f4arrayType = LLVMArrayType(float4, 10);
2722 arg_types[0] = float4;
2723 arg_types[1] = f4arrayType;
2724 LLVMTypeRef vertexCacheType = LLVMStructTypeInContext(llvm_ref->lc.context, arg_types, 2, false);
2725 LLVMTypeRef gpuStatePtrType = LLVMPointerType(LLVMStructCreateNamed(llvm_ref->lc.context, "GPUState"), 0);
2726 arg_types[0] = gpuStatePtrType;
2727 arg_types[1] = LLVMIntTypeInContext(llvm_ref->lc.context, 32);
2728 arg_types[2] = LLVMPointerType(vertexCacheType, 0);
2729 main_function_type = LLVMFunctionType(ret_type, arg_types, 3, 0);
2730 } else {
2731
2732 LLVMTypeRef floatType = LLVMFloatTypeInContext(llvm_ref->lc.context);
2733 LLVMTypeRef float4 = LLVMVectorType(floatType, 4);
2734 LLVMTypeRef f4arrayType = LLVMArrayType(float4, 10);
2735 arg_types[0] = float4;
2736 arg_types[1] = f4arrayType;
2737 LLVMTypeRef vertexCacheType = LLVMStructTypeInContext(llvm_ref->lc.context, arg_types, 2, false);
2738 arg_types[3] = LLVMPointerType(vertexCacheType, 0);
2739 LLVMTypeRef gpuStatePtrType = LLVMPointerType(LLVMStructCreateNamed(llvm_ref->lc.context, "GPUState"), 0);
2740 arg_types[0] = gpuStatePtrType;
2741 arg_types[1] = LLVMFloatTypeInContext(llvm_ref->lc.context);
2742 arg_types[2] = LLVMPointerType(float4, 0);
2743 arg_types[4] = LLVMPointerType(float4, 0);
2744 main_function_type = LLVMFunctionType(ret_type, arg_types, 3, 0);
2745 //TODO: this is zero argument function and returns void
2746 main_function_type = LLVMFunctionType(ret_type, arg_types, 5, 0);
2747 }
2748 LLVMValueRef main_function = LLVMAddFunction(mod, gl_shader_stage_name(nir->info.stage), main_function_type);
2749 LLVMBasicBlockRef main_function_body =
2750 LLVMAppendBasicBlockInContext(ctx.lc.context, main_function, "main_body");
2751 LLVMPositionBuilderAtEnd(ctx.lc.builder, main_function_body);
2752 ctx.main_function = main_function;
2753
2754 if (!nir->info.io_lowered) {
2755 nir_foreach_shader_out_variable(variable, nir)
2756 {
2757 handle_shader_output_decl(&ctx, nir, variable, ctx.stage);
2758 }
2759 }
2760 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2761 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2762 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2763 func = (struct nir_function *)exec_list_get_head(&nir->functions);
2764
2765 nir_index_ssa_defs(func->impl);
2766 ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2767 setup_locals(&ctx, func);
2768 setup_scratch(&ctx, nir);
2769 setup_constant_data(&ctx, nir);
2770
2771 // if (gl_shader_stage_is_compute(nir->info.stage))
2772 // setup_shared(&ctx, nir);
2773 visit_cf_list(&ctx, &func->impl->body);
2774 LLVMBuildRetVoid(ctx.lc.builder);
2775 char *error = NULL;
2776 LLVMVerifyModule(mod, LLVMPrintMessageAction, &error);
2777 LLVMDumpModule(mod);
2778 LLVMDisposeMessage(error);
2779 LLVMOrcModuleHandle mod_handle;
2780 LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref,
2781 &mod_handle,
2782 mod,
2783 orc_sym_resolver,
2784 (void *)(llvm_ref->orc_ref));
2785 LLVMDumpModule(mod);
2786 char *def_triple = LLVMGetDefaultTargetTriple(); // E.g. "x86_64-linux-gnu"
2787 LLVMDisasmContextRef disasm = LLVMCreateDisasm(def_triple, NULL,
2788 0, NULL,
2789 NULL);
2790 if (disasm) {
2791 LLVMOrcTargetAddress MainAddr;
2792 LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr ,gl_shader_stage_name(nir->info.stage));
2793 // if(nir->info.stage == MESA_SHADER_VERTEX)
2794 // {
2795 // pipeline->vs = (VertexShader)MainAddr;
2796 // }
2797 // else if(nir->info.stage == MESA_SHADER_FRAGMENT)
2798 // {
2799 // pipeline->fs = (FragmentShader)MainAddr;
2800 // }
2801 const uint8_t *bytes = (const uint8_t *)MainAddr;
2802 char outline[1024];
2803 uint64_t pc;
2804 pc = 0;
2805 uint64_t extent = 200;
2806 while (pc < extent) {
2807 size_t Size;
2808
2809 /*
2810 * Print address. We use addresses relative to the start of the function,
2811 * so that between runs.
2812 */
2813
2814
2815 Size = LLVMDisasmInstruction(disasm, (uint8_t *)bytes + pc, extent - pc, 0, outline,
2816 sizeof outline);
2817
2818 /*
2819 * Print the instruction.
2820 */
2821 printf("\t%s \n", outline);
2822
2823
2824 /*
2825 * Stop disassembling on return statements, if there is no record of a
2826 * jump to a successive address.
2827 *
2828 * XXX: This currently assumes x86
2829 */
2830
2831 if (Size == 1 && bytes[pc] == 0xc3) {
2832 break;
2833 }
2834
2835 /*
2836 * Advance.
2837 */
2838
2839 pc += Size;
2840
2841 if (pc >= extent) {
2842 break;
2843 }
2844 }
2845 }
2846 return mod;
2847 // LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod");
2848 // LLVMTypeRef param_types[] = { LLVMInt32Type(), LLVMInt32Type() };
2849 // LLVMTypeRef ret_type = LLVMFunctionType(LLVMInt32Type(), param_types, 2, 0);
2850 // LLVMValueRef sum = LLVMAddFunction(mod, "sum", ret_type);
2851 // LLVMBasicBlockRef entry = LLVMAppendBasicBlock(sum, "entry");
2852 // LLVMBuilderRef builder = LLVMCreateBuilder();
2853 // LLVMPositionBuilderAtEnd(builder, entry);
2854 // LLVMValueRef tmp = LLVMBuildAdd(builder, LLVMGetParam(sum, 0), LLVMGetParam(sum, 1), "tmp");
2855 // LLVMBuildRet(builder, tmp);
2856 // char *error = NULL;
2857 // LLVMVerifyModule(mod, LLVMAbortProcessAction, &error);
2858 // LLVMDumpModule(mod);
2859 // LLVMDisposeMessage(error);
2860 // LLVMOrcModuleHandle mod_handle;
2861 // LLVMErrorRef error_ref = LLVMOrcAddEagerlyCompiledIR(llvm_ref->orc_ref,
2862 // &mod_handle,
2863 // mod,
2864 // orc_sym_resolver,
2865 // (void *)(llvm_ref->orc_ref));
2866 }
2867
2868 Shader GetFuncPointer(struct libresoc_llvm *llvm_ref, const char *name) {
2869 LLVMOrcTargetAddress MainAddr;
2870 LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr , name);
2871 return (Shader)MainAddr;
2872 }