radeon/llvm: Use llvm.AMDIL.exp intrinsic again for now
[mesa.git] / src / gallium / drivers / radeon / radeon_setup_tgsi_llvm.c
1 /*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Tom Stellard <thomas.stellard@amd.com>
24 *
25 */
26 #include "radeon_llvm.h"
27
28 #include "gallivm/lp_bld_const.h"
29 #include "gallivm/lp_bld_gather.h"
30 #include "gallivm/lp_bld_flow.h"
31 #include "gallivm/lp_bld_init.h"
32 #include "gallivm/lp_bld_intr.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <llvm-c/Core.h>
41 #include <llvm-c/Transforms/Scalar.h>
42
43 static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx)
44 {
45 return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
46 }
47
48 static struct radeon_llvm_branch * get_current_branch(
49 struct radeon_llvm_context * ctx)
50 {
51 return ctx->branch_depth > 0 ?
52 ctx->branch + (ctx->branch_depth - 1) : NULL;
53 }
54
55 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
56 {
57 return (index * 4) + chan;
58 }
59
60 static LLVMValueRef emit_swizzle(
61 struct lp_build_tgsi_context * bld_base,
62 LLVMValueRef value,
63 unsigned swizzle_x,
64 unsigned swizzle_y,
65 unsigned swizzle_z,
66 unsigned swizzle_w)
67 {
68 LLVMValueRef swizzles[4];
69 LLVMTypeRef i32t =
70 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
71
72 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
73 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
74 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
75 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
76
77 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
78 value,
79 LLVMGetUndef(LLVMTypeOf(value)),
80 LLVMConstVector(swizzles, 4), "");
81 }
82
83 static struct tgsi_declaration_range
84 get_array_range(struct lp_build_tgsi_context *bld_base,
85 unsigned File, const struct tgsi_ind_register *reg)
86 {
87 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
88
89 if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
90 reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) {
91 struct tgsi_declaration_range range;
92 range.First = 0;
93 range.Last = bld_base->info->file_max[File];
94 return range;
95 }
96
97 return ctx->arrays[reg->ArrayID - 1];
98 }
99
100 static LLVMValueRef
101 emit_array_index(
102 struct lp_build_tgsi_soa_context *bld,
103 const struct tgsi_ind_register *reg,
104 unsigned offset)
105 {
106 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
107
108 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
109 return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
110 }
111
112 LLVMValueRef
113 radeon_llvm_emit_fetch_double(
114 struct lp_build_tgsi_context *bld_base,
115 LLVMValueRef ptr,
116 LLVMValueRef ptr2)
117 {
118 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
119 LLVMValueRef result;
120
121 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
122
123 result = LLVMBuildInsertElement(builder,
124 result,
125 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
126 bld_base->int_bld.zero, "");
127 result = LLVMBuildInsertElement(builder,
128 result,
129 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
130 bld_base->int_bld.one, "");
131 return bitcast(bld_base, TGSI_TYPE_DOUBLE, result);
132 }
133
134 static LLVMValueRef
135 emit_array_fetch(
136 struct lp_build_tgsi_context *bld_base,
137 unsigned File, enum tgsi_opcode_type type,
138 struct tgsi_declaration_range range,
139 unsigned swizzle)
140 {
141 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
142 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
143 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
144
145 unsigned i, size = range.Last - range.First + 1;
146 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
147 LLVMValueRef result = LLVMGetUndef(vec);
148
149 struct tgsi_full_src_register tmp_reg = {};
150 tmp_reg.Register.File = File;
151
152 for (i = 0; i < size; ++i) {
153 tmp_reg.Register.Index = i + range.First;
154 LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
155 result = LLVMBuildInsertElement(builder, result, temp,
156 lp_build_const_int32(gallivm, i), "");
157 }
158 return result;
159 }
160
161 static bool uses_temp_indirect_addressing(
162 struct lp_build_tgsi_context *bld_base)
163 {
164 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
165 return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
166 }
167
168 LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
169 const struct tgsi_full_src_register *reg,
170 enum tgsi_opcode_type type,
171 unsigned swizzle)
172 {
173 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
174 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
175 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
176 LLVMValueRef result = NULL, ptr, ptr2;
177
178 if (swizzle == ~0) {
179 LLVMValueRef values[TGSI_NUM_CHANNELS];
180 unsigned chan;
181 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
182 values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
183 }
184 return lp_build_gather_values(bld_base->base.gallivm, values,
185 TGSI_NUM_CHANNELS);
186 }
187
188 if (reg->Register.Indirect) {
189 struct tgsi_declaration_range range = get_array_range(bld_base,
190 reg->Register.File, &reg->Indirect);
191 return LLVMBuildExtractElement(builder,
192 emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle),
193 emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First),
194 "");
195 }
196
197 switch(reg->Register.File) {
198 case TGSI_FILE_IMMEDIATE: {
199 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
200 if (type == TGSI_TYPE_DOUBLE) {
201 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
202 result = LLVMConstInsertElement(result,
203 bld->immediates[reg->Register.Index][swizzle],
204 bld_base->int_bld.zero);
205 result = LLVMConstInsertElement(result,
206 bld->immediates[reg->Register.Index][swizzle + 1],
207 bld_base->int_bld.one);
208 return LLVMConstBitCast(result, ctype);
209 } else {
210 return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
211 }
212 }
213
214 case TGSI_FILE_INPUT:
215 result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
216 if (type == TGSI_TYPE_DOUBLE) {
217 ptr = result;
218 ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
219 return radeon_llvm_emit_fetch_double(bld_base, ptr, ptr2);
220 }
221 break;
222
223 case TGSI_FILE_TEMPORARY:
224 if (reg->Register.Index >= ctx->temps_count)
225 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
226 if (uses_temp_indirect_addressing(bld_base)) {
227 ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
228 break;
229 }
230 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
231 if (type == TGSI_TYPE_DOUBLE) {
232 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
233 return radeon_llvm_emit_fetch_double(bld_base,
234 LLVMBuildLoad(builder, ptr, ""),
235 LLVMBuildLoad(builder, ptr2, ""));
236 }
237 result = LLVMBuildLoad(builder, ptr, "");
238 break;
239
240 case TGSI_FILE_OUTPUT:
241 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
242 if (type == TGSI_TYPE_DOUBLE) {
243 ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
244 return radeon_llvm_emit_fetch_double(bld_base,
245 LLVMBuildLoad(builder, ptr, ""),
246 LLVMBuildLoad(builder, ptr2, ""));
247 }
248 result = LLVMBuildLoad(builder, ptr, "");
249 break;
250
251 default:
252 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
253 }
254
255 return bitcast(bld_base, type, result);
256 }
257
258 static LLVMValueRef fetch_system_value(
259 struct lp_build_tgsi_context * bld_base,
260 const struct tgsi_full_src_register *reg,
261 enum tgsi_opcode_type type,
262 unsigned swizzle)
263 {
264 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
265 struct gallivm_state *gallivm = bld_base->base.gallivm;
266
267 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
268 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
269 cval = LLVMBuildExtractElement(gallivm->builder, cval,
270 lp_build_const_int32(gallivm, swizzle), "");
271 }
272 return bitcast(bld_base, type, cval);
273 }
274
275 static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm,
276 LLVMTypeRef type,
277 const char *name)
278 {
279 LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
280 LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
281 return ptr;
282 }
283
284 static void emit_declaration(
285 struct lp_build_tgsi_context * bld_base,
286 const struct tgsi_full_declaration *decl)
287 {
288 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
289 unsigned first, last, i, idx;
290 switch(decl->Declaration.File) {
291 case TGSI_FILE_ADDRESS:
292 {
293 unsigned idx;
294 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
295 unsigned chan;
296 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
297 ctx->soa.addr[idx][chan] = si_build_alloca_undef(
298 &ctx->gallivm,
299 ctx->soa.bld_base.uint_bld.elem_type, "");
300 }
301 }
302 break;
303 }
304
305 case TGSI_FILE_TEMPORARY:
306 if (decl->Declaration.Array) {
307 if (!ctx->arrays) {
308 int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
309 ctx->arrays = MALLOC(sizeof(ctx->arrays[0]) * size);
310 }
311
312 ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
313 }
314 if (uses_temp_indirect_addressing(bld_base)) {
315 lp_emit_declaration_soa(bld_base, decl);
316 break;
317 }
318 first = decl->Range.First;
319 last = decl->Range.Last;
320 if (!ctx->temps_count) {
321 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
322 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
323 }
324 for (idx = first; idx <= last; idx++) {
325 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
326 ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
327 si_build_alloca_undef(bld_base->base.gallivm,
328 bld_base->base.vec_type,
329 "temp");
330 }
331 }
332 break;
333
334 case TGSI_FILE_INPUT:
335 {
336 unsigned idx;
337 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
338 if (ctx->load_input)
339 ctx->load_input(ctx, idx, decl);
340 }
341 }
342 break;
343
344 case TGSI_FILE_SYSTEM_VALUE:
345 {
346 unsigned idx;
347 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
348 ctx->load_system_value(ctx, idx, decl);
349 }
350 }
351 break;
352
353 case TGSI_FILE_OUTPUT:
354 {
355 unsigned idx;
356 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
357 unsigned chan;
358 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
359 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
360 ctx->soa.outputs[idx][chan] = si_build_alloca_undef(
361 &ctx->gallivm,
362 ctx->soa.bld_base.base.elem_type, "");
363 }
364 }
365
366 ctx->output_reg_count = MAX2(ctx->output_reg_count,
367 decl->Range.Last + 1);
368 break;
369 }
370
371 default:
372 break;
373 }
374 }
375
376 LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
377 LLVMValueRef value)
378 {
379 struct lp_build_emit_data clamp_emit_data;
380
381 memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
382 clamp_emit_data.arg_count = 3;
383 clamp_emit_data.args[0] = value;
384 clamp_emit_data.args[2] = bld_base->base.one;
385 clamp_emit_data.args[1] = bld_base->base.zero;
386
387 return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
388 &clamp_emit_data);
389 }
390
391 void radeon_llvm_emit_store(
392 struct lp_build_tgsi_context * bld_base,
393 const struct tgsi_full_instruction * inst,
394 const struct tgsi_opcode_info * info,
395 LLVMValueRef dst[4])
396 {
397 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
398 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
399 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
400 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
401 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
402 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
403 unsigned chan, chan_index;
404 boolean is_vec_store = FALSE;
405 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
406
407 if (dst[0]) {
408 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
409 is_vec_store = (k == LLVMVectorTypeKind);
410 }
411
412 if (is_vec_store) {
413 LLVMValueRef values[4] = {};
414 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
415 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
416 values[chan] = LLVMBuildExtractElement(gallivm->builder,
417 dst[0], index, "");
418 }
419 bld_base->emit_store(bld_base, inst, info, values);
420 return;
421 }
422
423 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
424 LLVMValueRef value = dst[chan_index];
425
426 if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
427 continue;
428 if (inst->Instruction.Saturate)
429 value = radeon_llvm_saturate(bld_base, value);
430
431 if (reg->Register.File == TGSI_FILE_ADDRESS) {
432 temp_ptr = bld->addr[reg->Register.Index][chan_index];
433 LLVMBuildStore(builder, value, temp_ptr);
434 continue;
435 }
436
437 if (dtype != TGSI_TYPE_DOUBLE)
438 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
439
440 if (reg->Register.Indirect) {
441 struct tgsi_declaration_range range = get_array_range(bld_base,
442 reg->Register.File, &reg->Indirect);
443
444 unsigned i, size = range.Last - range.First + 1;
445 LLVMValueRef array = LLVMBuildInsertElement(builder,
446 emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index),
447 value, emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First), "");
448
449 for (i = 0; i < size; ++i) {
450 switch(reg->Register.File) {
451 case TGSI_FILE_OUTPUT:
452 temp_ptr = bld->outputs[i + range.First][chan_index];
453 break;
454
455 case TGSI_FILE_TEMPORARY:
456 if (range.First + i >= ctx->temps_count)
457 continue;
458 if (uses_temp_indirect_addressing(bld_base))
459 temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
460 else
461 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
462 break;
463
464 default:
465 return;
466 }
467 value = LLVMBuildExtractElement(builder, array,
468 lp_build_const_int32(gallivm, i), "");
469 LLVMBuildStore(builder, value, temp_ptr);
470 }
471
472 } else {
473 switch(reg->Register.File) {
474 case TGSI_FILE_OUTPUT:
475 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
476 if (dtype == TGSI_TYPE_DOUBLE)
477 temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
478 break;
479
480 case TGSI_FILE_TEMPORARY:
481 if (reg->Register.Index >= ctx->temps_count)
482 continue;
483 if (uses_temp_indirect_addressing(bld_base)) {
484 temp_ptr = NULL;
485 break;
486 }
487 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
488 if (dtype == TGSI_TYPE_DOUBLE)
489 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
490
491 break;
492
493 default:
494 return;
495 }
496 if (dtype != TGSI_TYPE_DOUBLE)
497 LLVMBuildStore(builder, value, temp_ptr);
498 else {
499 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
500 LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
501 LLVMValueRef val2;
502 value = LLVMBuildExtractElement(builder, ptr,
503 bld_base->uint_bld.zero, "");
504 val2 = LLVMBuildExtractElement(builder, ptr,
505 bld_base->uint_bld.one, "");
506
507 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
508 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
509 }
510 }
511 }
512 }
513
514 static void bgnloop_emit(
515 const struct lp_build_tgsi_action * action,
516 struct lp_build_tgsi_context * bld_base,
517 struct lp_build_emit_data * emit_data)
518 {
519 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
520 struct gallivm_state * gallivm = bld_base->base.gallivm;
521 LLVMBasicBlockRef loop_block;
522 LLVMBasicBlockRef endloop_block;
523 endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
524 ctx->main_fn, "ENDLOOP");
525 loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
526 endloop_block, "LOOP");
527 LLVMBuildBr(gallivm->builder, loop_block);
528 LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
529
530 if (++ctx->loop_depth > ctx->loop_depth_max) {
531 unsigned new_max = ctx->loop_depth_max << 1;
532
533 if (!new_max)
534 new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
535
536 ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
537 sizeof(ctx->loop[0]),
538 new_max * sizeof(ctx->loop[0]));
539 ctx->loop_depth_max = new_max;
540 }
541
542 ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
543 ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
544 }
545
546 static void brk_emit(
547 const struct lp_build_tgsi_action * action,
548 struct lp_build_tgsi_context * bld_base,
549 struct lp_build_emit_data * emit_data)
550 {
551 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
552 struct gallivm_state * gallivm = bld_base->base.gallivm;
553 struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
554
555 LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
556 }
557
558 static void cont_emit(
559 const struct lp_build_tgsi_action * action,
560 struct lp_build_tgsi_context * bld_base,
561 struct lp_build_emit_data * emit_data)
562 {
563 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
564 struct gallivm_state * gallivm = bld_base->base.gallivm;
565 struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
566
567 LLVMBuildBr(gallivm->builder, current_loop->loop_block);
568 }
569
570 static void else_emit(
571 const struct lp_build_tgsi_action * action,
572 struct lp_build_tgsi_context * bld_base,
573 struct lp_build_emit_data * emit_data)
574 {
575 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
576 struct gallivm_state * gallivm = bld_base->base.gallivm;
577 struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
578 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
579
580 /* We need to add a terminator to the current block if the previous
581 * instruction was an ENDIF.Example:
582 * IF
583 * [code]
584 * IF
585 * [code]
586 * ELSE
587 * [code]
588 * ENDIF <--
589 * ELSE<--
590 * [code]
591 * ENDIF
592 */
593
594 if (current_block != current_branch->if_block) {
595 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
596 }
597 if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
598 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
599 }
600 current_branch->has_else = 1;
601 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
602 }
603
604 static void endif_emit(
605 const struct lp_build_tgsi_action * action,
606 struct lp_build_tgsi_context * bld_base,
607 struct lp_build_emit_data * emit_data)
608 {
609 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
610 struct gallivm_state * gallivm = bld_base->base.gallivm;
611 struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
612 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
613
614 /* If we have consecutive ENDIF instructions, then the first ENDIF
615 * will not have a terminator, so we need to add one. */
616 if (current_block != current_branch->if_block
617 && current_block != current_branch->else_block
618 && !LLVMGetBasicBlockTerminator(current_block)) {
619
620 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
621 }
622 if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
623 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
624 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
625 }
626
627 if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
628 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
629 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
630 }
631
632 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
633 ctx->branch_depth--;
634 }
635
636 static void endloop_emit(
637 const struct lp_build_tgsi_action * action,
638 struct lp_build_tgsi_context * bld_base,
639 struct lp_build_emit_data * emit_data)
640 {
641 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
642 struct gallivm_state * gallivm = bld_base->base.gallivm;
643 struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
644
645 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
646 LLVMBuildBr(gallivm->builder, current_loop->loop_block);
647 }
648
649 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
650 ctx->loop_depth--;
651 }
652
653 static void if_cond_emit(
654 const struct lp_build_tgsi_action * action,
655 struct lp_build_tgsi_context * bld_base,
656 struct lp_build_emit_data * emit_data,
657 LLVMValueRef cond)
658 {
659 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
660 struct gallivm_state * gallivm = bld_base->base.gallivm;
661 LLVMBasicBlockRef if_block, else_block, endif_block;
662
663 endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
664 ctx->main_fn, "ENDIF");
665 if_block = LLVMInsertBasicBlockInContext(gallivm->context,
666 endif_block, "IF");
667 else_block = LLVMInsertBasicBlockInContext(gallivm->context,
668 endif_block, "ELSE");
669 LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
670 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
671
672 if (++ctx->branch_depth > ctx->branch_depth_max) {
673 unsigned new_max = ctx->branch_depth_max << 1;
674
675 if (!new_max)
676 new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
677
678 ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
679 sizeof(ctx->branch[0]),
680 new_max * sizeof(ctx->branch[0]));
681 ctx->branch_depth_max = new_max;
682 }
683
684 ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
685 ctx->branch[ctx->branch_depth - 1].if_block = if_block;
686 ctx->branch[ctx->branch_depth - 1].else_block = else_block;
687 ctx->branch[ctx->branch_depth - 1].has_else = 0;
688 }
689
690 static void if_emit(
691 const struct lp_build_tgsi_action * action,
692 struct lp_build_tgsi_context * bld_base,
693 struct lp_build_emit_data * emit_data)
694 {
695 struct gallivm_state * gallivm = bld_base->base.gallivm;
696 LLVMValueRef cond;
697
698 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
699 emit_data->args[0],
700 bld_base->base.zero, "");
701
702 if_cond_emit(action, bld_base, emit_data, cond);
703 }
704
705 static void uif_emit(
706 const struct lp_build_tgsi_action * action,
707 struct lp_build_tgsi_context * bld_base,
708 struct lp_build_emit_data * emit_data)
709 {
710 struct gallivm_state * gallivm = bld_base->base.gallivm;
711 LLVMValueRef cond;
712
713 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
714 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
715 bld_base->int_bld.zero, "");
716
717 if_cond_emit(action, bld_base, emit_data, cond);
718 }
719
720 static void kill_if_fetch_args(
721 struct lp_build_tgsi_context * bld_base,
722 struct lp_build_emit_data * emit_data)
723 {
724 const struct tgsi_full_instruction * inst = emit_data->inst;
725 struct gallivm_state *gallivm = bld_base->base.gallivm;
726 LLVMBuilderRef builder = gallivm->builder;
727 unsigned i;
728 LLVMValueRef conds[TGSI_NUM_CHANNELS];
729
730 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
731 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
732 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
733 bld_base->base.zero, "");
734 }
735
736 /* Or the conditions together */
737 for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
738 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
739 }
740
741 emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
742 emit_data->arg_count = 1;
743 emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
744 lp_build_const_float(gallivm, -1.0f),
745 bld_base->base.zero, "");
746 }
747
748 static void kil_emit(
749 const struct lp_build_tgsi_action * action,
750 struct lp_build_tgsi_context * bld_base,
751 struct lp_build_emit_data * emit_data)
752 {
753 unsigned i;
754 for (i = 0; i < emit_data->arg_count; i++) {
755 emit_data->output[i] = lp_build_intrinsic_unary(
756 bld_base->base.gallivm->builder,
757 action->intr_name,
758 emit_data->dst_type, emit_data->args[i]);
759 }
760 }
761
762 static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
763 LLVMValueRef *in, LLVMValueRef *out)
764 {
765 struct gallivm_state * gallivm = bld_base->base.gallivm;
766 LLVMBuilderRef builder = gallivm->builder;
767 LLVMTypeRef type = bld_base->base.elem_type;
768 LLVMValueRef coords[4];
769 LLVMValueRef mad_args[3];
770 LLVMValueRef v, cube_vec;
771 unsigned i;
772
773 cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
774 v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
775 &cube_vec, 1, LLVMReadNoneAttribute);
776
777 for (i = 0; i < 4; ++i)
778 coords[i] = LLVMBuildExtractElement(builder, v,
779 lp_build_const_int32(gallivm, i), "");
780
781 coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
782 type, &coords[2], 1, LLVMReadNoneAttribute);
783 coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
784
785 mad_args[1] = coords[2];
786 mad_args[2] = LLVMConstReal(type, 1.5);
787
788 mad_args[0] = coords[0];
789 coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
790 mad_args[0], mad_args[1], mad_args[2]);
791
792 mad_args[0] = coords[1];
793 coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
794 mad_args[0], mad_args[1], mad_args[2]);
795
796 /* apply xyz = yxw swizzle to cooords */
797 out[0] = coords[1];
798 out[1] = coords[0];
799 out[2] = coords[3];
800 }
801
802 void radeon_llvm_emit_prepare_cube_coords(
803 struct lp_build_tgsi_context * bld_base,
804 struct lp_build_emit_data * emit_data,
805 LLVMValueRef *coords_arg,
806 LLVMValueRef *derivs_arg)
807 {
808
809 unsigned target = emit_data->inst->Texture.Texture;
810 unsigned opcode = emit_data->inst->Instruction.Opcode;
811 struct gallivm_state * gallivm = bld_base->base.gallivm;
812 LLVMBuilderRef builder = gallivm->builder;
813 LLVMValueRef coords[4];
814 unsigned i;
815
816 radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
817
818 if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
819 LLVMValueRef derivs[4];
820 int axis;
821
822 /* Convert cube derivatives to 2D derivatives. */
823 for (axis = 0; axis < 2; axis++) {
824 LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
825
826 /* Shift the cube coordinates by the derivatives to get
827 * the cube coordinates of the "neighboring pixel".
828 */
829 for (i = 0; i < 3; i++)
830 shifted_cube_coords[i] =
831 LLVMBuildFAdd(builder, coords_arg[i],
832 derivs_arg[axis*3+i], "");
833 shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
834
835 /* Project the shifted cube coordinates onto the face. */
836 radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
837 shifted_coords);
838
839 /* Subtract both sets of 2D coordinates to get 2D derivatives.
840 * This won't work if the shifted coordinates ended up
841 * in a different face.
842 */
843 for (i = 0; i < 2; i++)
844 derivs[axis * 2 + i] =
845 LLVMBuildFSub(builder, shifted_coords[i],
846 coords[i], "");
847 }
848
849 memcpy(derivs_arg, derivs, sizeof(derivs));
850 }
851
852 if (target == TGSI_TEXTURE_CUBE_ARRAY ||
853 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
854 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
855 /* coords_arg.w component - array_index for cube arrays */
856 coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
857 coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
858 }
859
860 /* Preserve compare/lod/bias. Put it in coords.w. */
861 if (opcode == TGSI_OPCODE_TEX2 ||
862 opcode == TGSI_OPCODE_TXB2 ||
863 opcode == TGSI_OPCODE_TXL2) {
864 coords[3] = coords_arg[4];
865 } else if (opcode == TGSI_OPCODE_TXB ||
866 opcode == TGSI_OPCODE_TXL ||
867 target == TGSI_TEXTURE_SHADOWCUBE) {
868 coords[3] = coords_arg[3];
869 }
870
871 memcpy(coords_arg, coords, sizeof(coords));
872 }
873
874 static void emit_icmp(
875 const struct lp_build_tgsi_action * action,
876 struct lp_build_tgsi_context * bld_base,
877 struct lp_build_emit_data * emit_data)
878 {
879 unsigned pred;
880 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
881 LLVMContextRef context = bld_base->base.gallivm->context;
882
883 switch (emit_data->inst->Instruction.Opcode) {
884 case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
885 case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
886 case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
887 case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
888 case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
889 case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
890 default:
891 assert(!"unknown instruction");
892 pred = 0;
893 break;
894 }
895
896 LLVMValueRef v = LLVMBuildICmp(builder, pred,
897 emit_data->args[0], emit_data->args[1],"");
898
899 v = LLVMBuildSExtOrBitCast(builder, v,
900 LLVMInt32TypeInContext(context), "");
901
902 emit_data->output[emit_data->chan] = v;
903 }
904
905 static void emit_ucmp(
906 const struct lp_build_tgsi_action * action,
907 struct lp_build_tgsi_context * bld_base,
908 struct lp_build_emit_data * emit_data)
909 {
910 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
911
912 LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
913 bld_base->uint_bld.elem_type, "");
914
915 LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
916 bld_base->uint_bld.zero, "");
917
918 emit_data->output[emit_data->chan] =
919 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
920 }
921
922 static void emit_cmp(const struct lp_build_tgsi_action *action,
923 struct lp_build_tgsi_context *bld_base,
924 struct lp_build_emit_data *emit_data)
925 {
926 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
927 LLVMValueRef cond, *args = emit_data->args;
928
929 cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
930 bld_base->base.zero, "");
931
932 emit_data->output[emit_data->chan] =
933 LLVMBuildSelect(builder, cond, args[1], args[2], "");
934 }
935
936 static void emit_set_cond(
937 const struct lp_build_tgsi_action *action,
938 struct lp_build_tgsi_context * bld_base,
939 struct lp_build_emit_data * emit_data)
940 {
941 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
942 LLVMRealPredicate pred;
943 LLVMValueRef cond;
944
945 /* Use ordered for everything but NE (which is usual for
946 * float comparisons)
947 */
948 switch (emit_data->inst->Instruction.Opcode) {
949 case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
950 case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
951 case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
952 case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
953 case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
954 case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
955 default: assert(!"unknown instruction"); pred = 0; break;
956 }
957
958 cond = LLVMBuildFCmp(builder,
959 pred, emit_data->args[0], emit_data->args[1], "");
960
961 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
962 cond, bld_base->base.one, bld_base->base.zero, "");
963 }
964
965 static void emit_fcmp(
966 const struct lp_build_tgsi_action *action,
967 struct lp_build_tgsi_context * bld_base,
968 struct lp_build_emit_data * emit_data)
969 {
970 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
971 LLVMContextRef context = bld_base->base.gallivm->context;
972 LLVMRealPredicate pred;
973
974 /* Use ordered for everything but NE (which is usual for
975 * float comparisons)
976 */
977 switch (emit_data->inst->Instruction.Opcode) {
978 case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
979 case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
980 case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
981 case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
982 default: assert(!"unknown instruction"); pred = 0; break;
983 }
984
985 LLVMValueRef v = LLVMBuildFCmp(builder, pred,
986 emit_data->args[0], emit_data->args[1],"");
987
988 v = LLVMBuildSExtOrBitCast(builder, v,
989 LLVMInt32TypeInContext(context), "");
990
991 emit_data->output[emit_data->chan] = v;
992 }
993
994 static void emit_dcmp(
995 const struct lp_build_tgsi_action *action,
996 struct lp_build_tgsi_context * bld_base,
997 struct lp_build_emit_data * emit_data)
998 {
999 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1000 LLVMContextRef context = bld_base->base.gallivm->context;
1001 LLVMRealPredicate pred;
1002
1003 /* Use ordered for everything but NE (which is usual for
1004 * float comparisons)
1005 */
1006 switch (emit_data->inst->Instruction.Opcode) {
1007 case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
1008 case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
1009 case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
1010 case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
1011 default: assert(!"unknown instruction"); pred = 0; break;
1012 }
1013
1014 LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1015 emit_data->args[0], emit_data->args[1],"");
1016
1017 v = LLVMBuildSExtOrBitCast(builder, v,
1018 LLVMInt32TypeInContext(context), "");
1019
1020 emit_data->output[emit_data->chan] = v;
1021 }
1022
1023 static void emit_not(
1024 const struct lp_build_tgsi_action * action,
1025 struct lp_build_tgsi_context * bld_base,
1026 struct lp_build_emit_data * emit_data)
1027 {
1028 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1029 LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
1030 emit_data->args[0]);
1031 emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
1032 }
1033
1034 static void emit_arl(
1035 const struct lp_build_tgsi_action * action,
1036 struct lp_build_tgsi_context * bld_base,
1037 struct lp_build_emit_data * emit_data)
1038 {
1039 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1040 LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
1041 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1042 floor_index, bld_base->base.int_elem_type , "");
1043 }
1044
1045 static void emit_and(
1046 const struct lp_build_tgsi_action * action,
1047 struct lp_build_tgsi_context * bld_base,
1048 struct lp_build_emit_data * emit_data)
1049 {
1050 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1051 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1052 emit_data->args[0], emit_data->args[1], "");
1053 }
1054
1055 static void emit_or(
1056 const struct lp_build_tgsi_action * action,
1057 struct lp_build_tgsi_context * bld_base,
1058 struct lp_build_emit_data * emit_data)
1059 {
1060 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1061 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1062 emit_data->args[0], emit_data->args[1], "");
1063 }
1064
1065 static void emit_uadd(
1066 const struct lp_build_tgsi_action * action,
1067 struct lp_build_tgsi_context * bld_base,
1068 struct lp_build_emit_data * emit_data)
1069 {
1070 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1071 emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
1072 emit_data->args[0], emit_data->args[1], "");
1073 }
1074
1075 static void emit_udiv(
1076 const struct lp_build_tgsi_action * action,
1077 struct lp_build_tgsi_context * bld_base,
1078 struct lp_build_emit_data * emit_data)
1079 {
1080 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1081 emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
1082 emit_data->args[0], emit_data->args[1], "");
1083 }
1084
1085 static void emit_idiv(
1086 const struct lp_build_tgsi_action * action,
1087 struct lp_build_tgsi_context * bld_base,
1088 struct lp_build_emit_data * emit_data)
1089 {
1090 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1091 emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
1092 emit_data->args[0], emit_data->args[1], "");
1093 }
1094
1095 static void emit_mod(
1096 const struct lp_build_tgsi_action * action,
1097 struct lp_build_tgsi_context * bld_base,
1098 struct lp_build_emit_data * emit_data)
1099 {
1100 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1101 emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
1102 emit_data->args[0], emit_data->args[1], "");
1103 }
1104
1105 static void emit_umod(
1106 const struct lp_build_tgsi_action * action,
1107 struct lp_build_tgsi_context * bld_base,
1108 struct lp_build_emit_data * emit_data)
1109 {
1110 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1111 emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
1112 emit_data->args[0], emit_data->args[1], "");
1113 }
1114
1115 static void emit_shl(
1116 const struct lp_build_tgsi_action * action,
1117 struct lp_build_tgsi_context * bld_base,
1118 struct lp_build_emit_data * emit_data)
1119 {
1120 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1121 emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
1122 emit_data->args[0], emit_data->args[1], "");
1123 }
1124
1125 static void emit_ushr(
1126 const struct lp_build_tgsi_action * action,
1127 struct lp_build_tgsi_context * bld_base,
1128 struct lp_build_emit_data * emit_data)
1129 {
1130 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1131 emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
1132 emit_data->args[0], emit_data->args[1], "");
1133 }
1134 static void emit_ishr(
1135 const struct lp_build_tgsi_action * action,
1136 struct lp_build_tgsi_context * bld_base,
1137 struct lp_build_emit_data * emit_data)
1138 {
1139 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1140 emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
1141 emit_data->args[0], emit_data->args[1], "");
1142 }
1143
1144 static void emit_xor(
1145 const struct lp_build_tgsi_action * action,
1146 struct lp_build_tgsi_context * bld_base,
1147 struct lp_build_emit_data * emit_data)
1148 {
1149 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1150 emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
1151 emit_data->args[0], emit_data->args[1], "");
1152 }
1153
1154 static void emit_ssg(
1155 const struct lp_build_tgsi_action * action,
1156 struct lp_build_tgsi_context * bld_base,
1157 struct lp_build_emit_data * emit_data)
1158 {
1159 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1160
1161 LLVMValueRef cmp, val;
1162
1163 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
1164 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
1165 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
1166 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
1167 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
1168 } else { // float SSG
1169 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
1170 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
1171 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
1172 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
1173 }
1174
1175 emit_data->output[emit_data->chan] = val;
1176 }
1177
1178 static void emit_ineg(
1179 const struct lp_build_tgsi_action * action,
1180 struct lp_build_tgsi_context * bld_base,
1181 struct lp_build_emit_data * emit_data)
1182 {
1183 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1184 emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
1185 emit_data->args[0], "");
1186 }
1187
1188 static void emit_dneg(
1189 const struct lp_build_tgsi_action * action,
1190 struct lp_build_tgsi_context * bld_base,
1191 struct lp_build_emit_data * emit_data)
1192 {
1193 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1194 emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
1195 emit_data->args[0], "");
1196 }
1197
1198 static void emit_frac(
1199 const struct lp_build_tgsi_action * action,
1200 struct lp_build_tgsi_context * bld_base,
1201 struct lp_build_emit_data * emit_data)
1202 {
1203 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1204 char *intr;
1205
1206 if (emit_data->info->opcode == TGSI_OPCODE_FRC)
1207 intr = "llvm.floor.f32";
1208 else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
1209 intr = "llvm.floor.f64";
1210 else {
1211 assert(0);
1212 return;
1213 }
1214
1215 LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
1216 &emit_data->args[0], 1,
1217 LLVMReadNoneAttribute);
1218 emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
1219 emit_data->args[0], floor, "");
1220 }
1221
1222 static void emit_f2i(
1223 const struct lp_build_tgsi_action * action,
1224 struct lp_build_tgsi_context * bld_base,
1225 struct lp_build_emit_data * emit_data)
1226 {
1227 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1228 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1229 emit_data->args[0], bld_base->int_bld.elem_type, "");
1230 }
1231
1232 static void emit_f2u(
1233 const struct lp_build_tgsi_action * action,
1234 struct lp_build_tgsi_context * bld_base,
1235 struct lp_build_emit_data * emit_data)
1236 {
1237 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1238 emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
1239 emit_data->args[0], bld_base->uint_bld.elem_type, "");
1240 }
1241
1242 static void emit_i2f(
1243 const struct lp_build_tgsi_action * action,
1244 struct lp_build_tgsi_context * bld_base,
1245 struct lp_build_emit_data * emit_data)
1246 {
1247 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1248 emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
1249 emit_data->args[0], bld_base->base.elem_type, "");
1250 }
1251
1252 static void emit_u2f(
1253 const struct lp_build_tgsi_action * action,
1254 struct lp_build_tgsi_context * bld_base,
1255 struct lp_build_emit_data * emit_data)
1256 {
1257 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1258 emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
1259 emit_data->args[0], bld_base->base.elem_type, "");
1260 }
1261
1262 static void emit_immediate(struct lp_build_tgsi_context * bld_base,
1263 const struct tgsi_full_immediate *imm)
1264 {
1265 unsigned i;
1266 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
1267
1268 for (i = 0; i < 4; ++i) {
1269 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1270 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false );
1271 }
1272
1273 ctx->soa.num_immediates++;
1274 }
1275
1276 void
1277 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
1278 struct lp_build_tgsi_context *bld_base,
1279 struct lp_build_emit_data *emit_data)
1280 {
1281 struct lp_build_context * base = &bld_base->base;
1282 emit_data->output[emit_data->chan] =
1283 lp_build_intrinsic(base->gallivm->builder, action->intr_name,
1284 emit_data->dst_type, emit_data->args,
1285 emit_data->arg_count, LLVMReadNoneAttribute);
1286 }
1287
1288 static void emit_bfi(const struct lp_build_tgsi_action * action,
1289 struct lp_build_tgsi_context * bld_base,
1290 struct lp_build_emit_data * emit_data)
1291 {
1292 struct gallivm_state *gallivm = bld_base->base.gallivm;
1293 LLVMBuilderRef builder = gallivm->builder;
1294 LLVMValueRef bfi_args[3];
1295
1296 // Calculate the bitmask: (((1 << src3) - 1) << src2
1297 bfi_args[0] = LLVMBuildShl(builder,
1298 LLVMBuildSub(builder,
1299 LLVMBuildShl(builder,
1300 bld_base->int_bld.one,
1301 emit_data->args[3], ""),
1302 bld_base->int_bld.one, ""),
1303 emit_data->args[2], "");
1304
1305 bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
1306 emit_data->args[2], "");
1307
1308 bfi_args[2] = emit_data->args[0];
1309
1310 /* Calculate:
1311 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1312 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1313 */
1314 emit_data->output[emit_data->chan] =
1315 LLVMBuildXor(builder, bfi_args[2],
1316 LLVMBuildAnd(builder, bfi_args[0],
1317 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
1318 ""), ""), "");
1319 }
1320
1321 /* this is ffs in C */
1322 static void emit_lsb(const struct lp_build_tgsi_action * action,
1323 struct lp_build_tgsi_context * bld_base,
1324 struct lp_build_emit_data * emit_data)
1325 {
1326 struct gallivm_state *gallivm = bld_base->base.gallivm;
1327 LLVMValueRef args[2] = {
1328 emit_data->args[0],
1329
1330 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1331 * add special code to check for x=0. The reason is that
1332 * the LLVM behavior for x=0 is different from what we
1333 * need here.
1334 *
1335 * The hardware already implements the correct behavior.
1336 */
1337 lp_build_const_int32(gallivm, 1)
1338 };
1339
1340 emit_data->output[emit_data->chan] =
1341 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
1342 emit_data->dst_type, args, Elements(args),
1343 LLVMReadNoneAttribute);
1344 }
1345
1346 /* Find the last bit set. */
1347 static void emit_umsb(const struct lp_build_tgsi_action * action,
1348 struct lp_build_tgsi_context * bld_base,
1349 struct lp_build_emit_data * emit_data)
1350 {
1351 struct gallivm_state *gallivm = bld_base->base.gallivm;
1352 LLVMBuilderRef builder = gallivm->builder;
1353 LLVMValueRef args[2] = {
1354 emit_data->args[0],
1355 /* Don't generate code for handling zero: */
1356 lp_build_const_int32(gallivm, 1)
1357 };
1358
1359 LLVMValueRef msb =
1360 lp_build_intrinsic(builder, "llvm.ctlz.i32",
1361 emit_data->dst_type, args, Elements(args),
1362 LLVMReadNoneAttribute);
1363
1364 /* The HW returns the last bit index from MSB, but TGSI wants
1365 * the index from LSB. Invert it by doing "31 - msb". */
1366 msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1367 msb, "");
1368
1369 /* Check for zero: */
1370 emit_data->output[emit_data->chan] =
1371 LLVMBuildSelect(builder,
1372 LLVMBuildICmp(builder, LLVMIntEQ, args[0],
1373 bld_base->uint_bld.zero, ""),
1374 lp_build_const_int32(gallivm, -1), msb, "");
1375 }
1376
1377 /* Find the last bit opposite of the sign bit. */
1378 static void emit_imsb(const struct lp_build_tgsi_action * action,
1379 struct lp_build_tgsi_context * bld_base,
1380 struct lp_build_emit_data * emit_data)
1381 {
1382 struct gallivm_state *gallivm = bld_base->base.gallivm;
1383 LLVMBuilderRef builder = gallivm->builder;
1384 LLVMValueRef arg = emit_data->args[0];
1385
1386 LLVMValueRef msb =
1387 lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
1388 emit_data->dst_type, &arg, 1,
1389 LLVMReadNoneAttribute);
1390
1391 /* The HW returns the last bit index from MSB, but TGSI wants
1392 * the index from LSB. Invert it by doing "31 - msb". */
1393 msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1394 msb, "");
1395
1396 /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
1397 LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
1398
1399 LLVMValueRef cond =
1400 LLVMBuildOr(builder,
1401 LLVMBuildICmp(builder, LLVMIntEQ, arg,
1402 bld_base->uint_bld.zero, ""),
1403 LLVMBuildICmp(builder, LLVMIntEQ, arg,
1404 all_ones, ""), "");
1405
1406 emit_data->output[emit_data->chan] =
1407 LLVMBuildSelect(builder, cond, all_ones, msb, "");
1408 }
1409
1410 static void emit_iabs(const struct lp_build_tgsi_action *action,
1411 struct lp_build_tgsi_context *bld_base,
1412 struct lp_build_emit_data *emit_data)
1413 {
1414 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1415
1416 emit_data->output[emit_data->chan] =
1417 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
1418 emit_data->args[0],
1419 LLVMBuildNeg(builder,
1420 emit_data->args[0], ""));
1421 }
1422
1423 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
1424 struct lp_build_tgsi_context *bld_base,
1425 struct lp_build_emit_data *emit_data)
1426 {
1427 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1428 LLVMIntPredicate op;
1429
1430 switch (emit_data->info->opcode) {
1431 default:
1432 assert(0);
1433 case TGSI_OPCODE_IMAX:
1434 op = LLVMIntSGT;
1435 break;
1436 case TGSI_OPCODE_IMIN:
1437 op = LLVMIntSLT;
1438 break;
1439 case TGSI_OPCODE_UMAX:
1440 op = LLVMIntUGT;
1441 break;
1442 case TGSI_OPCODE_UMIN:
1443 op = LLVMIntULT;
1444 break;
1445 }
1446
1447 emit_data->output[emit_data->chan] =
1448 LLVMBuildSelect(builder,
1449 LLVMBuildICmp(builder, op, emit_data->args[0],
1450 emit_data->args[1], ""),
1451 emit_data->args[0],
1452 emit_data->args[1], "");
1453 }
1454
1455 void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
1456 {
1457 struct lp_type type;
1458
1459 /* Initialize the gallivm object:
1460 * We are only using the module, context, and builder fields of this struct.
1461 * This should be enough for us to be able to pass our gallivm struct to the
1462 * helper functions in the gallivm module.
1463 */
1464 memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1465 memset(&ctx->soa, 0, sizeof(ctx->soa));
1466 ctx->gallivm.context = LLVMContextCreate();
1467 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1468 ctx->gallivm.context);
1469 ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1470
1471 struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
1472
1473 type.floating = TRUE;
1474 type.fixed = FALSE;
1475 type.sign = TRUE;
1476 type.norm = FALSE;
1477 type.width = 32;
1478 type.length = 1;
1479
1480 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1481 lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1482 lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1483 {
1484 struct lp_type dbl_type;
1485 dbl_type = type;
1486 dbl_type.width *= 2;
1487 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
1488 }
1489
1490 bld_base->soa = 1;
1491 bld_base->emit_store = radeon_llvm_emit_store;
1492 bld_base->emit_swizzle = emit_swizzle;
1493 bld_base->emit_declaration = emit_declaration;
1494 bld_base->emit_immediate = emit_immediate;
1495
1496 bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
1497 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
1498 bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
1499 bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
1500 bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1501
1502 /* Allocate outputs */
1503 ctx->soa.outputs = ctx->outputs;
1504
1505 lp_set_default_actions(bld_base);
1506
1507 bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
1508 bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
1509 bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1510 bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
1511 bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
1512 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1513 bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
1514 bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
1515 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1516 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
1517 bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
1518 bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1519 bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
1520 bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
1521 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1522 bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
1523 bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
1524 bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
1525 bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
1526 bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
1527 bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
1528 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
1529 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
1530 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
1531 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
1532 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
1533 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
1534 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
1535 bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64";
1536 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
1537 bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
1538 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1539 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1540 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1541 bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1542 bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
1543 bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
1544 bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
1545 bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
1546 bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
1547 bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
1548 bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1549 bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1550 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
1551 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
1552 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
1553 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
1554 bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
1555 bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
1556 bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
1557 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
1558 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1559 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1560 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
1561 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
1562 bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
1563 bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
1564 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
1565 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
1566 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
1567 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
1568 bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
1569 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
1570 bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
1571 bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
1572 bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
1573 bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
1574 bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
1575 bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
1576 bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
1577 bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
1578 bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
1579 bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
1580 bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
1581 bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
1582 bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
1583 bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
1584 bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
1585 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
1586 bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
1587 bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
1588 bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
1589 bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
1590 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
1591 bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
1592 bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
1593 bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
1594 bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
1595 bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
1596 bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
1597 bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
1598 bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
1599 bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
1600 bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
1601 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
1602 bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
1603 bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
1604 bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
1605 bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
1606 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
1607 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
1608 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
1609 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
1610 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
1611 bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
1612 bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
1613 bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
1614 bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
1615 bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
1616 bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
1617 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
1618 }
1619
1620 void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
1621 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1622 {
1623 LLVMTypeRef main_fn_type;
1624 LLVMBasicBlockRef main_fn_body;
1625
1626 /* Setup the function */
1627 main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context),
1628 ParamTypes, ParamCount, 0);
1629 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
1630 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1631 ctx->main_fn, "main_body");
1632 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1633 }
1634
1635 void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
1636 {
1637 struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
1638 /* End the main function with Return*/
1639 LLVMBuildRetVoid(gallivm->builder);
1640
1641 /* Create the pass manager */
1642 ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
1643 gallivm->module);
1644
1645 /* This pass should eliminate all the load and store instructions */
1646 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1647
1648 /* Add some optimization passes */
1649 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1650 LLVMAddLICMPass(gallivm->passmgr);
1651 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1652 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1653 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1654
1655 /* Run the pass */
1656 LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
1657
1658 LLVMDisposeBuilder(gallivm->builder);
1659 LLVMDisposePassManager(gallivm->passmgr);
1660
1661 }
1662
1663 void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
1664 {
1665 LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
1666 LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
1667 FREE(ctx->arrays);
1668 ctx->arrays = NULL;
1669 FREE(ctx->temps);
1670 ctx->temps = NULL;
1671 ctx->temps_count = 0;
1672 FREE(ctx->loop);
1673 ctx->loop = NULL;
1674 ctx->loop_depth_max = 0;
1675 FREE(ctx->branch);
1676 ctx->branch = NULL;
1677 ctx->branch_depth_max = 0;
1678 }