radeonsi: convert to 64-bitness checks instead of doubles.
[mesa.git] / src / gallium / drivers / radeon / radeon_setup_tgsi_llvm.c
1 /*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Tom Stellard <thomas.stellard@amd.com>
24 *
25 */
26 #include "radeon_llvm.h"
27
28 #include "gallivm/lp_bld_const.h"
29 #include "gallivm/lp_bld_gather.h"
30 #include "gallivm/lp_bld_flow.h"
31 #include "gallivm/lp_bld_init.h"
32 #include "gallivm/lp_bld_intr.h"
33 #include "gallivm/lp_bld_misc.h"
34 #include "gallivm/lp_bld_swizzle.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "util/u_math.h"
38 #include "util/u_memory.h"
39 #include "util/u_debug.h"
40
41 #include <llvm-c/Core.h>
42 #include <llvm-c/Transforms/Scalar.h>
43
44 static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx)
45 {
46 return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
47 }
48
49 static struct radeon_llvm_branch * get_current_branch(
50 struct radeon_llvm_context * ctx)
51 {
52 return ctx->branch_depth > 0 ?
53 ctx->branch + (ctx->branch_depth - 1) : NULL;
54 }
55
56 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
57 {
58 return (index * 4) + chan;
59 }
60
61 static LLVMValueRef emit_swizzle(
62 struct lp_build_tgsi_context * bld_base,
63 LLVMValueRef value,
64 unsigned swizzle_x,
65 unsigned swizzle_y,
66 unsigned swizzle_z,
67 unsigned swizzle_w)
68 {
69 LLVMValueRef swizzles[4];
70 LLVMTypeRef i32t =
71 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
72
73 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
74 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
75 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
76 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
77
78 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
79 value,
80 LLVMGetUndef(LLVMTypeOf(value)),
81 LLVMConstVector(swizzles, 4), "");
82 }
83
84 static struct tgsi_declaration_range
85 get_array_range(struct lp_build_tgsi_context *bld_base,
86 unsigned File, const struct tgsi_ind_register *reg)
87 {
88 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
89
90 if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
91 reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) {
92 struct tgsi_declaration_range range;
93 range.First = 0;
94 range.Last = bld_base->info->file_max[File];
95 return range;
96 }
97
98 return ctx->arrays[reg->ArrayID - 1];
99 }
100
101 static LLVMValueRef
102 emit_array_index(
103 struct lp_build_tgsi_soa_context *bld,
104 const struct tgsi_ind_register *reg,
105 unsigned offset)
106 {
107 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
108
109 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
110 return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
111 }
112
113 LLVMValueRef
114 radeon_llvm_emit_fetch_64bit(
115 struct lp_build_tgsi_context *bld_base,
116 enum tgsi_opcode_type type,
117 LLVMValueRef ptr,
118 LLVMValueRef ptr2)
119 {
120 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
121 LLVMValueRef result;
122
123 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
124
125 result = LLVMBuildInsertElement(builder,
126 result,
127 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
128 bld_base->int_bld.zero, "");
129 result = LLVMBuildInsertElement(builder,
130 result,
131 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
132 bld_base->int_bld.one, "");
133 return bitcast(bld_base, type, result);
134 }
135
136 static LLVMValueRef
137 emit_array_fetch(
138 struct lp_build_tgsi_context *bld_base,
139 unsigned File, enum tgsi_opcode_type type,
140 struct tgsi_declaration_range range,
141 unsigned swizzle)
142 {
143 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
144 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
145 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
146
147 unsigned i, size = range.Last - range.First + 1;
148 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
149 LLVMValueRef result = LLVMGetUndef(vec);
150
151 struct tgsi_full_src_register tmp_reg = {};
152 tmp_reg.Register.File = File;
153
154 for (i = 0; i < size; ++i) {
155 tmp_reg.Register.Index = i + range.First;
156 LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
157 result = LLVMBuildInsertElement(builder, result, temp,
158 lp_build_const_int32(gallivm, i), "");
159 }
160 return result;
161 }
162
163 static bool uses_temp_indirect_addressing(
164 struct lp_build_tgsi_context *bld_base)
165 {
166 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
167 return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
168 }
169
170 LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
171 const struct tgsi_full_src_register *reg,
172 enum tgsi_opcode_type type,
173 unsigned swizzle)
174 {
175 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
176 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
177 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
178 LLVMValueRef result = NULL, ptr, ptr2;
179
180 if (swizzle == ~0) {
181 LLVMValueRef values[TGSI_NUM_CHANNELS];
182 unsigned chan;
183 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
184 values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
185 }
186 return lp_build_gather_values(bld_base->base.gallivm, values,
187 TGSI_NUM_CHANNELS);
188 }
189
190 if (reg->Register.Indirect) {
191 struct tgsi_declaration_range range = get_array_range(bld_base,
192 reg->Register.File, &reg->Indirect);
193 return LLVMBuildExtractElement(builder,
194 emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle),
195 emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First),
196 "");
197 }
198
199 switch(reg->Register.File) {
200 case TGSI_FILE_IMMEDIATE: {
201 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
202 if (tgsi_type_is_64bit(type)) {
203 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
204 result = LLVMConstInsertElement(result,
205 bld->immediates[reg->Register.Index][swizzle],
206 bld_base->int_bld.zero);
207 result = LLVMConstInsertElement(result,
208 bld->immediates[reg->Register.Index][swizzle + 1],
209 bld_base->int_bld.one);
210 return LLVMConstBitCast(result, ctype);
211 } else {
212 return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
213 }
214 }
215
216 case TGSI_FILE_INPUT:
217 result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
218 if (tgsi_type_is_64bit(type)) {
219 ptr = result;
220 ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
221 return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
222 }
223 break;
224
225 case TGSI_FILE_TEMPORARY:
226 if (reg->Register.Index >= ctx->temps_count)
227 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
228 if (uses_temp_indirect_addressing(bld_base)) {
229 ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
230 break;
231 }
232 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
233 if (tgsi_type_is_64bit(type)) {
234 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
235 return radeon_llvm_emit_fetch_64bit(bld_base, type,
236 LLVMBuildLoad(builder, ptr, ""),
237 LLVMBuildLoad(builder, ptr2, ""));
238 }
239 result = LLVMBuildLoad(builder, ptr, "");
240 break;
241
242 case TGSI_FILE_OUTPUT:
243 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
244 if (tgsi_type_is_64bit(type)) {
245 ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
246 return radeon_llvm_emit_fetch_64bit(bld_base, type,
247 LLVMBuildLoad(builder, ptr, ""),
248 LLVMBuildLoad(builder, ptr2, ""));
249 }
250 result = LLVMBuildLoad(builder, ptr, "");
251 break;
252
253 default:
254 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
255 }
256
257 return bitcast(bld_base, type, result);
258 }
259
260 static LLVMValueRef fetch_system_value(
261 struct lp_build_tgsi_context * bld_base,
262 const struct tgsi_full_src_register *reg,
263 enum tgsi_opcode_type type,
264 unsigned swizzle)
265 {
266 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
267 struct gallivm_state *gallivm = bld_base->base.gallivm;
268
269 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
270 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
271 cval = LLVMBuildExtractElement(gallivm->builder, cval,
272 lp_build_const_int32(gallivm, swizzle), "");
273 }
274 return bitcast(bld_base, type, cval);
275 }
276
277 static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm,
278 LLVMTypeRef type,
279 const char *name)
280 {
281 LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
282 LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
283 return ptr;
284 }
285
286 static void emit_declaration(
287 struct lp_build_tgsi_context * bld_base,
288 const struct tgsi_full_declaration *decl)
289 {
290 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
291 unsigned first, last, i, idx;
292 switch(decl->Declaration.File) {
293 case TGSI_FILE_ADDRESS:
294 {
295 unsigned idx;
296 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
297 unsigned chan;
298 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
299 ctx->soa.addr[idx][chan] = si_build_alloca_undef(
300 &ctx->gallivm,
301 ctx->soa.bld_base.uint_bld.elem_type, "");
302 }
303 }
304 break;
305 }
306
307 case TGSI_FILE_TEMPORARY:
308 if (decl->Declaration.Array) {
309 if (!ctx->arrays) {
310 int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
311 ctx->arrays = MALLOC(sizeof(ctx->arrays[0]) * size);
312 }
313
314 ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
315 }
316 if (uses_temp_indirect_addressing(bld_base)) {
317 lp_emit_declaration_soa(bld_base, decl);
318 break;
319 }
320 first = decl->Range.First;
321 last = decl->Range.Last;
322 if (!ctx->temps_count) {
323 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
324 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
325 }
326 for (idx = first; idx <= last; idx++) {
327 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
328 ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
329 si_build_alloca_undef(bld_base->base.gallivm,
330 bld_base->base.vec_type,
331 "temp");
332 }
333 }
334 break;
335
336 case TGSI_FILE_INPUT:
337 {
338 unsigned idx;
339 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
340 if (ctx->load_input)
341 ctx->load_input(ctx, idx, decl);
342 }
343 }
344 break;
345
346 case TGSI_FILE_SYSTEM_VALUE:
347 {
348 unsigned idx;
349 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
350 ctx->load_system_value(ctx, idx, decl);
351 }
352 }
353 break;
354
355 case TGSI_FILE_OUTPUT:
356 {
357 unsigned idx;
358 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
359 unsigned chan;
360 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
361 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
362 ctx->soa.outputs[idx][chan] = si_build_alloca_undef(
363 &ctx->gallivm,
364 ctx->soa.bld_base.base.elem_type, "");
365 }
366 }
367 break;
368 }
369
370 case TGSI_FILE_MEMORY:
371 ctx->declare_memory_region(ctx, decl);
372 break;
373
374 default:
375 break;
376 }
377 }
378
379 LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
380 LLVMValueRef value)
381 {
382 struct lp_build_emit_data clamp_emit_data;
383
384 memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
385 clamp_emit_data.arg_count = 3;
386 clamp_emit_data.args[0] = value;
387 clamp_emit_data.args[2] = bld_base->base.one;
388 clamp_emit_data.args[1] = bld_base->base.zero;
389
390 return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
391 &clamp_emit_data);
392 }
393
394 void radeon_llvm_emit_store(
395 struct lp_build_tgsi_context * bld_base,
396 const struct tgsi_full_instruction * inst,
397 const struct tgsi_opcode_info * info,
398 LLVMValueRef dst[4])
399 {
400 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
401 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
402 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
403 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
404 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
405 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
406 unsigned chan, chan_index;
407 boolean is_vec_store = FALSE;
408 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
409
410 if (dst[0]) {
411 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
412 is_vec_store = (k == LLVMVectorTypeKind);
413 }
414
415 if (is_vec_store) {
416 LLVMValueRef values[4] = {};
417 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
418 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
419 values[chan] = LLVMBuildExtractElement(gallivm->builder,
420 dst[0], index, "");
421 }
422 bld_base->emit_store(bld_base, inst, info, values);
423 return;
424 }
425
426 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
427 LLVMValueRef value = dst[chan_index];
428
429 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
430 continue;
431 if (inst->Instruction.Saturate)
432 value = radeon_llvm_saturate(bld_base, value);
433
434 if (reg->Register.File == TGSI_FILE_ADDRESS) {
435 temp_ptr = bld->addr[reg->Register.Index][chan_index];
436 LLVMBuildStore(builder, value, temp_ptr);
437 continue;
438 }
439
440 if (!tgsi_type_is_64bit(dtype))
441 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
442
443 if (reg->Register.Indirect) {
444 struct tgsi_declaration_range range = get_array_range(bld_base,
445 reg->Register.File, &reg->Indirect);
446
447 unsigned i, size = range.Last - range.First + 1;
448 LLVMValueRef array = LLVMBuildInsertElement(builder,
449 emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index),
450 value, emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First), "");
451
452 for (i = 0; i < size; ++i) {
453 switch(reg->Register.File) {
454 case TGSI_FILE_OUTPUT:
455 temp_ptr = bld->outputs[i + range.First][chan_index];
456 break;
457
458 case TGSI_FILE_TEMPORARY:
459 if (range.First + i >= ctx->temps_count)
460 continue;
461 if (uses_temp_indirect_addressing(bld_base))
462 temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
463 else
464 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
465 break;
466
467 default:
468 return;
469 }
470 value = LLVMBuildExtractElement(builder, array,
471 lp_build_const_int32(gallivm, i), "");
472 LLVMBuildStore(builder, value, temp_ptr);
473 }
474
475 } else {
476 switch(reg->Register.File) {
477 case TGSI_FILE_OUTPUT:
478 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
479 if (tgsi_type_is_64bit(dtype))
480 temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
481 break;
482
483 case TGSI_FILE_TEMPORARY:
484 if (reg->Register.Index >= ctx->temps_count)
485 continue;
486 if (uses_temp_indirect_addressing(bld_base)) {
487 temp_ptr = NULL;
488 break;
489 }
490 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
491 if (tgsi_type_is_64bit(dtype))
492 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
493
494 break;
495
496 default:
497 return;
498 }
499 if (!tgsi_type_is_64bit(dtype))
500 LLVMBuildStore(builder, value, temp_ptr);
501 else {
502 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
503 LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
504 LLVMValueRef val2;
505 value = LLVMBuildExtractElement(builder, ptr,
506 bld_base->uint_bld.zero, "");
507 val2 = LLVMBuildExtractElement(builder, ptr,
508 bld_base->uint_bld.one, "");
509
510 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
511 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
512 }
513 }
514 }
515 }
516
517 static void bgnloop_emit(
518 const struct lp_build_tgsi_action * action,
519 struct lp_build_tgsi_context * bld_base,
520 struct lp_build_emit_data * emit_data)
521 {
522 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
523 struct gallivm_state * gallivm = bld_base->base.gallivm;
524 LLVMBasicBlockRef loop_block;
525 LLVMBasicBlockRef endloop_block;
526 endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
527 ctx->main_fn, "ENDLOOP");
528 loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
529 endloop_block, "LOOP");
530 LLVMBuildBr(gallivm->builder, loop_block);
531 LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
532
533 if (++ctx->loop_depth > ctx->loop_depth_max) {
534 unsigned new_max = ctx->loop_depth_max << 1;
535
536 if (!new_max)
537 new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
538
539 ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
540 sizeof(ctx->loop[0]),
541 new_max * sizeof(ctx->loop[0]));
542 ctx->loop_depth_max = new_max;
543 }
544
545 ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
546 ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
547 }
548
549 static void brk_emit(
550 const struct lp_build_tgsi_action * action,
551 struct lp_build_tgsi_context * bld_base,
552 struct lp_build_emit_data * emit_data)
553 {
554 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
555 struct gallivm_state * gallivm = bld_base->base.gallivm;
556 struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
557
558 LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
559 }
560
561 static void cont_emit(
562 const struct lp_build_tgsi_action * action,
563 struct lp_build_tgsi_context * bld_base,
564 struct lp_build_emit_data * emit_data)
565 {
566 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
567 struct gallivm_state * gallivm = bld_base->base.gallivm;
568 struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
569
570 LLVMBuildBr(gallivm->builder, current_loop->loop_block);
571 }
572
573 static void else_emit(
574 const struct lp_build_tgsi_action * action,
575 struct lp_build_tgsi_context * bld_base,
576 struct lp_build_emit_data * emit_data)
577 {
578 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
579 struct gallivm_state * gallivm = bld_base->base.gallivm;
580 struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
581 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
582
583 /* We need to add a terminator to the current block if the previous
584 * instruction was an ENDIF.Example:
585 * IF
586 * [code]
587 * IF
588 * [code]
589 * ELSE
590 * [code]
591 * ENDIF <--
592 * ELSE<--
593 * [code]
594 * ENDIF
595 */
596
597 if (current_block != current_branch->if_block) {
598 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
599 }
600 if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
601 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
602 }
603 current_branch->has_else = 1;
604 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
605 }
606
607 static void endif_emit(
608 const struct lp_build_tgsi_action * action,
609 struct lp_build_tgsi_context * bld_base,
610 struct lp_build_emit_data * emit_data)
611 {
612 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
613 struct gallivm_state * gallivm = bld_base->base.gallivm;
614 struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
615 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
616
617 /* If we have consecutive ENDIF instructions, then the first ENDIF
618 * will not have a terminator, so we need to add one. */
619 if (current_block != current_branch->if_block
620 && current_block != current_branch->else_block
621 && !LLVMGetBasicBlockTerminator(current_block)) {
622
623 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
624 }
625 if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
626 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
627 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
628 }
629
630 if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
631 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
632 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
633 }
634
635 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
636 ctx->branch_depth--;
637 }
638
639 static void endloop_emit(
640 const struct lp_build_tgsi_action * action,
641 struct lp_build_tgsi_context * bld_base,
642 struct lp_build_emit_data * emit_data)
643 {
644 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
645 struct gallivm_state * gallivm = bld_base->base.gallivm;
646 struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
647
648 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
649 LLVMBuildBr(gallivm->builder, current_loop->loop_block);
650 }
651
652 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
653 ctx->loop_depth--;
654 }
655
656 static void if_cond_emit(
657 const struct lp_build_tgsi_action * action,
658 struct lp_build_tgsi_context * bld_base,
659 struct lp_build_emit_data * emit_data,
660 LLVMValueRef cond)
661 {
662 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
663 struct gallivm_state * gallivm = bld_base->base.gallivm;
664 LLVMBasicBlockRef if_block, else_block, endif_block;
665
666 endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
667 ctx->main_fn, "ENDIF");
668 if_block = LLVMInsertBasicBlockInContext(gallivm->context,
669 endif_block, "IF");
670 else_block = LLVMInsertBasicBlockInContext(gallivm->context,
671 endif_block, "ELSE");
672 LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
673 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
674
675 if (++ctx->branch_depth > ctx->branch_depth_max) {
676 unsigned new_max = ctx->branch_depth_max << 1;
677
678 if (!new_max)
679 new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
680
681 ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
682 sizeof(ctx->branch[0]),
683 new_max * sizeof(ctx->branch[0]));
684 ctx->branch_depth_max = new_max;
685 }
686
687 ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
688 ctx->branch[ctx->branch_depth - 1].if_block = if_block;
689 ctx->branch[ctx->branch_depth - 1].else_block = else_block;
690 ctx->branch[ctx->branch_depth - 1].has_else = 0;
691 }
692
693 static void if_emit(
694 const struct lp_build_tgsi_action * action,
695 struct lp_build_tgsi_context * bld_base,
696 struct lp_build_emit_data * emit_data)
697 {
698 struct gallivm_state * gallivm = bld_base->base.gallivm;
699 LLVMValueRef cond;
700
701 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
702 emit_data->args[0],
703 bld_base->base.zero, "");
704
705 if_cond_emit(action, bld_base, emit_data, cond);
706 }
707
708 static void uif_emit(
709 const struct lp_build_tgsi_action * action,
710 struct lp_build_tgsi_context * bld_base,
711 struct lp_build_emit_data * emit_data)
712 {
713 struct gallivm_state * gallivm = bld_base->base.gallivm;
714 LLVMValueRef cond;
715
716 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
717 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
718 bld_base->int_bld.zero, "");
719
720 if_cond_emit(action, bld_base, emit_data, cond);
721 }
722
723 static void kill_if_fetch_args(
724 struct lp_build_tgsi_context * bld_base,
725 struct lp_build_emit_data * emit_data)
726 {
727 const struct tgsi_full_instruction * inst = emit_data->inst;
728 struct gallivm_state *gallivm = bld_base->base.gallivm;
729 LLVMBuilderRef builder = gallivm->builder;
730 unsigned i;
731 LLVMValueRef conds[TGSI_NUM_CHANNELS];
732
733 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
734 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
735 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
736 bld_base->base.zero, "");
737 }
738
739 /* Or the conditions together */
740 for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
741 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
742 }
743
744 emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
745 emit_data->arg_count = 1;
746 emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
747 lp_build_const_float(gallivm, -1.0f),
748 bld_base->base.zero, "");
749 }
750
751 static void kil_emit(
752 const struct lp_build_tgsi_action * action,
753 struct lp_build_tgsi_context * bld_base,
754 struct lp_build_emit_data * emit_data)
755 {
756 unsigned i;
757 for (i = 0; i < emit_data->arg_count; i++) {
758 emit_data->output[i] = lp_build_intrinsic_unary(
759 bld_base->base.gallivm->builder,
760 action->intr_name,
761 emit_data->dst_type, emit_data->args[i]);
762 }
763 }
764
765 static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
766 LLVMValueRef *in, LLVMValueRef *out)
767 {
768 struct gallivm_state * gallivm = bld_base->base.gallivm;
769 LLVMBuilderRef builder = gallivm->builder;
770 LLVMTypeRef type = bld_base->base.elem_type;
771 LLVMValueRef coords[4];
772 LLVMValueRef mad_args[3];
773 LLVMValueRef v, cube_vec;
774 unsigned i;
775
776 cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
777 v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
778 &cube_vec, 1, LLVMReadNoneAttribute);
779
780 for (i = 0; i < 4; ++i)
781 coords[i] = LLVMBuildExtractElement(builder, v,
782 lp_build_const_int32(gallivm, i), "");
783
784 coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
785 type, &coords[2], 1, LLVMReadNoneAttribute);
786 coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
787
788 mad_args[1] = coords[2];
789 mad_args[2] = LLVMConstReal(type, 1.5);
790
791 mad_args[0] = coords[0];
792 coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
793 mad_args[0], mad_args[1], mad_args[2]);
794
795 mad_args[0] = coords[1];
796 coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
797 mad_args[0], mad_args[1], mad_args[2]);
798
799 /* apply xyz = yxw swizzle to cooords */
800 out[0] = coords[1];
801 out[1] = coords[0];
802 out[2] = coords[3];
803 }
804
805 void radeon_llvm_emit_prepare_cube_coords(
806 struct lp_build_tgsi_context * bld_base,
807 struct lp_build_emit_data * emit_data,
808 LLVMValueRef *coords_arg,
809 LLVMValueRef *derivs_arg)
810 {
811
812 unsigned target = emit_data->inst->Texture.Texture;
813 unsigned opcode = emit_data->inst->Instruction.Opcode;
814 struct gallivm_state * gallivm = bld_base->base.gallivm;
815 LLVMBuilderRef builder = gallivm->builder;
816 LLVMValueRef coords[4];
817 unsigned i;
818
819 radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
820
821 if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
822 LLVMValueRef derivs[4];
823 int axis;
824
825 /* Convert cube derivatives to 2D derivatives. */
826 for (axis = 0; axis < 2; axis++) {
827 LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
828
829 /* Shift the cube coordinates by the derivatives to get
830 * the cube coordinates of the "neighboring pixel".
831 */
832 for (i = 0; i < 3; i++)
833 shifted_cube_coords[i] =
834 LLVMBuildFAdd(builder, coords_arg[i],
835 derivs_arg[axis*3+i], "");
836 shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
837
838 /* Project the shifted cube coordinates onto the face. */
839 radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
840 shifted_coords);
841
842 /* Subtract both sets of 2D coordinates to get 2D derivatives.
843 * This won't work if the shifted coordinates ended up
844 * in a different face.
845 */
846 for (i = 0; i < 2; i++)
847 derivs[axis * 2 + i] =
848 LLVMBuildFSub(builder, shifted_coords[i],
849 coords[i], "");
850 }
851
852 memcpy(derivs_arg, derivs, sizeof(derivs));
853 }
854
855 if (target == TGSI_TEXTURE_CUBE_ARRAY ||
856 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
857 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
858 /* coords_arg.w component - array_index for cube arrays */
859 coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
860 coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
861 }
862
863 /* Preserve compare/lod/bias. Put it in coords.w. */
864 if (opcode == TGSI_OPCODE_TEX2 ||
865 opcode == TGSI_OPCODE_TXB2 ||
866 opcode == TGSI_OPCODE_TXL2) {
867 coords[3] = coords_arg[4];
868 } else if (opcode == TGSI_OPCODE_TXB ||
869 opcode == TGSI_OPCODE_TXL ||
870 target == TGSI_TEXTURE_SHADOWCUBE) {
871 coords[3] = coords_arg[3];
872 }
873
874 memcpy(coords_arg, coords, sizeof(coords));
875 }
876
877 static void emit_icmp(
878 const struct lp_build_tgsi_action * action,
879 struct lp_build_tgsi_context * bld_base,
880 struct lp_build_emit_data * emit_data)
881 {
882 unsigned pred;
883 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
884 LLVMContextRef context = bld_base->base.gallivm->context;
885
886 switch (emit_data->inst->Instruction.Opcode) {
887 case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
888 case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
889 case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
890 case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
891 case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
892 case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
893 default:
894 assert(!"unknown instruction");
895 pred = 0;
896 break;
897 }
898
899 LLVMValueRef v = LLVMBuildICmp(builder, pred,
900 emit_data->args[0], emit_data->args[1],"");
901
902 v = LLVMBuildSExtOrBitCast(builder, v,
903 LLVMInt32TypeInContext(context), "");
904
905 emit_data->output[emit_data->chan] = v;
906 }
907
908 static void emit_ucmp(
909 const struct lp_build_tgsi_action * action,
910 struct lp_build_tgsi_context * bld_base,
911 struct lp_build_emit_data * emit_data)
912 {
913 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
914
915 LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
916 bld_base->uint_bld.elem_type, "");
917
918 LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
919 bld_base->uint_bld.zero, "");
920
921 emit_data->output[emit_data->chan] =
922 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
923 }
924
925 static void emit_cmp(const struct lp_build_tgsi_action *action,
926 struct lp_build_tgsi_context *bld_base,
927 struct lp_build_emit_data *emit_data)
928 {
929 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
930 LLVMValueRef cond, *args = emit_data->args;
931
932 cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
933 bld_base->base.zero, "");
934
935 emit_data->output[emit_data->chan] =
936 LLVMBuildSelect(builder, cond, args[1], args[2], "");
937 }
938
939 static void emit_set_cond(
940 const struct lp_build_tgsi_action *action,
941 struct lp_build_tgsi_context * bld_base,
942 struct lp_build_emit_data * emit_data)
943 {
944 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
945 LLVMRealPredicate pred;
946 LLVMValueRef cond;
947
948 /* Use ordered for everything but NE (which is usual for
949 * float comparisons)
950 */
951 switch (emit_data->inst->Instruction.Opcode) {
952 case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
953 case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
954 case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
955 case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
956 case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
957 case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
958 default: assert(!"unknown instruction"); pred = 0; break;
959 }
960
961 cond = LLVMBuildFCmp(builder,
962 pred, emit_data->args[0], emit_data->args[1], "");
963
964 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
965 cond, bld_base->base.one, bld_base->base.zero, "");
966 }
967
968 static void emit_fcmp(
969 const struct lp_build_tgsi_action *action,
970 struct lp_build_tgsi_context * bld_base,
971 struct lp_build_emit_data * emit_data)
972 {
973 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
974 LLVMContextRef context = bld_base->base.gallivm->context;
975 LLVMRealPredicate pred;
976
977 /* Use ordered for everything but NE (which is usual for
978 * float comparisons)
979 */
980 switch (emit_data->inst->Instruction.Opcode) {
981 case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
982 case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
983 case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
984 case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
985 default: assert(!"unknown instruction"); pred = 0; break;
986 }
987
988 LLVMValueRef v = LLVMBuildFCmp(builder, pred,
989 emit_data->args[0], emit_data->args[1],"");
990
991 v = LLVMBuildSExtOrBitCast(builder, v,
992 LLVMInt32TypeInContext(context), "");
993
994 emit_data->output[emit_data->chan] = v;
995 }
996
997 static void emit_dcmp(
998 const struct lp_build_tgsi_action *action,
999 struct lp_build_tgsi_context * bld_base,
1000 struct lp_build_emit_data * emit_data)
1001 {
1002 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1003 LLVMContextRef context = bld_base->base.gallivm->context;
1004 LLVMRealPredicate pred;
1005
1006 /* Use ordered for everything but NE (which is usual for
1007 * float comparisons)
1008 */
1009 switch (emit_data->inst->Instruction.Opcode) {
1010 case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
1011 case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
1012 case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
1013 case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
1014 default: assert(!"unknown instruction"); pred = 0; break;
1015 }
1016
1017 LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1018 emit_data->args[0], emit_data->args[1],"");
1019
1020 v = LLVMBuildSExtOrBitCast(builder, v,
1021 LLVMInt32TypeInContext(context), "");
1022
1023 emit_data->output[emit_data->chan] = v;
1024 }
1025
1026 static void emit_not(
1027 const struct lp_build_tgsi_action * action,
1028 struct lp_build_tgsi_context * bld_base,
1029 struct lp_build_emit_data * emit_data)
1030 {
1031 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1032 LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
1033 emit_data->args[0]);
1034 emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
1035 }
1036
1037 static void emit_arl(
1038 const struct lp_build_tgsi_action * action,
1039 struct lp_build_tgsi_context * bld_base,
1040 struct lp_build_emit_data * emit_data)
1041 {
1042 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1043 LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
1044 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1045 floor_index, bld_base->base.int_elem_type , "");
1046 }
1047
1048 static void emit_and(
1049 const struct lp_build_tgsi_action * action,
1050 struct lp_build_tgsi_context * bld_base,
1051 struct lp_build_emit_data * emit_data)
1052 {
1053 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1054 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1055 emit_data->args[0], emit_data->args[1], "");
1056 }
1057
1058 static void emit_or(
1059 const struct lp_build_tgsi_action * action,
1060 struct lp_build_tgsi_context * bld_base,
1061 struct lp_build_emit_data * emit_data)
1062 {
1063 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1064 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1065 emit_data->args[0], emit_data->args[1], "");
1066 }
1067
1068 static void emit_uadd(
1069 const struct lp_build_tgsi_action * action,
1070 struct lp_build_tgsi_context * bld_base,
1071 struct lp_build_emit_data * emit_data)
1072 {
1073 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1074 emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
1075 emit_data->args[0], emit_data->args[1], "");
1076 }
1077
1078 static void emit_udiv(
1079 const struct lp_build_tgsi_action * action,
1080 struct lp_build_tgsi_context * bld_base,
1081 struct lp_build_emit_data * emit_data)
1082 {
1083 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1084 emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
1085 emit_data->args[0], emit_data->args[1], "");
1086 }
1087
1088 static void emit_idiv(
1089 const struct lp_build_tgsi_action * action,
1090 struct lp_build_tgsi_context * bld_base,
1091 struct lp_build_emit_data * emit_data)
1092 {
1093 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1094 emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
1095 emit_data->args[0], emit_data->args[1], "");
1096 }
1097
1098 static void emit_mod(
1099 const struct lp_build_tgsi_action * action,
1100 struct lp_build_tgsi_context * bld_base,
1101 struct lp_build_emit_data * emit_data)
1102 {
1103 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1104 emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
1105 emit_data->args[0], emit_data->args[1], "");
1106 }
1107
1108 static void emit_umod(
1109 const struct lp_build_tgsi_action * action,
1110 struct lp_build_tgsi_context * bld_base,
1111 struct lp_build_emit_data * emit_data)
1112 {
1113 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1114 emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
1115 emit_data->args[0], emit_data->args[1], "");
1116 }
1117
1118 static void emit_shl(
1119 const struct lp_build_tgsi_action * action,
1120 struct lp_build_tgsi_context * bld_base,
1121 struct lp_build_emit_data * emit_data)
1122 {
1123 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1124 emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
1125 emit_data->args[0], emit_data->args[1], "");
1126 }
1127
1128 static void emit_ushr(
1129 const struct lp_build_tgsi_action * action,
1130 struct lp_build_tgsi_context * bld_base,
1131 struct lp_build_emit_data * emit_data)
1132 {
1133 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1134 emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
1135 emit_data->args[0], emit_data->args[1], "");
1136 }
1137 static void emit_ishr(
1138 const struct lp_build_tgsi_action * action,
1139 struct lp_build_tgsi_context * bld_base,
1140 struct lp_build_emit_data * emit_data)
1141 {
1142 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1143 emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
1144 emit_data->args[0], emit_data->args[1], "");
1145 }
1146
1147 static void emit_xor(
1148 const struct lp_build_tgsi_action * action,
1149 struct lp_build_tgsi_context * bld_base,
1150 struct lp_build_emit_data * emit_data)
1151 {
1152 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1153 emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
1154 emit_data->args[0], emit_data->args[1], "");
1155 }
1156
1157 static void emit_ssg(
1158 const struct lp_build_tgsi_action * action,
1159 struct lp_build_tgsi_context * bld_base,
1160 struct lp_build_emit_data * emit_data)
1161 {
1162 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1163
1164 LLVMValueRef cmp, val;
1165
1166 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
1167 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
1168 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
1169 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
1170 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
1171 } else { // float SSG
1172 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
1173 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
1174 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
1175 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
1176 }
1177
1178 emit_data->output[emit_data->chan] = val;
1179 }
1180
1181 static void emit_ineg(
1182 const struct lp_build_tgsi_action * action,
1183 struct lp_build_tgsi_context * bld_base,
1184 struct lp_build_emit_data * emit_data)
1185 {
1186 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1187 emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
1188 emit_data->args[0], "");
1189 }
1190
1191 static void emit_dneg(
1192 const struct lp_build_tgsi_action * action,
1193 struct lp_build_tgsi_context * bld_base,
1194 struct lp_build_emit_data * emit_data)
1195 {
1196 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1197 emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
1198 emit_data->args[0], "");
1199 }
1200
1201 static void emit_frac(
1202 const struct lp_build_tgsi_action * action,
1203 struct lp_build_tgsi_context * bld_base,
1204 struct lp_build_emit_data * emit_data)
1205 {
1206 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1207 char *intr;
1208
1209 if (emit_data->info->opcode == TGSI_OPCODE_FRC)
1210 intr = "llvm.floor.f32";
1211 else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
1212 intr = "llvm.floor.f64";
1213 else {
1214 assert(0);
1215 return;
1216 }
1217
1218 LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
1219 &emit_data->args[0], 1,
1220 LLVMReadNoneAttribute);
1221 emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
1222 emit_data->args[0], floor, "");
1223 }
1224
1225 static void emit_f2i(
1226 const struct lp_build_tgsi_action * action,
1227 struct lp_build_tgsi_context * bld_base,
1228 struct lp_build_emit_data * emit_data)
1229 {
1230 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1231 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1232 emit_data->args[0], bld_base->int_bld.elem_type, "");
1233 }
1234
1235 static void emit_f2u(
1236 const struct lp_build_tgsi_action * action,
1237 struct lp_build_tgsi_context * bld_base,
1238 struct lp_build_emit_data * emit_data)
1239 {
1240 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1241 emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
1242 emit_data->args[0], bld_base->uint_bld.elem_type, "");
1243 }
1244
1245 static void emit_i2f(
1246 const struct lp_build_tgsi_action * action,
1247 struct lp_build_tgsi_context * bld_base,
1248 struct lp_build_emit_data * emit_data)
1249 {
1250 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1251 emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
1252 emit_data->args[0], bld_base->base.elem_type, "");
1253 }
1254
1255 static void emit_u2f(
1256 const struct lp_build_tgsi_action * action,
1257 struct lp_build_tgsi_context * bld_base,
1258 struct lp_build_emit_data * emit_data)
1259 {
1260 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1261 emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
1262 emit_data->args[0], bld_base->base.elem_type, "");
1263 }
1264
1265 static void emit_immediate(struct lp_build_tgsi_context * bld_base,
1266 const struct tgsi_full_immediate *imm)
1267 {
1268 unsigned i;
1269 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
1270
1271 for (i = 0; i < 4; ++i) {
1272 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1273 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false );
1274 }
1275
1276 ctx->soa.num_immediates++;
1277 }
1278
1279 void
1280 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
1281 struct lp_build_tgsi_context *bld_base,
1282 struct lp_build_emit_data *emit_data)
1283 {
1284 struct lp_build_context * base = &bld_base->base;
1285 emit_data->output[emit_data->chan] =
1286 lp_build_intrinsic(base->gallivm->builder, action->intr_name,
1287 emit_data->dst_type, emit_data->args,
1288 emit_data->arg_count, LLVMReadNoneAttribute);
1289 }
1290
1291 static void emit_bfi(const struct lp_build_tgsi_action * action,
1292 struct lp_build_tgsi_context * bld_base,
1293 struct lp_build_emit_data * emit_data)
1294 {
1295 struct gallivm_state *gallivm = bld_base->base.gallivm;
1296 LLVMBuilderRef builder = gallivm->builder;
1297 LLVMValueRef bfi_args[3];
1298
1299 // Calculate the bitmask: (((1 << src3) - 1) << src2
1300 bfi_args[0] = LLVMBuildShl(builder,
1301 LLVMBuildSub(builder,
1302 LLVMBuildShl(builder,
1303 bld_base->int_bld.one,
1304 emit_data->args[3], ""),
1305 bld_base->int_bld.one, ""),
1306 emit_data->args[2], "");
1307
1308 bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
1309 emit_data->args[2], "");
1310
1311 bfi_args[2] = emit_data->args[0];
1312
1313 /* Calculate:
1314 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1315 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1316 */
1317 emit_data->output[emit_data->chan] =
1318 LLVMBuildXor(builder, bfi_args[2],
1319 LLVMBuildAnd(builder, bfi_args[0],
1320 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
1321 ""), ""), "");
1322 }
1323
1324 /* this is ffs in C */
1325 static void emit_lsb(const struct lp_build_tgsi_action * action,
1326 struct lp_build_tgsi_context * bld_base,
1327 struct lp_build_emit_data * emit_data)
1328 {
1329 struct gallivm_state *gallivm = bld_base->base.gallivm;
1330 LLVMValueRef args[2] = {
1331 emit_data->args[0],
1332
1333 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1334 * add special code to check for x=0. The reason is that
1335 * the LLVM behavior for x=0 is different from what we
1336 * need here.
1337 *
1338 * The hardware already implements the correct behavior.
1339 */
1340 lp_build_const_int32(gallivm, 1)
1341 };
1342
1343 emit_data->output[emit_data->chan] =
1344 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
1345 emit_data->dst_type, args, ARRAY_SIZE(args),
1346 LLVMReadNoneAttribute);
1347 }
1348
1349 /* Find the last bit set. */
1350 static void emit_umsb(const struct lp_build_tgsi_action * action,
1351 struct lp_build_tgsi_context * bld_base,
1352 struct lp_build_emit_data * emit_data)
1353 {
1354 struct gallivm_state *gallivm = bld_base->base.gallivm;
1355 LLVMBuilderRef builder = gallivm->builder;
1356 LLVMValueRef args[2] = {
1357 emit_data->args[0],
1358 /* Don't generate code for handling zero: */
1359 lp_build_const_int32(gallivm, 1)
1360 };
1361
1362 LLVMValueRef msb =
1363 lp_build_intrinsic(builder, "llvm.ctlz.i32",
1364 emit_data->dst_type, args, ARRAY_SIZE(args),
1365 LLVMReadNoneAttribute);
1366
1367 /* The HW returns the last bit index from MSB, but TGSI wants
1368 * the index from LSB. Invert it by doing "31 - msb". */
1369 msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1370 msb, "");
1371
1372 /* Check for zero: */
1373 emit_data->output[emit_data->chan] =
1374 LLVMBuildSelect(builder,
1375 LLVMBuildICmp(builder, LLVMIntEQ, args[0],
1376 bld_base->uint_bld.zero, ""),
1377 lp_build_const_int32(gallivm, -1), msb, "");
1378 }
1379
1380 /* Find the last bit opposite of the sign bit. */
1381 static void emit_imsb(const struct lp_build_tgsi_action * action,
1382 struct lp_build_tgsi_context * bld_base,
1383 struct lp_build_emit_data * emit_data)
1384 {
1385 struct gallivm_state *gallivm = bld_base->base.gallivm;
1386 LLVMBuilderRef builder = gallivm->builder;
1387 LLVMValueRef arg = emit_data->args[0];
1388
1389 LLVMValueRef msb =
1390 lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
1391 emit_data->dst_type, &arg, 1,
1392 LLVMReadNoneAttribute);
1393
1394 /* The HW returns the last bit index from MSB, but TGSI wants
1395 * the index from LSB. Invert it by doing "31 - msb". */
1396 msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1397 msb, "");
1398
1399 /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
1400 LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
1401
1402 LLVMValueRef cond =
1403 LLVMBuildOr(builder,
1404 LLVMBuildICmp(builder, LLVMIntEQ, arg,
1405 bld_base->uint_bld.zero, ""),
1406 LLVMBuildICmp(builder, LLVMIntEQ, arg,
1407 all_ones, ""), "");
1408
1409 emit_data->output[emit_data->chan] =
1410 LLVMBuildSelect(builder, cond, all_ones, msb, "");
1411 }
1412
1413 static void emit_iabs(const struct lp_build_tgsi_action *action,
1414 struct lp_build_tgsi_context *bld_base,
1415 struct lp_build_emit_data *emit_data)
1416 {
1417 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1418
1419 emit_data->output[emit_data->chan] =
1420 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
1421 emit_data->args[0],
1422 LLVMBuildNeg(builder,
1423 emit_data->args[0], ""));
1424 }
1425
1426 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
1427 struct lp_build_tgsi_context *bld_base,
1428 struct lp_build_emit_data *emit_data)
1429 {
1430 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1431 LLVMIntPredicate op;
1432
1433 switch (emit_data->info->opcode) {
1434 default:
1435 assert(0);
1436 case TGSI_OPCODE_IMAX:
1437 op = LLVMIntSGT;
1438 break;
1439 case TGSI_OPCODE_IMIN:
1440 op = LLVMIntSLT;
1441 break;
1442 case TGSI_OPCODE_UMAX:
1443 op = LLVMIntUGT;
1444 break;
1445 case TGSI_OPCODE_UMIN:
1446 op = LLVMIntULT;
1447 break;
1448 }
1449
1450 emit_data->output[emit_data->chan] =
1451 LLVMBuildSelect(builder,
1452 LLVMBuildICmp(builder, op, emit_data->args[0],
1453 emit_data->args[1], ""),
1454 emit_data->args[0],
1455 emit_data->args[1], "");
1456 }
1457
1458 static void pk2h_fetch_args(struct lp_build_tgsi_context * bld_base,
1459 struct lp_build_emit_data * emit_data)
1460 {
1461 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1462 0, TGSI_CHAN_X);
1463 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
1464 0, TGSI_CHAN_Y);
1465 }
1466
1467 static void emit_pk2h(const struct lp_build_tgsi_action *action,
1468 struct lp_build_tgsi_context *bld_base,
1469 struct lp_build_emit_data *emit_data)
1470 {
1471 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1472 LLVMContextRef context = bld_base->base.gallivm->context;
1473 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1474 LLVMTypeRef fp16, i16;
1475 LLVMValueRef const16, comp[2];
1476 unsigned i;
1477
1478 fp16 = LLVMHalfTypeInContext(context);
1479 i16 = LLVMInt16TypeInContext(context);
1480 const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1481
1482 for (i = 0; i < 2; i++) {
1483 comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
1484 comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
1485 comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
1486 }
1487
1488 comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
1489 comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
1490
1491 emit_data->output[emit_data->chan] = comp[0];
1492 }
1493
1494 static void up2h_fetch_args(struct lp_build_tgsi_context * bld_base,
1495 struct lp_build_emit_data * emit_data)
1496 {
1497 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1498 0, TGSI_CHAN_X);
1499 }
1500
1501 static void emit_up2h(const struct lp_build_tgsi_action *action,
1502 struct lp_build_tgsi_context *bld_base,
1503 struct lp_build_emit_data *emit_data)
1504 {
1505 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1506 LLVMContextRef context = bld_base->base.gallivm->context;
1507 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1508 LLVMTypeRef fp16, i16;
1509 LLVMValueRef const16, input, val;
1510 unsigned i;
1511
1512 fp16 = LLVMHalfTypeInContext(context);
1513 i16 = LLVMInt16TypeInContext(context);
1514 const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1515 input = emit_data->args[0];
1516
1517 for (i = 0; i < 2; i++) {
1518 val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
1519 val = LLVMBuildTrunc(builder, val, i16, "");
1520 val = LLVMBuildBitCast(builder, val, fp16, "");
1521 emit_data->output[i] =
1522 LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
1523 }
1524 }
1525
1526 void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple)
1527 {
1528 struct lp_type type;
1529
1530 /* Initialize the gallivm object:
1531 * We are only using the module, context, and builder fields of this struct.
1532 * This should be enough for us to be able to pass our gallivm struct to the
1533 * helper functions in the gallivm module.
1534 */
1535 memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1536 memset(&ctx->soa, 0, sizeof(ctx->soa));
1537 ctx->gallivm.context = LLVMContextCreate();
1538 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1539 ctx->gallivm.context);
1540 LLVMSetTarget(ctx->gallivm.module, triple);
1541 ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1542
1543 struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
1544
1545 type.floating = TRUE;
1546 type.fixed = FALSE;
1547 type.sign = TRUE;
1548 type.norm = FALSE;
1549 type.width = 32;
1550 type.length = 1;
1551
1552 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1553 lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1554 lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1555 {
1556 struct lp_type dbl_type;
1557 dbl_type = type;
1558 dbl_type.width *= 2;
1559 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
1560 }
1561
1562 bld_base->soa = 1;
1563 bld_base->emit_store = radeon_llvm_emit_store;
1564 bld_base->emit_swizzle = emit_swizzle;
1565 bld_base->emit_declaration = emit_declaration;
1566 bld_base->emit_immediate = emit_immediate;
1567
1568 bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
1569 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
1570 bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
1571 bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
1572 bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1573
1574 /* Allocate outputs */
1575 ctx->soa.outputs = ctx->outputs;
1576
1577 lp_set_default_actions(bld_base);
1578
1579 bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
1580 bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
1581 bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1582 bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
1583 bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
1584 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1585 bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
1586 bld_base->op_actions[TGSI_OPCODE_BREV].intr_name =
1587 HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
1588 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1589 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
1590 bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
1591 bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1592 bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name =
1593 HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
1594 bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
1595 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1596 bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
1597 bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
1598 bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
1599 bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
1600 bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
1601 bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
1602 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
1603 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
1604 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
1605 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
1606 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
1607 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
1608 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
1609 bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64";
1610 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
1611 bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
1612 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1613 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1614 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1615 bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1616 bld_base->op_actions[TGSI_OPCODE_EX2].intr_name =
1617 HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
1618 bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
1619 bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
1620 bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
1621 bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
1622 bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
1623 bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1624 bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1625 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
1626 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
1627 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
1628 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
1629 bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
1630 bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
1631 bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
1632 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
1633 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1634 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1635 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
1636 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
1637 bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
1638 bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
1639 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
1640 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
1641 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
1642 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
1643 bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
1644 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
1645 bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
1646 bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
1647 bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
1648 bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
1649 bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
1650 bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
1651 bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
1652 bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
1653 bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
1654 bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
1655 bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
1656 bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
1657 bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
1658 bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
1659 bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
1660 bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
1661 bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
1662 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
1663 bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
1664 bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
1665 bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
1666 bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
1667 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
1668 bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
1669 bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
1670 bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
1671 bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
1672 bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
1673 bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
1674 bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
1675 bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
1676 bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
1677 bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
1678 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
1679 bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
1680 bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
1681 bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
1682 bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
1683 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
1684 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
1685 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
1686 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
1687 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
1688 bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
1689 bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
1690 bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
1691 bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
1692 bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
1693 bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
1694 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
1695 bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
1696 bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
1697 }
1698
1699 void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
1700 LLVMTypeRef *return_types, unsigned num_return_elems,
1701 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1702 {
1703 LLVMTypeRef main_fn_type, ret_type;
1704 LLVMBasicBlockRef main_fn_body;
1705
1706 if (num_return_elems)
1707 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1708 return_types,
1709 num_return_elems, true);
1710 else
1711 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1712
1713 /* Setup the function */
1714 ctx->return_type = ret_type;
1715 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1716 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
1717 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1718 ctx->main_fn, "main_body");
1719 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1720 }
1721
1722 void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
1723 {
1724 struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
1725 const char *triple = LLVMGetTarget(gallivm->module);
1726 LLVMTargetLibraryInfoRef target_library_info;
1727
1728 /* Create the pass manager */
1729 gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
1730 gallivm->module);
1731
1732 target_library_info = gallivm_create_target_library_info(triple);
1733 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1734
1735 /* This pass should eliminate all the load and store instructions */
1736 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1737
1738 /* Add some optimization passes */
1739 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1740 LLVMAddLICMPass(gallivm->passmgr);
1741 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1742 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1743 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1744
1745 /* Run the pass */
1746 LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
1747
1748 LLVMDisposeBuilder(gallivm->builder);
1749 LLVMDisposePassManager(gallivm->passmgr);
1750 gallivm_dispose_target_library_info(target_library_info);
1751 }
1752
1753 void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
1754 {
1755 LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
1756 LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
1757 FREE(ctx->arrays);
1758 ctx->arrays = NULL;
1759 FREE(ctx->temps);
1760 ctx->temps = NULL;
1761 ctx->temps_count = 0;
1762 FREE(ctx->loop);
1763 ctx->loop = NULL;
1764 ctx->loop_depth_max = 0;
1765 FREE(ctx->branch);
1766 ctx->branch = NULL;
1767 ctx->branch_depth_max = 0;
1768 }