gallivm/nir: allow 64-bit arit ops
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_action.c
1 /**************************************************************************
2 *
3 * Copyright 2011-2012 Advanced Micro Devices, Inc.
4 * Copyright 2009 VMware, Inc.
5 * Copyright 2007-2008 VMware, Inc.
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial portions
18 * of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 **************************************************************************/
29
30 /**
31 * @file
32 * TGSI to LLVM IR translation.
33 *
34 * @author Jose Fonseca <jfonseca@vmware.com>
35 * @author Tom Stellard <thomas.stellard@amd.com>
36 *
37 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
38 * Brian Paul, and others.
39 */
40
41
42 #include "lp_bld_tgsi_action.h"
43
44 #include "lp_bld_tgsi.h"
45 #include "lp_bld_arit.h"
46 #include "lp_bld_bitarit.h"
47 #include "lp_bld_const.h"
48 #include "lp_bld_conv.h"
49 #include "lp_bld_gather.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_pack.h"
52
53 #include "tgsi/tgsi_exec.h"
54
55 /* XXX: The CPU only defaults should be repaced by generic ones. In most
56 * cases, the CPU defaults are just wrappers around a function in
57 * lp_build_arit.c and these functions should be inlined here and the CPU
58 * generic code should be removed and placed elsewhere.
59 */
60
61 /* Default actions */
62
63 /* Generic fetch_arg functions */
64
65 static void scalar_unary_fetch_args(
66 struct lp_build_tgsi_context * bld_base,
67 struct lp_build_emit_data * emit_data)
68 {
69 /* src0.x */
70 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
71 emit_data->arg_count = 1;
72 emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
73 }
74
75 static void scalar_binary_fetch_args(
76 struct lp_build_tgsi_context * bld_base,
77 struct lp_build_emit_data * emit_data)
78 {
79 /* src0.x */
80 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
81 0, TGSI_CHAN_X);
82 /* src1.x */
83 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
84 1, TGSI_CHAN_X);
85 emit_data->arg_count = 2;
86 emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
87 }
88
89 /* TGSI_OPCODE_ADD */
90 static void
91 add_emit(
92 const struct lp_build_tgsi_action * action,
93 struct lp_build_tgsi_context * bld_base,
94 struct lp_build_emit_data * emit_data)
95 {
96 emit_data->output[emit_data->chan] = LLVMBuildFAdd(
97 bld_base->base.gallivm->builder,
98 emit_data->args[0], emit_data->args[1], "");
99 }
100
101 /* TGSI_OPCODE_ARR */
102 static void
103 arr_emit(
104 const struct lp_build_tgsi_action * action,
105 struct lp_build_tgsi_context * bld_base,
106 struct lp_build_emit_data * emit_data)
107 {
108 LLVMValueRef tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ROUND, emit_data->args[0]);
109 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
110 bld_base->uint_bld.vec_type, "");
111 }
112
113 /* DP* Helper */
114
115 static void
116 dp_fetch_args(
117 struct lp_build_tgsi_context * bld_base,
118 struct lp_build_emit_data * emit_data,
119 unsigned dp_components)
120 {
121 unsigned chan, src;
122 for (src = 0; src < 2; src++) {
123 for (chan = 0; chan < dp_components; chan++) {
124 emit_data->args[(src * dp_components) + chan] =
125 lp_build_emit_fetch(bld_base, emit_data->inst, src, chan);
126 }
127 }
128 emit_data->dst_type = bld_base->base.elem_type;
129 }
130
131 /* TGSI_OPCODE_DP2 */
132 static void
133 dp2_fetch_args(
134 struct lp_build_tgsi_context * bld_base,
135 struct lp_build_emit_data * emit_data)
136 {
137 dp_fetch_args(bld_base, emit_data, 2);
138 }
139
140 static void
141 dp2_emit(
142 const struct lp_build_tgsi_action * action,
143 struct lp_build_tgsi_context * bld_base,
144 struct lp_build_emit_data * emit_data)
145 {
146 LLVMValueRef tmp0, tmp1;
147 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
148 emit_data->args[0] /* src0.x */,
149 emit_data->args[2] /* src1.x */);
150 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
151 emit_data->args[1] /* src0.y */,
152 emit_data->args[3] /* src1.y */);
153 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
154 TGSI_OPCODE_ADD, tmp0, tmp1);
155 }
156
157 static struct lp_build_tgsi_action dp2_action = {
158 dp2_fetch_args, /* fetch_args */
159 dp2_emit /* emit */
160 };
161
162 /* TGSI_OPCODE_DP3 */
163 static void
164 dp3_fetch_args(
165 struct lp_build_tgsi_context * bld_base,
166 struct lp_build_emit_data * emit_data)
167 {
168 dp_fetch_args(bld_base, emit_data, 3);
169 }
170
171 static void
172 dp3_emit(
173 const struct lp_build_tgsi_action * action,
174 struct lp_build_tgsi_context * bld_base,
175 struct lp_build_emit_data * emit_data)
176 {
177 LLVMValueRef tmp0, tmp1;
178 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
179 emit_data->args[0] /* src0.x */,
180 emit_data->args[3] /* src1.x */);
181 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
182 emit_data->args[1] /* src0.y */,
183 emit_data->args[4] /* src1.y */);
184 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0);
185 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
186 emit_data->args[2] /* src0.z */,
187 emit_data->args[5] /* src1.z */);
188 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
189 TGSI_OPCODE_ADD, tmp0, tmp1);
190 }
191
192 static struct lp_build_tgsi_action dp3_action = {
193 dp3_fetch_args, /* fetch_args */
194 dp3_emit /* emit */
195 };
196
197 /* TGSI_OPCODDE_DP4 */
198
199 static void
200 dp4_fetch_args(
201 struct lp_build_tgsi_context * bld_base,
202 struct lp_build_emit_data * emit_data)
203 {
204 dp_fetch_args(bld_base, emit_data, 4);
205 }
206
207 static void
208 dp4_emit(
209 const struct lp_build_tgsi_action * action,
210 struct lp_build_tgsi_context * bld_base,
211 struct lp_build_emit_data * emit_data)
212 {
213 LLVMValueRef tmp0, tmp1;
214 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
215 emit_data->args[0] /* src0.x */,
216 emit_data->args[4] /* src1.x */);
217 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
218 emit_data->args[1] /* src0.y */,
219 emit_data->args[5] /* src1.y */);
220 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
221 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
222 emit_data->args[2] /* src0.z */,
223 emit_data->args[6] /* src1.z */);
224 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
225 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
226 emit_data->args[3] /* src0.w */,
227 emit_data->args[7] /* src1.w */);
228 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
229 TGSI_OPCODE_ADD, tmp0, tmp1);
230 }
231
232 static struct lp_build_tgsi_action dp4_action = {
233 dp4_fetch_args, /* fetch_args */
234 dp4_emit /* emit */
235 };
236
237 /* TGSI_OPCODE_DST */
238 static void
239 dst_fetch_args(
240 struct lp_build_tgsi_context * bld_base,
241 struct lp_build_emit_data * emit_data)
242 {
243 /* src0.y */
244 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
245 0, TGSI_CHAN_Y);
246 /* src0.z */
247 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
248 0, TGSI_CHAN_Z);
249 /* src1.y */
250 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
251 1, TGSI_CHAN_Y);
252 /* src1.w */
253 emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
254 1, TGSI_CHAN_W);
255 }
256
257 static void
258 dst_emit(
259 const struct lp_build_tgsi_action * action,
260 struct lp_build_tgsi_context * bld_base,
261 struct lp_build_emit_data * emit_data)
262 {
263 /* dst.x */
264 emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
265
266 /* dst.y */
267 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
268 TGSI_OPCODE_MUL,
269 emit_data->args[0] /* src0.y */,
270 emit_data->args[2] /* src1.y */);
271 /* dst.z */
272 emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
273
274 /* dst.w */
275 emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
276 }
277
278 static struct lp_build_tgsi_action dst_action = {
279 dst_fetch_args, /* fetch_args */
280 dst_emit /* emit */
281 };
282
283 /* TGSI_OPCODE_END */
284 static void
285 end_emit(
286 const struct lp_build_tgsi_action * action,
287 struct lp_build_tgsi_context * bld_base,
288 struct lp_build_emit_data * emit_data)
289 {
290 bld_base->pc = -1;
291 }
292
293 /* TGSI_OPCODE_EXP */
294
295 static void
296 exp_emit(
297 const struct lp_build_tgsi_action * action,
298 struct lp_build_tgsi_context * bld_base,
299 struct lp_build_emit_data * emit_data)
300 {
301 LLVMValueRef floor_x;
302
303 /* floor( src0.x ) */
304 floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
305 emit_data->args[0]);
306
307 /* 2 ^ floor( src0.x ) */
308 emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
309 TGSI_OPCODE_EX2, floor_x);
310
311 /* src0.x - floor( src0.x ) */
312 emit_data->output[TGSI_CHAN_Y] =
313 lp_build_sub(&bld_base->base, emit_data->args[0] /* src0.x */, floor_x);
314
315 /* 2 ^ src0.x */
316 emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base,
317 TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */);
318
319 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
320 }
321
322 const struct lp_build_tgsi_action exp_action = {
323 scalar_unary_fetch_args, /* fetch_args */
324 exp_emit /* emit */
325 };
326
327 /* TGSI_OPCODE_FRC */
328
329 static void
330 frc_emit(
331 const struct lp_build_tgsi_action * action,
332 struct lp_build_tgsi_context * bld_base,
333 struct lp_build_emit_data * emit_data)
334 {
335 LLVMValueRef tmp;
336 tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
337 emit_data->args[0]);
338 emit_data->output[emit_data->chan] =
339 lp_build_sub(&bld_base->base, emit_data->args[0], tmp);
340 }
341
342 /* TGSI_OPCODE_KILL_IF */
343
344 static void
345 kil_fetch_args(
346 struct lp_build_tgsi_context * bld_base,
347 struct lp_build_emit_data * emit_data)
348 {
349 /* src0.x */
350 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
351 0, TGSI_CHAN_X);
352 /* src0.y */
353 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
354 0, TGSI_CHAN_Y);
355 /* src0.z */
356 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
357 0, TGSI_CHAN_Z);
358 /* src0.w */
359 emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
360 0, TGSI_CHAN_W);
361 emit_data->arg_count = 4;
362 emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
363 }
364
365 /* TGSI_OPCODE_KILL */
366
367 static void
368 kilp_fetch_args(
369 struct lp_build_tgsi_context * bld_base,
370 struct lp_build_emit_data * emit_data)
371 {
372 emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
373 }
374
375 /* TGSI_OPCODE_LIT */
376
377 static void
378 lit_fetch_args(
379 struct lp_build_tgsi_context * bld_base,
380 struct lp_build_emit_data * emit_data)
381 {
382 /* src0.x */
383 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
384 /* src0.y */
385 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
386 /* src0.w */
387 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
388 emit_data->arg_count = 3;
389 }
390
391 static void
392 lit_emit(
393 const struct lp_build_tgsi_action * action,
394 struct lp_build_tgsi_context * bld_base,
395 struct lp_build_emit_data * emit_data)
396 {
397 LLVMValueRef tmp0, tmp1, tmp2;
398
399 /* dst.x */
400 emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
401
402 /* dst. y */
403 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
404 TGSI_OPCODE_MAX,
405 emit_data->args[0] /* src0.x */,
406 bld_base->base.zero);
407
408 /* dst.z */
409 /* XMM[1] = SrcReg[0].yyyy */
410 tmp1 = emit_data->args[1];
411 /* XMM[1] = max(XMM[1], 0) */
412 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
413 tmp1, bld_base->base.zero);
414 /* XMM[2] = SrcReg[0].wwww */
415 tmp2 = emit_data->args[2];
416 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
417 tmp1, tmp2);
418 tmp0 = emit_data->args[0];
419 emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base,
420 TGSI_OPCODE_CMP,
421 tmp0, bld_base->base.zero, tmp1);
422 /* dst.w */
423 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
424 }
425
426 static struct lp_build_tgsi_action lit_action = {
427 lit_fetch_args, /* fetch_args */
428 lit_emit /* emit */
429 };
430
431 /* TGSI_OPCODE_LOG */
432
433 static void
434 log_emit(
435 const struct lp_build_tgsi_action * action,
436 struct lp_build_tgsi_context * bld_base,
437 struct lp_build_emit_data * emit_data)
438 {
439
440 LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
441
442 /* abs( src0.x) */
443 abs_x = lp_build_abs(&bld_base->base, emit_data->args[0] /* src0.x */);
444
445 /* log( abs( src0.x ) ) */
446 log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
447 abs_x);
448
449 /* floor( log( abs( src0.x ) ) ) */
450 flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
451 log_abs_x);
452 /* dst.x */
453 emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
454
455 /* dst.y */
456 ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2,
457 flr_log_abs_x);
458
459 /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
460 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
461 TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x);
462
463 /* dst.x */
464 emit_data->output[TGSI_CHAN_Z] = log_abs_x;
465
466 /* dst.w */
467 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
468 }
469
470 static struct lp_build_tgsi_action log_action = {
471 scalar_unary_fetch_args, /* fetch_args */
472 log_emit /* emit */
473 };
474
475 /* TGSI_OPCODE_PK2H */
476
477 static void
478 pk2h_fetch_args(
479 struct lp_build_tgsi_context * bld_base,
480 struct lp_build_emit_data * emit_data)
481 {
482 /* src0.x */
483 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
484 0, TGSI_CHAN_X);
485 /* src0.y */
486 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
487 0, TGSI_CHAN_Y);
488 }
489
490 static void
491 pk2h_emit(
492 const struct lp_build_tgsi_action *action,
493 struct lp_build_tgsi_context *bld_base,
494 struct lp_build_emit_data *emit_data)
495 {
496 struct gallivm_state *gallivm = bld_base->base.gallivm;
497 struct lp_type f16i_t;
498 LLVMValueRef lo, hi, res;
499
500 f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
501 lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
502 hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
503 /* maybe some interleave doubling vector width would be useful... */
504 lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
505 hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
506 res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
507
508 emit_data->output[emit_data->chan] = res;
509 }
510
511 static struct lp_build_tgsi_action pk2h_action = {
512 pk2h_fetch_args, /* fetch_args */
513 pk2h_emit /* emit */
514 };
515
516 /* TGSI_OPCODE_UP2H */
517
518 static void
519 up2h_emit(
520 const struct lp_build_tgsi_action *action,
521 struct lp_build_tgsi_context *bld_base,
522 struct lp_build_emit_data *emit_data)
523 {
524 struct gallivm_state *gallivm = bld_base->base.gallivm;
525 LLVMBuilderRef builder = gallivm->builder;
526 LLVMContextRef context = gallivm->context;
527 LLVMValueRef lo, hi, res[2], arg;
528 unsigned nr = bld_base->base.type.length;
529 LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
530
531 arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
532 lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
533 hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
534 res[0] = lp_build_half_to_float(gallivm, lo);
535 res[1] = lp_build_half_to_float(gallivm, hi);
536
537 emit_data->output[0] = emit_data->output[2] = res[0];
538 emit_data->output[1] = emit_data->output[3] = res[1];
539 }
540
541 static struct lp_build_tgsi_action up2h_action = {
542 scalar_unary_fetch_args, /* fetch_args */
543 up2h_emit /* emit */
544 };
545
546 /* TGSI_OPCODE_LRP */
547
548 static void
549 lrp_emit(
550 const struct lp_build_tgsi_action * action,
551 struct lp_build_tgsi_context * bld_base,
552 struct lp_build_emit_data * emit_data)
553 {
554 struct lp_build_context *bld = &bld_base->base;
555 LLVMValueRef inv, a, b;
556
557 /* This uses the correct version: (1 - t)*a + t*b
558 *
559 * An alternative version is "a + t*(b-a)". The problem is this version
560 * doesn't return "b" for t = 1, because "a + (b-a)" isn't equal to "b"
561 * because of the floating-point rounding.
562 */
563 inv = lp_build_sub(bld, bld_base->base.one, emit_data->args[0]);
564 a = lp_build_mul(bld, emit_data->args[1], emit_data->args[0]);
565 b = lp_build_mul(bld, emit_data->args[2], inv);
566 emit_data->output[emit_data->chan] = lp_build_add(bld, a, b);
567 }
568
569 /* TGSI_OPCODE_MAD */
570
571 static void
572 mad_emit(
573 const struct lp_build_tgsi_action * action,
574 struct lp_build_tgsi_context * bld_base,
575 struct lp_build_emit_data * emit_data)
576 {
577 LLVMValueRef tmp;
578 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
579 emit_data->args[0],
580 emit_data->args[1]);
581 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
582 TGSI_OPCODE_ADD, tmp, emit_data->args[2]);
583 }
584
585 /* TGSI_OPCODE_MOV */
586
587 static void
588 mov_emit(
589 const struct lp_build_tgsi_action * action,
590 struct lp_build_tgsi_context * bld_base,
591 struct lp_build_emit_data * emit_data)
592 {
593 emit_data->output[emit_data->chan] = emit_data->args[0];
594 }
595
596 /* TGSI_OPCODE_MUL */
597 static void
598 mul_emit(
599 const struct lp_build_tgsi_action * action,
600 struct lp_build_tgsi_context * bld_base,
601 struct lp_build_emit_data * emit_data)
602 {
603 emit_data->output[emit_data->chan] = LLVMBuildFMul(
604 bld_base->base.gallivm->builder,
605 emit_data->args[0], emit_data->args[1], "");
606 }
607
608 /*.TGSI_OPCODE_DIV.*/
609 static void fdiv_emit(
610 const struct lp_build_tgsi_action * action,
611 struct lp_build_tgsi_context * bld_base,
612 struct lp_build_emit_data * emit_data)
613 {
614 emit_data->output[emit_data->chan] = LLVMBuildFDiv(
615 bld_base->base.gallivm->builder,
616 emit_data->args[0], emit_data->args[1], "");
617 }
618
619 /*.TGSI_OPCODE_RCP.*/
620 static void rcp_emit(
621 const struct lp_build_tgsi_action * action,
622 struct lp_build_tgsi_context * bld_base,
623 struct lp_build_emit_data * emit_data)
624 {
625 LLVMValueRef one;
626 one = lp_build_const_float(bld_base->base.gallivm, 1.0f);
627 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
628 TGSI_OPCODE_DIV, one, emit_data->args[0]);
629 }
630
631 /* TGSI_OPCODE_POW */
632
633 static void
634 pow_emit(
635 const struct lp_build_tgsi_action * action,
636 struct lp_build_tgsi_context * bld_base,
637 struct lp_build_emit_data * emit_data)
638 {
639 emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
640 emit_data->args[0], emit_data->args[1]);
641 }
642
643 static struct lp_build_tgsi_action pow_action = {
644 scalar_binary_fetch_args, /* fetch_args */
645 pow_emit /* emit */
646 };
647
648 /* TGSI_OPCODE_RSQ */
649
650 static void
651 rsq_emit(
652 const struct lp_build_tgsi_action * action,
653 struct lp_build_tgsi_context * bld_base,
654 struct lp_build_emit_data * emit_data)
655 {
656 if (bld_base->rsq_action.emit) {
657 bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
658 } else {
659 emit_data->output[emit_data->chan] = bld_base->base.undef;
660 }
661 }
662
663 const struct lp_build_tgsi_action rsq_action = {
664 scalar_unary_fetch_args, /* fetch_args */
665 rsq_emit /* emit */
666
667 };
668
669 /* TGSI_OPCODE_SQRT */
670
671 static void
672 sqrt_emit(
673 const struct lp_build_tgsi_action * action,
674 struct lp_build_tgsi_context * bld_base,
675 struct lp_build_emit_data * emit_data)
676 {
677 if (bld_base->sqrt_action.emit) {
678 bld_base->sqrt_action.emit(&bld_base->sqrt_action, bld_base, emit_data);
679 } else {
680 emit_data->output[emit_data->chan] = bld_base->base.undef;
681 }
682 }
683
684 const struct lp_build_tgsi_action sqrt_action = {
685 scalar_unary_fetch_args, /* fetch_args */
686 sqrt_emit /* emit */
687 };
688
689 /* TGSI_OPCODE_F2U */
690 static void
691 f2u_emit(
692 const struct lp_build_tgsi_action * action,
693 struct lp_build_tgsi_context * bld_base,
694 struct lp_build_emit_data * emit_data)
695 {
696 emit_data->output[emit_data->chan] =
697 LLVMBuildFPToUI(bld_base->base.gallivm->builder,
698 emit_data->args[0],
699 bld_base->base.int_vec_type, "");
700 }
701
702 /* TGSI_OPCODE_U2F */
703 static void
704 u2f_emit(
705 const struct lp_build_tgsi_action * action,
706 struct lp_build_tgsi_context * bld_base,
707 struct lp_build_emit_data * emit_data)
708 {
709 emit_data->output[emit_data->chan] =
710 LLVMBuildUIToFP(bld_base->base.gallivm->builder,
711 emit_data->args[0],
712 bld_base->base.vec_type, "");
713 }
714
715 static void
716 umad_emit(
717 const struct lp_build_tgsi_action * action,
718 struct lp_build_tgsi_context * bld_base,
719 struct lp_build_emit_data * emit_data)
720 {
721 LLVMValueRef tmp;
722 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMUL,
723 emit_data->args[0],
724 emit_data->args[1]);
725 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
726 TGSI_OPCODE_UADD, tmp, emit_data->args[2]);
727 }
728
729 /* TGSI_OPCODE_UMUL */
730 static void
731 umul_emit(
732 const struct lp_build_tgsi_action * action,
733 struct lp_build_tgsi_context * bld_base,
734 struct lp_build_emit_data * emit_data)
735 {
736 emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint_bld,
737 emit_data->args[0], emit_data->args[1]);
738 }
739
740 /* TGSI_OPCODE_IMUL_HI */
741 static void
742 imul_hi_emit(
743 const struct lp_build_tgsi_action * action,
744 struct lp_build_tgsi_context * bld_base,
745 struct lp_build_emit_data * emit_data)
746 {
747 struct lp_build_context *int_bld = &bld_base->int_bld;
748 LLVMValueRef hi_bits;
749
750 assert(int_bld->type.width == 32);
751
752 /* low result bits are tossed away */
753 lp_build_mul_32_lohi(int_bld, emit_data->args[0],
754 emit_data->args[1], &hi_bits);
755 emit_data->output[emit_data->chan] = hi_bits;
756 }
757
758 static void
759 imul_hi_emit_cpu(
760 const struct lp_build_tgsi_action * action,
761 struct lp_build_tgsi_context * bld_base,
762 struct lp_build_emit_data * emit_data)
763 {
764 struct lp_build_context *int_bld = &bld_base->int_bld;
765 LLVMValueRef hi_bits;
766
767 assert(int_bld->type.width == 32);
768
769 /* low result bits are tossed away */
770 lp_build_mul_32_lohi_cpu(int_bld, emit_data->args[0],
771 emit_data->args[1], &hi_bits);
772 emit_data->output[emit_data->chan] = hi_bits;
773 }
774
775 /* TGSI_OPCODE_UMUL_HI */
776 static void
777 umul_hi_emit(
778 const struct lp_build_tgsi_action * action,
779 struct lp_build_tgsi_context * bld_base,
780 struct lp_build_emit_data * emit_data)
781 {
782 struct lp_build_context *uint_bld = &bld_base->uint_bld;
783 LLVMValueRef hi_bits;
784
785 assert(uint_bld->type.width == 32);
786
787 /* low result bits are tossed away */
788 lp_build_mul_32_lohi(uint_bld, emit_data->args[0],
789 emit_data->args[1], &hi_bits);
790 emit_data->output[emit_data->chan] = hi_bits;
791 }
792
793 static void
794 umul_hi_emit_cpu(
795 const struct lp_build_tgsi_action * action,
796 struct lp_build_tgsi_context * bld_base,
797 struct lp_build_emit_data * emit_data)
798 {
799 struct lp_build_context *uint_bld = &bld_base->uint_bld;
800 LLVMValueRef hi_bits;
801
802 assert(uint_bld->type.width == 32);
803
804 /* low result bits are tossed away */
805 lp_build_mul_32_lohi_cpu(uint_bld, emit_data->args[0],
806 emit_data->args[1], &hi_bits);
807 emit_data->output[emit_data->chan] = hi_bits;
808 }
809
810 /* TGSI_OPCODE_MAX */
811 static void fmax_emit(
812 const struct lp_build_tgsi_action * action,
813 struct lp_build_tgsi_context * bld_base,
814 struct lp_build_emit_data * emit_data)
815 {
816 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
817 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
818 LLVMBuildFCmp(builder, LLVMRealUGE,
819 emit_data->args[0], emit_data->args[1], ""),
820 emit_data->args[0], emit_data->args[1], "");
821 }
822
823 /* TGSI_OPCODE_MIN */
824 static void fmin_emit(
825 const struct lp_build_tgsi_action * action,
826 struct lp_build_tgsi_context * bld_base,
827 struct lp_build_emit_data * emit_data)
828 {
829 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
830 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
831 LLVMBuildFCmp(builder, LLVMRealUGE,
832 emit_data->args[0], emit_data->args[1], ""),
833 emit_data->args[1], emit_data->args[0], "");
834 }
835
836 /* TGSI_OPCODE_D2F */
837 static void
838 d2f_emit(
839 const struct lp_build_tgsi_action * action,
840 struct lp_build_tgsi_context * bld_base,
841 struct lp_build_emit_data * emit_data)
842 {
843 emit_data->output[emit_data->chan] =
844 LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
845 emit_data->args[0],
846 bld_base->base.vec_type, "");
847 }
848
849 /* TGSI_OPCODE_D2I */
850 static void
851 d2i_emit(
852 const struct lp_build_tgsi_action * action,
853 struct lp_build_tgsi_context * bld_base,
854 struct lp_build_emit_data * emit_data)
855 {
856 emit_data->output[emit_data->chan] =
857 LLVMBuildFPToSI(bld_base->base.gallivm->builder,
858 emit_data->args[0],
859 bld_base->base.int_vec_type, "");
860 }
861
862 /* TGSI_OPCODE_D2U */
863 static void
864 d2u_emit(
865 const struct lp_build_tgsi_action * action,
866 struct lp_build_tgsi_context * bld_base,
867 struct lp_build_emit_data * emit_data)
868 {
869 emit_data->output[emit_data->chan] =
870 LLVMBuildFPToUI(bld_base->base.gallivm->builder,
871 emit_data->args[0],
872 bld_base->base.int_vec_type, "");
873 }
874
875 /* TGSI_OPCODE_F2D */
876 static void
877 f2d_emit(
878 const struct lp_build_tgsi_action * action,
879 struct lp_build_tgsi_context * bld_base,
880 struct lp_build_emit_data * emit_data)
881 {
882 emit_data->output[emit_data->chan] =
883 LLVMBuildFPExt(bld_base->base.gallivm->builder,
884 emit_data->args[0],
885 bld_base->dbl_bld.vec_type, "");
886 }
887
888 /* TGSI_OPCODE_U2D */
889 static void
890 u2d_emit(
891 const struct lp_build_tgsi_action * action,
892 struct lp_build_tgsi_context * bld_base,
893 struct lp_build_emit_data * emit_data)
894 {
895 emit_data->output[emit_data->chan] =
896 LLVMBuildUIToFP(bld_base->base.gallivm->builder,
897 emit_data->args[0],
898 bld_base->dbl_bld.vec_type, "");
899 }
900
901 /* TGSI_OPCODE_I2D */
902 static void
903 i2d_emit(
904 const struct lp_build_tgsi_action * action,
905 struct lp_build_tgsi_context * bld_base,
906 struct lp_build_emit_data * emit_data)
907 {
908 emit_data->output[emit_data->chan] =
909 LLVMBuildSIToFP(bld_base->base.gallivm->builder,
910 emit_data->args[0],
911 bld_base->dbl_bld.vec_type, "");
912 }
913
914 /* TGSI_OPCODE_DMAD */
915 static void
916 dmad_emit(
917 const struct lp_build_tgsi_action * action,
918 struct lp_build_tgsi_context * bld_base,
919 struct lp_build_emit_data * emit_data)
920 {
921 LLVMValueRef tmp;
922 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL,
923 emit_data->args[0],
924 emit_data->args[1]);
925 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
926 TGSI_OPCODE_DADD, tmp, emit_data->args[2]);
927 }
928
929 /*.TGSI_OPCODE_DRCP.*/
930 static void drcp_emit(
931 const struct lp_build_tgsi_action * action,
932 struct lp_build_tgsi_context * bld_base,
933 struct lp_build_emit_data * emit_data)
934 {
935 LLVMValueRef one;
936 one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f);
937 emit_data->output[emit_data->chan] = LLVMBuildFDiv(
938 bld_base->base.gallivm->builder,
939 one, emit_data->args[0], "");
940 }
941
942 /* TGSI_OPCODE_DFRAC */
943 static void dfrac_emit(
944 const struct lp_build_tgsi_action * action,
945 struct lp_build_tgsi_context * bld_base,
946 struct lp_build_emit_data * emit_data)
947 {
948 LLVMValueRef tmp;
949 tmp = lp_build_floor(&bld_base->dbl_bld,
950 emit_data->args[0]);
951 emit_data->output[emit_data->chan] = LLVMBuildFSub(bld_base->base.gallivm->builder,
952 emit_data->args[0], tmp, "");
953 }
954
955 static void
956 u64mul_emit(
957 const struct lp_build_tgsi_action * action,
958 struct lp_build_tgsi_context * bld_base,
959 struct lp_build_emit_data * emit_data)
960 {
961 emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint64_bld,
962 emit_data->args[0], emit_data->args[1]);
963 }
964
965 static void
966 u64mod_emit_cpu(
967 const struct lp_build_tgsi_action * action,
968 struct lp_build_tgsi_context * bld_base,
969 struct lp_build_emit_data * emit_data)
970 {
971 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
972 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
973 PIPE_FUNC_EQUAL, emit_data->args[1],
974 bld_base->uint64_bld.zero);
975 /* We want to make sure that we never divide/mod by zero to not
976 * generate sigfpe. We don't want to crash just because the
977 * shader is doing something weird. */
978 LLVMValueRef divisor = LLVMBuildOr(builder,
979 div_mask,
980 emit_data->args[1], "");
981 LLVMValueRef result = lp_build_mod(&bld_base->uint64_bld,
982 emit_data->args[0], divisor);
983 /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
984 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
985 div_mask,
986 result, "");
987 }
988
989 static void
990 i64mod_emit_cpu(
991 const struct lp_build_tgsi_action * action,
992 struct lp_build_tgsi_context * bld_base,
993 struct lp_build_emit_data * emit_data)
994 {
995 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
996 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
997 PIPE_FUNC_EQUAL, emit_data->args[1],
998 bld_base->uint64_bld.zero);
999 /* We want to make sure that we never divide/mod by zero to not
1000 * generate sigfpe. We don't want to crash just because the
1001 * shader is doing something weird. */
1002 LLVMValueRef divisor = LLVMBuildOr(builder,
1003 div_mask,
1004 emit_data->args[1], "");
1005 LLVMValueRef result = lp_build_mod(&bld_base->int64_bld,
1006 emit_data->args[0], divisor);
1007 /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1008 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1009 div_mask,
1010 result, "");
1011 }
1012
1013 static void
1014 u64div_emit_cpu(
1015 const struct lp_build_tgsi_action * action,
1016 struct lp_build_tgsi_context * bld_base,
1017 struct lp_build_emit_data * emit_data)
1018 {
1019
1020 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1021 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
1022 PIPE_FUNC_EQUAL, emit_data->args[1],
1023 bld_base->uint64_bld.zero);
1024 /* We want to make sure that we never divide/mod by zero to not
1025 * generate sigfpe. We don't want to crash just because the
1026 * shader is doing something weird. */
1027 LLVMValueRef divisor = LLVMBuildOr(builder,
1028 div_mask,
1029 emit_data->args[1], "");
1030 LLVMValueRef result = LLVMBuildUDiv(builder,
1031 emit_data->args[0], divisor, "");
1032 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1033 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1034 div_mask,
1035 result, "");
1036 }
1037
1038 static void
1039 i64div_emit_cpu(
1040 const struct lp_build_tgsi_action * action,
1041 struct lp_build_tgsi_context * bld_base,
1042 struct lp_build_emit_data * emit_data)
1043 {
1044
1045 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1046 LLVMValueRef div_mask = lp_build_cmp(&bld_base->int64_bld,
1047 PIPE_FUNC_EQUAL, emit_data->args[1],
1048 bld_base->int64_bld.zero);
1049 /* We want to make sure that we never divide/mod by zero to not
1050 * generate sigfpe. We don't want to crash just because the
1051 * shader is doing something weird. */
1052 LLVMValueRef divisor = LLVMBuildOr(builder,
1053 div_mask,
1054 emit_data->args[1], "");
1055 LLVMValueRef result = LLVMBuildSDiv(builder,
1056 emit_data->args[0], divisor, "");
1057 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1058 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1059 div_mask,
1060 result, "");
1061 }
1062
1063 static void
1064 f2u64_emit(
1065 const struct lp_build_tgsi_action * action,
1066 struct lp_build_tgsi_context * bld_base,
1067 struct lp_build_emit_data * emit_data)
1068 {
1069 emit_data->output[emit_data->chan] =
1070 LLVMBuildFPToUI(bld_base->base.gallivm->builder,
1071 emit_data->args[0],
1072 bld_base->uint64_bld.vec_type, "");
1073 }
1074
1075 static void
1076 f2i64_emit(
1077 const struct lp_build_tgsi_action * action,
1078 struct lp_build_tgsi_context * bld_base,
1079 struct lp_build_emit_data * emit_data)
1080 {
1081 emit_data->output[emit_data->chan] =
1082 LLVMBuildFPToSI(bld_base->base.gallivm->builder,
1083 emit_data->args[0],
1084 bld_base->int64_bld.vec_type, "");
1085 }
1086
1087 static void
1088 u2i64_emit(
1089 const struct lp_build_tgsi_action * action,
1090 struct lp_build_tgsi_context * bld_base,
1091 struct lp_build_emit_data * emit_data)
1092 {
1093 emit_data->output[emit_data->chan] =
1094 LLVMBuildZExt(bld_base->base.gallivm->builder,
1095 emit_data->args[0],
1096 bld_base->uint64_bld.vec_type, "");
1097 }
1098
1099 static void
1100 i2i64_emit(
1101 const struct lp_build_tgsi_action * action,
1102 struct lp_build_tgsi_context * bld_base,
1103 struct lp_build_emit_data * emit_data)
1104 {
1105 emit_data->output[emit_data->chan] =
1106 LLVMBuildSExt(bld_base->base.gallivm->builder,
1107 emit_data->args[0],
1108 bld_base->int64_bld.vec_type, "");
1109 }
1110
1111 static void
1112 i642f_emit(
1113 const struct lp_build_tgsi_action * action,
1114 struct lp_build_tgsi_context * bld_base,
1115 struct lp_build_emit_data * emit_data)
1116 {
1117 emit_data->output[emit_data->chan] =
1118 LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1119 emit_data->args[0],
1120 bld_base->base.vec_type, "");
1121 }
1122
1123 static void
1124 u642f_emit(
1125 const struct lp_build_tgsi_action * action,
1126 struct lp_build_tgsi_context * bld_base,
1127 struct lp_build_emit_data * emit_data)
1128 {
1129 emit_data->output[emit_data->chan] =
1130 LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1131 emit_data->args[0],
1132 bld_base->base.vec_type, "");
1133 }
1134
1135 static void
1136 i642d_emit(
1137 const struct lp_build_tgsi_action * action,
1138 struct lp_build_tgsi_context * bld_base,
1139 struct lp_build_emit_data * emit_data)
1140 {
1141 emit_data->output[emit_data->chan] =
1142 LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1143 emit_data->args[0],
1144 bld_base->dbl_bld.vec_type, "");
1145 }
1146
1147 static void
1148 u642d_emit(
1149 const struct lp_build_tgsi_action * action,
1150 struct lp_build_tgsi_context * bld_base,
1151 struct lp_build_emit_data * emit_data)
1152 {
1153 emit_data->output[emit_data->chan] =
1154 LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1155 emit_data->args[0],
1156 bld_base->dbl_bld.vec_type, "");
1157 }
1158
1159 void
1160 lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
1161 {
1162 bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
1163 bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
1164 bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
1165 bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
1166 bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
1167 bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
1168 bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
1169 bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
1170 bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
1171 bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
1172 bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
1173 bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
1174
1175 bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
1176 bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
1177 bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
1178 bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
1179 bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
1180 bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
1181 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kil_fetch_args;
1182 bld_base->op_actions[TGSI_OPCODE_KILL].fetch_args = kilp_fetch_args;
1183 bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
1184 bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
1185 bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
1186
1187 bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
1188 bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
1189 bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
1190 bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
1191 bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
1192 bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
1193 bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
1194 bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
1195 bld_base->op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit;
1196 bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit;
1197
1198 bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit;
1199 bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit;
1200 bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit;
1201 bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit;
1202 bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit;
1203 bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit;
1204 bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit;
1205
1206 bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
1207 bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
1208
1209 bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit;
1210 bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit;
1211 bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit;
1212 bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit;
1213 bld_base->op_actions[TGSI_OPCODE_DDIV].emit = fdiv_emit;
1214
1215 bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit;
1216 bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit;
1217 bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit;
1218
1219 bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
1220 bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
1221 bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
1222
1223 bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
1224
1225 bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
1226 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
1227
1228 bld_base->op_actions[TGSI_OPCODE_U64MUL].emit = u64mul_emit;
1229
1230 bld_base->op_actions[TGSI_OPCODE_F2I64].emit = f2i64_emit;
1231 bld_base->op_actions[TGSI_OPCODE_F2U64].emit = f2u64_emit;
1232
1233 bld_base->op_actions[TGSI_OPCODE_D2I64].emit = f2i64_emit;
1234 bld_base->op_actions[TGSI_OPCODE_D2U64].emit = f2u64_emit;
1235
1236 bld_base->op_actions[TGSI_OPCODE_I2I64].emit = i2i64_emit;
1237 bld_base->op_actions[TGSI_OPCODE_U2I64].emit = u2i64_emit;
1238
1239 bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
1240 bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
1241
1242 bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
1243 bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
1244
1245 bld_base->op_actions[TGSI_OPCODE_I642D].emit = i642d_emit;
1246 bld_base->op_actions[TGSI_OPCODE_U642D].emit = u642d_emit;
1247
1248 }
1249
1250 /* CPU Only default actions */
1251
1252 /* These actions are CPU only, because they could potentially output SSE
1253 * intrinsics.
1254 */
1255
1256 /* TGSI_OPCODE_ADD (CPU Only) */
1257 static void
1258 add_emit_cpu(
1259 const struct lp_build_tgsi_action * action,
1260 struct lp_build_tgsi_context * bld_base,
1261 struct lp_build_emit_data * emit_data)
1262 {
1263 emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base,
1264 emit_data->args[0], emit_data->args[1]);
1265 }
1266
1267 /* TGSI_OPCODE_AND (CPU Only) */
1268 static void
1269 and_emit_cpu(
1270 const struct lp_build_tgsi_action * action,
1271 struct lp_build_tgsi_context * bld_base,
1272 struct lp_build_emit_data * emit_data)
1273 {
1274 emit_data->output[emit_data->chan] = lp_build_and(&bld_base->uint_bld,
1275 emit_data->args[0], emit_data->args[1]);
1276 }
1277
1278 /* TGSI_OPCODE_ARL (CPU Only) */
1279 static void
1280 arl_emit_cpu(
1281 const struct lp_build_tgsi_action * action,
1282 struct lp_build_tgsi_context * bld_base,
1283 struct lp_build_emit_data * emit_data)
1284 {
1285 LLVMValueRef tmp;
1286 tmp = lp_build_floor(&bld_base->base,
1287 emit_data->args[0]);
1288 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
1289 bld_base->uint_bld.vec_type, "");
1290 }
1291
1292 /* TGSI_OPCODE_ARR (CPU Only) */
1293 static void
1294 arr_emit_cpu(
1295 const struct lp_build_tgsi_action * action,
1296 struct lp_build_tgsi_context * bld_base,
1297 struct lp_build_emit_data * emit_data)
1298 {
1299 emit_data->output[emit_data->chan] = lp_build_iround(&bld_base->base, emit_data->args[0]);
1300 }
1301
1302 /* TGSI_OPCODE_CEIL (CPU Only) */
1303 static void
1304 ceil_emit_cpu(
1305 const struct lp_build_tgsi_action * action,
1306 struct lp_build_tgsi_context * bld_base,
1307 struct lp_build_emit_data * emit_data)
1308 {
1309 emit_data->output[emit_data->chan] = lp_build_ceil(&bld_base->base,
1310 emit_data->args[0]);
1311 }
1312
1313 /* TGSI_OPCODE_CMP (CPU Only) */
1314 static void
1315 cmp_emit_cpu(
1316 const struct lp_build_tgsi_action * action,
1317 struct lp_build_tgsi_context * bld_base,
1318 struct lp_build_emit_data * emit_data)
1319 {
1320 LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
1321 emit_data->args[0], bld_base->base.zero);
1322 emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1323 cond, emit_data->args[1], emit_data->args[2]);
1324 }
1325
1326 /* TGSI_OPCODE_UCMP (CPU Only) */
1327 static void
1328 ucmp_emit_cpu(
1329 const struct lp_build_tgsi_action * action,
1330 struct lp_build_tgsi_context * bld_base,
1331 struct lp_build_emit_data * emit_data)
1332 {
1333 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1334 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1335 LLVMValueRef unsigned_cond =
1336 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
1337 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
1338 unsigned_cond,
1339 uint_bld->zero);
1340 emit_data->output[emit_data->chan] =
1341 lp_build_select(&bld_base->base,
1342 cond, emit_data->args[1], emit_data->args[2]);
1343 }
1344
1345 /* TGSI_OPCODE_COS (CPU Only) */
1346 static void
1347 cos_emit_cpu(
1348 const struct lp_build_tgsi_action * action,
1349 struct lp_build_tgsi_context * bld_base,
1350 struct lp_build_emit_data * emit_data)
1351 {
1352 emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base,
1353 emit_data->args[0]);
1354 }
1355
1356 /* TGSI_OPCODE_DIV (CPU Only) */
1357 static void
1358 div_emit_cpu(
1359 const struct lp_build_tgsi_action * action,
1360 struct lp_build_tgsi_context * bld_base,
1361 struct lp_build_emit_data * emit_data)
1362 {
1363 emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base,
1364 emit_data->args[0], emit_data->args[1]);
1365 }
1366
1367 /* TGSI_OPCODE_EX2 (CPU Only) */
1368 static void
1369 ex2_emit_cpu(
1370 const struct lp_build_tgsi_action * action,
1371 struct lp_build_tgsi_context * bld_base,
1372 struct lp_build_emit_data * emit_data)
1373 {
1374 emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base,
1375 emit_data->args[0]);
1376 }
1377
1378 /* TGSI_OPCODE_F2I (CPU Only) */
1379 static void
1380 f2i_emit_cpu(
1381 const struct lp_build_tgsi_action * action,
1382 struct lp_build_tgsi_context * bld_base,
1383 struct lp_build_emit_data * emit_data)
1384 {
1385 emit_data->output[emit_data->chan] = lp_build_itrunc(&bld_base->base,
1386 emit_data->args[0]);
1387 }
1388
1389 /* TGSI_OPCODE_FSET Helper (CPU Only) */
1390 static void
1391 fset_emit_cpu(
1392 const struct lp_build_tgsi_action * action,
1393 struct lp_build_tgsi_context * bld_base,
1394 struct lp_build_emit_data * emit_data,
1395 unsigned pipe_func)
1396 {
1397 LLVMValueRef cond;
1398
1399 if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1400 cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1401 emit_data->args[0], emit_data->args[1]);
1402 }
1403 else {
1404 cond = lp_build_cmp(&bld_base->base, pipe_func,
1405 emit_data->args[0], emit_data->args[1]);
1406
1407 }
1408 emit_data->output[emit_data->chan] = cond;
1409 }
1410
1411
1412 /* TGSI_OPCODE_FSEQ (CPU Only) */
1413 static void
1414 fseq_emit_cpu(
1415 const struct lp_build_tgsi_action * action,
1416 struct lp_build_tgsi_context * bld_base,
1417 struct lp_build_emit_data * emit_data)
1418 {
1419 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1420 }
1421
1422 /* TGSI_OPCODE_ISGE (CPU Only) */
1423 static void
1424 fsge_emit_cpu(
1425 const struct lp_build_tgsi_action * action,
1426 struct lp_build_tgsi_context * bld_base,
1427 struct lp_build_emit_data * emit_data)
1428 {
1429 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1430 }
1431
1432 /* TGSI_OPCODE_ISLT (CPU Only) */
1433 static void
1434 fslt_emit_cpu(
1435 const struct lp_build_tgsi_action * action,
1436 struct lp_build_tgsi_context * bld_base,
1437 struct lp_build_emit_data * emit_data)
1438 {
1439 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1440 }
1441
1442 /* TGSI_OPCODE_USNE (CPU Only) */
1443
1444 static void
1445 fsne_emit_cpu(
1446 const struct lp_build_tgsi_action * action,
1447 struct lp_build_tgsi_context * bld_base,
1448 struct lp_build_emit_data * emit_data)
1449 {
1450 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1451 }
1452
1453 /* TGSI_OPCODE_FLR (CPU Only) */
1454
1455 static void
1456 flr_emit_cpu(
1457 const struct lp_build_tgsi_action * action,
1458 struct lp_build_tgsi_context * bld_base,
1459 struct lp_build_emit_data * emit_data)
1460 {
1461 emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base,
1462 emit_data->args[0]);
1463 }
1464
1465 /* TGSI_OPCODE_I2F (CPU Only) */
1466 static void
1467 i2f_emit_cpu(
1468 const struct lp_build_tgsi_action * action,
1469 struct lp_build_tgsi_context * bld_base,
1470 struct lp_build_emit_data * emit_data)
1471 {
1472 emit_data->output[emit_data->chan] = lp_build_int_to_float(&bld_base->base,
1473 emit_data->args[0]);
1474 }
1475
1476 /* TGSI_OPCODE_IABS (CPU Only) */
1477 static void
1478 iabs_emit_cpu(
1479 const struct lp_build_tgsi_action * action,
1480 struct lp_build_tgsi_context * bld_base,
1481 struct lp_build_emit_data * emit_data)
1482 {
1483 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int_bld,
1484 emit_data->args[0]);
1485 }
1486
1487 /* TGSI_OPCODE_IDIV (CPU Only) */
1488 static void
1489 idiv_emit_cpu(
1490 const struct lp_build_tgsi_action * action,
1491 struct lp_build_tgsi_context * bld_base,
1492 struct lp_build_emit_data * emit_data)
1493 {
1494 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1495 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1496 PIPE_FUNC_EQUAL, emit_data->args[1],
1497 bld_base->uint_bld.zero);
1498 /* We want to make sure that we never divide/mod by zero to not
1499 * generate sigfpe. We don't want to crash just because the
1500 * shader is doing something weird. */
1501 LLVMValueRef divisor = LLVMBuildOr(builder,
1502 div_mask,
1503 emit_data->args[1], "");
1504 LLVMValueRef result = lp_build_div(&bld_base->int_bld,
1505 emit_data->args[0], divisor);
1506 LLVMValueRef not_div_mask = LLVMBuildNot(builder,
1507 div_mask,"");
1508 /* idiv by zero doesn't have a guaranteed return value chose 0 for now. */
1509 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1510 not_div_mask,
1511 result, "");
1512 }
1513
1514 /* TGSI_OPCODE_INEG (CPU Only) */
1515 static void
1516 ineg_emit_cpu(
1517 const struct lp_build_tgsi_action * action,
1518 struct lp_build_tgsi_context * bld_base,
1519 struct lp_build_emit_data * emit_data)
1520 {
1521 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int_bld,
1522 bld_base->int_bld.zero,
1523 emit_data->args[0]);
1524 }
1525
1526 /* TGSI_OPCODE_ISET Helper (CPU Only) */
1527 static void
1528 iset_emit_cpu(
1529 const struct lp_build_tgsi_action * action,
1530 struct lp_build_tgsi_context * bld_base,
1531 struct lp_build_emit_data * emit_data,
1532 unsigned pipe_func)
1533 {
1534 LLVMValueRef cond = lp_build_cmp(&bld_base->int_bld, pipe_func,
1535 emit_data->args[0], emit_data->args[1]);
1536 emit_data->output[emit_data->chan] = cond;
1537 }
1538
1539 /* TGSI_OPCODE_IMAX (CPU Only) */
1540 static void
1541 imax_emit_cpu(
1542 const struct lp_build_tgsi_action * action,
1543 struct lp_build_tgsi_context * bld_base,
1544 struct lp_build_emit_data * emit_data)
1545 {
1546 emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int_bld,
1547 emit_data->args[0], emit_data->args[1]);
1548 }
1549
1550 /* TGSI_OPCODE_IMIN (CPU Only) */
1551 static void
1552 imin_emit_cpu(
1553 const struct lp_build_tgsi_action * action,
1554 struct lp_build_tgsi_context * bld_base,
1555 struct lp_build_emit_data * emit_data)
1556 {
1557 emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int_bld,
1558 emit_data->args[0], emit_data->args[1]);
1559 }
1560
1561 /* TGSI_OPCODE_ISGE (CPU Only) */
1562 static void
1563 isge_emit_cpu(
1564 const struct lp_build_tgsi_action * action,
1565 struct lp_build_tgsi_context * bld_base,
1566 struct lp_build_emit_data * emit_data)
1567 {
1568 iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1569 }
1570
1571 /* TGSI_OPCODE_ISHR (CPU Only) */
1572 static void
1573 ishr_emit_cpu(
1574 const struct lp_build_tgsi_action * action,
1575 struct lp_build_tgsi_context * bld_base,
1576 struct lp_build_emit_data * emit_data)
1577 {
1578 struct lp_build_context *int_bld = &bld_base->int_bld;
1579 LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
1580 int_bld->type.width - 1);
1581 LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
1582 emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
1583 masked_count);
1584 }
1585
1586 /* TGSI_OPCODE_ISLT (CPU Only) */
1587 static void
1588 islt_emit_cpu(
1589 const struct lp_build_tgsi_action * action,
1590 struct lp_build_tgsi_context * bld_base,
1591 struct lp_build_emit_data * emit_data)
1592 {
1593 iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1594 }
1595
1596
1597 /* TGSI_OPCODE_ISSG (CPU Only) */
1598 static void
1599 issg_emit_cpu(
1600 const struct lp_build_tgsi_action * action,
1601 struct lp_build_tgsi_context * bld_base,
1602 struct lp_build_emit_data * emit_data)
1603 {
1604 emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int_bld,
1605 emit_data->args[0]);
1606 }
1607
1608 /* TGSI_OPCODE_LG2 (CPU Only) */
1609 static void
1610 lg2_emit_cpu(
1611 const struct lp_build_tgsi_action * action,
1612 struct lp_build_tgsi_context * bld_base,
1613 struct lp_build_emit_data * emit_data)
1614 {
1615 emit_data->output[emit_data->chan] = lp_build_log2_safe(&bld_base->base,
1616 emit_data->args[0]);
1617 }
1618
1619 /* TGSI_OPCODE_LOG (CPU Only) */
1620 static void
1621 log_emit_cpu(
1622 const struct lp_build_tgsi_action * action,
1623 struct lp_build_tgsi_context * bld_base,
1624 struct lp_build_emit_data * emit_data)
1625 {
1626 LLVMValueRef p_floor_log2;
1627 LLVMValueRef p_exp;
1628 LLVMValueRef p_log2;
1629 LLVMValueRef src0 = emit_data->args[0];
1630
1631 lp_build_log2_approx(&bld_base->base, src0,
1632 &p_exp, &p_floor_log2, &p_log2, FALSE);
1633
1634 emit_data->output[TGSI_CHAN_X] = p_floor_log2;
1635
1636 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
1637 TGSI_OPCODE_DIV,
1638 src0, p_exp);
1639 emit_data->output[TGSI_CHAN_Z] = p_log2;
1640
1641 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
1642
1643 }
1644
1645 /* TGSI_OPCODE_MAD (CPU Only) */
1646
1647 static void
1648 mad_emit_cpu(
1649 const struct lp_build_tgsi_action * action,
1650 struct lp_build_tgsi_context * bld_base,
1651 struct lp_build_emit_data * emit_data)
1652 {
1653 emit_data->output[emit_data->chan] =
1654 lp_build_mad(&bld_base->base,
1655 emit_data->args[0], emit_data->args[1], emit_data->args[2]);
1656 }
1657
1658 /* TGSI_OPCODE_MAX (CPU Only) */
1659
1660 static void
1661 max_emit_cpu(
1662 const struct lp_build_tgsi_action * action,
1663 struct lp_build_tgsi_context * bld_base,
1664 struct lp_build_emit_data * emit_data)
1665 {
1666 emit_data->output[emit_data->chan] =
1667 lp_build_max_ext(&bld_base->base,
1668 emit_data->args[0], emit_data->args[1],
1669 GALLIVM_NAN_RETURN_OTHER);
1670 }
1671
1672 /* TGSI_OPCODE_MIN (CPU Only) */
1673 static void
1674 min_emit_cpu(
1675 const struct lp_build_tgsi_action * action,
1676 struct lp_build_tgsi_context * bld_base,
1677 struct lp_build_emit_data * emit_data)
1678 {
1679 emit_data->output[emit_data->chan] =
1680 lp_build_min_ext(&bld_base->base,
1681 emit_data->args[0], emit_data->args[1],
1682 GALLIVM_NAN_RETURN_OTHER);
1683 }
1684
1685 /* TGSI_OPCODE_MOD (CPU Only) */
1686 static void
1687 mod_emit_cpu(
1688 const struct lp_build_tgsi_action * action,
1689 struct lp_build_tgsi_context * bld_base,
1690 struct lp_build_emit_data * emit_data)
1691 {
1692 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1693 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1694 PIPE_FUNC_EQUAL, emit_data->args[1],
1695 bld_base->uint_bld.zero);
1696 /* We want to make sure that we never divide/mod by zero to not
1697 * generate sigfpe. We don't want to crash just because the
1698 * shader is doing something weird. */
1699 LLVMValueRef divisor = LLVMBuildOr(builder,
1700 div_mask,
1701 emit_data->args[1], "");
1702 LLVMValueRef result = lp_build_mod(&bld_base->int_bld,
1703 emit_data->args[0], divisor);
1704 /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1705 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1706 div_mask,
1707 result, "");
1708 }
1709
1710 /* TGSI_OPCODE_NOT */
1711 static void
1712 not_emit_cpu(
1713 const struct lp_build_tgsi_action * action,
1714 struct lp_build_tgsi_context * bld_base,
1715 struct lp_build_emit_data * emit_data)
1716 {
1717 emit_data->output[emit_data->chan] = lp_build_not(&bld_base->uint_bld,
1718 emit_data->args[0]);
1719 }
1720
1721 /* TGSI_OPCODE_OR (CPU Only) */
1722 static void
1723 or_emit_cpu(
1724 const struct lp_build_tgsi_action * action,
1725 struct lp_build_tgsi_context * bld_base,
1726 struct lp_build_emit_data * emit_data)
1727 {
1728 emit_data->output[emit_data->chan] = lp_build_or(&bld_base->uint_bld,
1729 emit_data->args[0], emit_data->args[1]);
1730 }
1731
1732 /* TGSI_OPCODE_POW (CPU Only) */
1733 static void
1734 pow_emit_cpu(
1735 const struct lp_build_tgsi_action * action,
1736 struct lp_build_tgsi_context * bld_base,
1737 struct lp_build_emit_data * emit_data)
1738 {
1739 emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
1740 emit_data->args[0], emit_data->args[1]);
1741 }
1742
1743
1744 /* TGSI_OPCODE_RCP (CPU Only) */
1745
1746 static void
1747 rcp_emit_cpu(
1748 const struct lp_build_tgsi_action * action,
1749 struct lp_build_tgsi_context * bld_base,
1750 struct lp_build_emit_data * emit_data)
1751 {
1752 emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base,
1753 emit_data->args[0]);
1754 }
1755
1756 /* Reciprical squareroot (CPU Only) */
1757 static void
1758 recip_sqrt_emit_cpu(
1759 const struct lp_build_tgsi_action * action,
1760 struct lp_build_tgsi_context * bld_base,
1761 struct lp_build_emit_data * emit_data)
1762 {
1763 emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base,
1764 emit_data->args[0]);
1765 }
1766
1767 static void
1768 sqrt_emit_cpu(
1769 const struct lp_build_tgsi_action * action,
1770 struct lp_build_tgsi_context * bld_base,
1771 struct lp_build_emit_data * emit_data)
1772 {
1773 emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->base,
1774 emit_data->args[0]);
1775 }
1776
1777
1778 /* TGSI_OPCODE_ROUND (CPU Only) */
1779 static void
1780 round_emit_cpu(
1781 const struct lp_build_tgsi_action * action,
1782 struct lp_build_tgsi_context * bld_base,
1783 struct lp_build_emit_data * emit_data)
1784 {
1785 emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base,
1786 emit_data->args[0]);
1787 }
1788
1789 /* TGSI_OPCODE_SET Helper (CPU Only) */
1790
1791 static void
1792 set_emit_cpu(
1793 const struct lp_build_tgsi_action * action,
1794 struct lp_build_tgsi_context * bld_base,
1795 struct lp_build_emit_data * emit_data,
1796 unsigned pipe_func)
1797 {
1798 LLVMValueRef cond;
1799
1800 if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1801 cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1802 emit_data->args[0], emit_data->args[1]);
1803 }
1804 else {
1805 cond = lp_build_cmp(&bld_base->base, pipe_func,
1806 emit_data->args[0], emit_data->args[1]);
1807
1808 }
1809 emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1810 cond,
1811 bld_base->base.one,
1812 bld_base->base.zero);
1813 }
1814
1815 /* TGSI_OPCODE_SEQ (CPU Only) */
1816
1817 static void
1818 seq_emit_cpu(
1819 const struct lp_build_tgsi_action * action,
1820 struct lp_build_tgsi_context * bld_base,
1821 struct lp_build_emit_data * emit_data)
1822 {
1823 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1824 }
1825
1826 /* TGSI_OPCODE_SGE (CPU Only) */
1827 static void
1828 sge_emit_cpu(
1829 const struct lp_build_tgsi_action * action,
1830 struct lp_build_tgsi_context * bld_base,
1831 struct lp_build_emit_data * emit_data)
1832 {
1833 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1834 }
1835
1836 /* TGSI_OPCODE_SGT (CPU Only)*/
1837
1838 static void
1839 sgt_emit_cpu(
1840 const struct lp_build_tgsi_action * action,
1841 struct lp_build_tgsi_context * bld_base,
1842 struct lp_build_emit_data * emit_data)
1843 {
1844 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
1845 }
1846
1847 /* TGSI_OPCODE_SHL (CPU Only) */
1848 static void
1849 shl_emit_cpu(
1850 const struct lp_build_tgsi_action * action,
1851 struct lp_build_tgsi_context * bld_base,
1852 struct lp_build_emit_data * emit_data)
1853 {
1854 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1855 LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
1856 uint_bld->type.width - 1);
1857 LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
1858 emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
1859 masked_count);
1860 }
1861
1862 /* TGSI_OPCODE_SIN (CPU Only) */
1863 static void
1864 sin_emit_cpu(
1865 const struct lp_build_tgsi_action * action,
1866 struct lp_build_tgsi_context * bld_base,
1867 struct lp_build_emit_data * emit_data)
1868 {
1869 emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base,
1870 emit_data->args[0]);
1871 }
1872
1873 /* TGSI_OPCODE_SLE (CPU Only) */
1874 static void
1875 sle_emit_cpu(
1876 const struct lp_build_tgsi_action * action,
1877 struct lp_build_tgsi_context * bld_base,
1878 struct lp_build_emit_data * emit_data)
1879 {
1880 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
1881 }
1882
1883 /* TGSI_OPCODE_SLT (CPU Only) */
1884 static void
1885 slt_emit_cpu(
1886 const struct lp_build_tgsi_action * action,
1887 struct lp_build_tgsi_context * bld_base,
1888 struct lp_build_emit_data * emit_data)
1889 {
1890 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1891 }
1892
1893 /* TGSI_OPCODE_SNE (CPU Only) */
1894
1895 static void
1896 sne_emit_cpu(
1897 const struct lp_build_tgsi_action * action,
1898 struct lp_build_tgsi_context * bld_base,
1899 struct lp_build_emit_data * emit_data)
1900 {
1901 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1902 }
1903
1904 /* TGSI_OPCODE_SSG (CPU Only) */
1905
1906 static void
1907 ssg_emit_cpu(
1908 const struct lp_build_tgsi_action * action,
1909 struct lp_build_tgsi_context * bld_base,
1910 struct lp_build_emit_data * emit_data)
1911 {
1912 emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base,
1913 emit_data->args[0]);
1914 }
1915
1916 /* TGSI_OPCODE_TRUNC (CPU Only) */
1917
1918 static void
1919 trunc_emit_cpu(
1920 const struct lp_build_tgsi_action * action,
1921 struct lp_build_tgsi_context * bld_base,
1922 struct lp_build_emit_data * emit_data)
1923 {
1924 emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
1925 emit_data->args[0]);
1926 }
1927
1928 /* TGSI_OPCODE_UADD (CPU Only) */
1929 static void
1930 uadd_emit_cpu(
1931 const struct lp_build_tgsi_action * action,
1932 struct lp_build_tgsi_context * bld_base,
1933 struct lp_build_emit_data * emit_data)
1934 {
1935 emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint_bld,
1936 emit_data->args[0], emit_data->args[1]);
1937 }
1938
1939 /* TGSI_OPCODE_UDIV (CPU Only) */
1940 static void
1941 udiv_emit_cpu(
1942 const struct lp_build_tgsi_action * action,
1943 struct lp_build_tgsi_context * bld_base,
1944 struct lp_build_emit_data * emit_data)
1945 {
1946
1947 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1948 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1949 PIPE_FUNC_EQUAL, emit_data->args[1],
1950 bld_base->uint_bld.zero);
1951 /* We want to make sure that we never divide/mod by zero to not
1952 * generate sigfpe. We don't want to crash just because the
1953 * shader is doing something weird. */
1954 LLVMValueRef divisor = LLVMBuildOr(builder,
1955 div_mask,
1956 emit_data->args[1], "");
1957 LLVMValueRef result = lp_build_div(&bld_base->uint_bld,
1958 emit_data->args[0], divisor);
1959 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1960 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1961 div_mask,
1962 result, "");
1963 }
1964
1965 /* TGSI_OPCODE_UMAX (CPU Only) */
1966 static void
1967 umax_emit_cpu(
1968 const struct lp_build_tgsi_action * action,
1969 struct lp_build_tgsi_context * bld_base,
1970 struct lp_build_emit_data * emit_data)
1971 {
1972 emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint_bld,
1973 emit_data->args[0], emit_data->args[1]);
1974 }
1975
1976 /* TGSI_OPCODE_UMIN (CPU Only) */
1977 static void
1978 umin_emit_cpu(
1979 const struct lp_build_tgsi_action * action,
1980 struct lp_build_tgsi_context * bld_base,
1981 struct lp_build_emit_data * emit_data)
1982 {
1983 emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint_bld,
1984 emit_data->args[0], emit_data->args[1]);
1985 }
1986
1987 /* TGSI_OPCODE_UMOD (CPU Only) */
1988 static void
1989 umod_emit_cpu(
1990 const struct lp_build_tgsi_action * action,
1991 struct lp_build_tgsi_context * bld_base,
1992 struct lp_build_emit_data * emit_data)
1993 {
1994 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1995 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1996 PIPE_FUNC_EQUAL, emit_data->args[1],
1997 bld_base->uint_bld.zero);
1998 /* We want to make sure that we never divide/mod by zero to not
1999 * generate sigfpe. We don't want to crash just because the
2000 * shader is doing something weird. */
2001 LLVMValueRef divisor = LLVMBuildOr(builder,
2002 div_mask,
2003 emit_data->args[1], "");
2004 LLVMValueRef result = lp_build_mod(&bld_base->uint_bld,
2005 emit_data->args[0], divisor);
2006 /* umod by zero is guaranteed to return 0xffffffff */
2007 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
2008 div_mask,
2009 result, "");
2010 }
2011
2012 /* TGSI_OPCODE_USET Helper (CPU Only) */
2013 static void
2014 uset_emit_cpu(
2015 const struct lp_build_tgsi_action * action,
2016 struct lp_build_tgsi_context * bld_base,
2017 struct lp_build_emit_data * emit_data,
2018 unsigned pipe_func)
2019 {
2020 LLVMValueRef cond = lp_build_cmp(&bld_base->uint_bld, pipe_func,
2021 emit_data->args[0], emit_data->args[1]);
2022 emit_data->output[emit_data->chan] = cond;
2023 }
2024
2025
2026 /* TGSI_OPCODE_USEQ (CPU Only) */
2027 static void
2028 useq_emit_cpu(
2029 const struct lp_build_tgsi_action * action,
2030 struct lp_build_tgsi_context * bld_base,
2031 struct lp_build_emit_data * emit_data)
2032 {
2033 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2034 }
2035
2036 /* TGSI_OPCODE_ISGE (CPU Only) */
2037 static void
2038 usge_emit_cpu(
2039 const struct lp_build_tgsi_action * action,
2040 struct lp_build_tgsi_context * bld_base,
2041 struct lp_build_emit_data * emit_data)
2042 {
2043 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2044 }
2045
2046 /* TGSI_OPCODE_USHR (CPU Only) */
2047 static void
2048 ushr_emit_cpu(
2049 const struct lp_build_tgsi_action * action,
2050 struct lp_build_tgsi_context * bld_base,
2051 struct lp_build_emit_data * emit_data)
2052 {
2053 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2054 LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2055 uint_bld->type.width - 1);
2056 LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2057 emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
2058 masked_count);
2059 }
2060
2061 /* TGSI_OPCODE_ISLT (CPU Only) */
2062 static void
2063 uslt_emit_cpu(
2064 const struct lp_build_tgsi_action * action,
2065 struct lp_build_tgsi_context * bld_base,
2066 struct lp_build_emit_data * emit_data)
2067 {
2068 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2069 }
2070
2071 /* TGSI_OPCODE_USNE (CPU Only) */
2072
2073 static void
2074 usne_emit_cpu(
2075 const struct lp_build_tgsi_action * action,
2076 struct lp_build_tgsi_context * bld_base,
2077 struct lp_build_emit_data * emit_data)
2078 {
2079 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2080 }
2081
2082 /* TGSI_OPCODE_XOR */
2083 static void
2084 xor_emit_cpu(
2085 const struct lp_build_tgsi_action * action,
2086 struct lp_build_tgsi_context * bld_base,
2087 struct lp_build_emit_data * emit_data)
2088 {
2089 emit_data->output[emit_data->chan] = lp_build_xor(&bld_base->uint_bld,
2090 emit_data->args[0],
2091 emit_data->args[1]);
2092 }
2093
2094 /* TGSI_OPCODE_DABS (CPU Only) */
2095 static void
2096 dabs_emit_cpu(
2097 const struct lp_build_tgsi_action * action,
2098 struct lp_build_tgsi_context * bld_base,
2099 struct lp_build_emit_data * emit_data)
2100 {
2101 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld,
2102 emit_data->args[0]);
2103 }
2104
2105 /* TGSI_OPCODE_DNEG (CPU Only) */
2106 static void
2107 dneg_emit_cpu(
2108 const struct lp_build_tgsi_action * action,
2109 struct lp_build_tgsi_context * bld_base,
2110 struct lp_build_emit_data * emit_data)
2111 {
2112 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld,
2113 bld_base->dbl_bld.zero,
2114 emit_data->args[0]);
2115 }
2116
2117 /* TGSI_OPCODE_DSET Helper (CPU Only) */
2118 static void
2119 dset_emit_cpu(
2120 const struct lp_build_tgsi_action * action,
2121 struct lp_build_tgsi_context * bld_base,
2122 struct lp_build_emit_data * emit_data,
2123 unsigned pipe_func)
2124 {
2125 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2126 LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func,
2127 emit_data->args[0], emit_data->args[1]);
2128 /* arguments were 64 bit but store as 32 bit */
2129 cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2130 emit_data->output[emit_data->chan] = cond;
2131 }
2132
2133 /* TGSI_OPCODE_DSEQ (CPU Only) */
2134 static void
2135 dseq_emit_cpu(
2136 const struct lp_build_tgsi_action * action,
2137 struct lp_build_tgsi_context * bld_base,
2138 struct lp_build_emit_data * emit_data)
2139 {
2140 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2141 }
2142
2143 /* TGSI_OPCODE_DSGE (CPU Only) */
2144 static void
2145 dsge_emit_cpu(
2146 const struct lp_build_tgsi_action * action,
2147 struct lp_build_tgsi_context * bld_base,
2148 struct lp_build_emit_data * emit_data)
2149 {
2150 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2151 }
2152
2153 /* TGSI_OPCODE_DSLT (CPU Only) */
2154 static void
2155 dslt_emit_cpu(
2156 const struct lp_build_tgsi_action * action,
2157 struct lp_build_tgsi_context * bld_base,
2158 struct lp_build_emit_data * emit_data)
2159 {
2160 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2161 }
2162
2163 /* TGSI_OPCODE_DSNE (CPU Only) */
2164 static void
2165 dsne_emit_cpu(
2166 const struct lp_build_tgsi_action * action,
2167 struct lp_build_tgsi_context * bld_base,
2168 struct lp_build_emit_data * emit_data)
2169 {
2170 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2171 }
2172
2173 /* Double Reciprocal squareroot (CPU Only) */
2174 static void
2175 drecip_sqrt_emit_cpu(
2176 const struct lp_build_tgsi_action * action,
2177 struct lp_build_tgsi_context * bld_base,
2178 struct lp_build_emit_data * emit_data)
2179 {
2180 emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld,
2181 emit_data->args[0]);
2182 }
2183
2184 /* Double Squareroot (CPU Only) */
2185 static void
2186 dsqrt_emit_cpu(
2187 const struct lp_build_tgsi_action * action,
2188 struct lp_build_tgsi_context * bld_base,
2189 struct lp_build_emit_data * emit_data)
2190 {
2191 emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
2192 emit_data->args[0]);
2193 }
2194
2195 static void
2196 i64abs_emit_cpu(
2197 const struct lp_build_tgsi_action * action,
2198 struct lp_build_tgsi_context * bld_base,
2199 struct lp_build_emit_data * emit_data)
2200 {
2201 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int64_bld,
2202 emit_data->args[0]);
2203 }
2204
2205 static void
2206 i64ssg_emit_cpu(
2207 const struct lp_build_tgsi_action * action,
2208 struct lp_build_tgsi_context * bld_base,
2209 struct lp_build_emit_data * emit_data)
2210 {
2211 emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int64_bld,
2212 emit_data->args[0]);
2213 }
2214
2215 static void
2216 i64neg_emit_cpu(
2217 const struct lp_build_tgsi_action * action,
2218 struct lp_build_tgsi_context * bld_base,
2219 struct lp_build_emit_data * emit_data)
2220 {
2221 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int64_bld,
2222 bld_base->int64_bld.zero,
2223 emit_data->args[0]);
2224 }
2225
2226 static void
2227 u64set_emit_cpu(
2228 const struct lp_build_tgsi_action * action,
2229 struct lp_build_tgsi_context * bld_base,
2230 struct lp_build_emit_data * emit_data,
2231 unsigned pipe_func)
2232 {
2233 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2234 LLVMValueRef cond = lp_build_cmp(&bld_base->uint64_bld, pipe_func,
2235 emit_data->args[0], emit_data->args[1]);
2236 /* arguments were 64 bit but store as 32 bit */
2237 cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2238 emit_data->output[emit_data->chan] = cond;
2239 }
2240
2241 static void
2242 u64seq_emit_cpu(
2243 const struct lp_build_tgsi_action * action,
2244 struct lp_build_tgsi_context * bld_base,
2245 struct lp_build_emit_data * emit_data)
2246 {
2247 u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2248 }
2249
2250 static void
2251 u64sne_emit_cpu(
2252 const struct lp_build_tgsi_action * action,
2253 struct lp_build_tgsi_context * bld_base,
2254 struct lp_build_emit_data * emit_data)
2255 {
2256 u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2257 }
2258
2259 static void
2260 u64slt_emit_cpu(
2261 const struct lp_build_tgsi_action * action,
2262 struct lp_build_tgsi_context * bld_base,
2263 struct lp_build_emit_data * emit_data)
2264 {
2265 u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2266 }
2267
2268 static void
2269 u64sge_emit_cpu(
2270 const struct lp_build_tgsi_action * action,
2271 struct lp_build_tgsi_context * bld_base,
2272 struct lp_build_emit_data * emit_data)
2273 {
2274 u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2275 }
2276
2277 static void
2278 i64set_emit_cpu(
2279 const struct lp_build_tgsi_action * action,
2280 struct lp_build_tgsi_context * bld_base,
2281 struct lp_build_emit_data * emit_data,
2282 unsigned pipe_func)
2283 {
2284 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2285 LLVMValueRef cond = lp_build_cmp(&bld_base->int64_bld, pipe_func,
2286 emit_data->args[0], emit_data->args[1]);
2287 /* arguments were 64 bit but store as 32 bit */
2288 cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2289 emit_data->output[emit_data->chan] = cond;
2290 }
2291
2292 static void
2293 i64slt_emit_cpu(
2294 const struct lp_build_tgsi_action * action,
2295 struct lp_build_tgsi_context * bld_base,
2296 struct lp_build_emit_data * emit_data)
2297 {
2298 i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2299 }
2300
2301 static void
2302 i64sge_emit_cpu(
2303 const struct lp_build_tgsi_action * action,
2304 struct lp_build_tgsi_context * bld_base,
2305 struct lp_build_emit_data * emit_data)
2306 {
2307 i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2308 }
2309
2310 static void
2311 u64max_emit_cpu(
2312 const struct lp_build_tgsi_action * action,
2313 struct lp_build_tgsi_context * bld_base,
2314 struct lp_build_emit_data * emit_data)
2315 {
2316 emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint64_bld,
2317 emit_data->args[0], emit_data->args[1]);
2318 }
2319
2320 static void
2321 u64min_emit_cpu(
2322 const struct lp_build_tgsi_action * action,
2323 struct lp_build_tgsi_context * bld_base,
2324 struct lp_build_emit_data * emit_data)
2325 {
2326 emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint64_bld,
2327 emit_data->args[0], emit_data->args[1]);
2328 }
2329
2330 static void
2331 i64max_emit_cpu(
2332 const struct lp_build_tgsi_action * action,
2333 struct lp_build_tgsi_context * bld_base,
2334 struct lp_build_emit_data * emit_data)
2335 {
2336 emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int64_bld,
2337 emit_data->args[0], emit_data->args[1]);
2338 }
2339
2340 static void
2341 i64min_emit_cpu(
2342 const struct lp_build_tgsi_action * action,
2343 struct lp_build_tgsi_context * bld_base,
2344 struct lp_build_emit_data * emit_data)
2345 {
2346 emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int64_bld,
2347 emit_data->args[0], emit_data->args[1]);
2348 }
2349
2350 static void
2351 u64add_emit_cpu(
2352 const struct lp_build_tgsi_action * action,
2353 struct lp_build_tgsi_context * bld_base,
2354 struct lp_build_emit_data * emit_data)
2355 {
2356 emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint64_bld,
2357 emit_data->args[0], emit_data->args[1]);
2358 }
2359
2360 static void
2361 u64shl_emit_cpu(
2362 const struct lp_build_tgsi_action * action,
2363 struct lp_build_tgsi_context * bld_base,
2364 struct lp_build_emit_data * emit_data)
2365 {
2366 struct lp_build_context *uint_bld = &bld_base->uint64_bld;
2367 LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2368 uint_bld->type.width - 1);
2369 LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2370 emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
2371 masked_count);
2372 }
2373
2374 static void
2375 i64shr_emit_cpu(
2376 const struct lp_build_tgsi_action * action,
2377 struct lp_build_tgsi_context * bld_base,
2378 struct lp_build_emit_data * emit_data)
2379 {
2380 struct lp_build_context *int_bld = &bld_base->int64_bld;
2381 LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
2382 int_bld->type.width - 1);
2383 LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
2384 emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
2385 masked_count);
2386 }
2387
2388 static void
2389 u64shr_emit_cpu(
2390 const struct lp_build_tgsi_action * action,
2391 struct lp_build_tgsi_context * bld_base,
2392 struct lp_build_emit_data * emit_data)
2393 {
2394 struct lp_build_context *uint_bld = &bld_base->uint64_bld;
2395 LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2396 uint_bld->type.width - 1);
2397 LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2398 emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
2399 masked_count);
2400 }
2401 static void bfi_emit_cpu(const struct lp_build_tgsi_action *action,
2402 struct lp_build_tgsi_context *bld_base,
2403 struct lp_build_emit_data *emit_data) {
2404 /*
2405 * def bfi(base, insert, offset, bits):
2406 * if offset < 0 or bits < 0 or offset + bits > 32:
2407 * return undefined
2408 * # << defined such that mask == ~0 when bits == 32, offset == 0
2409 * mask = ((1 << bits) - 1) << offset
2410 * return ((insert << offset) & mask) | (base & ~mask)
2411 */
2412 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2413 LLVMValueRef one_shl_bits_dec_one = lp_build_sub(
2414 uint_bld, lp_build_shl(uint_bld, uint_bld->one, emit_data->args[3]),
2415 uint_bld->one);
2416 LLVMValueRef mask =
2417 lp_build_shl(uint_bld, one_shl_bits_dec_one, emit_data->args[2]);
2418 LLVMValueRef insert_shl_offset =
2419 lp_build_shl(uint_bld, emit_data->args[1], emit_data->args[2]);
2420 LLVMValueRef insert_shl_offset_and_mask =
2421 lp_build_and(uint_bld, insert_shl_offset, mask);
2422 LLVMValueRef base_and_not_mask =
2423 lp_build_and(uint_bld, emit_data->args[0], lp_build_not(uint_bld, mask));
2424
2425 emit_data->output[emit_data->chan] =
2426 lp_build_or(uint_bld, insert_shl_offset_and_mask, base_and_not_mask);
2427 }
2428
2429 static void lsb_emit_cpu(const struct lp_build_tgsi_action *action,
2430 struct lp_build_tgsi_context *bld_base,
2431 struct lp_build_emit_data *emit_data) {
2432 struct lp_build_context *uint_bld = &bld_base->int_bld;
2433
2434 LLVMValueRef result = lp_build_cttz(uint_bld, emit_data->args[0]);
2435 LLVMValueRef cond =
2436 lp_build_cmp(uint_bld, PIPE_FUNC_LESS, result,
2437 lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 32));
2438 emit_data->output[emit_data->chan] = lp_build_select(
2439 uint_bld, cond, result,
2440 lp_build_const_vec(uint_bld->gallivm, uint_bld->type, -1));
2441 }
2442
2443 static void umsb_emit_cpu(const struct lp_build_tgsi_action *action,
2444 struct lp_build_tgsi_context *bld_base,
2445 struct lp_build_emit_data *emit_data) {
2446 struct lp_build_context *uint_bld = &bld_base->int_bld;
2447 emit_data->output[emit_data->chan] = lp_build_sub(
2448 uint_bld, lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 31),
2449 lp_build_ctlz(uint_bld, emit_data->args[0]));
2450 }
2451
2452 static void imsb_emit_cpu(const struct lp_build_tgsi_action *action,
2453 struct lp_build_tgsi_context *bld_base,
2454 struct lp_build_emit_data *emit_data) {
2455 struct lp_build_context *uint_bld = &bld_base->int_bld;
2456
2457 LLVMValueRef cond =
2458 lp_build_cmp(uint_bld, PIPE_FUNC_LESS, emit_data->args[0],
2459 lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 0));
2460 emit_data->args[0] = lp_build_select(
2461 uint_bld, cond, lp_build_not(uint_bld, emit_data->args[0]),
2462 emit_data->args[0]);
2463 umsb_emit_cpu(action, bld_base, emit_data);
2464 }
2465
2466 static void popc_emit_cpu(const struct lp_build_tgsi_action *action,
2467 struct lp_build_tgsi_context *bld_base,
2468 struct lp_build_emit_data *emit_data) {
2469 struct lp_build_context *uint_bld = &bld_base->int_bld;
2470 emit_data->output[emit_data->chan] =
2471 lp_build_popcount(uint_bld, emit_data->args[0]);
2472 }
2473
2474 static void ibfe_emit_cpu(const struct lp_build_tgsi_action *action,
2475 struct lp_build_tgsi_context *bld_base,
2476 struct lp_build_emit_data *emit_data) {
2477 /* def ibfe(value, offset, bits):
2478 * if offset < 0 or bits < 0 or offset + bits > 32:
2479 * return undefined
2480 * if bits == 0: return 0
2481 * # Note: >> sign-extends
2482 * return (value << (32 - offset - bits)) >> (32 - bits)
2483 */
2484 struct lp_build_context *uint_bld = &bld_base->int_bld;
2485
2486 LLVMValueRef r_32_sub_bits = lp_build_sub(
2487 uint_bld, lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 32),
2488 emit_data->args[2]);
2489 LLVMValueRef temp1 =
2490 lp_build_sub(uint_bld, r_32_sub_bits, emit_data->args[1]);
2491 LLVMValueRef temp2 = lp_build_shl(uint_bld, emit_data->args[0], temp1);
2492 LLVMValueRef cond =
2493 lp_build_cmp(uint_bld, PIPE_FUNC_EQUAL, emit_data->args[2],
2494 lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 0));
2495 emit_data->output[emit_data->chan] = lp_build_select(
2496 uint_bld, cond, lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 0),
2497 lp_build_shr(uint_bld, temp2, r_32_sub_bits));
2498 }
2499
2500 static void ubfe_emit_cpu(const struct lp_build_tgsi_action *action,
2501 struct lp_build_tgsi_context *bld_base,
2502 struct lp_build_emit_data *emit_data) {
2503 /* def ubfe(value, offset, bits):
2504 * if offset < 0 or bits < 0 or offset + bits > 32:
2505 * return undefined
2506 * if bits == 0: return 0
2507 * # Note: >> does not sign-extend
2508 * return (value << (32 - offset - bits)) >> (32 - bits)
2509 */
2510 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2511
2512 LLVMValueRef r_32_sub_bits = lp_build_sub(
2513 uint_bld, lp_build_const_vec(uint_bld->gallivm, uint_bld->type, 32),
2514 emit_data->args[2]);
2515 LLVMValueRef temp1 =
2516 lp_build_sub(uint_bld, r_32_sub_bits, emit_data->args[1]);
2517 LLVMValueRef temp2 = lp_build_shl(uint_bld, emit_data->args[0], temp1);
2518 emit_data->output[emit_data->chan] =
2519 lp_build_shr(uint_bld, temp2, r_32_sub_bits);
2520 }
2521
2522 static void brev_emit_cpu(const struct lp_build_tgsi_action *action,
2523 struct lp_build_tgsi_context *bld_base,
2524 struct lp_build_emit_data *emit_data) {
2525 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2526 emit_data->output[emit_data->chan] =
2527 lp_build_bitfield_reverse(uint_bld, emit_data->args[0]);
2528 }
2529
2530 void
2531 lp_set_default_actions_cpu(
2532 struct lp_build_tgsi_context * bld_base)
2533 {
2534 lp_set_default_actions(bld_base);
2535 bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
2536 bld_base->op_actions[TGSI_OPCODE_AND].emit = and_emit_cpu;
2537 bld_base->op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
2538 bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu;
2539 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
2540 bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
2541 bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
2542 bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
2543 bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
2544 bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
2545 bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
2546 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu;
2547 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu;
2548 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = fslt_emit_cpu;
2549 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = fsne_emit_cpu;
2550
2551 bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu;
2552 bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu;
2553 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu;
2554 bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu;
2555 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu;
2556 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = imin_emit_cpu;
2557 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu;
2558 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
2559 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
2560 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
2561 bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit_cpu;
2562 bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit_cpu;
2563
2564 bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
2565 bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
2566 bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit_cpu;
2567 bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
2568 bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
2569 bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu;
2570 bld_base->op_actions[TGSI_OPCODE_NOT].emit = not_emit_cpu;
2571 bld_base->op_actions[TGSI_OPCODE_OR].emit = or_emit_cpu;
2572 bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
2573 bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
2574 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
2575 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
2576 bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
2577 bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
2578 bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
2579 bld_base->op_actions[TGSI_OPCODE_SHL].emit = shl_emit_cpu;
2580 bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
2581 bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
2582 bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
2583 bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
2584 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
2585
2586 bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
2587 bld_base->sqrt_action.emit = sqrt_emit_cpu;
2588
2589 bld_base->op_actions[TGSI_OPCODE_UADD].emit = uadd_emit_cpu;
2590 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = ucmp_emit_cpu;
2591 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = udiv_emit_cpu;
2592 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = umax_emit_cpu;
2593 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = umin_emit_cpu;
2594 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = umod_emit_cpu;
2595 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = useq_emit_cpu;
2596 bld_base->op_actions[TGSI_OPCODE_USGE].emit = usge_emit_cpu;
2597 bld_base->op_actions[TGSI_OPCODE_USHR].emit = ushr_emit_cpu;
2598 bld_base->op_actions[TGSI_OPCODE_USLT].emit = uslt_emit_cpu;
2599 bld_base->op_actions[TGSI_OPCODE_USNE].emit = usne_emit_cpu;
2600
2601 bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu;
2602
2603 bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
2604 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
2605 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
2606 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
2607 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
2608 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
2609
2610 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
2611 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
2612
2613 bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = i64abs_emit_cpu;
2614 bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = i64ssg_emit_cpu;
2615 bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = i64neg_emit_cpu;
2616
2617 bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = u64seq_emit_cpu;
2618 bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = u64sne_emit_cpu;
2619 bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = u64slt_emit_cpu;
2620 bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = u64sge_emit_cpu;
2621 bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = i64slt_emit_cpu;
2622 bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = i64sge_emit_cpu;
2623
2624 bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = u64min_emit_cpu;
2625 bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = u64max_emit_cpu;
2626 bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = i64min_emit_cpu;
2627 bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = i64max_emit_cpu;
2628
2629 bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = u64add_emit_cpu;
2630 bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = u64mod_emit_cpu;
2631 bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = i64mod_emit_cpu;
2632 bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = u64div_emit_cpu;
2633 bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = i64div_emit_cpu;
2634
2635 bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = u64shl_emit_cpu;
2636 bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = i64shr_emit_cpu;
2637 bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = u64shr_emit_cpu;
2638
2639 bld_base->op_actions[TGSI_OPCODE_BFI].emit = bfi_emit_cpu;
2640 bld_base->op_actions[TGSI_OPCODE_POPC].emit = popc_emit_cpu;
2641 bld_base->op_actions[TGSI_OPCODE_LSB].emit = lsb_emit_cpu;
2642 bld_base->op_actions[TGSI_OPCODE_IMSB].emit = imsb_emit_cpu;
2643 bld_base->op_actions[TGSI_OPCODE_UMSB].emit = umsb_emit_cpu;
2644 bld_base->op_actions[TGSI_OPCODE_IBFE].emit = ibfe_emit_cpu;
2645 bld_base->op_actions[TGSI_OPCODE_UBFE].emit = ubfe_emit_cpu;
2646 bld_base->op_actions[TGSI_OPCODE_BREV].emit = brev_emit_cpu;
2647
2648 }