Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_action.c
1 /**************************************************************************
2 *
3 * Copyright 2011-2012 Advanced Micro Devices, Inc.
4 * Copyright 2009 VMware, Inc.
5 * Copyright 2007-2008 VMware, Inc.
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial portions
18 * of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 **************************************************************************/
29
30 /**
31 * @file
32 * TGSI to LLVM IR translation.
33 *
34 * @author Jose Fonseca <jfonseca@vmware.com>
35 * @author Tom Stellard <thomas.stellard@amd.com>
36 *
37 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
38 * Brian Paul, and others.
39 */
40
41
42 #include "lp_bld_tgsi_action.h"
43
44 #include "lp_bld_tgsi.h"
45 #include "lp_bld_arit.h"
46 #include "lp_bld_bitarit.h"
47 #include "lp_bld_const.h"
48 #include "lp_bld_conv.h"
49 #include "lp_bld_gather.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_pack.h"
52
53 #include "tgsi/tgsi_exec.h"
54
55 /* XXX: The CPU only defaults should be repaced by generic ones. In most
56 * cases, the CPU defaults are just wrappers around a function in
57 * lp_build_arit.c and these functions should be inlined here and the CPU
58 * generic code should be removed and placed elsewhere.
59 */
60
61 /* Default actions */
62
63 /* Generic fetch_arg functions */
64
65 static void scalar_unary_fetch_args(
66 struct lp_build_tgsi_context * bld_base,
67 struct lp_build_emit_data * emit_data)
68 {
69 /* src0.x */
70 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
71 emit_data->arg_count = 1;
72 emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
73 }
74
75 static void scalar_binary_fetch_args(
76 struct lp_build_tgsi_context * bld_base,
77 struct lp_build_emit_data * emit_data)
78 {
79 /* src0.x */
80 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
81 0, TGSI_CHAN_X);
82 /* src1.x */
83 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
84 1, TGSI_CHAN_X);
85 emit_data->arg_count = 2;
86 emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
87 }
88
89 /* TGSI_OPCODE_ADD */
90 static void
91 add_emit(
92 const struct lp_build_tgsi_action * action,
93 struct lp_build_tgsi_context * bld_base,
94 struct lp_build_emit_data * emit_data)
95 {
96 emit_data->output[emit_data->chan] = LLVMBuildFAdd(
97 bld_base->base.gallivm->builder,
98 emit_data->args[0], emit_data->args[1], "");
99 }
100
101 /* TGSI_OPCODE_ARR */
102 static void
103 arr_emit(
104 const struct lp_build_tgsi_action * action,
105 struct lp_build_tgsi_context * bld_base,
106 struct lp_build_emit_data * emit_data)
107 {
108 LLVMValueRef tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ROUND, emit_data->args[0]);
109 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
110 bld_base->uint_bld.vec_type, "");
111 }
112
113 /* TGSI_OPCODE_CLAMP */
114 static void
115 clamp_emit(
116 const struct lp_build_tgsi_action * action,
117 struct lp_build_tgsi_context * bld_base,
118 struct lp_build_emit_data * emit_data)
119 {
120 LLVMValueRef tmp;
121 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
122 emit_data->args[0],
123 emit_data->args[1]);
124 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
125 TGSI_OPCODE_MIN, tmp, emit_data->args[2]);
126 }
127
128 /* DP* Helper */
129
130 static void
131 dp_fetch_args(
132 struct lp_build_tgsi_context * bld_base,
133 struct lp_build_emit_data * emit_data,
134 unsigned dp_components)
135 {
136 unsigned chan, src;
137 for (src = 0; src < 2; src++) {
138 for (chan = 0; chan < dp_components; chan++) {
139 emit_data->args[(src * dp_components) + chan] =
140 lp_build_emit_fetch(bld_base, emit_data->inst, src, chan);
141 }
142 }
143 emit_data->dst_type = bld_base->base.elem_type;
144 }
145
146 /* TGSI_OPCODE_DP2 */
147 static void
148 dp2_fetch_args(
149 struct lp_build_tgsi_context * bld_base,
150 struct lp_build_emit_data * emit_data)
151 {
152 dp_fetch_args(bld_base, emit_data, 2);
153 }
154
155 static void
156 dp2_emit(
157 const struct lp_build_tgsi_action * action,
158 struct lp_build_tgsi_context * bld_base,
159 struct lp_build_emit_data * emit_data)
160 {
161 LLVMValueRef tmp0, tmp1;
162 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
163 emit_data->args[0] /* src0.x */,
164 emit_data->args[2] /* src1.x */);
165 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
166 emit_data->args[1] /* src0.y */,
167 emit_data->args[3] /* src1.y */);
168 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
169 TGSI_OPCODE_ADD, tmp0, tmp1);
170 }
171
172 static struct lp_build_tgsi_action dp2_action = {
173 dp2_fetch_args, /* fetch_args */
174 dp2_emit /* emit */
175 };
176
177 /* TGSI_OPCODE_DP2A */
178 static void
179 dp2a_fetch_args(
180 struct lp_build_tgsi_context * bld_base,
181 struct lp_build_emit_data * emit_data)
182 {
183 dp_fetch_args(bld_base, emit_data, 2);
184 emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
185 2, TGSI_CHAN_X);
186 }
187
188 static void
189 dp2a_emit(
190 const struct lp_build_tgsi_action * action,
191 struct lp_build_tgsi_context * bld_base,
192 struct lp_build_emit_data * emit_data)
193 {
194 LLVMValueRef tmp;
195 tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
196 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
197 emit_data->args[5], tmp);
198 }
199
200 static struct lp_build_tgsi_action dp2a_action = {
201 dp2a_fetch_args, /* fetch_args */
202 dp2a_emit /* emit */
203 };
204
205 /* TGSI_OPCODE_DP3 */
206 static void
207 dp3_fetch_args(
208 struct lp_build_tgsi_context * bld_base,
209 struct lp_build_emit_data * emit_data)
210 {
211 dp_fetch_args(bld_base, emit_data, 3);
212 }
213
214 static void
215 dp3_emit(
216 const struct lp_build_tgsi_action * action,
217 struct lp_build_tgsi_context * bld_base,
218 struct lp_build_emit_data * emit_data)
219 {
220 LLVMValueRef tmp0, tmp1;
221 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
222 emit_data->args[0] /* src0.x */,
223 emit_data->args[3] /* src1.x */);
224 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
225 emit_data->args[1] /* src0.y */,
226 emit_data->args[4] /* src1.y */);
227 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0);
228 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
229 emit_data->args[2] /* src0.z */,
230 emit_data->args[5] /* src1.z */);
231 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
232 TGSI_OPCODE_ADD, tmp0, tmp1);
233 }
234
235 static struct lp_build_tgsi_action dp3_action = {
236 dp3_fetch_args, /* fetch_args */
237 dp3_emit /* emit */
238 };
239
240 /* TGSI_OPCODDE_DP4 */
241
242 static void
243 dp4_fetch_args(
244 struct lp_build_tgsi_context * bld_base,
245 struct lp_build_emit_data * emit_data)
246 {
247 dp_fetch_args(bld_base, emit_data, 4);
248 }
249
250 static void
251 dp4_emit(
252 const struct lp_build_tgsi_action * action,
253 struct lp_build_tgsi_context * bld_base,
254 struct lp_build_emit_data * emit_data)
255 {
256 LLVMValueRef tmp0, tmp1;
257 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
258 emit_data->args[0] /* src0.x */,
259 emit_data->args[4] /* src1.x */);
260 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
261 emit_data->args[1] /* src0.y */,
262 emit_data->args[5] /* src1.y */);
263 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
264 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
265 emit_data->args[2] /* src0.z */,
266 emit_data->args[6] /* src1.z */);
267 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
268 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
269 emit_data->args[3] /* src0.w */,
270 emit_data->args[7] /* src1.w */);
271 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
272 TGSI_OPCODE_ADD, tmp0, tmp1);
273 }
274
275 static struct lp_build_tgsi_action dp4_action = {
276 dp4_fetch_args, /* fetch_args */
277 dp4_emit /* emit */
278 };
279
280 /* TGSI_OPCODE_DPH */
281 static void
282 dph_fetch_args(
283 struct lp_build_tgsi_context * bld_base,
284 struct lp_build_emit_data * emit_data)
285 {
286 dp_fetch_args(bld_base, emit_data, 4);
287 /* src0.w */
288 emit_data->args[3] = bld_base->base.one;
289 }
290
291 const struct lp_build_tgsi_action dph_action = {
292 dph_fetch_args, /* fetch_args */
293 dp4_emit /* emit */
294 };
295
296 /* TGSI_OPCODE_DST */
297 static void
298 dst_fetch_args(
299 struct lp_build_tgsi_context * bld_base,
300 struct lp_build_emit_data * emit_data)
301 {
302 /* src0.y */
303 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
304 0, TGSI_CHAN_Y);
305 /* src0.z */
306 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
307 0, TGSI_CHAN_Z);
308 /* src1.y */
309 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
310 1, TGSI_CHAN_Y);
311 /* src1.w */
312 emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
313 1, TGSI_CHAN_W);
314 }
315
316 static void
317 dst_emit(
318 const struct lp_build_tgsi_action * action,
319 struct lp_build_tgsi_context * bld_base,
320 struct lp_build_emit_data * emit_data)
321 {
322 /* dst.x */
323 emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
324
325 /* dst.y */
326 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
327 TGSI_OPCODE_MUL,
328 emit_data->args[0] /* src0.y */,
329 emit_data->args[2] /* src1.y */);
330 /* dst.z */
331 emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
332
333 /* dst.w */
334 emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
335 }
336
337 static struct lp_build_tgsi_action dst_action = {
338 dst_fetch_args, /* fetch_args */
339 dst_emit /* emit */
340 };
341
342 /* TGSI_OPCODE_END */
343 static void
344 end_emit(
345 const struct lp_build_tgsi_action * action,
346 struct lp_build_tgsi_context * bld_base,
347 struct lp_build_emit_data * emit_data)
348 {
349 bld_base->pc = -1;
350 }
351
352 /* TGSI_OPCODE_EXP */
353
354 static void
355 exp_emit(
356 const struct lp_build_tgsi_action * action,
357 struct lp_build_tgsi_context * bld_base,
358 struct lp_build_emit_data * emit_data)
359 {
360 LLVMValueRef floor_x;
361
362 /* floor( src0.x ) */
363 floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
364 emit_data->args[0]);
365
366 /* 2 ^ floor( src0.x ) */
367 emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
368 TGSI_OPCODE_EX2, floor_x);
369
370 /* src0.x - floor( src0.x ) */
371 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
372 TGSI_OPCODE_SUB, emit_data->args[0] /* src0.x */, floor_x);
373
374 /* 2 ^ src0.x */
375 emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base,
376 TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */);
377
378 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
379 }
380
381 const struct lp_build_tgsi_action exp_action = {
382 scalar_unary_fetch_args, /* fetch_args */
383 exp_emit /* emit */
384 };
385
386 /* TGSI_OPCODE_FRC */
387
388 static void
389 frc_emit(
390 const struct lp_build_tgsi_action * action,
391 struct lp_build_tgsi_context * bld_base,
392 struct lp_build_emit_data * emit_data)
393 {
394 LLVMValueRef tmp;
395 tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
396 emit_data->args[0]);
397 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
398 TGSI_OPCODE_SUB, emit_data->args[0], tmp);
399 }
400
401 /* TGSI_OPCODE_KILL_IF */
402
403 static void
404 kil_fetch_args(
405 struct lp_build_tgsi_context * bld_base,
406 struct lp_build_emit_data * emit_data)
407 {
408 /* src0.x */
409 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
410 0, TGSI_CHAN_X);
411 /* src0.y */
412 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
413 0, TGSI_CHAN_Y);
414 /* src0.z */
415 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
416 0, TGSI_CHAN_Z);
417 /* src0.w */
418 emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
419 0, TGSI_CHAN_W);
420 emit_data->arg_count = 4;
421 emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
422 }
423
424 /* TGSI_OPCODE_KILL */
425
426 static void
427 kilp_fetch_args(
428 struct lp_build_tgsi_context * bld_base,
429 struct lp_build_emit_data * emit_data)
430 {
431 emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
432 }
433
434 /* TGSI_OPCODE_LIT */
435
436 static void
437 lit_fetch_args(
438 struct lp_build_tgsi_context * bld_base,
439 struct lp_build_emit_data * emit_data)
440 {
441 /* src0.x */
442 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
443 /* src0.y */
444 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
445 /* src0.w */
446 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
447 emit_data->arg_count = 3;
448 }
449
450 static void
451 lit_emit(
452 const struct lp_build_tgsi_action * action,
453 struct lp_build_tgsi_context * bld_base,
454 struct lp_build_emit_data * emit_data)
455 {
456 LLVMValueRef tmp0, tmp1, tmp2;
457
458 /* dst.x */
459 emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
460
461 /* dst. y */
462 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
463 TGSI_OPCODE_MAX,
464 emit_data->args[0] /* src0.x */,
465 bld_base->base.zero);
466
467 /* dst.z */
468 /* XMM[1] = SrcReg[0].yyyy */
469 tmp1 = emit_data->args[1];
470 /* XMM[1] = max(XMM[1], 0) */
471 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
472 tmp1, bld_base->base.zero);
473 /* XMM[2] = SrcReg[0].wwww */
474 tmp2 = emit_data->args[2];
475 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
476 tmp1, tmp2);
477 tmp0 = emit_data->args[0];
478 emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base,
479 TGSI_OPCODE_CMP,
480 tmp0, bld_base->base.zero, tmp1);
481 /* dst.w */
482 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
483 }
484
485 static struct lp_build_tgsi_action lit_action = {
486 lit_fetch_args, /* fetch_args */
487 lit_emit /* emit */
488 };
489
490 /* TGSI_OPCODE_LOG */
491
492 static void
493 log_emit(
494 const struct lp_build_tgsi_action * action,
495 struct lp_build_tgsi_context * bld_base,
496 struct lp_build_emit_data * emit_data)
497 {
498
499 LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
500
501 /* abs( src0.x) */
502 abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
503 emit_data->args[0] /* src0.x */);
504
505 /* log( abs( src0.x ) ) */
506 log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
507 abs_x);
508
509 /* floor( log( abs( src0.x ) ) ) */
510 flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
511 log_abs_x);
512 /* dst.x */
513 emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
514
515 /* dst.y */
516 ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2,
517 flr_log_abs_x);
518
519 /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
520 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
521 TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x);
522
523 /* dst.x */
524 emit_data->output[TGSI_CHAN_Z] = log_abs_x;
525
526 /* dst.w */
527 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
528 }
529
530 static struct lp_build_tgsi_action log_action = {
531 scalar_unary_fetch_args, /* fetch_args */
532 log_emit /* emit */
533 };
534
535 /* TGSI_OPCODE_PK2H */
536
537 static void
538 pk2h_fetch_args(
539 struct lp_build_tgsi_context * bld_base,
540 struct lp_build_emit_data * emit_data)
541 {
542 /* src0.x */
543 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
544 0, TGSI_CHAN_X);
545 /* src0.y */
546 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
547 0, TGSI_CHAN_Y);
548 }
549
550 static void
551 pk2h_emit(
552 const struct lp_build_tgsi_action *action,
553 struct lp_build_tgsi_context *bld_base,
554 struct lp_build_emit_data *emit_data)
555 {
556 struct gallivm_state *gallivm = bld_base->base.gallivm;
557 struct lp_type f16i_t;
558 LLVMValueRef lo, hi, res;
559
560 f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
561 lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
562 hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
563 /* maybe some interleave doubling vector width would be useful... */
564 lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
565 hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
566 res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
567
568 emit_data->output[emit_data->chan] = res;
569 }
570
571 static struct lp_build_tgsi_action pk2h_action = {
572 pk2h_fetch_args, /* fetch_args */
573 pk2h_emit /* emit */
574 };
575
576 /* TGSI_OPCODE_UP2H */
577
578 static void
579 up2h_emit(
580 const struct lp_build_tgsi_action *action,
581 struct lp_build_tgsi_context *bld_base,
582 struct lp_build_emit_data *emit_data)
583 {
584 struct gallivm_state *gallivm = bld_base->base.gallivm;
585 LLVMBuilderRef builder = gallivm->builder;
586 LLVMContextRef context = gallivm->context;
587 LLVMValueRef lo, hi, res[2], arg;
588 unsigned nr = bld_base->base.type.length;
589 LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
590
591 arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
592 lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
593 hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
594 res[0] = lp_build_half_to_float(gallivm, lo);
595 res[1] = lp_build_half_to_float(gallivm, hi);
596
597 emit_data->output[0] = emit_data->output[2] = res[0];
598 emit_data->output[1] = emit_data->output[3] = res[1];
599 }
600
601 static struct lp_build_tgsi_action up2h_action = {
602 scalar_unary_fetch_args, /* fetch_args */
603 up2h_emit /* emit */
604 };
605
606 /* TGSI_OPCODE_LRP */
607
608 static void
609 lrp_emit(
610 const struct lp_build_tgsi_action * action,
611 struct lp_build_tgsi_context * bld_base,
612 struct lp_build_emit_data * emit_data)
613 {
614 struct lp_build_context *bld = &bld_base->base;
615 LLVMValueRef inv, a, b;
616
617 /* This uses the correct version: (1 - t)*a + t*b
618 *
619 * An alternative version is "a + t*(b-a)". The problem is this version
620 * doesn't return "b" for t = 1, because "a + (b-a)" isn't equal to "b"
621 * because of the floating-point rounding.
622 */
623 inv = lp_build_sub(bld, bld_base->base.one, emit_data->args[0]);
624 a = lp_build_mul(bld, emit_data->args[1], emit_data->args[0]);
625 b = lp_build_mul(bld, emit_data->args[2], inv);
626 emit_data->output[emit_data->chan] = lp_build_add(bld, a, b);
627 }
628
629 /* TGSI_OPCODE_MAD */
630
631 static void
632 mad_emit(
633 const struct lp_build_tgsi_action * action,
634 struct lp_build_tgsi_context * bld_base,
635 struct lp_build_emit_data * emit_data)
636 {
637 LLVMValueRef tmp;
638 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
639 emit_data->args[0],
640 emit_data->args[1]);
641 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
642 TGSI_OPCODE_ADD, tmp, emit_data->args[2]);
643 }
644
645 /* TGSI_OPCODE_MOV */
646
647 static void
648 mov_emit(
649 const struct lp_build_tgsi_action * action,
650 struct lp_build_tgsi_context * bld_base,
651 struct lp_build_emit_data * emit_data)
652 {
653 emit_data->output[emit_data->chan] = emit_data->args[0];
654 }
655
656 /* TGSI_OPCODE_MUL */
657 static void
658 mul_emit(
659 const struct lp_build_tgsi_action * action,
660 struct lp_build_tgsi_context * bld_base,
661 struct lp_build_emit_data * emit_data)
662 {
663 emit_data->output[emit_data->chan] = LLVMBuildFMul(
664 bld_base->base.gallivm->builder,
665 emit_data->args[0], emit_data->args[1], "");
666 }
667
668 /*.TGSI_OPCODE_DIV.*/
669 static void fdiv_emit(
670 const struct lp_build_tgsi_action * action,
671 struct lp_build_tgsi_context * bld_base,
672 struct lp_build_emit_data * emit_data)
673 {
674 emit_data->output[emit_data->chan] = LLVMBuildFDiv(
675 bld_base->base.gallivm->builder,
676 emit_data->args[0], emit_data->args[1], "");
677 }
678
679 /*.TGSI_OPCODE_RCP.*/
680 static void rcp_emit(
681 const struct lp_build_tgsi_action * action,
682 struct lp_build_tgsi_context * bld_base,
683 struct lp_build_emit_data * emit_data)
684 {
685 LLVMValueRef one;
686 one = lp_build_const_float(bld_base->base.gallivm, 1.0f);
687 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
688 TGSI_OPCODE_DIV, one, emit_data->args[0]);
689 }
690
691 /* TGSI_OPCODE_POW */
692
693 static void
694 pow_emit(
695 const struct lp_build_tgsi_action * action,
696 struct lp_build_tgsi_context * bld_base,
697 struct lp_build_emit_data * emit_data)
698 {
699 emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
700 emit_data->args[0], emit_data->args[1]);
701 }
702
703 static struct lp_build_tgsi_action pow_action = {
704 scalar_binary_fetch_args, /* fetch_args */
705 pow_emit /* emit */
706 };
707
708 /* TGSI_OPCODE_RSQ */
709
710 static void
711 rsq_emit(
712 const struct lp_build_tgsi_action * action,
713 struct lp_build_tgsi_context * bld_base,
714 struct lp_build_emit_data * emit_data)
715 {
716 if (bld_base->rsq_action.emit) {
717 bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
718 } else {
719 emit_data->output[emit_data->chan] = bld_base->base.undef;
720 }
721 }
722
723 const struct lp_build_tgsi_action rsq_action = {
724 scalar_unary_fetch_args, /* fetch_args */
725 rsq_emit /* emit */
726
727 };
728
729 /* TGSI_OPCODE_SQRT */
730
731 static void
732 sqrt_emit(
733 const struct lp_build_tgsi_action * action,
734 struct lp_build_tgsi_context * bld_base,
735 struct lp_build_emit_data * emit_data)
736 {
737 if (bld_base->sqrt_action.emit) {
738 bld_base->sqrt_action.emit(&bld_base->sqrt_action, bld_base, emit_data);
739 } else {
740 emit_data->output[emit_data->chan] = bld_base->base.undef;
741 }
742 }
743
744 const struct lp_build_tgsi_action sqrt_action = {
745 scalar_unary_fetch_args, /* fetch_args */
746 sqrt_emit /* emit */
747 };
748
749 /* TGSI_OPCODE_SCS */
750 static void
751 scs_emit(
752 const struct lp_build_tgsi_action * action,
753 struct lp_build_tgsi_context * bld_base,
754 struct lp_build_emit_data * emit_data)
755 {
756 /* dst.x */
757 emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
758 TGSI_OPCODE_COS, emit_data->args[0]);
759 /* dst.y */
760 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
761 TGSI_OPCODE_SIN, emit_data->args[0]);
762 /* dst.z */
763 emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
764
765 /* dst.w */
766 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
767 }
768
769 const struct lp_build_tgsi_action scs_action = {
770 scalar_unary_fetch_args, /* fetch_args */
771 scs_emit /* emit */
772 };
773
774 /* TGSI_OPCODE_SUB */
775 static void
776 sub_emit(
777 const struct lp_build_tgsi_action * action,
778 struct lp_build_tgsi_context * bld_base,
779 struct lp_build_emit_data * emit_data)
780 {
781 emit_data->output[emit_data->chan] =
782 LLVMBuildFSub(bld_base->base.gallivm->builder,
783 emit_data->args[0],
784 emit_data->args[1], "");
785 }
786
787 /* TGSI_OPCODE_F2U */
788 static void
789 f2u_emit(
790 const struct lp_build_tgsi_action * action,
791 struct lp_build_tgsi_context * bld_base,
792 struct lp_build_emit_data * emit_data)
793 {
794 emit_data->output[emit_data->chan] =
795 LLVMBuildFPToUI(bld_base->base.gallivm->builder,
796 emit_data->args[0],
797 bld_base->base.int_vec_type, "");
798 }
799
800 /* TGSI_OPCODE_U2F */
801 static void
802 u2f_emit(
803 const struct lp_build_tgsi_action * action,
804 struct lp_build_tgsi_context * bld_base,
805 struct lp_build_emit_data * emit_data)
806 {
807 emit_data->output[emit_data->chan] =
808 LLVMBuildUIToFP(bld_base->base.gallivm->builder,
809 emit_data->args[0],
810 bld_base->base.vec_type, "");
811 }
812
813 static void
814 umad_emit(
815 const struct lp_build_tgsi_action * action,
816 struct lp_build_tgsi_context * bld_base,
817 struct lp_build_emit_data * emit_data)
818 {
819 LLVMValueRef tmp;
820 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMUL,
821 emit_data->args[0],
822 emit_data->args[1]);
823 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
824 TGSI_OPCODE_UADD, tmp, emit_data->args[2]);
825 }
826
827 /* TGSI_OPCODE_UMUL */
828 static void
829 umul_emit(
830 const struct lp_build_tgsi_action * action,
831 struct lp_build_tgsi_context * bld_base,
832 struct lp_build_emit_data * emit_data)
833 {
834 emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint_bld,
835 emit_data->args[0], emit_data->args[1]);
836 }
837
838 /* TGSI_OPCODE_IMUL_HI */
839 static void
840 imul_hi_emit(
841 const struct lp_build_tgsi_action * action,
842 struct lp_build_tgsi_context * bld_base,
843 struct lp_build_emit_data * emit_data)
844 {
845 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
846 struct lp_build_context *int_bld = &bld_base->int_bld;
847 struct lp_type type = int_bld->type;
848 LLVMValueRef src0, src1;
849 LLVMValueRef dst64;
850 LLVMTypeRef typeRef;
851
852 assert(type.width == 32);
853 type.width = 64;
854 typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
855 src0 = LLVMBuildSExt(builder, emit_data->args[0], typeRef, "");
856 src1 = LLVMBuildSExt(builder, emit_data->args[1], typeRef, "");
857 dst64 = LLVMBuildMul(builder, src0, src1, "");
858 dst64 = LLVMBuildAShr(
859 builder, dst64,
860 lp_build_const_vec(bld_base->base.gallivm, type, 32), "");
861 type.width = 32;
862 typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
863 emit_data->output[emit_data->chan] =
864 LLVMBuildTrunc(builder, dst64, typeRef, "");
865 }
866
867 /* TGSI_OPCODE_UMUL_HI */
868 static void
869 umul_hi_emit(
870 const struct lp_build_tgsi_action * action,
871 struct lp_build_tgsi_context * bld_base,
872 struct lp_build_emit_data * emit_data)
873 {
874 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
875 struct lp_build_context *uint_bld = &bld_base->uint_bld;
876 struct lp_type type = uint_bld->type;
877 LLVMValueRef src0, src1;
878 LLVMValueRef dst64;
879 LLVMTypeRef typeRef;
880
881 assert(type.width == 32);
882 type.width = 64;
883 typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
884 src0 = LLVMBuildZExt(builder, emit_data->args[0], typeRef, "");
885 src1 = LLVMBuildZExt(builder, emit_data->args[1], typeRef, "");
886 dst64 = LLVMBuildMul(builder, src0, src1, "");
887 dst64 = LLVMBuildLShr(
888 builder, dst64,
889 lp_build_const_vec(bld_base->base.gallivm, type, 32), "");
890 type.width = 32;
891 typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
892 emit_data->output[emit_data->chan] =
893 LLVMBuildTrunc(builder, dst64, typeRef, "");
894 }
895
896 /* TGSI_OPCODE_MAX */
897 static void fmax_emit(
898 const struct lp_build_tgsi_action * action,
899 struct lp_build_tgsi_context * bld_base,
900 struct lp_build_emit_data * emit_data)
901 {
902 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
903 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
904 LLVMBuildFCmp(builder, LLVMRealUGE,
905 emit_data->args[0], emit_data->args[1], ""),
906 emit_data->args[0], emit_data->args[1], "");
907 }
908
909 /* TGSI_OPCODE_MIN */
910 static void fmin_emit(
911 const struct lp_build_tgsi_action * action,
912 struct lp_build_tgsi_context * bld_base,
913 struct lp_build_emit_data * emit_data)
914 {
915 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
916 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
917 LLVMBuildFCmp(builder, LLVMRealUGE,
918 emit_data->args[0], emit_data->args[1], ""),
919 emit_data->args[1], emit_data->args[0], "");
920 }
921
922 /* TGSI_OPCODE_XPD */
923
924 static void
925 xpd_fetch_args(
926 struct lp_build_tgsi_context * bld_base,
927 struct lp_build_emit_data * emit_data)
928 {
929 dp_fetch_args(bld_base, emit_data, 3);
930 }
931
932 /**
933 * (a * b) - (c * d)
934 */
935 static LLVMValueRef
936 xpd_helper(
937 struct lp_build_tgsi_context * bld_base,
938 LLVMValueRef a,
939 LLVMValueRef b,
940 LLVMValueRef c,
941 LLVMValueRef d)
942 {
943 LLVMValueRef tmp0, tmp1;
944
945 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
946 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
947
948 return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1);
949 }
950
951 static void
952 xpd_emit(
953 const struct lp_build_tgsi_action * action,
954 struct lp_build_tgsi_context * bld_base,
955 struct lp_build_emit_data * emit_data)
956 {
957 emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
958 emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
959 emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
960
961 emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
962 emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
963 emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
964
965 emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
966 emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
967 emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
968
969 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
970 }
971
972 const struct lp_build_tgsi_action xpd_action = {
973 xpd_fetch_args, /* fetch_args */
974 xpd_emit /* emit */
975 };
976
977 /* TGSI_OPCODE_D2F */
978 static void
979 d2f_emit(
980 const struct lp_build_tgsi_action * action,
981 struct lp_build_tgsi_context * bld_base,
982 struct lp_build_emit_data * emit_data)
983 {
984 emit_data->output[emit_data->chan] =
985 LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
986 emit_data->args[0],
987 bld_base->base.vec_type, "");
988 }
989
990 /* TGSI_OPCODE_D2I */
991 static void
992 d2i_emit(
993 const struct lp_build_tgsi_action * action,
994 struct lp_build_tgsi_context * bld_base,
995 struct lp_build_emit_data * emit_data)
996 {
997 emit_data->output[emit_data->chan] =
998 LLVMBuildFPToSI(bld_base->base.gallivm->builder,
999 emit_data->args[0],
1000 bld_base->base.int_vec_type, "");
1001 }
1002
1003 /* TGSI_OPCODE_D2U */
1004 static void
1005 d2u_emit(
1006 const struct lp_build_tgsi_action * action,
1007 struct lp_build_tgsi_context * bld_base,
1008 struct lp_build_emit_data * emit_data)
1009 {
1010 emit_data->output[emit_data->chan] =
1011 LLVMBuildFPToUI(bld_base->base.gallivm->builder,
1012 emit_data->args[0],
1013 bld_base->base.int_vec_type, "");
1014 }
1015
1016 /* TGSI_OPCODE_F2D */
1017 static void
1018 f2d_emit(
1019 const struct lp_build_tgsi_action * action,
1020 struct lp_build_tgsi_context * bld_base,
1021 struct lp_build_emit_data * emit_data)
1022 {
1023 emit_data->output[emit_data->chan] =
1024 LLVMBuildFPExt(bld_base->base.gallivm->builder,
1025 emit_data->args[0],
1026 bld_base->dbl_bld.vec_type, "");
1027 }
1028
1029 /* TGSI_OPCODE_U2D */
1030 static void
1031 u2d_emit(
1032 const struct lp_build_tgsi_action * action,
1033 struct lp_build_tgsi_context * bld_base,
1034 struct lp_build_emit_data * emit_data)
1035 {
1036 emit_data->output[emit_data->chan] =
1037 LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1038 emit_data->args[0],
1039 bld_base->dbl_bld.vec_type, "");
1040 }
1041
1042 /* TGSI_OPCODE_I2D */
1043 static void
1044 i2d_emit(
1045 const struct lp_build_tgsi_action * action,
1046 struct lp_build_tgsi_context * bld_base,
1047 struct lp_build_emit_data * emit_data)
1048 {
1049 emit_data->output[emit_data->chan] =
1050 LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1051 emit_data->args[0],
1052 bld_base->dbl_bld.vec_type, "");
1053 }
1054
1055 /* TGSI_OPCODE_DMAD */
1056 static void
1057 dmad_emit(
1058 const struct lp_build_tgsi_action * action,
1059 struct lp_build_tgsi_context * bld_base,
1060 struct lp_build_emit_data * emit_data)
1061 {
1062 LLVMValueRef tmp;
1063 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL,
1064 emit_data->args[0],
1065 emit_data->args[1]);
1066 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
1067 TGSI_OPCODE_DADD, tmp, emit_data->args[2]);
1068 }
1069
1070 /*.TGSI_OPCODE_DRCP.*/
1071 static void drcp_emit(
1072 const struct lp_build_tgsi_action * action,
1073 struct lp_build_tgsi_context * bld_base,
1074 struct lp_build_emit_data * emit_data)
1075 {
1076 LLVMValueRef one;
1077 one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f);
1078 emit_data->output[emit_data->chan] = LLVMBuildFDiv(
1079 bld_base->base.gallivm->builder,
1080 one, emit_data->args[0], "");
1081 }
1082
1083 /* TGSI_OPCODE_DFRAC */
1084 static void dfrac_emit(
1085 const struct lp_build_tgsi_action * action,
1086 struct lp_build_tgsi_context * bld_base,
1087 struct lp_build_emit_data * emit_data)
1088 {
1089 LLVMValueRef tmp;
1090 tmp = lp_build_floor(&bld_base->dbl_bld,
1091 emit_data->args[0]);
1092 emit_data->output[emit_data->chan] = LLVMBuildFSub(bld_base->base.gallivm->builder,
1093 emit_data->args[0], tmp, "");
1094 }
1095
1096 void
1097 lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
1098 {
1099 bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
1100 bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
1101 bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
1102 bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
1103 bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
1104 bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
1105 bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
1106 bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
1107 bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
1108 bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
1109 bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
1110 bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
1111 bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
1112 bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
1113 bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
1114 bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
1115
1116 bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
1117 bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
1118 bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
1119 bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
1120 bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
1121 bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
1122 bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
1123 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kil_fetch_args;
1124 bld_base->op_actions[TGSI_OPCODE_KILL].fetch_args = kilp_fetch_args;
1125 bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
1126 bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
1127 bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
1128
1129 bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
1130 bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
1131 bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit;
1132 bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
1133 bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
1134 bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
1135 bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
1136 bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
1137 bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
1138 bld_base->op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit;
1139 bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit;
1140 bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
1141
1142 bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit;
1143 bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit;
1144 bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit;
1145 bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit;
1146 bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit;
1147 bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit;
1148 bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit;
1149
1150 bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
1151 bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
1152
1153 bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit;
1154 bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit;
1155 bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit;
1156 bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit;
1157
1158 bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit;
1159 bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit;
1160 bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit;
1161
1162 bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
1163 bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
1164 bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
1165
1166 bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
1167
1168 bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
1169 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
1170
1171 }
1172
1173 /* CPU Only default actions */
1174
1175 /* These actions are CPU only, because they could potentially output SSE
1176 * intrinsics.
1177 */
1178
1179 /* TGSI_OPCODE_ABS (CPU Only)*/
1180
1181 static void
1182 abs_emit_cpu(
1183 const struct lp_build_tgsi_action * action,
1184 struct lp_build_tgsi_context * bld_base,
1185 struct lp_build_emit_data * emit_data)
1186 {
1187 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->base,
1188 emit_data->args[0]);
1189 }
1190
1191 /* TGSI_OPCODE_ADD (CPU Only) */
1192 static void
1193 add_emit_cpu(
1194 const struct lp_build_tgsi_action * action,
1195 struct lp_build_tgsi_context * bld_base,
1196 struct lp_build_emit_data * emit_data)
1197 {
1198 emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base,
1199 emit_data->args[0], emit_data->args[1]);
1200 }
1201
1202 /* TGSI_OPCODE_AND (CPU Only) */
1203 static void
1204 and_emit_cpu(
1205 const struct lp_build_tgsi_action * action,
1206 struct lp_build_tgsi_context * bld_base,
1207 struct lp_build_emit_data * emit_data)
1208 {
1209 emit_data->output[emit_data->chan] = lp_build_and(&bld_base->uint_bld,
1210 emit_data->args[0], emit_data->args[1]);
1211 }
1212
1213 /* TGSI_OPCODE_ARL (CPU Only) */
1214 static void
1215 arl_emit_cpu(
1216 const struct lp_build_tgsi_action * action,
1217 struct lp_build_tgsi_context * bld_base,
1218 struct lp_build_emit_data * emit_data)
1219 {
1220 LLVMValueRef tmp;
1221 tmp = lp_build_floor(&bld_base->base,
1222 emit_data->args[0]);
1223 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
1224 bld_base->uint_bld.vec_type, "");
1225 }
1226
1227 /* TGSI_OPCODE_ARR (CPU Only) */
1228 static void
1229 arr_emit_cpu(
1230 const struct lp_build_tgsi_action * action,
1231 struct lp_build_tgsi_context * bld_base,
1232 struct lp_build_emit_data * emit_data)
1233 {
1234 emit_data->output[emit_data->chan] = lp_build_iround(&bld_base->base, emit_data->args[0]);
1235 }
1236
1237 /* TGSI_OPCODE_CEIL (CPU Only) */
1238 static void
1239 ceil_emit_cpu(
1240 const struct lp_build_tgsi_action * action,
1241 struct lp_build_tgsi_context * bld_base,
1242 struct lp_build_emit_data * emit_data)
1243 {
1244 emit_data->output[emit_data->chan] = lp_build_ceil(&bld_base->base,
1245 emit_data->args[0]);
1246 }
1247
1248 /* TGSI_OPCODE_CMP (CPU Only) */
1249 static void
1250 cmp_emit_cpu(
1251 const struct lp_build_tgsi_action * action,
1252 struct lp_build_tgsi_context * bld_base,
1253 struct lp_build_emit_data * emit_data)
1254 {
1255 LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
1256 emit_data->args[0], bld_base->base.zero);
1257 emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1258 cond, emit_data->args[1], emit_data->args[2]);
1259 }
1260
1261 /* TGSI_OPCODE_UCMP (CPU Only) */
1262 static void
1263 ucmp_emit_cpu(
1264 const struct lp_build_tgsi_action * action,
1265 struct lp_build_tgsi_context * bld_base,
1266 struct lp_build_emit_data * emit_data)
1267 {
1268 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1269 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1270 LLVMValueRef unsigned_cond =
1271 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
1272 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
1273 unsigned_cond,
1274 uint_bld->zero);
1275 emit_data->output[emit_data->chan] =
1276 lp_build_select(&bld_base->base,
1277 cond, emit_data->args[1], emit_data->args[2]);
1278 }
1279
1280 /* TGSI_OPCODE_COS (CPU Only) */
1281 static void
1282 cos_emit_cpu(
1283 const struct lp_build_tgsi_action * action,
1284 struct lp_build_tgsi_context * bld_base,
1285 struct lp_build_emit_data * emit_data)
1286 {
1287 emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base,
1288 emit_data->args[0]);
1289 }
1290
1291 /* TGSI_OPCODE_DIV (CPU Only) */
1292 static void
1293 div_emit_cpu(
1294 const struct lp_build_tgsi_action * action,
1295 struct lp_build_tgsi_context * bld_base,
1296 struct lp_build_emit_data * emit_data)
1297 {
1298 emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base,
1299 emit_data->args[0], emit_data->args[1]);
1300 }
1301
1302 /* TGSI_OPCODE_EX2 (CPU Only) */
1303 static void
1304 ex2_emit_cpu(
1305 const struct lp_build_tgsi_action * action,
1306 struct lp_build_tgsi_context * bld_base,
1307 struct lp_build_emit_data * emit_data)
1308 {
1309 emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base,
1310 emit_data->args[0]);
1311 }
1312
1313 /* TGSI_OPCODE_F2I (CPU Only) */
1314 static void
1315 f2i_emit_cpu(
1316 const struct lp_build_tgsi_action * action,
1317 struct lp_build_tgsi_context * bld_base,
1318 struct lp_build_emit_data * emit_data)
1319 {
1320 emit_data->output[emit_data->chan] = lp_build_itrunc(&bld_base->base,
1321 emit_data->args[0]);
1322 }
1323
1324 /* TGSI_OPCODE_FSET Helper (CPU Only) */
1325 static void
1326 fset_emit_cpu(
1327 const struct lp_build_tgsi_action * action,
1328 struct lp_build_tgsi_context * bld_base,
1329 struct lp_build_emit_data * emit_data,
1330 unsigned pipe_func)
1331 {
1332 LLVMValueRef cond;
1333
1334 if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1335 cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1336 emit_data->args[0], emit_data->args[1]);
1337 }
1338 else {
1339 cond = lp_build_cmp(&bld_base->base, pipe_func,
1340 emit_data->args[0], emit_data->args[1]);
1341
1342 }
1343 emit_data->output[emit_data->chan] = cond;
1344 }
1345
1346
1347 /* TGSI_OPCODE_FSEQ (CPU Only) */
1348 static void
1349 fseq_emit_cpu(
1350 const struct lp_build_tgsi_action * action,
1351 struct lp_build_tgsi_context * bld_base,
1352 struct lp_build_emit_data * emit_data)
1353 {
1354 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1355 }
1356
1357 /* TGSI_OPCODE_ISGE (CPU Only) */
1358 static void
1359 fsge_emit_cpu(
1360 const struct lp_build_tgsi_action * action,
1361 struct lp_build_tgsi_context * bld_base,
1362 struct lp_build_emit_data * emit_data)
1363 {
1364 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1365 }
1366
1367 /* TGSI_OPCODE_ISLT (CPU Only) */
1368 static void
1369 fslt_emit_cpu(
1370 const struct lp_build_tgsi_action * action,
1371 struct lp_build_tgsi_context * bld_base,
1372 struct lp_build_emit_data * emit_data)
1373 {
1374 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1375 }
1376
1377 /* TGSI_OPCODE_USNE (CPU Only) */
1378
1379 static void
1380 fsne_emit_cpu(
1381 const struct lp_build_tgsi_action * action,
1382 struct lp_build_tgsi_context * bld_base,
1383 struct lp_build_emit_data * emit_data)
1384 {
1385 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1386 }
1387
1388 /* TGSI_OPCODE_FLR (CPU Only) */
1389
1390 static void
1391 flr_emit_cpu(
1392 const struct lp_build_tgsi_action * action,
1393 struct lp_build_tgsi_context * bld_base,
1394 struct lp_build_emit_data * emit_data)
1395 {
1396 emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base,
1397 emit_data->args[0]);
1398 }
1399
1400 /* TGSI_OPCODE_I2F (CPU Only) */
1401 static void
1402 i2f_emit_cpu(
1403 const struct lp_build_tgsi_action * action,
1404 struct lp_build_tgsi_context * bld_base,
1405 struct lp_build_emit_data * emit_data)
1406 {
1407 emit_data->output[emit_data->chan] = lp_build_int_to_float(&bld_base->base,
1408 emit_data->args[0]);
1409 }
1410
1411 /* TGSI_OPCODE_IABS (CPU Only) */
1412 static void
1413 iabs_emit_cpu(
1414 const struct lp_build_tgsi_action * action,
1415 struct lp_build_tgsi_context * bld_base,
1416 struct lp_build_emit_data * emit_data)
1417 {
1418 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int_bld,
1419 emit_data->args[0]);
1420 }
1421
1422 /* TGSI_OPCODE_IDIV (CPU Only) */
1423 static void
1424 idiv_emit_cpu(
1425 const struct lp_build_tgsi_action * action,
1426 struct lp_build_tgsi_context * bld_base,
1427 struct lp_build_emit_data * emit_data)
1428 {
1429 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1430 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1431 PIPE_FUNC_EQUAL, emit_data->args[1],
1432 bld_base->uint_bld.zero);
1433 /* We want to make sure that we never divide/mod by zero to not
1434 * generate sigfpe. We don't want to crash just because the
1435 * shader is doing something weird. */
1436 LLVMValueRef divisor = LLVMBuildOr(builder,
1437 div_mask,
1438 emit_data->args[1], "");
1439 LLVMValueRef result = lp_build_div(&bld_base->int_bld,
1440 emit_data->args[0], divisor);
1441 LLVMValueRef not_div_mask = LLVMBuildNot(builder,
1442 div_mask,"");
1443 /* idiv by zero doesn't have a guaranteed return value chose 0 for now. */
1444 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1445 not_div_mask,
1446 result, "");
1447 }
1448
1449 /* TGSI_OPCODE_INEG (CPU Only) */
1450 static void
1451 ineg_emit_cpu(
1452 const struct lp_build_tgsi_action * action,
1453 struct lp_build_tgsi_context * bld_base,
1454 struct lp_build_emit_data * emit_data)
1455 {
1456 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int_bld,
1457 bld_base->int_bld.zero,
1458 emit_data->args[0]);
1459 }
1460
1461 /* TGSI_OPCODE_ISET Helper (CPU Only) */
1462 static void
1463 iset_emit_cpu(
1464 const struct lp_build_tgsi_action * action,
1465 struct lp_build_tgsi_context * bld_base,
1466 struct lp_build_emit_data * emit_data,
1467 unsigned pipe_func)
1468 {
1469 LLVMValueRef cond = lp_build_cmp(&bld_base->int_bld, pipe_func,
1470 emit_data->args[0], emit_data->args[1]);
1471 emit_data->output[emit_data->chan] = cond;
1472 }
1473
1474 /* TGSI_OPCODE_IMAX (CPU Only) */
1475 static void
1476 imax_emit_cpu(
1477 const struct lp_build_tgsi_action * action,
1478 struct lp_build_tgsi_context * bld_base,
1479 struct lp_build_emit_data * emit_data)
1480 {
1481 emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int_bld,
1482 emit_data->args[0], emit_data->args[1]);
1483 }
1484
1485 /* TGSI_OPCODE_IMIN (CPU Only) */
1486 static void
1487 imin_emit_cpu(
1488 const struct lp_build_tgsi_action * action,
1489 struct lp_build_tgsi_context * bld_base,
1490 struct lp_build_emit_data * emit_data)
1491 {
1492 emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int_bld,
1493 emit_data->args[0], emit_data->args[1]);
1494 }
1495
1496 /* TGSI_OPCODE_ISGE (CPU Only) */
1497 static void
1498 isge_emit_cpu(
1499 const struct lp_build_tgsi_action * action,
1500 struct lp_build_tgsi_context * bld_base,
1501 struct lp_build_emit_data * emit_data)
1502 {
1503 iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1504 }
1505
1506 /* TGSI_OPCODE_ISHR (CPU Only) */
1507 static void
1508 ishr_emit_cpu(
1509 const struct lp_build_tgsi_action * action,
1510 struct lp_build_tgsi_context * bld_base,
1511 struct lp_build_emit_data * emit_data)
1512 {
1513 struct lp_build_context *int_bld = &bld_base->int_bld;
1514 LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
1515 int_bld->type.width - 1);
1516 LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
1517 emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
1518 masked_count);
1519 }
1520
1521 /* TGSI_OPCODE_ISLT (CPU Only) */
1522 static void
1523 islt_emit_cpu(
1524 const struct lp_build_tgsi_action * action,
1525 struct lp_build_tgsi_context * bld_base,
1526 struct lp_build_emit_data * emit_data)
1527 {
1528 iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1529 }
1530
1531
1532 /* TGSI_OPCODE_ISSG (CPU Only) */
1533 static void
1534 issg_emit_cpu(
1535 const struct lp_build_tgsi_action * action,
1536 struct lp_build_tgsi_context * bld_base,
1537 struct lp_build_emit_data * emit_data)
1538 {
1539 emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int_bld,
1540 emit_data->args[0]);
1541 }
1542
1543 /* TGSI_OPCODE_LG2 (CPU Only) */
1544 static void
1545 lg2_emit_cpu(
1546 const struct lp_build_tgsi_action * action,
1547 struct lp_build_tgsi_context * bld_base,
1548 struct lp_build_emit_data * emit_data)
1549 {
1550 emit_data->output[emit_data->chan] = lp_build_log2_safe(&bld_base->base,
1551 emit_data->args[0]);
1552 }
1553
1554 /* TGSI_OPCODE_LOG (CPU Only) */
1555 static void
1556 log_emit_cpu(
1557 const struct lp_build_tgsi_action * action,
1558 struct lp_build_tgsi_context * bld_base,
1559 struct lp_build_emit_data * emit_data)
1560 {
1561 LLVMValueRef p_floor_log2;
1562 LLVMValueRef p_exp;
1563 LLVMValueRef p_log2;
1564 LLVMValueRef src0 = emit_data->args[0];
1565
1566 lp_build_log2_approx(&bld_base->base, src0,
1567 &p_exp, &p_floor_log2, &p_log2, FALSE);
1568
1569 emit_data->output[TGSI_CHAN_X] = p_floor_log2;
1570
1571 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
1572 TGSI_OPCODE_DIV,
1573 src0, p_exp);
1574 emit_data->output[TGSI_CHAN_Z] = p_log2;
1575
1576 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
1577
1578 }
1579
1580 /* TGSI_OPCODE_MAX (CPU Only) */
1581
1582 static void
1583 max_emit_cpu(
1584 const struct lp_build_tgsi_action * action,
1585 struct lp_build_tgsi_context * bld_base,
1586 struct lp_build_emit_data * emit_data)
1587 {
1588 emit_data->output[emit_data->chan] =
1589 lp_build_max_ext(&bld_base->base,
1590 emit_data->args[0], emit_data->args[1],
1591 GALLIVM_NAN_RETURN_OTHER);
1592 }
1593
1594 /* TGSI_OPCODE_MIN (CPU Only) */
1595 static void
1596 min_emit_cpu(
1597 const struct lp_build_tgsi_action * action,
1598 struct lp_build_tgsi_context * bld_base,
1599 struct lp_build_emit_data * emit_data)
1600 {
1601 emit_data->output[emit_data->chan] =
1602 lp_build_min_ext(&bld_base->base,
1603 emit_data->args[0], emit_data->args[1],
1604 GALLIVM_NAN_RETURN_OTHER);
1605 }
1606
1607 /* TGSI_OPCODE_MOD (CPU Only) */
1608 static void
1609 mod_emit_cpu(
1610 const struct lp_build_tgsi_action * action,
1611 struct lp_build_tgsi_context * bld_base,
1612 struct lp_build_emit_data * emit_data)
1613 {
1614 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1615 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1616 PIPE_FUNC_EQUAL, emit_data->args[1],
1617 bld_base->uint_bld.zero);
1618 /* We want to make sure that we never divide/mod by zero to not
1619 * generate sigfpe. We don't want to crash just because the
1620 * shader is doing something weird. */
1621 LLVMValueRef divisor = LLVMBuildOr(builder,
1622 div_mask,
1623 emit_data->args[1], "");
1624 LLVMValueRef result = lp_build_mod(&bld_base->int_bld,
1625 emit_data->args[0], divisor);
1626 /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1627 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1628 div_mask,
1629 result, "");
1630 }
1631
1632 /* TGSI_OPCODE_NOT */
1633 static void
1634 not_emit_cpu(
1635 const struct lp_build_tgsi_action * action,
1636 struct lp_build_tgsi_context * bld_base,
1637 struct lp_build_emit_data * emit_data)
1638 {
1639 emit_data->output[emit_data->chan] = lp_build_not(&bld_base->uint_bld,
1640 emit_data->args[0]);
1641 }
1642
1643 /* TGSI_OPCODE_OR (CPU Only) */
1644 static void
1645 or_emit_cpu(
1646 const struct lp_build_tgsi_action * action,
1647 struct lp_build_tgsi_context * bld_base,
1648 struct lp_build_emit_data * emit_data)
1649 {
1650 emit_data->output[emit_data->chan] = lp_build_or(&bld_base->uint_bld,
1651 emit_data->args[0], emit_data->args[1]);
1652 }
1653
1654 /* TGSI_OPCODE_POW (CPU Only) */
1655 static void
1656 pow_emit_cpu(
1657 const struct lp_build_tgsi_action * action,
1658 struct lp_build_tgsi_context * bld_base,
1659 struct lp_build_emit_data * emit_data)
1660 {
1661 emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
1662 emit_data->args[0], emit_data->args[1]);
1663 }
1664
1665
1666 /* TGSI_OPCODE_RCP (CPU Only) */
1667
1668 static void
1669 rcp_emit_cpu(
1670 const struct lp_build_tgsi_action * action,
1671 struct lp_build_tgsi_context * bld_base,
1672 struct lp_build_emit_data * emit_data)
1673 {
1674 emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base,
1675 emit_data->args[0]);
1676 }
1677
1678 /* Reciprical squareroot (CPU Only) */
1679 static void
1680 recip_sqrt_emit_cpu(
1681 const struct lp_build_tgsi_action * action,
1682 struct lp_build_tgsi_context * bld_base,
1683 struct lp_build_emit_data * emit_data)
1684 {
1685 emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base,
1686 emit_data->args[0]);
1687 }
1688
1689 static void
1690 sqrt_emit_cpu(
1691 const struct lp_build_tgsi_action * action,
1692 struct lp_build_tgsi_context * bld_base,
1693 struct lp_build_emit_data * emit_data)
1694 {
1695 emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->base,
1696 emit_data->args[0]);
1697 }
1698
1699
1700 /* TGSI_OPCODE_ROUND (CPU Only) */
1701 static void
1702 round_emit_cpu(
1703 const struct lp_build_tgsi_action * action,
1704 struct lp_build_tgsi_context * bld_base,
1705 struct lp_build_emit_data * emit_data)
1706 {
1707 emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base,
1708 emit_data->args[0]);
1709 }
1710
1711 /* TGSI_OPCODE_SET Helper (CPU Only) */
1712
1713 static void
1714 set_emit_cpu(
1715 const struct lp_build_tgsi_action * action,
1716 struct lp_build_tgsi_context * bld_base,
1717 struct lp_build_emit_data * emit_data,
1718 unsigned pipe_func)
1719 {
1720 LLVMValueRef cond;
1721
1722 if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1723 cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1724 emit_data->args[0], emit_data->args[1]);
1725 }
1726 else {
1727 cond = lp_build_cmp(&bld_base->base, pipe_func,
1728 emit_data->args[0], emit_data->args[1]);
1729
1730 }
1731 emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1732 cond,
1733 bld_base->base.one,
1734 bld_base->base.zero);
1735 }
1736
1737 /* TGSI_OPCODE_SEQ (CPU Only) */
1738
1739 static void
1740 seq_emit_cpu(
1741 const struct lp_build_tgsi_action * action,
1742 struct lp_build_tgsi_context * bld_base,
1743 struct lp_build_emit_data * emit_data)
1744 {
1745 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1746 }
1747
1748 /* TGSI_OPCODE_SGE (CPU Only) */
1749 static void
1750 sge_emit_cpu(
1751 const struct lp_build_tgsi_action * action,
1752 struct lp_build_tgsi_context * bld_base,
1753 struct lp_build_emit_data * emit_data)
1754 {
1755 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1756 }
1757
1758 /* TGSI_OPCODE_SGT (CPU Only)*/
1759
1760 static void
1761 sgt_emit_cpu(
1762 const struct lp_build_tgsi_action * action,
1763 struct lp_build_tgsi_context * bld_base,
1764 struct lp_build_emit_data * emit_data)
1765 {
1766 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
1767 }
1768
1769 /* TGSI_OPCODE_SHL (CPU Only) */
1770 static void
1771 shl_emit_cpu(
1772 const struct lp_build_tgsi_action * action,
1773 struct lp_build_tgsi_context * bld_base,
1774 struct lp_build_emit_data * emit_data)
1775 {
1776 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1777 LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
1778 uint_bld->type.width - 1);
1779 LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
1780 emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
1781 masked_count);
1782 }
1783
1784 /* TGSI_OPCODE_SIN (CPU Only) */
1785 static void
1786 sin_emit_cpu(
1787 const struct lp_build_tgsi_action * action,
1788 struct lp_build_tgsi_context * bld_base,
1789 struct lp_build_emit_data * emit_data)
1790 {
1791 emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base,
1792 emit_data->args[0]);
1793 }
1794
1795 /* TGSI_OPCODE_SLE (CPU Only) */
1796 static void
1797 sle_emit_cpu(
1798 const struct lp_build_tgsi_action * action,
1799 struct lp_build_tgsi_context * bld_base,
1800 struct lp_build_emit_data * emit_data)
1801 {
1802 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
1803 }
1804
1805 /* TGSI_OPCODE_SLT (CPU Only) */
1806 static void
1807 slt_emit_cpu(
1808 const struct lp_build_tgsi_action * action,
1809 struct lp_build_tgsi_context * bld_base,
1810 struct lp_build_emit_data * emit_data)
1811 {
1812 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1813 }
1814
1815 /* TGSI_OPCODE_SNE (CPU Only) */
1816
1817 static void
1818 sne_emit_cpu(
1819 const struct lp_build_tgsi_action * action,
1820 struct lp_build_tgsi_context * bld_base,
1821 struct lp_build_emit_data * emit_data)
1822 {
1823 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1824 }
1825
1826 /* TGSI_OPCODE_SSG (CPU Only) */
1827
1828 static void
1829 ssg_emit_cpu(
1830 const struct lp_build_tgsi_action * action,
1831 struct lp_build_tgsi_context * bld_base,
1832 struct lp_build_emit_data * emit_data)
1833 {
1834 emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base,
1835 emit_data->args[0]);
1836 }
1837
1838 /* TGSI_OPCODE_SUB (CPU Only) */
1839
1840 static void
1841 sub_emit_cpu(
1842 const struct lp_build_tgsi_action * action,
1843 struct lp_build_tgsi_context * bld_base,
1844 struct lp_build_emit_data * emit_data)
1845 {
1846 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base,
1847 emit_data->args[0],
1848 emit_data->args[1]);
1849 }
1850
1851 /* TGSI_OPCODE_TRUNC (CPU Only) */
1852
1853 static void
1854 trunc_emit_cpu(
1855 const struct lp_build_tgsi_action * action,
1856 struct lp_build_tgsi_context * bld_base,
1857 struct lp_build_emit_data * emit_data)
1858 {
1859 emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
1860 emit_data->args[0]);
1861 }
1862
1863 /* TGSI_OPCODE_UADD (CPU Only) */
1864 static void
1865 uadd_emit_cpu(
1866 const struct lp_build_tgsi_action * action,
1867 struct lp_build_tgsi_context * bld_base,
1868 struct lp_build_emit_data * emit_data)
1869 {
1870 emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint_bld,
1871 emit_data->args[0], emit_data->args[1]);
1872 }
1873
1874 /* TGSI_OPCODE_UDIV (CPU Only) */
1875 static void
1876 udiv_emit_cpu(
1877 const struct lp_build_tgsi_action * action,
1878 struct lp_build_tgsi_context * bld_base,
1879 struct lp_build_emit_data * emit_data)
1880 {
1881
1882 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1883 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1884 PIPE_FUNC_EQUAL, emit_data->args[1],
1885 bld_base->uint_bld.zero);
1886 /* We want to make sure that we never divide/mod by zero to not
1887 * generate sigfpe. We don't want to crash just because the
1888 * shader is doing something weird. */
1889 LLVMValueRef divisor = LLVMBuildOr(builder,
1890 div_mask,
1891 emit_data->args[1], "");
1892 LLVMValueRef result = lp_build_div(&bld_base->uint_bld,
1893 emit_data->args[0], divisor);
1894 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1895 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1896 div_mask,
1897 result, "");
1898 }
1899
1900 /* TGSI_OPCODE_UMAX (CPU Only) */
1901 static void
1902 umax_emit_cpu(
1903 const struct lp_build_tgsi_action * action,
1904 struct lp_build_tgsi_context * bld_base,
1905 struct lp_build_emit_data * emit_data)
1906 {
1907 emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint_bld,
1908 emit_data->args[0], emit_data->args[1]);
1909 }
1910
1911 /* TGSI_OPCODE_UMIN (CPU Only) */
1912 static void
1913 umin_emit_cpu(
1914 const struct lp_build_tgsi_action * action,
1915 struct lp_build_tgsi_context * bld_base,
1916 struct lp_build_emit_data * emit_data)
1917 {
1918 emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint_bld,
1919 emit_data->args[0], emit_data->args[1]);
1920 }
1921
1922 /* TGSI_OPCODE_UMOD (CPU Only) */
1923 static void
1924 umod_emit_cpu(
1925 const struct lp_build_tgsi_action * action,
1926 struct lp_build_tgsi_context * bld_base,
1927 struct lp_build_emit_data * emit_data)
1928 {
1929 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1930 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1931 PIPE_FUNC_EQUAL, emit_data->args[1],
1932 bld_base->uint_bld.zero);
1933 /* We want to make sure that we never divide/mod by zero to not
1934 * generate sigfpe. We don't want to crash just because the
1935 * shader is doing something weird. */
1936 LLVMValueRef divisor = LLVMBuildOr(builder,
1937 div_mask,
1938 emit_data->args[1], "");
1939 LLVMValueRef result = lp_build_mod(&bld_base->uint_bld,
1940 emit_data->args[0], divisor);
1941 /* umod by zero is guaranteed to return 0xffffffff */
1942 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1943 div_mask,
1944 result, "");
1945 }
1946
1947 /* TGSI_OPCODE_USET Helper (CPU Only) */
1948 static void
1949 uset_emit_cpu(
1950 const struct lp_build_tgsi_action * action,
1951 struct lp_build_tgsi_context * bld_base,
1952 struct lp_build_emit_data * emit_data,
1953 unsigned pipe_func)
1954 {
1955 LLVMValueRef cond = lp_build_cmp(&bld_base->uint_bld, pipe_func,
1956 emit_data->args[0], emit_data->args[1]);
1957 emit_data->output[emit_data->chan] = cond;
1958 }
1959
1960
1961 /* TGSI_OPCODE_USEQ (CPU Only) */
1962 static void
1963 useq_emit_cpu(
1964 const struct lp_build_tgsi_action * action,
1965 struct lp_build_tgsi_context * bld_base,
1966 struct lp_build_emit_data * emit_data)
1967 {
1968 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1969 }
1970
1971 /* TGSI_OPCODE_ISGE (CPU Only) */
1972 static void
1973 usge_emit_cpu(
1974 const struct lp_build_tgsi_action * action,
1975 struct lp_build_tgsi_context * bld_base,
1976 struct lp_build_emit_data * emit_data)
1977 {
1978 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1979 }
1980
1981 /* TGSI_OPCODE_USHR (CPU Only) */
1982 static void
1983 ushr_emit_cpu(
1984 const struct lp_build_tgsi_action * action,
1985 struct lp_build_tgsi_context * bld_base,
1986 struct lp_build_emit_data * emit_data)
1987 {
1988 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1989 LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
1990 uint_bld->type.width - 1);
1991 LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
1992 emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
1993 masked_count);
1994 }
1995
1996 /* TGSI_OPCODE_ISLT (CPU Only) */
1997 static void
1998 uslt_emit_cpu(
1999 const struct lp_build_tgsi_action * action,
2000 struct lp_build_tgsi_context * bld_base,
2001 struct lp_build_emit_data * emit_data)
2002 {
2003 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2004 }
2005
2006 /* TGSI_OPCODE_USNE (CPU Only) */
2007
2008 static void
2009 usne_emit_cpu(
2010 const struct lp_build_tgsi_action * action,
2011 struct lp_build_tgsi_context * bld_base,
2012 struct lp_build_emit_data * emit_data)
2013 {
2014 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2015 }
2016
2017 /* TGSI_OPCODE_XOR */
2018 static void
2019 xor_emit_cpu(
2020 const struct lp_build_tgsi_action * action,
2021 struct lp_build_tgsi_context * bld_base,
2022 struct lp_build_emit_data * emit_data)
2023 {
2024 emit_data->output[emit_data->chan] = lp_build_xor(&bld_base->uint_bld,
2025 emit_data->args[0],
2026 emit_data->args[1]);
2027 }
2028
2029 /* TGSI_OPCODE_DABS (CPU Only) */
2030 static void
2031 dabs_emit_cpu(
2032 const struct lp_build_tgsi_action * action,
2033 struct lp_build_tgsi_context * bld_base,
2034 struct lp_build_emit_data * emit_data)
2035 {
2036 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld,
2037 emit_data->args[0]);
2038 }
2039
2040 /* TGSI_OPCODE_DNEG (CPU Only) */
2041 static void
2042 dneg_emit_cpu(
2043 const struct lp_build_tgsi_action * action,
2044 struct lp_build_tgsi_context * bld_base,
2045 struct lp_build_emit_data * emit_data)
2046 {
2047 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld,
2048 bld_base->dbl_bld.zero,
2049 emit_data->args[0]);
2050 }
2051
2052 /* TGSI_OPCODE_DSET Helper (CPU Only) */
2053 static void
2054 dset_emit_cpu(
2055 const struct lp_build_tgsi_action * action,
2056 struct lp_build_tgsi_context * bld_base,
2057 struct lp_build_emit_data * emit_data,
2058 unsigned pipe_func)
2059 {
2060 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2061 LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func,
2062 emit_data->args[0], emit_data->args[1]);
2063 /* arguments were 64 bit but store as 32 bit */
2064 cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2065 emit_data->output[emit_data->chan] = cond;
2066 }
2067
2068 /* TGSI_OPCODE_DSEQ (CPU Only) */
2069 static void
2070 dseq_emit_cpu(
2071 const struct lp_build_tgsi_action * action,
2072 struct lp_build_tgsi_context * bld_base,
2073 struct lp_build_emit_data * emit_data)
2074 {
2075 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2076 }
2077
2078 /* TGSI_OPCODE_DSGE (CPU Only) */
2079 static void
2080 dsge_emit_cpu(
2081 const struct lp_build_tgsi_action * action,
2082 struct lp_build_tgsi_context * bld_base,
2083 struct lp_build_emit_data * emit_data)
2084 {
2085 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2086 }
2087
2088 /* TGSI_OPCODE_DSLT (CPU Only) */
2089 static void
2090 dslt_emit_cpu(
2091 const struct lp_build_tgsi_action * action,
2092 struct lp_build_tgsi_context * bld_base,
2093 struct lp_build_emit_data * emit_data)
2094 {
2095 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2096 }
2097
2098 /* TGSI_OPCODE_DSNE (CPU Only) */
2099 static void
2100 dsne_emit_cpu(
2101 const struct lp_build_tgsi_action * action,
2102 struct lp_build_tgsi_context * bld_base,
2103 struct lp_build_emit_data * emit_data)
2104 {
2105 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2106 }
2107
2108 /* Double Reciprocal squareroot (CPU Only) */
2109 static void
2110 drecip_sqrt_emit_cpu(
2111 const struct lp_build_tgsi_action * action,
2112 struct lp_build_tgsi_context * bld_base,
2113 struct lp_build_emit_data * emit_data)
2114 {
2115 emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld,
2116 emit_data->args[0]);
2117 }
2118
2119 /* Double Squareroot (CPU Only) */
2120 static void
2121 dsqrt_emit_cpu(
2122 const struct lp_build_tgsi_action * action,
2123 struct lp_build_tgsi_context * bld_base,
2124 struct lp_build_emit_data * emit_data)
2125 {
2126 emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
2127 emit_data->args[0]);
2128 }
2129
2130 void
2131 lp_set_default_actions_cpu(
2132 struct lp_build_tgsi_context * bld_base)
2133 {
2134 lp_set_default_actions(bld_base);
2135 bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
2136 bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
2137 bld_base->op_actions[TGSI_OPCODE_AND].emit = and_emit_cpu;
2138 bld_base->op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
2139 bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu;
2140 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
2141 bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
2142 bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
2143 bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
2144 bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
2145 bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
2146 bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
2147 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu;
2148 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu;
2149 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = fslt_emit_cpu;
2150 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = fsne_emit_cpu;
2151
2152 bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu;
2153 bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu;
2154 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu;
2155 bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu;
2156 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu;
2157 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = imin_emit_cpu;
2158 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu;
2159 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
2160 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
2161 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
2162
2163 bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
2164 bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
2165 bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
2166 bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
2167 bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu;
2168 bld_base->op_actions[TGSI_OPCODE_NOT].emit = not_emit_cpu;
2169 bld_base->op_actions[TGSI_OPCODE_OR].emit = or_emit_cpu;
2170 bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
2171 bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
2172 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
2173 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
2174 bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
2175 bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
2176 bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
2177 bld_base->op_actions[TGSI_OPCODE_SHL].emit = shl_emit_cpu;
2178 bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
2179 bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
2180 bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
2181 bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
2182 bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu;
2183 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
2184
2185 bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
2186 bld_base->sqrt_action.emit = sqrt_emit_cpu;
2187
2188 bld_base->op_actions[TGSI_OPCODE_UADD].emit = uadd_emit_cpu;
2189 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = ucmp_emit_cpu;
2190 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = udiv_emit_cpu;
2191 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = umax_emit_cpu;
2192 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = umin_emit_cpu;
2193 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = umod_emit_cpu;
2194 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = useq_emit_cpu;
2195 bld_base->op_actions[TGSI_OPCODE_USGE].emit = usge_emit_cpu;
2196 bld_base->op_actions[TGSI_OPCODE_USHR].emit = ushr_emit_cpu;
2197 bld_base->op_actions[TGSI_OPCODE_USLT].emit = uslt_emit_cpu;
2198 bld_base->op_actions[TGSI_OPCODE_USNE].emit = usne_emit_cpu;
2199
2200 bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu;
2201
2202 bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
2203 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
2204 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
2205 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
2206 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
2207 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
2208
2209 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
2210 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
2211
2212 }