gallivm: add PK2H/UP2H support
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_action.c
1 /**************************************************************************
2 *
3 * Copyright 2011-2012 Advanced Micro Devices, Inc.
4 * Copyright 2009 VMware, Inc.
5 * Copyright 2007-2008 VMware, Inc.
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial portions
18 * of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 **************************************************************************/
29
30 /**
31 * @file
32 * TGSI to LLVM IR translation.
33 *
34 * @author Jose Fonseca <jfonseca@vmware.com>
35 * @author Tom Stellard <thomas.stellard@amd.com>
36 *
37 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
38 * Brian Paul, and others.
39 */
40
41
42 #include "lp_bld_tgsi_action.h"
43
44 #include "lp_bld_tgsi.h"
45 #include "lp_bld_arit.h"
46 #include "lp_bld_bitarit.h"
47 #include "lp_bld_const.h"
48 #include "lp_bld_conv.h"
49 #include "lp_bld_gather.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_pack.h"
52
53 #include "tgsi/tgsi_exec.h"
54
55 /* XXX: The CPU only defaults should be repaced by generic ones. In most
56 * cases, the CPU defaults are just wrappers around a function in
57 * lp_build_arit.c and these functions should be inlined here and the CPU
58 * generic code should be removed and placed elsewhere.
59 */
60
61 /* Default actions */
62
63 /* Generic fetch_arg functions */
64
65 static void scalar_unary_fetch_args(
66 struct lp_build_tgsi_context * bld_base,
67 struct lp_build_emit_data * emit_data)
68 {
69 /* src0.x */
70 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
71 emit_data->arg_count = 1;
72 emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
73 }
74
75 static void scalar_binary_fetch_args(
76 struct lp_build_tgsi_context * bld_base,
77 struct lp_build_emit_data * emit_data)
78 {
79 /* src0.x */
80 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
81 0, TGSI_CHAN_X);
82 /* src1.x */
83 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
84 1, TGSI_CHAN_X);
85 emit_data->arg_count = 2;
86 emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
87 }
88
89 /* TGSI_OPCODE_ADD */
90 static void
91 add_emit(
92 const struct lp_build_tgsi_action * action,
93 struct lp_build_tgsi_context * bld_base,
94 struct lp_build_emit_data * emit_data)
95 {
96 emit_data->output[emit_data->chan] = LLVMBuildFAdd(
97 bld_base->base.gallivm->builder,
98 emit_data->args[0], emit_data->args[1], "");
99 }
100
101 /* TGSI_OPCODE_ARR */
102 static void
103 arr_emit(
104 const struct lp_build_tgsi_action * action,
105 struct lp_build_tgsi_context * bld_base,
106 struct lp_build_emit_data * emit_data)
107 {
108 LLVMValueRef tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ROUND, emit_data->args[0]);
109 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
110 bld_base->uint_bld.vec_type, "");
111 }
112
113 /* TGSI_OPCODE_CLAMP */
114 static void
115 clamp_emit(
116 const struct lp_build_tgsi_action * action,
117 struct lp_build_tgsi_context * bld_base,
118 struct lp_build_emit_data * emit_data)
119 {
120 LLVMValueRef tmp;
121 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
122 emit_data->args[0],
123 emit_data->args[1]);
124 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
125 TGSI_OPCODE_MIN, tmp, emit_data->args[2]);
126 }
127
128 /* DP* Helper */
129
130 static void
131 dp_fetch_args(
132 struct lp_build_tgsi_context * bld_base,
133 struct lp_build_emit_data * emit_data,
134 unsigned dp_components)
135 {
136 unsigned chan, src;
137 for (src = 0; src < 2; src++) {
138 for (chan = 0; chan < dp_components; chan++) {
139 emit_data->args[(src * dp_components) + chan] =
140 lp_build_emit_fetch(bld_base, emit_data->inst, src, chan);
141 }
142 }
143 emit_data->dst_type = bld_base->base.elem_type;
144 }
145
146 /* TGSI_OPCODE_DP2 */
147 static void
148 dp2_fetch_args(
149 struct lp_build_tgsi_context * bld_base,
150 struct lp_build_emit_data * emit_data)
151 {
152 dp_fetch_args(bld_base, emit_data, 2);
153 }
154
155 static void
156 dp2_emit(
157 const struct lp_build_tgsi_action * action,
158 struct lp_build_tgsi_context * bld_base,
159 struct lp_build_emit_data * emit_data)
160 {
161 LLVMValueRef tmp0, tmp1;
162 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
163 emit_data->args[0] /* src0.x */,
164 emit_data->args[2] /* src1.x */);
165 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
166 emit_data->args[1] /* src0.y */,
167 emit_data->args[3] /* src1.y */);
168 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
169 TGSI_OPCODE_ADD, tmp0, tmp1);
170 }
171
172 static struct lp_build_tgsi_action dp2_action = {
173 dp2_fetch_args, /* fetch_args */
174 dp2_emit /* emit */
175 };
176
177 /* TGSI_OPCODE_DP2A */
178 static void
179 dp2a_fetch_args(
180 struct lp_build_tgsi_context * bld_base,
181 struct lp_build_emit_data * emit_data)
182 {
183 dp_fetch_args(bld_base, emit_data, 2);
184 emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
185 2, TGSI_CHAN_X);
186 }
187
188 static void
189 dp2a_emit(
190 const struct lp_build_tgsi_action * action,
191 struct lp_build_tgsi_context * bld_base,
192 struct lp_build_emit_data * emit_data)
193 {
194 LLVMValueRef tmp;
195 tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
196 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
197 emit_data->args[5], tmp);
198 }
199
200 static struct lp_build_tgsi_action dp2a_action = {
201 dp2a_fetch_args, /* fetch_args */
202 dp2a_emit /* emit */
203 };
204
205 /* TGSI_OPCODE_DP3 */
206 static void
207 dp3_fetch_args(
208 struct lp_build_tgsi_context * bld_base,
209 struct lp_build_emit_data * emit_data)
210 {
211 dp_fetch_args(bld_base, emit_data, 3);
212 }
213
214 static void
215 dp3_emit(
216 const struct lp_build_tgsi_action * action,
217 struct lp_build_tgsi_context * bld_base,
218 struct lp_build_emit_data * emit_data)
219 {
220 LLVMValueRef tmp0, tmp1;
221 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
222 emit_data->args[0] /* src0.x */,
223 emit_data->args[3] /* src1.x */);
224 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
225 emit_data->args[1] /* src0.y */,
226 emit_data->args[4] /* src1.y */);
227 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0);
228 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
229 emit_data->args[2] /* src0.z */,
230 emit_data->args[5] /* src1.z */);
231 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
232 TGSI_OPCODE_ADD, tmp0, tmp1);
233 }
234
235 static struct lp_build_tgsi_action dp3_action = {
236 dp3_fetch_args, /* fetch_args */
237 dp3_emit /* emit */
238 };
239
240 /* TGSI_OPCODDE_DP4 */
241
242 static void
243 dp4_fetch_args(
244 struct lp_build_tgsi_context * bld_base,
245 struct lp_build_emit_data * emit_data)
246 {
247 dp_fetch_args(bld_base, emit_data, 4);
248 }
249
250 static void
251 dp4_emit(
252 const struct lp_build_tgsi_action * action,
253 struct lp_build_tgsi_context * bld_base,
254 struct lp_build_emit_data * emit_data)
255 {
256 LLVMValueRef tmp0, tmp1;
257 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
258 emit_data->args[0] /* src0.x */,
259 emit_data->args[4] /* src1.x */);
260 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
261 emit_data->args[1] /* src0.y */,
262 emit_data->args[5] /* src1.y */);
263 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
264 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
265 emit_data->args[2] /* src0.z */,
266 emit_data->args[6] /* src1.z */);
267 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
268 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
269 emit_data->args[3] /* src0.w */,
270 emit_data->args[7] /* src1.w */);
271 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
272 TGSI_OPCODE_ADD, tmp0, tmp1);
273 }
274
275 static struct lp_build_tgsi_action dp4_action = {
276 dp4_fetch_args, /* fetch_args */
277 dp4_emit /* emit */
278 };
279
280 /* TGSI_OPCODE_DPH */
281 static void
282 dph_fetch_args(
283 struct lp_build_tgsi_context * bld_base,
284 struct lp_build_emit_data * emit_data)
285 {
286 dp_fetch_args(bld_base, emit_data, 4);
287 /* src0.w */
288 emit_data->args[3] = bld_base->base.one;
289 }
290
291 const struct lp_build_tgsi_action dph_action = {
292 dph_fetch_args, /* fetch_args */
293 dp4_emit /* emit */
294 };
295
296 /* TGSI_OPCODE_DST */
297 static void
298 dst_fetch_args(
299 struct lp_build_tgsi_context * bld_base,
300 struct lp_build_emit_data * emit_data)
301 {
302 /* src0.y */
303 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
304 0, TGSI_CHAN_Y);
305 /* src0.z */
306 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
307 0, TGSI_CHAN_Z);
308 /* src1.y */
309 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
310 1, TGSI_CHAN_Y);
311 /* src1.w */
312 emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
313 1, TGSI_CHAN_W);
314 }
315
316 static void
317 dst_emit(
318 const struct lp_build_tgsi_action * action,
319 struct lp_build_tgsi_context * bld_base,
320 struct lp_build_emit_data * emit_data)
321 {
322 /* dst.x */
323 emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
324
325 /* dst.y */
326 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
327 TGSI_OPCODE_MUL,
328 emit_data->args[0] /* src0.y */,
329 emit_data->args[2] /* src1.y */);
330 /* dst.z */
331 emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
332
333 /* dst.w */
334 emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
335 }
336
337 static struct lp_build_tgsi_action dst_action = {
338 dst_fetch_args, /* fetch_args */
339 dst_emit /* emit */
340 };
341
342 /* TGSI_OPCODE_END */
343 static void
344 end_emit(
345 const struct lp_build_tgsi_action * action,
346 struct lp_build_tgsi_context * bld_base,
347 struct lp_build_emit_data * emit_data)
348 {
349 bld_base->pc = -1;
350 }
351
352 /* TGSI_OPCODE_EXP */
353
354 static void
355 exp_emit(
356 const struct lp_build_tgsi_action * action,
357 struct lp_build_tgsi_context * bld_base,
358 struct lp_build_emit_data * emit_data)
359 {
360 LLVMValueRef floor_x;
361
362 /* floor( src0.x ) */
363 floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
364 emit_data->args[0]);
365
366 /* 2 ^ floor( src0.x ) */
367 emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
368 TGSI_OPCODE_EX2, floor_x);
369
370 /* src0.x - floor( src0.x ) */
371 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
372 TGSI_OPCODE_SUB, emit_data->args[0] /* src0.x */, floor_x);
373
374 /* 2 ^ src0.x */
375 emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base,
376 TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */);
377
378 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
379 }
380
381 const struct lp_build_tgsi_action exp_action = {
382 scalar_unary_fetch_args, /* fetch_args */
383 exp_emit /* emit */
384 };
385
386 /* TGSI_OPCODE_FRC */
387
388 static void
389 frc_emit(
390 const struct lp_build_tgsi_action * action,
391 struct lp_build_tgsi_context * bld_base,
392 struct lp_build_emit_data * emit_data)
393 {
394 LLVMValueRef tmp;
395 tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
396 emit_data->args[0]);
397 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
398 TGSI_OPCODE_SUB, emit_data->args[0], tmp);
399 }
400
401 /* TGSI_OPCODE_KILL_IF */
402
403 static void
404 kil_fetch_args(
405 struct lp_build_tgsi_context * bld_base,
406 struct lp_build_emit_data * emit_data)
407 {
408 /* src0.x */
409 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
410 0, TGSI_CHAN_X);
411 /* src0.y */
412 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
413 0, TGSI_CHAN_Y);
414 /* src0.z */
415 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
416 0, TGSI_CHAN_Z);
417 /* src0.w */
418 emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
419 0, TGSI_CHAN_W);
420 emit_data->arg_count = 4;
421 emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
422 }
423
424 /* TGSI_OPCODE_KILL */
425
426 static void
427 kilp_fetch_args(
428 struct lp_build_tgsi_context * bld_base,
429 struct lp_build_emit_data * emit_data)
430 {
431 emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
432 }
433
434 /* TGSI_OPCODE_LIT */
435
436 static void
437 lit_fetch_args(
438 struct lp_build_tgsi_context * bld_base,
439 struct lp_build_emit_data * emit_data)
440 {
441 /* src0.x */
442 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
443 /* src0.y */
444 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
445 /* src0.w */
446 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
447 emit_data->arg_count = 3;
448 }
449
450 static void
451 lit_emit(
452 const struct lp_build_tgsi_action * action,
453 struct lp_build_tgsi_context * bld_base,
454 struct lp_build_emit_data * emit_data)
455 {
456 LLVMValueRef tmp0, tmp1, tmp2;
457
458 /* dst.x */
459 emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
460
461 /* dst. y */
462 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
463 TGSI_OPCODE_MAX,
464 emit_data->args[0] /* src0.x */,
465 bld_base->base.zero);
466
467 /* dst.z */
468 /* XMM[1] = SrcReg[0].yyyy */
469 tmp1 = emit_data->args[1];
470 /* XMM[1] = max(XMM[1], 0) */
471 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
472 tmp1, bld_base->base.zero);
473 /* XMM[2] = SrcReg[0].wwww */
474 tmp2 = emit_data->args[2];
475 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
476 tmp1, tmp2);
477 tmp0 = emit_data->args[0];
478 emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base,
479 TGSI_OPCODE_CMP,
480 tmp0, bld_base->base.zero, tmp1);
481 /* dst.w */
482 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
483 }
484
485 static struct lp_build_tgsi_action lit_action = {
486 lit_fetch_args, /* fetch_args */
487 lit_emit /* emit */
488 };
489
490 /* TGSI_OPCODE_LOG */
491
492 static void
493 log_emit(
494 const struct lp_build_tgsi_action * action,
495 struct lp_build_tgsi_context * bld_base,
496 struct lp_build_emit_data * emit_data)
497 {
498
499 LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
500
501 /* abs( src0.x) */
502 abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
503 emit_data->args[0] /* src0.x */);
504
505 /* log( abs( src0.x ) ) */
506 log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
507 abs_x);
508
509 /* floor( log( abs( src0.x ) ) ) */
510 flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
511 log_abs_x);
512 /* dst.x */
513 emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
514
515 /* dst.y */
516 ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2,
517 flr_log_abs_x);
518
519 /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
520 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
521 TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x);
522
523 /* dst.x */
524 emit_data->output[TGSI_CHAN_Z] = log_abs_x;
525
526 /* dst.w */
527 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
528 }
529
530 static struct lp_build_tgsi_action log_action = {
531 scalar_unary_fetch_args, /* fetch_args */
532 log_emit /* emit */
533 };
534
535 /* TGSI_OPCODE_PK2H */
536
537 static void
538 pk2h_fetch_args(
539 struct lp_build_tgsi_context * bld_base,
540 struct lp_build_emit_data * emit_data)
541 {
542 /* src0.x */
543 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
544 0, TGSI_CHAN_X);
545 /* src0.y */
546 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
547 0, TGSI_CHAN_Y);
548 }
549
550 static void
551 pk2h_emit(const struct lp_build_tgsi_action *action,
552 struct lp_build_tgsi_context *bld_base,
553 struct lp_build_emit_data *emit_data)
554 {
555 struct gallivm_state *gallivm = bld_base->base.gallivm;
556 struct lp_type f16i_t;
557 LLVMValueRef lo, hi, res;
558
559 f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
560 lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
561 hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
562 /* maybe some interleave doubling vector width would be useful... */
563 lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
564 hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
565 res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
566
567 emit_data->output[emit_data->chan] = res;
568 }
569
570 static struct lp_build_tgsi_action pk2h_action = {
571 pk2h_fetch_args, /* fetch_args */
572 pk2h_emit /* emit */
573 };
574
575 /* TGSI_OPCODE_UP2H */
576
577 static void
578 up2h_emit(const struct lp_build_tgsi_action *action,
579 struct lp_build_tgsi_context *bld_base,
580 struct lp_build_emit_data *emit_data)
581 {
582 struct gallivm_state *gallivm = bld_base->base.gallivm;
583 LLVMBuilderRef builder = gallivm->builder;
584 LLVMContextRef context = gallivm->context;
585 LLVMValueRef lo, hi, res[2], arg;
586 unsigned nr = bld_base->base.type.length;
587 LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
588
589 arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
590 lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
591 hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
592 res[0] = lp_build_half_to_float(gallivm, lo);
593 res[1] = lp_build_half_to_float(gallivm, hi);
594
595 emit_data->output[0] = emit_data->output[2] = res[0];
596 emit_data->output[1] = emit_data->output[3] = res[1];
597 }
598
599 static struct lp_build_tgsi_action up2h_action = {
600 scalar_unary_fetch_args, /* fetch_args */
601 up2h_emit /* emit */
602 };
603
604 /* TGSI_OPCODE_LRP */
605
606 static void
607 lrp_emit(
608 const struct lp_build_tgsi_action * action,
609 struct lp_build_tgsi_context * bld_base,
610 struct lp_build_emit_data * emit_data)
611 {
612 struct lp_build_context *bld = &bld_base->base;
613 LLVMValueRef inv, a, b;
614
615 /* This uses the correct version: (1 - t)*a + t*b
616 *
617 * An alternative version is "a + t*(b-a)". The problem is this version
618 * doesn't return "b" for t = 1, because "a + (b-a)" isn't equal to "b"
619 * because of the floating-point rounding.
620 */
621 inv = lp_build_sub(bld, bld_base->base.one, emit_data->args[0]);
622 a = lp_build_mul(bld, emit_data->args[1], emit_data->args[0]);
623 b = lp_build_mul(bld, emit_data->args[2], inv);
624 emit_data->output[emit_data->chan] = lp_build_add(bld, a, b);
625 }
626
627 /* TGSI_OPCODE_MAD */
628
629 static void
630 mad_emit(
631 const struct lp_build_tgsi_action * action,
632 struct lp_build_tgsi_context * bld_base,
633 struct lp_build_emit_data * emit_data)
634 {
635 LLVMValueRef tmp;
636 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
637 emit_data->args[0],
638 emit_data->args[1]);
639 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
640 TGSI_OPCODE_ADD, tmp, emit_data->args[2]);
641 }
642
643 /* TGSI_OPCODE_MOV */
644
645 static void
646 mov_emit(
647 const struct lp_build_tgsi_action * action,
648 struct lp_build_tgsi_context * bld_base,
649 struct lp_build_emit_data * emit_data)
650 {
651 emit_data->output[emit_data->chan] = emit_data->args[0];
652 }
653
654 /* TGSI_OPCODE_MUL */
655 static void
656 mul_emit(
657 const struct lp_build_tgsi_action * action,
658 struct lp_build_tgsi_context * bld_base,
659 struct lp_build_emit_data * emit_data)
660 {
661 emit_data->output[emit_data->chan] = LLVMBuildFMul(
662 bld_base->base.gallivm->builder,
663 emit_data->args[0], emit_data->args[1], "");
664 }
665
666 /*.TGSI_OPCODE_DIV.*/
667 static void fdiv_emit(
668 const struct lp_build_tgsi_action * action,
669 struct lp_build_tgsi_context * bld_base,
670 struct lp_build_emit_data * emit_data)
671 {
672 emit_data->output[emit_data->chan] = LLVMBuildFDiv(
673 bld_base->base.gallivm->builder,
674 emit_data->args[0], emit_data->args[1], "");
675 }
676
677 /*.TGSI_OPCODE_RCP.*/
678 static void rcp_emit(
679 const struct lp_build_tgsi_action * action,
680 struct lp_build_tgsi_context * bld_base,
681 struct lp_build_emit_data * emit_data)
682 {
683 LLVMValueRef one;
684 one = lp_build_const_float(bld_base->base.gallivm, 1.0f);
685 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
686 TGSI_OPCODE_DIV, one, emit_data->args[0]);
687 }
688
689 /* TGSI_OPCODE_POW */
690
691 static void
692 pow_emit(
693 const struct lp_build_tgsi_action * action,
694 struct lp_build_tgsi_context * bld_base,
695 struct lp_build_emit_data * emit_data)
696 {
697 emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
698 emit_data->args[0], emit_data->args[1]);
699 }
700
701 static struct lp_build_tgsi_action pow_action = {
702 scalar_binary_fetch_args, /* fetch_args */
703 pow_emit /* emit */
704 };
705
706 /* TGSI_OPCODE_RSQ */
707
708 static void
709 rsq_emit(
710 const struct lp_build_tgsi_action * action,
711 struct lp_build_tgsi_context * bld_base,
712 struct lp_build_emit_data * emit_data)
713 {
714 if (bld_base->rsq_action.emit) {
715 bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
716 } else {
717 emit_data->output[emit_data->chan] = bld_base->base.undef;
718 }
719 }
720
721 const struct lp_build_tgsi_action rsq_action = {
722 scalar_unary_fetch_args, /* fetch_args */
723 rsq_emit /* emit */
724
725 };
726
727 /* TGSI_OPCODE_SQRT */
728
729 static void
730 sqrt_emit(
731 const struct lp_build_tgsi_action * action,
732 struct lp_build_tgsi_context * bld_base,
733 struct lp_build_emit_data * emit_data)
734 {
735 if (bld_base->sqrt_action.emit) {
736 bld_base->sqrt_action.emit(&bld_base->sqrt_action, bld_base, emit_data);
737 } else {
738 emit_data->output[emit_data->chan] = bld_base->base.undef;
739 }
740 }
741
742 const struct lp_build_tgsi_action sqrt_action = {
743 scalar_unary_fetch_args, /* fetch_args */
744 sqrt_emit /* emit */
745 };
746
747 /* TGSI_OPCODE_SCS */
748 static void
749 scs_emit(
750 const struct lp_build_tgsi_action * action,
751 struct lp_build_tgsi_context * bld_base,
752 struct lp_build_emit_data * emit_data)
753 {
754 /* dst.x */
755 emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
756 TGSI_OPCODE_COS, emit_data->args[0]);
757 /* dst.y */
758 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
759 TGSI_OPCODE_SIN, emit_data->args[0]);
760 /* dst.z */
761 emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
762
763 /* dst.w */
764 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
765 }
766
767 const struct lp_build_tgsi_action scs_action = {
768 scalar_unary_fetch_args, /* fetch_args */
769 scs_emit /* emit */
770 };
771
772 /* TGSI_OPCODE_SUB */
773 static void
774 sub_emit(
775 const struct lp_build_tgsi_action * action,
776 struct lp_build_tgsi_context * bld_base,
777 struct lp_build_emit_data * emit_data)
778 {
779 emit_data->output[emit_data->chan] =
780 LLVMBuildFSub(bld_base->base.gallivm->builder,
781 emit_data->args[0],
782 emit_data->args[1], "");
783 }
784
785 /* TGSI_OPCODE_F2U */
786 static void
787 f2u_emit(
788 const struct lp_build_tgsi_action * action,
789 struct lp_build_tgsi_context * bld_base,
790 struct lp_build_emit_data * emit_data)
791 {
792 emit_data->output[emit_data->chan] =
793 LLVMBuildFPToUI(bld_base->base.gallivm->builder,
794 emit_data->args[0],
795 bld_base->base.int_vec_type, "");
796 }
797
798 /* TGSI_OPCODE_U2F */
799 static void
800 u2f_emit(
801 const struct lp_build_tgsi_action * action,
802 struct lp_build_tgsi_context * bld_base,
803 struct lp_build_emit_data * emit_data)
804 {
805 emit_data->output[emit_data->chan] =
806 LLVMBuildUIToFP(bld_base->base.gallivm->builder,
807 emit_data->args[0],
808 bld_base->base.vec_type, "");
809 }
810
811 static void
812 umad_emit(
813 const struct lp_build_tgsi_action * action,
814 struct lp_build_tgsi_context * bld_base,
815 struct lp_build_emit_data * emit_data)
816 {
817 LLVMValueRef tmp;
818 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMUL,
819 emit_data->args[0],
820 emit_data->args[1]);
821 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
822 TGSI_OPCODE_UADD, tmp, emit_data->args[2]);
823 }
824
825 /* TGSI_OPCODE_UMUL */
826 static void
827 umul_emit(
828 const struct lp_build_tgsi_action * action,
829 struct lp_build_tgsi_context * bld_base,
830 struct lp_build_emit_data * emit_data)
831 {
832 emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint_bld,
833 emit_data->args[0], emit_data->args[1]);
834 }
835
836 /* TGSI_OPCODE_IMUL_HI */
837 static void
838 imul_hi_emit(
839 const struct lp_build_tgsi_action * action,
840 struct lp_build_tgsi_context * bld_base,
841 struct lp_build_emit_data * emit_data)
842 {
843 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
844 struct lp_build_context *int_bld = &bld_base->int_bld;
845 struct lp_type type = int_bld->type;
846 LLVMValueRef src0, src1;
847 LLVMValueRef dst64;
848 LLVMTypeRef typeRef;
849
850 assert(type.width == 32);
851 type.width = 64;
852 typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
853 src0 = LLVMBuildSExt(builder, emit_data->args[0], typeRef, "");
854 src1 = LLVMBuildSExt(builder, emit_data->args[1], typeRef, "");
855 dst64 = LLVMBuildMul(builder, src0, src1, "");
856 dst64 = LLVMBuildAShr(
857 builder, dst64,
858 lp_build_const_vec(bld_base->base.gallivm, type, 32), "");
859 type.width = 32;
860 typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
861 emit_data->output[emit_data->chan] =
862 LLVMBuildTrunc(builder, dst64, typeRef, "");
863 }
864
865 /* TGSI_OPCODE_UMUL_HI */
866 static void
867 umul_hi_emit(
868 const struct lp_build_tgsi_action * action,
869 struct lp_build_tgsi_context * bld_base,
870 struct lp_build_emit_data * emit_data)
871 {
872 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
873 struct lp_build_context *uint_bld = &bld_base->uint_bld;
874 struct lp_type type = uint_bld->type;
875 LLVMValueRef src0, src1;
876 LLVMValueRef dst64;
877 LLVMTypeRef typeRef;
878
879 assert(type.width == 32);
880 type.width = 64;
881 typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
882 src0 = LLVMBuildZExt(builder, emit_data->args[0], typeRef, "");
883 src1 = LLVMBuildZExt(builder, emit_data->args[1], typeRef, "");
884 dst64 = LLVMBuildMul(builder, src0, src1, "");
885 dst64 = LLVMBuildLShr(
886 builder, dst64,
887 lp_build_const_vec(bld_base->base.gallivm, type, 32), "");
888 type.width = 32;
889 typeRef = lp_build_vec_type(bld_base->base.gallivm, type);
890 emit_data->output[emit_data->chan] =
891 LLVMBuildTrunc(builder, dst64, typeRef, "");
892 }
893
894 /* TGSI_OPCODE_MAX */
895 static void fmax_emit(
896 const struct lp_build_tgsi_action * action,
897 struct lp_build_tgsi_context * bld_base,
898 struct lp_build_emit_data * emit_data)
899 {
900 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
901 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
902 LLVMBuildFCmp(builder, LLVMRealUGE,
903 emit_data->args[0], emit_data->args[1], ""),
904 emit_data->args[0], emit_data->args[1], "");
905 }
906
907 /* TGSI_OPCODE_MIN */
908 static void fmin_emit(
909 const struct lp_build_tgsi_action * action,
910 struct lp_build_tgsi_context * bld_base,
911 struct lp_build_emit_data * emit_data)
912 {
913 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
914 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
915 LLVMBuildFCmp(builder, LLVMRealUGE,
916 emit_data->args[0], emit_data->args[1], ""),
917 emit_data->args[1], emit_data->args[0], "");
918 }
919
920 /* TGSI_OPCODE_XPD */
921
922 static void
923 xpd_fetch_args(
924 struct lp_build_tgsi_context * bld_base,
925 struct lp_build_emit_data * emit_data)
926 {
927 dp_fetch_args(bld_base, emit_data, 3);
928 }
929
930 /**
931 * (a * b) - (c * d)
932 */
933 static LLVMValueRef
934 xpd_helper(
935 struct lp_build_tgsi_context * bld_base,
936 LLVMValueRef a,
937 LLVMValueRef b,
938 LLVMValueRef c,
939 LLVMValueRef d)
940 {
941 LLVMValueRef tmp0, tmp1;
942
943 tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
944 tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
945
946 return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1);
947 }
948
949 static void
950 xpd_emit(
951 const struct lp_build_tgsi_action * action,
952 struct lp_build_tgsi_context * bld_base,
953 struct lp_build_emit_data * emit_data)
954 {
955 emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
956 emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
957 emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
958
959 emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
960 emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
961 emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
962
963 emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
964 emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
965 emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
966
967 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
968 }
969
970 const struct lp_build_tgsi_action xpd_action = {
971 xpd_fetch_args, /* fetch_args */
972 xpd_emit /* emit */
973 };
974
975 /* TGSI_OPCODE_D2F */
976 static void
977 d2f_emit(
978 const struct lp_build_tgsi_action * action,
979 struct lp_build_tgsi_context * bld_base,
980 struct lp_build_emit_data * emit_data)
981 {
982 emit_data->output[emit_data->chan] =
983 LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
984 emit_data->args[0],
985 bld_base->base.vec_type, "");
986 }
987
988 /* TGSI_OPCODE_D2I */
989 static void
990 d2i_emit(
991 const struct lp_build_tgsi_action * action,
992 struct lp_build_tgsi_context * bld_base,
993 struct lp_build_emit_data * emit_data)
994 {
995 emit_data->output[emit_data->chan] =
996 LLVMBuildFPToSI(bld_base->base.gallivm->builder,
997 emit_data->args[0],
998 bld_base->base.int_vec_type, "");
999 }
1000
1001 /* TGSI_OPCODE_D2U */
1002 static void
1003 d2u_emit(
1004 const struct lp_build_tgsi_action * action,
1005 struct lp_build_tgsi_context * bld_base,
1006 struct lp_build_emit_data * emit_data)
1007 {
1008 emit_data->output[emit_data->chan] =
1009 LLVMBuildFPToUI(bld_base->base.gallivm->builder,
1010 emit_data->args[0],
1011 bld_base->base.int_vec_type, "");
1012 }
1013
1014 /* TGSI_OPCODE_F2D */
1015 static void
1016 f2d_emit(
1017 const struct lp_build_tgsi_action * action,
1018 struct lp_build_tgsi_context * bld_base,
1019 struct lp_build_emit_data * emit_data)
1020 {
1021 emit_data->output[emit_data->chan] =
1022 LLVMBuildFPExt(bld_base->base.gallivm->builder,
1023 emit_data->args[0],
1024 bld_base->dbl_bld.vec_type, "");
1025 }
1026
1027 /* TGSI_OPCODE_U2D */
1028 static void
1029 u2d_emit(
1030 const struct lp_build_tgsi_action * action,
1031 struct lp_build_tgsi_context * bld_base,
1032 struct lp_build_emit_data * emit_data)
1033 {
1034 emit_data->output[emit_data->chan] =
1035 LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1036 emit_data->args[0],
1037 bld_base->dbl_bld.vec_type, "");
1038 }
1039
1040 /* TGSI_OPCODE_I2D */
1041 static void
1042 i2d_emit(
1043 const struct lp_build_tgsi_action * action,
1044 struct lp_build_tgsi_context * bld_base,
1045 struct lp_build_emit_data * emit_data)
1046 {
1047 emit_data->output[emit_data->chan] =
1048 LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1049 emit_data->args[0],
1050 bld_base->dbl_bld.vec_type, "");
1051 }
1052
1053 /* TGSI_OPCODE_DMAD */
1054 static void
1055 dmad_emit(
1056 const struct lp_build_tgsi_action * action,
1057 struct lp_build_tgsi_context * bld_base,
1058 struct lp_build_emit_data * emit_data)
1059 {
1060 LLVMValueRef tmp;
1061 tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL,
1062 emit_data->args[0],
1063 emit_data->args[1]);
1064 emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
1065 TGSI_OPCODE_DADD, tmp, emit_data->args[2]);
1066 }
1067
1068 /*.TGSI_OPCODE_DRCP.*/
1069 static void drcp_emit(
1070 const struct lp_build_tgsi_action * action,
1071 struct lp_build_tgsi_context * bld_base,
1072 struct lp_build_emit_data * emit_data)
1073 {
1074 LLVMValueRef one;
1075 one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f);
1076 emit_data->output[emit_data->chan] = LLVMBuildFDiv(
1077 bld_base->base.gallivm->builder,
1078 one, emit_data->args[0], "");
1079 }
1080
1081 /* TGSI_OPCODE_DFRAC */
1082 static void dfrac_emit(
1083 const struct lp_build_tgsi_action * action,
1084 struct lp_build_tgsi_context * bld_base,
1085 struct lp_build_emit_data * emit_data)
1086 {
1087 LLVMValueRef tmp;
1088 tmp = lp_build_floor(&bld_base->dbl_bld,
1089 emit_data->args[0]);
1090 emit_data->output[emit_data->chan] = LLVMBuildFSub(bld_base->base.gallivm->builder,
1091 emit_data->args[0], tmp, "");
1092 }
1093
1094 void
1095 lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
1096 {
1097 bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
1098 bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
1099 bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
1100 bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
1101 bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
1102 bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
1103 bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
1104 bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
1105 bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
1106 bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
1107 bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
1108 bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
1109 bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
1110 bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
1111 bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
1112 bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
1113
1114 bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
1115 bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
1116 bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
1117 bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
1118 bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
1119 bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
1120 bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
1121 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kil_fetch_args;
1122 bld_base->op_actions[TGSI_OPCODE_KILL].fetch_args = kilp_fetch_args;
1123 bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
1124 bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
1125 bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
1126
1127 bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
1128 bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
1129 bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit;
1130 bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
1131 bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
1132 bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
1133 bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
1134 bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
1135 bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
1136 bld_base->op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit;
1137 bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit;
1138 bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
1139
1140 bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit;
1141 bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit;
1142 bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit;
1143 bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit;
1144 bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit;
1145 bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit;
1146 bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit;
1147
1148 bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
1149 bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
1150
1151 bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit;
1152 bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit;
1153 bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit;
1154 bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit;
1155
1156 bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit;
1157 bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit;
1158 bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit;
1159
1160 bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
1161 bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
1162 bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
1163
1164 bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
1165
1166 bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
1167 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
1168
1169 }
1170
1171 /* CPU Only default actions */
1172
1173 /* These actions are CPU only, because they could potentially output SSE
1174 * intrinsics.
1175 */
1176
1177 /* TGSI_OPCODE_ABS (CPU Only)*/
1178
1179 static void
1180 abs_emit_cpu(
1181 const struct lp_build_tgsi_action * action,
1182 struct lp_build_tgsi_context * bld_base,
1183 struct lp_build_emit_data * emit_data)
1184 {
1185 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->base,
1186 emit_data->args[0]);
1187 }
1188
1189 /* TGSI_OPCODE_ADD (CPU Only) */
1190 static void
1191 add_emit_cpu(
1192 const struct lp_build_tgsi_action * action,
1193 struct lp_build_tgsi_context * bld_base,
1194 struct lp_build_emit_data * emit_data)
1195 {
1196 emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base,
1197 emit_data->args[0], emit_data->args[1]);
1198 }
1199
1200 /* TGSI_OPCODE_AND (CPU Only) */
1201 static void
1202 and_emit_cpu(
1203 const struct lp_build_tgsi_action * action,
1204 struct lp_build_tgsi_context * bld_base,
1205 struct lp_build_emit_data * emit_data)
1206 {
1207 emit_data->output[emit_data->chan] = lp_build_and(&bld_base->uint_bld,
1208 emit_data->args[0], emit_data->args[1]);
1209 }
1210
1211 /* TGSI_OPCODE_ARL (CPU Only) */
1212 static void
1213 arl_emit_cpu(
1214 const struct lp_build_tgsi_action * action,
1215 struct lp_build_tgsi_context * bld_base,
1216 struct lp_build_emit_data * emit_data)
1217 {
1218 LLVMValueRef tmp;
1219 tmp = lp_build_floor(&bld_base->base,
1220 emit_data->args[0]);
1221 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
1222 bld_base->uint_bld.vec_type, "");
1223 }
1224
1225 /* TGSI_OPCODE_ARR (CPU Only) */
1226 static void
1227 arr_emit_cpu(
1228 const struct lp_build_tgsi_action * action,
1229 struct lp_build_tgsi_context * bld_base,
1230 struct lp_build_emit_data * emit_data)
1231 {
1232 emit_data->output[emit_data->chan] = lp_build_iround(&bld_base->base, emit_data->args[0]);
1233 }
1234
1235 /* TGSI_OPCODE_CEIL (CPU Only) */
1236 static void
1237 ceil_emit_cpu(
1238 const struct lp_build_tgsi_action * action,
1239 struct lp_build_tgsi_context * bld_base,
1240 struct lp_build_emit_data * emit_data)
1241 {
1242 emit_data->output[emit_data->chan] = lp_build_ceil(&bld_base->base,
1243 emit_data->args[0]);
1244 }
1245
1246 /* TGSI_OPCODE_CMP (CPU Only) */
1247 static void
1248 cmp_emit_cpu(
1249 const struct lp_build_tgsi_action * action,
1250 struct lp_build_tgsi_context * bld_base,
1251 struct lp_build_emit_data * emit_data)
1252 {
1253 LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
1254 emit_data->args[0], bld_base->base.zero);
1255 emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1256 cond, emit_data->args[1], emit_data->args[2]);
1257 }
1258
1259 /* TGSI_OPCODE_UCMP (CPU Only) */
1260 static void
1261 ucmp_emit_cpu(
1262 const struct lp_build_tgsi_action * action,
1263 struct lp_build_tgsi_context * bld_base,
1264 struct lp_build_emit_data * emit_data)
1265 {
1266 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1267 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1268 LLVMValueRef unsigned_cond =
1269 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
1270 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
1271 unsigned_cond,
1272 uint_bld->zero);
1273 emit_data->output[emit_data->chan] =
1274 lp_build_select(&bld_base->base,
1275 cond, emit_data->args[1], emit_data->args[2]);
1276 }
1277
1278 /* TGSI_OPCODE_COS (CPU Only) */
1279 static void
1280 cos_emit_cpu(
1281 const struct lp_build_tgsi_action * action,
1282 struct lp_build_tgsi_context * bld_base,
1283 struct lp_build_emit_data * emit_data)
1284 {
1285 emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base,
1286 emit_data->args[0]);
1287 }
1288
1289 /* TGSI_OPCODE_DIV (CPU Only) */
1290 static void
1291 div_emit_cpu(
1292 const struct lp_build_tgsi_action * action,
1293 struct lp_build_tgsi_context * bld_base,
1294 struct lp_build_emit_data * emit_data)
1295 {
1296 emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base,
1297 emit_data->args[0], emit_data->args[1]);
1298 }
1299
1300 /* TGSI_OPCODE_EX2 (CPU Only) */
1301 static void
1302 ex2_emit_cpu(
1303 const struct lp_build_tgsi_action * action,
1304 struct lp_build_tgsi_context * bld_base,
1305 struct lp_build_emit_data * emit_data)
1306 {
1307 emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base,
1308 emit_data->args[0]);
1309 }
1310
1311 /* TGSI_OPCODE_F2I (CPU Only) */
1312 static void
1313 f2i_emit_cpu(
1314 const struct lp_build_tgsi_action * action,
1315 struct lp_build_tgsi_context * bld_base,
1316 struct lp_build_emit_data * emit_data)
1317 {
1318 emit_data->output[emit_data->chan] = lp_build_itrunc(&bld_base->base,
1319 emit_data->args[0]);
1320 }
1321
1322 /* TGSI_OPCODE_FSET Helper (CPU Only) */
1323 static void
1324 fset_emit_cpu(
1325 const struct lp_build_tgsi_action * action,
1326 struct lp_build_tgsi_context * bld_base,
1327 struct lp_build_emit_data * emit_data,
1328 unsigned pipe_func)
1329 {
1330 LLVMValueRef cond;
1331
1332 if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1333 cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1334 emit_data->args[0], emit_data->args[1]);
1335 }
1336 else {
1337 cond = lp_build_cmp(&bld_base->base, pipe_func,
1338 emit_data->args[0], emit_data->args[1]);
1339
1340 }
1341 emit_data->output[emit_data->chan] = cond;
1342 }
1343
1344
1345 /* TGSI_OPCODE_FSEQ (CPU Only) */
1346 static void
1347 fseq_emit_cpu(
1348 const struct lp_build_tgsi_action * action,
1349 struct lp_build_tgsi_context * bld_base,
1350 struct lp_build_emit_data * emit_data)
1351 {
1352 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1353 }
1354
1355 /* TGSI_OPCODE_ISGE (CPU Only) */
1356 static void
1357 fsge_emit_cpu(
1358 const struct lp_build_tgsi_action * action,
1359 struct lp_build_tgsi_context * bld_base,
1360 struct lp_build_emit_data * emit_data)
1361 {
1362 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1363 }
1364
1365 /* TGSI_OPCODE_ISLT (CPU Only) */
1366 static void
1367 fslt_emit_cpu(
1368 const struct lp_build_tgsi_action * action,
1369 struct lp_build_tgsi_context * bld_base,
1370 struct lp_build_emit_data * emit_data)
1371 {
1372 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1373 }
1374
1375 /* TGSI_OPCODE_USNE (CPU Only) */
1376
1377 static void
1378 fsne_emit_cpu(
1379 const struct lp_build_tgsi_action * action,
1380 struct lp_build_tgsi_context * bld_base,
1381 struct lp_build_emit_data * emit_data)
1382 {
1383 fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1384 }
1385
1386 /* TGSI_OPCODE_FLR (CPU Only) */
1387
1388 static void
1389 flr_emit_cpu(
1390 const struct lp_build_tgsi_action * action,
1391 struct lp_build_tgsi_context * bld_base,
1392 struct lp_build_emit_data * emit_data)
1393 {
1394 emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base,
1395 emit_data->args[0]);
1396 }
1397
1398 /* TGSI_OPCODE_I2F (CPU Only) */
1399 static void
1400 i2f_emit_cpu(
1401 const struct lp_build_tgsi_action * action,
1402 struct lp_build_tgsi_context * bld_base,
1403 struct lp_build_emit_data * emit_data)
1404 {
1405 emit_data->output[emit_data->chan] = lp_build_int_to_float(&bld_base->base,
1406 emit_data->args[0]);
1407 }
1408
1409 /* TGSI_OPCODE_IABS (CPU Only) */
1410 static void
1411 iabs_emit_cpu(
1412 const struct lp_build_tgsi_action * action,
1413 struct lp_build_tgsi_context * bld_base,
1414 struct lp_build_emit_data * emit_data)
1415 {
1416 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int_bld,
1417 emit_data->args[0]);
1418 }
1419
1420 /* TGSI_OPCODE_IDIV (CPU Only) */
1421 static void
1422 idiv_emit_cpu(
1423 const struct lp_build_tgsi_action * action,
1424 struct lp_build_tgsi_context * bld_base,
1425 struct lp_build_emit_data * emit_data)
1426 {
1427 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1428 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1429 PIPE_FUNC_EQUAL, emit_data->args[1],
1430 bld_base->uint_bld.zero);
1431 /* We want to make sure that we never divide/mod by zero to not
1432 * generate sigfpe. We don't want to crash just because the
1433 * shader is doing something weird. */
1434 LLVMValueRef divisor = LLVMBuildOr(builder,
1435 div_mask,
1436 emit_data->args[1], "");
1437 LLVMValueRef result = lp_build_div(&bld_base->int_bld,
1438 emit_data->args[0], divisor);
1439 LLVMValueRef not_div_mask = LLVMBuildNot(builder,
1440 div_mask,"");
1441 /* idiv by zero doesn't have a guaranteed return value chose 0 for now. */
1442 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1443 not_div_mask,
1444 result, "");
1445 }
1446
1447 /* TGSI_OPCODE_INEG (CPU Only) */
1448 static void
1449 ineg_emit_cpu(
1450 const struct lp_build_tgsi_action * action,
1451 struct lp_build_tgsi_context * bld_base,
1452 struct lp_build_emit_data * emit_data)
1453 {
1454 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int_bld,
1455 bld_base->int_bld.zero,
1456 emit_data->args[0]);
1457 }
1458
1459 /* TGSI_OPCODE_ISET Helper (CPU Only) */
1460 static void
1461 iset_emit_cpu(
1462 const struct lp_build_tgsi_action * action,
1463 struct lp_build_tgsi_context * bld_base,
1464 struct lp_build_emit_data * emit_data,
1465 unsigned pipe_func)
1466 {
1467 LLVMValueRef cond = lp_build_cmp(&bld_base->int_bld, pipe_func,
1468 emit_data->args[0], emit_data->args[1]);
1469 emit_data->output[emit_data->chan] = cond;
1470 }
1471
1472 /* TGSI_OPCODE_IMAX (CPU Only) */
1473 static void
1474 imax_emit_cpu(
1475 const struct lp_build_tgsi_action * action,
1476 struct lp_build_tgsi_context * bld_base,
1477 struct lp_build_emit_data * emit_data)
1478 {
1479 emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int_bld,
1480 emit_data->args[0], emit_data->args[1]);
1481 }
1482
1483 /* TGSI_OPCODE_IMIN (CPU Only) */
1484 static void
1485 imin_emit_cpu(
1486 const struct lp_build_tgsi_action * action,
1487 struct lp_build_tgsi_context * bld_base,
1488 struct lp_build_emit_data * emit_data)
1489 {
1490 emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int_bld,
1491 emit_data->args[0], emit_data->args[1]);
1492 }
1493
1494 /* TGSI_OPCODE_ISGE (CPU Only) */
1495 static void
1496 isge_emit_cpu(
1497 const struct lp_build_tgsi_action * action,
1498 struct lp_build_tgsi_context * bld_base,
1499 struct lp_build_emit_data * emit_data)
1500 {
1501 iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1502 }
1503
1504 /* TGSI_OPCODE_ISHR (CPU Only) */
1505 static void
1506 ishr_emit_cpu(
1507 const struct lp_build_tgsi_action * action,
1508 struct lp_build_tgsi_context * bld_base,
1509 struct lp_build_emit_data * emit_data)
1510 {
1511 struct lp_build_context *int_bld = &bld_base->int_bld;
1512 LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
1513 int_bld->type.width - 1);
1514 LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
1515 emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
1516 masked_count);
1517 }
1518
1519 /* TGSI_OPCODE_ISLT (CPU Only) */
1520 static void
1521 islt_emit_cpu(
1522 const struct lp_build_tgsi_action * action,
1523 struct lp_build_tgsi_context * bld_base,
1524 struct lp_build_emit_data * emit_data)
1525 {
1526 iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1527 }
1528
1529
1530 /* TGSI_OPCODE_ISSG (CPU Only) */
1531 static void
1532 issg_emit_cpu(
1533 const struct lp_build_tgsi_action * action,
1534 struct lp_build_tgsi_context * bld_base,
1535 struct lp_build_emit_data * emit_data)
1536 {
1537 emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int_bld,
1538 emit_data->args[0]);
1539 }
1540
1541 /* TGSI_OPCODE_LG2 (CPU Only) */
1542 static void
1543 lg2_emit_cpu(
1544 const struct lp_build_tgsi_action * action,
1545 struct lp_build_tgsi_context * bld_base,
1546 struct lp_build_emit_data * emit_data)
1547 {
1548 emit_data->output[emit_data->chan] = lp_build_log2_safe(&bld_base->base,
1549 emit_data->args[0]);
1550 }
1551
1552 /* TGSI_OPCODE_LOG (CPU Only) */
1553 static void
1554 log_emit_cpu(
1555 const struct lp_build_tgsi_action * action,
1556 struct lp_build_tgsi_context * bld_base,
1557 struct lp_build_emit_data * emit_data)
1558 {
1559 LLVMValueRef p_floor_log2;
1560 LLVMValueRef p_exp;
1561 LLVMValueRef p_log2;
1562 LLVMValueRef src0 = emit_data->args[0];
1563
1564 lp_build_log2_approx(&bld_base->base, src0,
1565 &p_exp, &p_floor_log2, &p_log2, FALSE);
1566
1567 emit_data->output[TGSI_CHAN_X] = p_floor_log2;
1568
1569 emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
1570 TGSI_OPCODE_DIV,
1571 src0, p_exp);
1572 emit_data->output[TGSI_CHAN_Z] = p_log2;
1573
1574 emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
1575
1576 }
1577
1578 /* TGSI_OPCODE_MAX (CPU Only) */
1579
1580 static void
1581 max_emit_cpu(
1582 const struct lp_build_tgsi_action * action,
1583 struct lp_build_tgsi_context * bld_base,
1584 struct lp_build_emit_data * emit_data)
1585 {
1586 emit_data->output[emit_data->chan] =
1587 lp_build_max_ext(&bld_base->base,
1588 emit_data->args[0], emit_data->args[1],
1589 GALLIVM_NAN_RETURN_OTHER);
1590 }
1591
1592 /* TGSI_OPCODE_MIN (CPU Only) */
1593 static void
1594 min_emit_cpu(
1595 const struct lp_build_tgsi_action * action,
1596 struct lp_build_tgsi_context * bld_base,
1597 struct lp_build_emit_data * emit_data)
1598 {
1599 emit_data->output[emit_data->chan] =
1600 lp_build_min_ext(&bld_base->base,
1601 emit_data->args[0], emit_data->args[1],
1602 GALLIVM_NAN_RETURN_OTHER);
1603 }
1604
1605 /* TGSI_OPCODE_MOD (CPU Only) */
1606 static void
1607 mod_emit_cpu(
1608 const struct lp_build_tgsi_action * action,
1609 struct lp_build_tgsi_context * bld_base,
1610 struct lp_build_emit_data * emit_data)
1611 {
1612 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1613 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1614 PIPE_FUNC_EQUAL, emit_data->args[1],
1615 bld_base->uint_bld.zero);
1616 /* We want to make sure that we never divide/mod by zero to not
1617 * generate sigfpe. We don't want to crash just because the
1618 * shader is doing something weird. */
1619 LLVMValueRef divisor = LLVMBuildOr(builder,
1620 div_mask,
1621 emit_data->args[1], "");
1622 LLVMValueRef result = lp_build_mod(&bld_base->int_bld,
1623 emit_data->args[0], divisor);
1624 /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1625 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1626 div_mask,
1627 result, "");
1628 }
1629
1630 /* TGSI_OPCODE_NOT */
1631 static void
1632 not_emit_cpu(
1633 const struct lp_build_tgsi_action * action,
1634 struct lp_build_tgsi_context * bld_base,
1635 struct lp_build_emit_data * emit_data)
1636 {
1637 emit_data->output[emit_data->chan] = lp_build_not(&bld_base->uint_bld,
1638 emit_data->args[0]);
1639 }
1640
1641 /* TGSI_OPCODE_OR (CPU Only) */
1642 static void
1643 or_emit_cpu(
1644 const struct lp_build_tgsi_action * action,
1645 struct lp_build_tgsi_context * bld_base,
1646 struct lp_build_emit_data * emit_data)
1647 {
1648 emit_data->output[emit_data->chan] = lp_build_or(&bld_base->uint_bld,
1649 emit_data->args[0], emit_data->args[1]);
1650 }
1651
1652 /* TGSI_OPCODE_POW (CPU Only) */
1653 static void
1654 pow_emit_cpu(
1655 const struct lp_build_tgsi_action * action,
1656 struct lp_build_tgsi_context * bld_base,
1657 struct lp_build_emit_data * emit_data)
1658 {
1659 emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
1660 emit_data->args[0], emit_data->args[1]);
1661 }
1662
1663
1664 /* TGSI_OPCODE_RCP (CPU Only) */
1665
1666 static void
1667 rcp_emit_cpu(
1668 const struct lp_build_tgsi_action * action,
1669 struct lp_build_tgsi_context * bld_base,
1670 struct lp_build_emit_data * emit_data)
1671 {
1672 emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base,
1673 emit_data->args[0]);
1674 }
1675
1676 /* Reciprical squareroot (CPU Only) */
1677 static void
1678 recip_sqrt_emit_cpu(
1679 const struct lp_build_tgsi_action * action,
1680 struct lp_build_tgsi_context * bld_base,
1681 struct lp_build_emit_data * emit_data)
1682 {
1683 emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base,
1684 emit_data->args[0]);
1685 }
1686
1687 static void
1688 sqrt_emit_cpu(
1689 const struct lp_build_tgsi_action * action,
1690 struct lp_build_tgsi_context * bld_base,
1691 struct lp_build_emit_data * emit_data)
1692 {
1693 emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->base,
1694 emit_data->args[0]);
1695 }
1696
1697
1698 /* TGSI_OPCODE_ROUND (CPU Only) */
1699 static void
1700 round_emit_cpu(
1701 const struct lp_build_tgsi_action * action,
1702 struct lp_build_tgsi_context * bld_base,
1703 struct lp_build_emit_data * emit_data)
1704 {
1705 emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base,
1706 emit_data->args[0]);
1707 }
1708
1709 /* TGSI_OPCODE_SET Helper (CPU Only) */
1710
1711 static void
1712 set_emit_cpu(
1713 const struct lp_build_tgsi_action * action,
1714 struct lp_build_tgsi_context * bld_base,
1715 struct lp_build_emit_data * emit_data,
1716 unsigned pipe_func)
1717 {
1718 LLVMValueRef cond;
1719
1720 if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1721 cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1722 emit_data->args[0], emit_data->args[1]);
1723 }
1724 else {
1725 cond = lp_build_cmp(&bld_base->base, pipe_func,
1726 emit_data->args[0], emit_data->args[1]);
1727
1728 }
1729 emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1730 cond,
1731 bld_base->base.one,
1732 bld_base->base.zero);
1733 }
1734
1735 /* TGSI_OPCODE_SEQ (CPU Only) */
1736
1737 static void
1738 seq_emit_cpu(
1739 const struct lp_build_tgsi_action * action,
1740 struct lp_build_tgsi_context * bld_base,
1741 struct lp_build_emit_data * emit_data)
1742 {
1743 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1744 }
1745
1746 /* TGSI_OPCODE_SGE (CPU Only) */
1747 static void
1748 sge_emit_cpu(
1749 const struct lp_build_tgsi_action * action,
1750 struct lp_build_tgsi_context * bld_base,
1751 struct lp_build_emit_data * emit_data)
1752 {
1753 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1754 }
1755
1756 /* TGSI_OPCODE_SGT (CPU Only)*/
1757
1758 static void
1759 sgt_emit_cpu(
1760 const struct lp_build_tgsi_action * action,
1761 struct lp_build_tgsi_context * bld_base,
1762 struct lp_build_emit_data * emit_data)
1763 {
1764 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
1765 }
1766
1767 /* TGSI_OPCODE_SHL (CPU Only) */
1768 static void
1769 shl_emit_cpu(
1770 const struct lp_build_tgsi_action * action,
1771 struct lp_build_tgsi_context * bld_base,
1772 struct lp_build_emit_data * emit_data)
1773 {
1774 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1775 LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
1776 uint_bld->type.width - 1);
1777 LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
1778 emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
1779 masked_count);
1780 }
1781
1782 /* TGSI_OPCODE_SIN (CPU Only) */
1783 static void
1784 sin_emit_cpu(
1785 const struct lp_build_tgsi_action * action,
1786 struct lp_build_tgsi_context * bld_base,
1787 struct lp_build_emit_data * emit_data)
1788 {
1789 emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base,
1790 emit_data->args[0]);
1791 }
1792
1793 /* TGSI_OPCODE_SLE (CPU Only) */
1794 static void
1795 sle_emit_cpu(
1796 const struct lp_build_tgsi_action * action,
1797 struct lp_build_tgsi_context * bld_base,
1798 struct lp_build_emit_data * emit_data)
1799 {
1800 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
1801 }
1802
1803 /* TGSI_OPCODE_SLT (CPU Only) */
1804 static void
1805 slt_emit_cpu(
1806 const struct lp_build_tgsi_action * action,
1807 struct lp_build_tgsi_context * bld_base,
1808 struct lp_build_emit_data * emit_data)
1809 {
1810 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1811 }
1812
1813 /* TGSI_OPCODE_SNE (CPU Only) */
1814
1815 static void
1816 sne_emit_cpu(
1817 const struct lp_build_tgsi_action * action,
1818 struct lp_build_tgsi_context * bld_base,
1819 struct lp_build_emit_data * emit_data)
1820 {
1821 set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1822 }
1823
1824 /* TGSI_OPCODE_SSG (CPU Only) */
1825
1826 static void
1827 ssg_emit_cpu(
1828 const struct lp_build_tgsi_action * action,
1829 struct lp_build_tgsi_context * bld_base,
1830 struct lp_build_emit_data * emit_data)
1831 {
1832 emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base,
1833 emit_data->args[0]);
1834 }
1835
1836 /* TGSI_OPCODE_SUB (CPU Only) */
1837
1838 static void
1839 sub_emit_cpu(
1840 const struct lp_build_tgsi_action * action,
1841 struct lp_build_tgsi_context * bld_base,
1842 struct lp_build_emit_data * emit_data)
1843 {
1844 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base,
1845 emit_data->args[0],
1846 emit_data->args[1]);
1847 }
1848
1849 /* TGSI_OPCODE_TRUNC (CPU Only) */
1850
1851 static void
1852 trunc_emit_cpu(
1853 const struct lp_build_tgsi_action * action,
1854 struct lp_build_tgsi_context * bld_base,
1855 struct lp_build_emit_data * emit_data)
1856 {
1857 emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
1858 emit_data->args[0]);
1859 }
1860
1861 /* TGSI_OPCODE_UADD (CPU Only) */
1862 static void
1863 uadd_emit_cpu(
1864 const struct lp_build_tgsi_action * action,
1865 struct lp_build_tgsi_context * bld_base,
1866 struct lp_build_emit_data * emit_data)
1867 {
1868 emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint_bld,
1869 emit_data->args[0], emit_data->args[1]);
1870 }
1871
1872 /* TGSI_OPCODE_UDIV (CPU Only) */
1873 static void
1874 udiv_emit_cpu(
1875 const struct lp_build_tgsi_action * action,
1876 struct lp_build_tgsi_context * bld_base,
1877 struct lp_build_emit_data * emit_data)
1878 {
1879
1880 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1881 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1882 PIPE_FUNC_EQUAL, emit_data->args[1],
1883 bld_base->uint_bld.zero);
1884 /* We want to make sure that we never divide/mod by zero to not
1885 * generate sigfpe. We don't want to crash just because the
1886 * shader is doing something weird. */
1887 LLVMValueRef divisor = LLVMBuildOr(builder,
1888 div_mask,
1889 emit_data->args[1], "");
1890 LLVMValueRef result = lp_build_div(&bld_base->uint_bld,
1891 emit_data->args[0], divisor);
1892 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1893 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1894 div_mask,
1895 result, "");
1896 }
1897
1898 /* TGSI_OPCODE_UMAX (CPU Only) */
1899 static void
1900 umax_emit_cpu(
1901 const struct lp_build_tgsi_action * action,
1902 struct lp_build_tgsi_context * bld_base,
1903 struct lp_build_emit_data * emit_data)
1904 {
1905 emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint_bld,
1906 emit_data->args[0], emit_data->args[1]);
1907 }
1908
1909 /* TGSI_OPCODE_UMIN (CPU Only) */
1910 static void
1911 umin_emit_cpu(
1912 const struct lp_build_tgsi_action * action,
1913 struct lp_build_tgsi_context * bld_base,
1914 struct lp_build_emit_data * emit_data)
1915 {
1916 emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint_bld,
1917 emit_data->args[0], emit_data->args[1]);
1918 }
1919
1920 /* TGSI_OPCODE_UMOD (CPU Only) */
1921 static void
1922 umod_emit_cpu(
1923 const struct lp_build_tgsi_action * action,
1924 struct lp_build_tgsi_context * bld_base,
1925 struct lp_build_emit_data * emit_data)
1926 {
1927 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1928 LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1929 PIPE_FUNC_EQUAL, emit_data->args[1],
1930 bld_base->uint_bld.zero);
1931 /* We want to make sure that we never divide/mod by zero to not
1932 * generate sigfpe. We don't want to crash just because the
1933 * shader is doing something weird. */
1934 LLVMValueRef divisor = LLVMBuildOr(builder,
1935 div_mask,
1936 emit_data->args[1], "");
1937 LLVMValueRef result = lp_build_mod(&bld_base->uint_bld,
1938 emit_data->args[0], divisor);
1939 /* umod by zero is guaranteed to return 0xffffffff */
1940 emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1941 div_mask,
1942 result, "");
1943 }
1944
1945 /* TGSI_OPCODE_USET Helper (CPU Only) */
1946 static void
1947 uset_emit_cpu(
1948 const struct lp_build_tgsi_action * action,
1949 struct lp_build_tgsi_context * bld_base,
1950 struct lp_build_emit_data * emit_data,
1951 unsigned pipe_func)
1952 {
1953 LLVMValueRef cond = lp_build_cmp(&bld_base->uint_bld, pipe_func,
1954 emit_data->args[0], emit_data->args[1]);
1955 emit_data->output[emit_data->chan] = cond;
1956 }
1957
1958
1959 /* TGSI_OPCODE_USEQ (CPU Only) */
1960 static void
1961 useq_emit_cpu(
1962 const struct lp_build_tgsi_action * action,
1963 struct lp_build_tgsi_context * bld_base,
1964 struct lp_build_emit_data * emit_data)
1965 {
1966 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1967 }
1968
1969 /* TGSI_OPCODE_ISGE (CPU Only) */
1970 static void
1971 usge_emit_cpu(
1972 const struct lp_build_tgsi_action * action,
1973 struct lp_build_tgsi_context * bld_base,
1974 struct lp_build_emit_data * emit_data)
1975 {
1976 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1977 }
1978
1979 /* TGSI_OPCODE_USHR (CPU Only) */
1980 static void
1981 ushr_emit_cpu(
1982 const struct lp_build_tgsi_action * action,
1983 struct lp_build_tgsi_context * bld_base,
1984 struct lp_build_emit_data * emit_data)
1985 {
1986 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1987 LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
1988 uint_bld->type.width - 1);
1989 LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
1990 emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
1991 masked_count);
1992 }
1993
1994 /* TGSI_OPCODE_ISLT (CPU Only) */
1995 static void
1996 uslt_emit_cpu(
1997 const struct lp_build_tgsi_action * action,
1998 struct lp_build_tgsi_context * bld_base,
1999 struct lp_build_emit_data * emit_data)
2000 {
2001 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2002 }
2003
2004 /* TGSI_OPCODE_USNE (CPU Only) */
2005
2006 static void
2007 usne_emit_cpu(
2008 const struct lp_build_tgsi_action * action,
2009 struct lp_build_tgsi_context * bld_base,
2010 struct lp_build_emit_data * emit_data)
2011 {
2012 uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2013 }
2014
2015 /* TGSI_OPCODE_XOR */
2016 static void
2017 xor_emit_cpu(
2018 const struct lp_build_tgsi_action * action,
2019 struct lp_build_tgsi_context * bld_base,
2020 struct lp_build_emit_data * emit_data)
2021 {
2022 emit_data->output[emit_data->chan] = lp_build_xor(&bld_base->uint_bld,
2023 emit_data->args[0],
2024 emit_data->args[1]);
2025 }
2026
2027 /* TGSI_OPCODE_DABS (CPU Only) */
2028 static void
2029 dabs_emit_cpu(
2030 const struct lp_build_tgsi_action * action,
2031 struct lp_build_tgsi_context * bld_base,
2032 struct lp_build_emit_data * emit_data)
2033 {
2034 emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld,
2035 emit_data->args[0]);
2036 }
2037
2038 /* TGSI_OPCODE_DNEG (CPU Only) */
2039 static void
2040 dneg_emit_cpu(
2041 const struct lp_build_tgsi_action * action,
2042 struct lp_build_tgsi_context * bld_base,
2043 struct lp_build_emit_data * emit_data)
2044 {
2045 emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld,
2046 bld_base->dbl_bld.zero,
2047 emit_data->args[0]);
2048 }
2049
2050 /* TGSI_OPCODE_DSET Helper (CPU Only) */
2051 static void
2052 dset_emit_cpu(
2053 const struct lp_build_tgsi_action * action,
2054 struct lp_build_tgsi_context * bld_base,
2055 struct lp_build_emit_data * emit_data,
2056 unsigned pipe_func)
2057 {
2058 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2059 LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func,
2060 emit_data->args[0], emit_data->args[1]);
2061 /* arguments were 64 bit but store as 32 bit */
2062 cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2063 emit_data->output[emit_data->chan] = cond;
2064 }
2065
2066 /* TGSI_OPCODE_DSEQ (CPU Only) */
2067 static void
2068 dseq_emit_cpu(
2069 const struct lp_build_tgsi_action * action,
2070 struct lp_build_tgsi_context * bld_base,
2071 struct lp_build_emit_data * emit_data)
2072 {
2073 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2074 }
2075
2076 /* TGSI_OPCODE_DSGE (CPU Only) */
2077 static void
2078 dsge_emit_cpu(
2079 const struct lp_build_tgsi_action * action,
2080 struct lp_build_tgsi_context * bld_base,
2081 struct lp_build_emit_data * emit_data)
2082 {
2083 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2084 }
2085
2086 /* TGSI_OPCODE_DSLT (CPU Only) */
2087 static void
2088 dslt_emit_cpu(
2089 const struct lp_build_tgsi_action * action,
2090 struct lp_build_tgsi_context * bld_base,
2091 struct lp_build_emit_data * emit_data)
2092 {
2093 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2094 }
2095
2096 /* TGSI_OPCODE_DSNE (CPU Only) */
2097 static void
2098 dsne_emit_cpu(
2099 const struct lp_build_tgsi_action * action,
2100 struct lp_build_tgsi_context * bld_base,
2101 struct lp_build_emit_data * emit_data)
2102 {
2103 dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2104 }
2105
2106 /* Double Reciprocal squareroot (CPU Only) */
2107 static void
2108 drecip_sqrt_emit_cpu(
2109 const struct lp_build_tgsi_action * action,
2110 struct lp_build_tgsi_context * bld_base,
2111 struct lp_build_emit_data * emit_data)
2112 {
2113 emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld,
2114 emit_data->args[0]);
2115 }
2116
2117 /* Double Squareroot (CPU Only) */
2118 static void
2119 dsqrt_emit_cpu(
2120 const struct lp_build_tgsi_action * action,
2121 struct lp_build_tgsi_context * bld_base,
2122 struct lp_build_emit_data * emit_data)
2123 {
2124 emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
2125 emit_data->args[0]);
2126 }
2127
2128 void
2129 lp_set_default_actions_cpu(
2130 struct lp_build_tgsi_context * bld_base)
2131 {
2132 lp_set_default_actions(bld_base);
2133 bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
2134 bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
2135 bld_base->op_actions[TGSI_OPCODE_AND].emit = and_emit_cpu;
2136 bld_base->op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
2137 bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu;
2138 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
2139 bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
2140 bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
2141 bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
2142 bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
2143 bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
2144 bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
2145 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu;
2146 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu;
2147 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = fslt_emit_cpu;
2148 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = fsne_emit_cpu;
2149
2150 bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu;
2151 bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu;
2152 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu;
2153 bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu;
2154 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu;
2155 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = imin_emit_cpu;
2156 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu;
2157 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
2158 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
2159 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
2160
2161 bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
2162 bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
2163 bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
2164 bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
2165 bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu;
2166 bld_base->op_actions[TGSI_OPCODE_NOT].emit = not_emit_cpu;
2167 bld_base->op_actions[TGSI_OPCODE_OR].emit = or_emit_cpu;
2168 bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
2169 bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
2170 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
2171 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
2172 bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
2173 bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
2174 bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
2175 bld_base->op_actions[TGSI_OPCODE_SHL].emit = shl_emit_cpu;
2176 bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
2177 bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
2178 bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
2179 bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
2180 bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu;
2181 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
2182
2183 bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
2184 bld_base->sqrt_action.emit = sqrt_emit_cpu;
2185
2186 bld_base->op_actions[TGSI_OPCODE_UADD].emit = uadd_emit_cpu;
2187 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = ucmp_emit_cpu;
2188 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = udiv_emit_cpu;
2189 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = umax_emit_cpu;
2190 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = umin_emit_cpu;
2191 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = umod_emit_cpu;
2192 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = useq_emit_cpu;
2193 bld_base->op_actions[TGSI_OPCODE_USGE].emit = usge_emit_cpu;
2194 bld_base->op_actions[TGSI_OPCODE_USHR].emit = ushr_emit_cpu;
2195 bld_base->op_actions[TGSI_OPCODE_USLT].emit = uslt_emit_cpu;
2196 bld_base->op_actions[TGSI_OPCODE_USNE].emit = usne_emit_cpu;
2197
2198 bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu;
2199
2200 bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
2201 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
2202 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
2203 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
2204 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
2205 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
2206
2207 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
2208 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
2209
2210 }