gallivm: added question/comment
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_tgsi.h"
55 #include "lp_bld_debug.h"
56
57
58 #define LP_MAX_TEMPS 256
59 #define LP_MAX_IMMEDIATES 256
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79
80 #define QUAD_TOP_LEFT 0
81 #define QUAD_TOP_RIGHT 1
82 #define QUAD_BOTTOM_LEFT 2
83 #define QUAD_BOTTOM_RIGHT 3
84
85 #define LP_TGSI_MAX_NESTING 16
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMValueRef exec_mask;
99 };
100
101 struct lp_build_tgsi_soa_context
102 {
103 struct lp_build_context base;
104
105 LLVMValueRef consts_ptr;
106 const LLVMValueRef *pos;
107 const LLVMValueRef (*inputs)[NUM_CHANNELS];
108 LLVMValueRef (*outputs)[NUM_CHANNELS];
109
110 struct lp_build_sampler_soa *sampler;
111
112 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
113 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
114
115 struct lp_build_mask_context *mask;
116 struct lp_exec_mask exec_mask;
117 };
118
119 static const unsigned char
120 swizzle_left[4] = {
121 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
122 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
123 };
124
125 static const unsigned char
126 swizzle_right[4] = {
127 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
128 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
129 };
130
131 static const unsigned char
132 swizzle_top[4] = {
133 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
134 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
135 };
136
137 static const unsigned char
138 swizzle_bottom[4] = {
139 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
140 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
141 };
142
143 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
144 {
145 mask->bld = bld;
146 mask->has_mask = FALSE;
147 mask->cond_stack_size = 0;
148
149 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
150 }
151
152 static void lp_exec_mask_update(struct lp_exec_mask *mask)
153 {
154 mask->exec_mask = mask->cond_mask;
155 if (mask->cond_stack_size > 0)
156 mask->has_mask = TRUE;
157 }
158
159 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
160 LLVMValueRef val)
161 {
162 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
163 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
164 mask->int_vec_type, "");
165
166 lp_exec_mask_update(mask);
167 }
168
169 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
170 {
171 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
172 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
173 mask->cond_mask, "");
174
175 /* means that we didn't have any mask before and that
176 * we were fully enabled */
177 if (mask->cond_stack_size <= 1) {
178 prev_mask = LLVMConstAllOnes(mask->int_vec_type);
179 }
180
181 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
182 inv_mask,
183 prev_mask, "");
184 lp_exec_mask_update(mask);
185 }
186
187 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
188 {
189 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
190 lp_exec_mask_update(mask);
191 }
192
193 static void lp_exec_mask_store(struct lp_exec_mask *mask,
194 LLVMValueRef val,
195 LLVMValueRef dst)
196 {
197 if (mask->has_mask) {
198 LLVMValueRef real_val, dst_val;
199
200 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
201 real_val = lp_build_select(mask->bld,
202 mask->exec_mask,
203 val, dst_val);
204
205 LLVMBuildStore(mask->bld->builder, real_val, dst);
206 } else
207 LLVMBuildStore(mask->bld->builder, val, dst);
208 }
209
210
211 static LLVMValueRef
212 emit_ddx(struct lp_build_tgsi_soa_context *bld,
213 LLVMValueRef src)
214 {
215 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
216 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
217 return lp_build_sub(&bld->base, src_right, src_left);
218 }
219
220
221 static LLVMValueRef
222 emit_ddy(struct lp_build_tgsi_soa_context *bld,
223 LLVMValueRef src)
224 {
225 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
226 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
227 return lp_build_sub(&bld->base, src_top, src_bottom);
228 }
229
230
231 /**
232 * Register fetch.
233 */
234 static LLVMValueRef
235 emit_fetch(
236 struct lp_build_tgsi_soa_context *bld,
237 const struct tgsi_full_instruction *inst,
238 unsigned index,
239 const unsigned chan_index )
240 {
241 const struct tgsi_full_src_register *reg = &inst->Src[index];
242 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
243 LLVMValueRef res;
244
245 switch (swizzle) {
246 case TGSI_SWIZZLE_X:
247 case TGSI_SWIZZLE_Y:
248 case TGSI_SWIZZLE_Z:
249 case TGSI_SWIZZLE_W:
250
251 switch (reg->Register.File) {
252 case TGSI_FILE_CONSTANT: {
253 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
254 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
255 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
256 res = lp_build_broadcast_scalar(&bld->base, scalar);
257 break;
258 }
259
260 case TGSI_FILE_IMMEDIATE:
261 res = bld->immediates[reg->Register.Index][swizzle];
262 assert(res);
263 break;
264
265 case TGSI_FILE_INPUT:
266 res = bld->inputs[reg->Register.Index][swizzle];
267 assert(res);
268 break;
269
270 case TGSI_FILE_TEMPORARY:
271 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
272 if(!res)
273 return bld->base.undef;
274 break;
275
276 default:
277 assert( 0 );
278 return bld->base.undef;
279 }
280 break;
281
282 default:
283 assert( 0 );
284 return bld->base.undef;
285 }
286
287 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
288 case TGSI_UTIL_SIGN_CLEAR:
289 res = lp_build_abs( &bld->base, res );
290 break;
291
292 case TGSI_UTIL_SIGN_SET:
293 /* TODO: Use bitwese OR for floating point */
294 res = lp_build_abs( &bld->base, res );
295 res = LLVMBuildNeg( bld->base.builder, res, "" );
296 break;
297
298 case TGSI_UTIL_SIGN_TOGGLE:
299 res = LLVMBuildNeg( bld->base.builder, res, "" );
300 break;
301
302 case TGSI_UTIL_SIGN_KEEP:
303 break;
304 }
305
306 return res;
307 }
308
309
310 /**
311 * Register fetch with derivatives.
312 */
313 static void
314 emit_fetch_deriv(
315 struct lp_build_tgsi_soa_context *bld,
316 const struct tgsi_full_instruction *inst,
317 unsigned index,
318 const unsigned chan_index,
319 LLVMValueRef *res,
320 LLVMValueRef *ddx,
321 LLVMValueRef *ddy)
322 {
323 LLVMValueRef src;
324
325 src = emit_fetch(bld, inst, index, chan_index);
326
327 if(res)
328 *res = src;
329
330 /* TODO: use interpolation coeffs for inputs */
331
332 if(ddx)
333 *ddx = emit_ddx(bld, src);
334
335 if(ddy)
336 *ddy = emit_ddy(bld, src);
337 }
338
339
340 /**
341 * Register store.
342 */
343 static void
344 emit_store(
345 struct lp_build_tgsi_soa_context *bld,
346 const struct tgsi_full_instruction *inst,
347 unsigned index,
348 unsigned chan_index,
349 LLVMValueRef value)
350 {
351 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
352
353 switch( inst->Instruction.Saturate ) {
354 case TGSI_SAT_NONE:
355 break;
356
357 case TGSI_SAT_ZERO_ONE:
358 value = lp_build_max(&bld->base, value, bld->base.zero);
359 value = lp_build_min(&bld->base, value, bld->base.one);
360 break;
361
362 case TGSI_SAT_MINUS_PLUS_ONE:
363 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
364 value = lp_build_min(&bld->base, value, bld->base.one);
365 break;
366
367 default:
368 assert(0);
369 }
370
371 switch( reg->Register.File ) {
372 case TGSI_FILE_OUTPUT:
373 lp_exec_mask_store(&bld->exec_mask, value,
374 bld->outputs[reg->Register.Index][chan_index]);
375 break;
376
377 case TGSI_FILE_TEMPORARY:
378 lp_exec_mask_store(&bld->exec_mask, value,
379 bld->temps[reg->Register.Index][chan_index]);
380 break;
381
382 case TGSI_FILE_ADDRESS:
383 /* FIXME */
384 assert(0);
385 break;
386
387 default:
388 assert( 0 );
389 }
390 }
391
392
393 /**
394 * High-level instruction translators.
395 */
396
397
398 static void
399 emit_tex( struct lp_build_tgsi_soa_context *bld,
400 const struct tgsi_full_instruction *inst,
401 boolean apply_lodbias,
402 boolean projected,
403 LLVMValueRef *texel)
404 {
405 const uint unit = inst->Src[1].Register.Index;
406 LLVMValueRef lodbias;
407 LLVMValueRef oow = NULL;
408 LLVMValueRef coords[3];
409 unsigned num_coords;
410 unsigned i;
411
412 switch (inst->Texture.Texture) {
413 case TGSI_TEXTURE_1D:
414 num_coords = 1;
415 break;
416 case TGSI_TEXTURE_2D:
417 case TGSI_TEXTURE_RECT:
418 num_coords = 2;
419 break;
420 case TGSI_TEXTURE_SHADOW1D:
421 case TGSI_TEXTURE_SHADOW2D:
422 case TGSI_TEXTURE_SHADOWRECT:
423 case TGSI_TEXTURE_3D:
424 case TGSI_TEXTURE_CUBE:
425 num_coords = 3;
426 break;
427 default:
428 assert(0);
429 return;
430 }
431
432 if(apply_lodbias)
433 lodbias = emit_fetch( bld, inst, 0, 3 );
434 else
435 lodbias = bld->base.zero;
436
437 if (projected) {
438 oow = emit_fetch( bld, inst, 0, 3 );
439 oow = lp_build_rcp(&bld->base, oow);
440 }
441
442 for (i = 0; i < num_coords; i++) {
443 coords[i] = emit_fetch( bld, inst, 0, i );
444 if (projected)
445 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
446 }
447 for (i = num_coords; i < 3; i++) {
448 coords[i] = bld->base.undef;
449 }
450
451 bld->sampler->emit_fetch_texel(bld->sampler,
452 bld->base.builder,
453 bld->base.type,
454 unit, num_coords, coords, lodbias,
455 texel);
456 }
457
458
459 static void
460 emit_kil(
461 struct lp_build_tgsi_soa_context *bld,
462 const struct tgsi_full_instruction *inst )
463 {
464 const struct tgsi_full_src_register *reg = &inst->Src[0];
465 LLVMValueRef terms[NUM_CHANNELS];
466 LLVMValueRef mask;
467 unsigned chan_index;
468
469 memset(&terms, 0, sizeof terms);
470
471 FOR_EACH_CHANNEL( chan_index ) {
472 unsigned swizzle;
473
474 /* Unswizzle channel */
475 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
476
477 /* Check if the component has not been already tested. */
478 assert(swizzle < NUM_CHANNELS);
479 if( !terms[swizzle] )
480 /* TODO: change the comparison operator instead of setting the sign */
481 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
482 }
483
484 mask = NULL;
485 FOR_EACH_CHANNEL( chan_index ) {
486 if(terms[chan_index]) {
487 LLVMValueRef chan_mask;
488
489 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
490
491 if(mask)
492 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
493 else
494 mask = chan_mask;
495 }
496 }
497
498 if(mask)
499 lp_build_mask_update(bld->mask, mask);
500 }
501
502
503 /**
504 * Check if inst src/dest regs use indirect addressing into temporary
505 * register file.
506 */
507 static boolean
508 indirect_temp_reference(const struct tgsi_full_instruction *inst)
509 {
510 uint i;
511 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
512 const struct tgsi_full_src_register *reg = &inst->Src[i];
513 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
514 reg->Register.Indirect)
515 return TRUE;
516 }
517 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
518 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
519 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
520 reg->Register.Indirect)
521 return TRUE;
522 }
523 return FALSE;
524 }
525
526 static int
527 emit_declaration(
528 struct lp_build_tgsi_soa_context *bld,
529 const struct tgsi_full_declaration *decl)
530 {
531 unsigned first = decl->Range.First;
532 unsigned last = decl->Range.Last;
533 unsigned idx, i;
534
535 for (idx = first; idx <= last; ++idx) {
536 boolean ok;
537
538 switch (decl->Declaration.File) {
539 case TGSI_FILE_TEMPORARY:
540 for (i = 0; i < NUM_CHANNELS; i++)
541 bld->temps[idx][i] = lp_build_alloca(&bld->base);
542 ok = TRUE;
543 break;
544
545 case TGSI_FILE_OUTPUT:
546 for (i = 0; i < NUM_CHANNELS; i++)
547 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
548 ok = TRUE;
549 break;
550
551 default:
552 /* don't need to declare other vars */
553 ok = TRUE;
554 }
555
556 if (!ok)
557 return FALSE;
558 }
559
560 return TRUE;
561 }
562
563 static int
564 emit_instruction(
565 struct lp_build_tgsi_soa_context *bld,
566 const struct tgsi_full_instruction *inst,
567 const struct tgsi_opcode_info *info)
568 {
569 unsigned chan_index;
570 LLVMValueRef src0, src1, src2;
571 LLVMValueRef tmp0, tmp1, tmp2;
572 LLVMValueRef tmp3 = NULL;
573 LLVMValueRef tmp4 = NULL;
574 LLVMValueRef tmp5 = NULL;
575 LLVMValueRef tmp6 = NULL;
576 LLVMValueRef tmp7 = NULL;
577 LLVMValueRef res;
578 LLVMValueRef dst0[NUM_CHANNELS];
579
580 /* we can't handle indirect addressing into temp register file yet */
581 if (indirect_temp_reference(inst))
582 return FALSE;
583
584 assert(info->num_dst <= 1);
585 if(info->num_dst) {
586 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
587 dst0[chan_index] = bld->base.undef;
588 }
589 }
590
591 switch (inst->Instruction.Opcode) {
592 #if 0
593 case TGSI_OPCODE_ARL:
594 /* FIXME */
595 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
596 tmp0 = emit_fetch( bld, inst, 0, chan_index );
597 emit_flr(bld, 0, 0);
598 emit_f2it( bld, 0 );
599 dst0[chan_index] = tmp0;
600 }
601 break;
602 #endif
603
604 case TGSI_OPCODE_MOV:
605 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
606 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
607 }
608 break;
609
610 case TGSI_OPCODE_LIT:
611 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
612 dst0[CHAN_X] = bld->base.one;
613 }
614 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
615 src0 = emit_fetch( bld, inst, 0, CHAN_X );
616 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
617 }
618 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
619 /* XMM[1] = SrcReg[0].yyyy */
620 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
621 /* XMM[1] = max(XMM[1], 0) */
622 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
623 /* XMM[2] = SrcReg[0].wwww */
624 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
625 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
626 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
627 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
628 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
629 }
630 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
631 dst0[CHAN_W] = bld->base.one;
632 }
633 break;
634
635 case TGSI_OPCODE_RCP:
636 /* TGSI_OPCODE_RECIP */
637 src0 = emit_fetch( bld, inst, 0, CHAN_X );
638 res = lp_build_rcp(&bld->base, src0);
639 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
640 dst0[chan_index] = res;
641 }
642 break;
643
644 case TGSI_OPCODE_RSQ:
645 /* TGSI_OPCODE_RECIPSQRT */
646 src0 = emit_fetch( bld, inst, 0, CHAN_X );
647 src0 = lp_build_abs(&bld->base, src0);
648 res = lp_build_rsqrt(&bld->base, src0);
649 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
650 dst0[chan_index] = res;
651 }
652 break;
653
654 case TGSI_OPCODE_EXP:
655 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
656 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
657 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
658 LLVMValueRef *p_exp2_int_part = NULL;
659 LLVMValueRef *p_frac_part = NULL;
660 LLVMValueRef *p_exp2 = NULL;
661
662 src0 = emit_fetch( bld, inst, 0, CHAN_X );
663
664 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
665 p_exp2_int_part = &tmp0;
666 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
667 p_frac_part = &tmp1;
668 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
669 p_exp2 = &tmp2;
670
671 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
672
673 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
674 dst0[CHAN_X] = tmp0;
675 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
676 dst0[CHAN_Y] = tmp1;
677 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
678 dst0[CHAN_Z] = tmp2;
679 }
680 /* dst.w = 1.0 */
681 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
682 dst0[CHAN_W] = bld->base.one;
683 }
684 break;
685
686 case TGSI_OPCODE_LOG:
687 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
688 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
689 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
690 LLVMValueRef *p_floor_log2 = NULL;
691 LLVMValueRef *p_exp = NULL;
692 LLVMValueRef *p_log2 = NULL;
693
694 src0 = emit_fetch( bld, inst, 0, CHAN_X );
695 src0 = lp_build_abs( &bld->base, src0 );
696
697 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
698 p_floor_log2 = &tmp0;
699 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
700 p_exp = &tmp1;
701 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
702 p_log2 = &tmp2;
703
704 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
705
706 /* dst.x = floor(lg2(abs(src.x))) */
707 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
708 dst0[CHAN_X] = tmp0;
709 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
710 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
711 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
712 }
713 /* dst.z = lg2(abs(src.x)) */
714 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
715 dst0[CHAN_Z] = tmp2;
716 }
717 /* dst.w = 1.0 */
718 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
719 dst0[CHAN_W] = bld->base.one;
720 }
721 break;
722
723 case TGSI_OPCODE_MUL:
724 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
725 src0 = emit_fetch( bld, inst, 0, chan_index );
726 src1 = emit_fetch( bld, inst, 1, chan_index );
727 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
728 }
729 break;
730
731 case TGSI_OPCODE_ADD:
732 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
733 src0 = emit_fetch( bld, inst, 0, chan_index );
734 src1 = emit_fetch( bld, inst, 1, chan_index );
735 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
736 }
737 break;
738
739 case TGSI_OPCODE_DP3:
740 /* TGSI_OPCODE_DOT3 */
741 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
742 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
743 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
744 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
745 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
746 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
747 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
748 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
749 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
750 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
751 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
752 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
753 dst0[chan_index] = tmp0;
754 }
755 break;
756
757 case TGSI_OPCODE_DP4:
758 /* TGSI_OPCODE_DOT4 */
759 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
760 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
761 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
762 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
763 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
764 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
765 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
766 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
767 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
768 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
769 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
770 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
771 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
772 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
773 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
774 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
775 dst0[chan_index] = tmp0;
776 }
777 break;
778
779 case TGSI_OPCODE_DST:
780 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
781 dst0[CHAN_X] = bld->base.one;
782 }
783 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
784 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
785 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
786 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
787 }
788 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
789 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
790 }
791 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
792 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
793 }
794 break;
795
796 case TGSI_OPCODE_MIN:
797 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
798 src0 = emit_fetch( bld, inst, 0, chan_index );
799 src1 = emit_fetch( bld, inst, 1, chan_index );
800 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
801 }
802 break;
803
804 case TGSI_OPCODE_MAX:
805 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
806 src0 = emit_fetch( bld, inst, 0, chan_index );
807 src1 = emit_fetch( bld, inst, 1, chan_index );
808 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
809 }
810 break;
811
812 case TGSI_OPCODE_SLT:
813 /* TGSI_OPCODE_SETLT */
814 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
815 src0 = emit_fetch( bld, inst, 0, chan_index );
816 src1 = emit_fetch( bld, inst, 1, chan_index );
817 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
818 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
819 }
820 break;
821
822 case TGSI_OPCODE_SGE:
823 /* TGSI_OPCODE_SETGE */
824 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
825 src0 = emit_fetch( bld, inst, 0, chan_index );
826 src1 = emit_fetch( bld, inst, 1, chan_index );
827 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
828 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
829 }
830 break;
831
832 case TGSI_OPCODE_MAD:
833 /* TGSI_OPCODE_MADD */
834 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
835 tmp0 = emit_fetch( bld, inst, 0, chan_index );
836 tmp1 = emit_fetch( bld, inst, 1, chan_index );
837 tmp2 = emit_fetch( bld, inst, 2, chan_index );
838 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
839 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
840 dst0[chan_index] = tmp0;
841 }
842 break;
843
844 case TGSI_OPCODE_SUB:
845 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
846 tmp0 = emit_fetch( bld, inst, 0, chan_index );
847 tmp1 = emit_fetch( bld, inst, 1, chan_index );
848 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
849 }
850 break;
851
852 case TGSI_OPCODE_LRP:
853 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
854 src0 = emit_fetch( bld, inst, 0, chan_index );
855 src1 = emit_fetch( bld, inst, 1, chan_index );
856 src2 = emit_fetch( bld, inst, 2, chan_index );
857 tmp0 = lp_build_sub( &bld->base, src1, src2 );
858 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
859 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
860 }
861 break;
862
863 case TGSI_OPCODE_CND:
864 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
865 src0 = emit_fetch( bld, inst, 0, chan_index );
866 src1 = emit_fetch( bld, inst, 1, chan_index );
867 src2 = emit_fetch( bld, inst, 2, chan_index );
868 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
869 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
870 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
871 }
872 break;
873
874 case TGSI_OPCODE_DP2A:
875 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
876 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
877 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
878 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
879 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
880 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
881 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
882 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
883 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
884 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
885 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
886 }
887 break;
888
889 case TGSI_OPCODE_FRC:
890 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
891 src0 = emit_fetch( bld, inst, 0, chan_index );
892 tmp0 = lp_build_floor(&bld->base, src0);
893 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
894 dst0[chan_index] = tmp0;
895 }
896 break;
897
898 case TGSI_OPCODE_CLAMP:
899 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
900 tmp0 = emit_fetch( bld, inst, 0, chan_index );
901 src1 = emit_fetch( bld, inst, 1, chan_index );
902 src2 = emit_fetch( bld, inst, 2, chan_index );
903 tmp0 = lp_build_max(&bld->base, tmp0, src1);
904 tmp0 = lp_build_min(&bld->base, tmp0, src2);
905 dst0[chan_index] = tmp0;
906 }
907 break;
908
909 case TGSI_OPCODE_FLR:
910 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
911 tmp0 = emit_fetch( bld, inst, 0, chan_index );
912 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
913 }
914 break;
915
916 case TGSI_OPCODE_ROUND:
917 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
918 tmp0 = emit_fetch( bld, inst, 0, chan_index );
919 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
920 }
921 break;
922
923 case TGSI_OPCODE_EX2: {
924 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
925 tmp0 = lp_build_exp2( &bld->base, tmp0);
926 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
927 dst0[chan_index] = tmp0;
928 }
929 break;
930 }
931
932 case TGSI_OPCODE_LG2:
933 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
934 tmp0 = lp_build_log2( &bld->base, tmp0);
935 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
936 dst0[chan_index] = tmp0;
937 }
938 break;
939
940 case TGSI_OPCODE_POW:
941 src0 = emit_fetch( bld, inst, 0, CHAN_X );
942 src1 = emit_fetch( bld, inst, 1, CHAN_X );
943 res = lp_build_pow( &bld->base, src0, src1 );
944 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
945 dst0[chan_index] = res;
946 }
947 break;
948
949 case TGSI_OPCODE_XPD:
950 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
951 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
952 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
953 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
954 }
955 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
956 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
957 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
958 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
959 }
960 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
961 tmp2 = tmp0;
962 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
963 tmp5 = tmp3;
964 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
965 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
966 dst0[CHAN_X] = tmp2;
967 }
968 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
969 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
970 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
971 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
972 }
973 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
974 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
975 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
976 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
977 dst0[CHAN_Y] = tmp3;
978 }
979 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
980 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
981 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
982 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
983 dst0[CHAN_Z] = tmp5;
984 }
985 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
986 dst0[CHAN_W] = bld->base.one;
987 }
988 break;
989
990 case TGSI_OPCODE_ABS:
991 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
992 tmp0 = emit_fetch( bld, inst, 0, chan_index );
993 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
994 }
995 break;
996
997 case TGSI_OPCODE_RCC:
998 /* deprecated? */
999 assert(0);
1000 return 0;
1001
1002 case TGSI_OPCODE_DPH:
1003 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1004 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1005 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1006 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1007 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1008 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1009 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1010 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1011 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1012 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1013 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1014 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1015 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1016 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1017 dst0[chan_index] = tmp0;
1018 }
1019 break;
1020
1021 case TGSI_OPCODE_COS:
1022 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1023 tmp0 = lp_build_cos( &bld->base, tmp0 );
1024 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1025 dst0[chan_index] = tmp0;
1026 }
1027 break;
1028
1029 case TGSI_OPCODE_DDX:
1030 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1031 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1032 }
1033 break;
1034
1035 case TGSI_OPCODE_DDY:
1036 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1037 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1038 }
1039 break;
1040
1041 case TGSI_OPCODE_KILP:
1042 /* predicated kill */
1043 /* FIXME */
1044 return 0;
1045 break;
1046
1047 case TGSI_OPCODE_KIL:
1048 /* conditional kill */
1049 emit_kil( bld, inst );
1050 break;
1051
1052 case TGSI_OPCODE_PK2H:
1053 return 0;
1054 break;
1055
1056 case TGSI_OPCODE_PK2US:
1057 return 0;
1058 break;
1059
1060 case TGSI_OPCODE_PK4B:
1061 return 0;
1062 break;
1063
1064 case TGSI_OPCODE_PK4UB:
1065 return 0;
1066 break;
1067
1068 case TGSI_OPCODE_RFL:
1069 return 0;
1070 break;
1071
1072 case TGSI_OPCODE_SEQ:
1073 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1074 src0 = emit_fetch( bld, inst, 0, chan_index );
1075 src1 = emit_fetch( bld, inst, 1, chan_index );
1076 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1077 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1078 }
1079 break;
1080
1081 case TGSI_OPCODE_SFL:
1082 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1083 dst0[chan_index] = bld->base.zero;
1084 }
1085 break;
1086
1087 case TGSI_OPCODE_SGT:
1088 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1089 src0 = emit_fetch( bld, inst, 0, chan_index );
1090 src1 = emit_fetch( bld, inst, 1, chan_index );
1091 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1092 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1093 }
1094 break;
1095
1096 case TGSI_OPCODE_SIN:
1097 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1098 tmp0 = lp_build_sin( &bld->base, tmp0 );
1099 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1100 dst0[chan_index] = tmp0;
1101 }
1102 break;
1103
1104 case TGSI_OPCODE_SLE:
1105 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1106 src0 = emit_fetch( bld, inst, 0, chan_index );
1107 src1 = emit_fetch( bld, inst, 1, chan_index );
1108 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1109 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1110 }
1111 break;
1112
1113 case TGSI_OPCODE_SNE:
1114 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1115 src0 = emit_fetch( bld, inst, 0, chan_index );
1116 src1 = emit_fetch( bld, inst, 1, chan_index );
1117 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1118 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1119 }
1120 break;
1121
1122 case TGSI_OPCODE_STR:
1123 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1124 dst0[chan_index] = bld->base.one;
1125 }
1126 break;
1127
1128 case TGSI_OPCODE_TEX:
1129 /* XXX what about dst0 writemask? */
1130 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1131 break;
1132
1133 case TGSI_OPCODE_TXD:
1134 /* FIXME */
1135 return 0;
1136 break;
1137
1138 case TGSI_OPCODE_UP2H:
1139 /* deprecated */
1140 assert (0);
1141 return 0;
1142 break;
1143
1144 case TGSI_OPCODE_UP2US:
1145 /* deprecated */
1146 assert(0);
1147 return 0;
1148 break;
1149
1150 case TGSI_OPCODE_UP4B:
1151 /* deprecated */
1152 assert(0);
1153 return 0;
1154 break;
1155
1156 case TGSI_OPCODE_UP4UB:
1157 /* deprecated */
1158 assert(0);
1159 return 0;
1160 break;
1161
1162 case TGSI_OPCODE_X2D:
1163 /* deprecated? */
1164 assert(0);
1165 return 0;
1166 break;
1167
1168 case TGSI_OPCODE_ARA:
1169 /* deprecated */
1170 assert(0);
1171 return 0;
1172 break;
1173
1174 #if 0
1175 case TGSI_OPCODE_ARR:
1176 /* FIXME */
1177 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1178 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1179 emit_rnd( bld, 0, 0 );
1180 emit_f2it( bld, 0 );
1181 dst0[chan_index] = tmp0;
1182 }
1183 break;
1184 #endif
1185
1186 case TGSI_OPCODE_BRA:
1187 /* deprecated */
1188 assert(0);
1189 return 0;
1190 break;
1191
1192 case TGSI_OPCODE_CAL:
1193 /* FIXME */
1194 return 0;
1195 break;
1196
1197 case TGSI_OPCODE_RET:
1198 /* FIXME */
1199 return 0;
1200 break;
1201
1202 case TGSI_OPCODE_END:
1203 break;
1204
1205 case TGSI_OPCODE_SSG:
1206 /* TGSI_OPCODE_SGN */
1207 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1208 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1209 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1210 }
1211 break;
1212
1213 case TGSI_OPCODE_CMP:
1214 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1215 src0 = emit_fetch( bld, inst, 0, chan_index );
1216 src1 = emit_fetch( bld, inst, 1, chan_index );
1217 src2 = emit_fetch( bld, inst, 2, chan_index );
1218 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1219 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1220 }
1221 break;
1222
1223 case TGSI_OPCODE_SCS:
1224 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1225 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1226 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1227 }
1228 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1229 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1230 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1231 }
1232 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1233 dst0[CHAN_Z] = bld->base.zero;
1234 }
1235 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1236 dst0[CHAN_W] = bld->base.one;
1237 }
1238 break;
1239
1240 case TGSI_OPCODE_TXB:
1241 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1242 break;
1243
1244 case TGSI_OPCODE_NRM:
1245 /* fall-through */
1246 case TGSI_OPCODE_NRM4:
1247 /* 3 or 4-component normalization */
1248 {
1249 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1250
1251 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1252 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1253 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1254 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1255
1256 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1257
1258 /* xmm4 = src.x */
1259 /* xmm0 = src.x * src.x */
1260 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1261 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1262 tmp4 = tmp0;
1263 }
1264 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1265
1266 /* xmm5 = src.y */
1267 /* xmm0 = xmm0 + src.y * src.y */
1268 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1269 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1270 tmp5 = tmp1;
1271 }
1272 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1273 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1274
1275 /* xmm6 = src.z */
1276 /* xmm0 = xmm0 + src.z * src.z */
1277 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1278 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1279 tmp6 = tmp1;
1280 }
1281 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1282 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1283
1284 if (dims == 4) {
1285 /* xmm7 = src.w */
1286 /* xmm0 = xmm0 + src.w * src.w */
1287 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1288 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1289 tmp7 = tmp1;
1290 }
1291 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1292 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1293 }
1294
1295 /* xmm1 = 1 / sqrt(xmm0) */
1296 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1297
1298 /* dst.x = xmm1 * src.x */
1299 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1300 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1301 }
1302
1303 /* dst.y = xmm1 * src.y */
1304 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1305 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1306 }
1307
1308 /* dst.z = xmm1 * src.z */
1309 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1310 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1311 }
1312
1313 /* dst.w = xmm1 * src.w */
1314 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1315 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1316 }
1317 }
1318
1319 /* dst.w = 1.0 */
1320 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1321 dst0[CHAN_W] = bld->base.one;
1322 }
1323 }
1324 break;
1325
1326 case TGSI_OPCODE_DIV:
1327 /* deprecated */
1328 assert( 0 );
1329 return 0;
1330 break;
1331
1332 case TGSI_OPCODE_DP2:
1333 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1334 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1335 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1336 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1337 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1338 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1339 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1340 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1341 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1342 }
1343 break;
1344
1345 case TGSI_OPCODE_TXL:
1346 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1347 break;
1348
1349 case TGSI_OPCODE_TXP:
1350 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1351 break;
1352
1353 case TGSI_OPCODE_BRK:
1354 /* FIXME */
1355 return 0;
1356 break;
1357
1358 case TGSI_OPCODE_IF:
1359 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1360 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1361 break;
1362
1363 case TGSI_OPCODE_BGNFOR:
1364 /* deprecated */
1365 assert(0);
1366 return 0;
1367 break;
1368
1369 case TGSI_OPCODE_REP:
1370 /* deprecated */
1371 assert(0);
1372 return 0;
1373 break;
1374
1375 case TGSI_OPCODE_ELSE:
1376 lp_exec_mask_cond_invert(&bld->exec_mask);
1377 break;
1378
1379 case TGSI_OPCODE_ENDIF:
1380 lp_exec_mask_cond_pop(&bld->exec_mask);
1381 break;
1382
1383 case TGSI_OPCODE_ENDFOR:
1384 /* deprecated */
1385 assert(0);
1386 return 0;
1387 break;
1388
1389 case TGSI_OPCODE_ENDREP:
1390 /* deprecated */
1391 assert(0);
1392 return 0;
1393 break;
1394
1395 case TGSI_OPCODE_PUSHA:
1396 /* deprecated? */
1397 assert(0);
1398 return 0;
1399 break;
1400
1401 case TGSI_OPCODE_POPA:
1402 /* deprecated? */
1403 assert(0);
1404 return 0;
1405 break;
1406
1407 case TGSI_OPCODE_CEIL:
1408 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1409 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1410 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1411 }
1412 break;
1413
1414 case TGSI_OPCODE_I2F:
1415 /* deprecated? */
1416 assert(0);
1417 return 0;
1418 break;
1419
1420 case TGSI_OPCODE_NOT:
1421 /* deprecated? */
1422 assert(0);
1423 return 0;
1424 break;
1425
1426 case TGSI_OPCODE_TRUNC:
1427 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1428 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1429 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1430 }
1431 break;
1432
1433 case TGSI_OPCODE_SHL:
1434 /* deprecated? */
1435 assert(0);
1436 return 0;
1437 break;
1438
1439 case TGSI_OPCODE_ISHR:
1440 /* deprecated? */
1441 assert(0);
1442 return 0;
1443 break;
1444
1445 case TGSI_OPCODE_AND:
1446 /* deprecated? */
1447 assert(0);
1448 return 0;
1449 break;
1450
1451 case TGSI_OPCODE_OR:
1452 /* deprecated? */
1453 assert(0);
1454 return 0;
1455 break;
1456
1457 case TGSI_OPCODE_MOD:
1458 /* deprecated? */
1459 assert(0);
1460 return 0;
1461 break;
1462
1463 case TGSI_OPCODE_XOR:
1464 /* deprecated? */
1465 assert(0);
1466 return 0;
1467 break;
1468
1469 case TGSI_OPCODE_SAD:
1470 /* deprecated? */
1471 assert(0);
1472 return 0;
1473 break;
1474
1475 case TGSI_OPCODE_TXF:
1476 /* deprecated? */
1477 assert(0);
1478 return 0;
1479 break;
1480
1481 case TGSI_OPCODE_TXQ:
1482 /* deprecated? */
1483 assert(0);
1484 return 0;
1485 break;
1486
1487 case TGSI_OPCODE_CONT:
1488 /* FIXME */
1489 return 0;
1490 break;
1491
1492 case TGSI_OPCODE_EMIT:
1493 return 0;
1494 break;
1495
1496 case TGSI_OPCODE_ENDPRIM:
1497 return 0;
1498 break;
1499
1500 case TGSI_OPCODE_NOP:
1501 break;
1502
1503 default:
1504 return 0;
1505 }
1506
1507 if(info->num_dst) {
1508 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1509 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1510 }
1511 }
1512
1513 return 1;
1514 }
1515
1516
1517 void
1518 lp_build_tgsi_soa(LLVMBuilderRef builder,
1519 const struct tgsi_token *tokens,
1520 struct lp_type type,
1521 struct lp_build_mask_context *mask,
1522 LLVMValueRef consts_ptr,
1523 const LLVMValueRef *pos,
1524 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1525 LLVMValueRef (*outputs)[NUM_CHANNELS],
1526 struct lp_build_sampler_soa *sampler)
1527 {
1528 struct lp_build_tgsi_soa_context bld;
1529 struct tgsi_parse_context parse;
1530 uint num_immediates = 0;
1531 unsigned i;
1532
1533 /* Setup build context */
1534 memset(&bld, 0, sizeof bld);
1535 lp_build_context_init(&bld.base, builder, type);
1536 bld.mask = mask;
1537 bld.pos = pos;
1538 bld.inputs = inputs;
1539 bld.outputs = outputs;
1540 bld.consts_ptr = consts_ptr;
1541 bld.sampler = sampler;
1542
1543 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1544
1545 tgsi_parse_init( &parse, tokens );
1546
1547 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1548 tgsi_parse_token( &parse );
1549
1550 switch( parse.FullToken.Token.Type ) {
1551 case TGSI_TOKEN_TYPE_DECLARATION:
1552 /* Inputs already interpolated */
1553 {
1554 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1555 _debug_printf("warning: failed to define LLVM variable\n");
1556 }
1557 break;
1558
1559 case TGSI_TOKEN_TYPE_INSTRUCTION:
1560 {
1561 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1562 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1563 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1564 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1565 info ? info->mnemonic : "<invalid>");
1566 }
1567
1568 break;
1569
1570 case TGSI_TOKEN_TYPE_IMMEDIATE:
1571 /* simply copy the immediate values into the next immediates[] slot */
1572 {
1573 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1574 assert(size <= 4);
1575 assert(num_immediates < LP_MAX_IMMEDIATES);
1576 for( i = 0; i < size; ++i )
1577 bld.immediates[num_immediates][i] =
1578 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1579 for( i = size; i < 4; ++i )
1580 bld.immediates[num_immediates][i] = bld.base.undef;
1581 num_immediates++;
1582 }
1583 break;
1584
1585 case TGSI_TOKEN_TYPE_PROPERTY:
1586 break;
1587
1588 default:
1589 assert( 0 );
1590 }
1591 }
1592
1593 tgsi_parse_free( &parse );
1594 }
1595