Merge commit 'origin/gallium-winsys-handle-rebased'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_tgsi.h"
55 #include "lp_bld_debug.h"
56
57
58 #define LP_MAX_TEMPS 256
59 #define LP_MAX_IMMEDIATES 256
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79
80 #define QUAD_TOP_LEFT 0
81 #define QUAD_TOP_RIGHT 1
82 #define QUAD_BOTTOM_LEFT 2
83 #define QUAD_BOTTOM_RIGHT 3
84
85 #define LP_TGSI_MAX_NESTING 16
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMValueRef exec_mask;
99 };
100
101 struct lp_build_tgsi_soa_context
102 {
103 struct lp_build_context base;
104
105 LLVMValueRef consts_ptr;
106 const LLVMValueRef *pos;
107 const LLVMValueRef (*inputs)[NUM_CHANNELS];
108 LLVMValueRef (*outputs)[NUM_CHANNELS];
109
110 struct lp_build_sampler_soa *sampler;
111
112 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
113 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
114
115 struct lp_build_mask_context *mask;
116 struct lp_exec_mask exec_mask;
117 };
118
119 static const unsigned char
120 swizzle_left[4] = {
121 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
122 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
123 };
124
125 static const unsigned char
126 swizzle_right[4] = {
127 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
128 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
129 };
130
131 static const unsigned char
132 swizzle_top[4] = {
133 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
134 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
135 };
136
137 static const unsigned char
138 swizzle_bottom[4] = {
139 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
140 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
141 };
142
143 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
144 {
145 mask->bld = bld;
146 mask->has_mask = FALSE;
147 mask->cond_stack_size = 0;
148
149 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
150 }
151
152 static void lp_exec_mask_update(struct lp_exec_mask *mask)
153 {
154 mask->exec_mask = mask->cond_mask;
155 if (mask->cond_stack_size > 0)
156 mask->has_mask = TRUE;
157 }
158
159 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
160 LLVMValueRef val)
161 {
162 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
163 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
164 mask->int_vec_type, "");
165
166 lp_exec_mask_update(mask);
167 }
168
169 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
170 {
171 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
172 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
173 mask->cond_mask, "");
174
175 /* means that we didn't have any mask before and that
176 * we were fully enabled */
177 if (mask->cond_stack_size <= 1) {
178 prev_mask = LLVMConstAllOnes(mask->int_vec_type);
179 }
180
181 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
182 inv_mask,
183 prev_mask, "");
184 lp_exec_mask_update(mask);
185 }
186
187 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
188 {
189 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
190 lp_exec_mask_update(mask);
191 }
192
193 static void lp_exec_mask_store(struct lp_exec_mask *mask,
194 LLVMValueRef val,
195 LLVMValueRef dst)
196 {
197 if (mask->has_mask) {
198 LLVMValueRef real_val, dst_val;
199
200 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
201 real_val = lp_build_select(mask->bld,
202 mask->exec_mask,
203 val, dst_val);
204
205 LLVMBuildStore(mask->bld->builder, real_val, dst);
206 } else
207 LLVMBuildStore(mask->bld->builder, val, dst);
208 }
209
210
211 static LLVMValueRef
212 emit_ddx(struct lp_build_tgsi_soa_context *bld,
213 LLVMValueRef src)
214 {
215 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
216 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
217 return lp_build_sub(&bld->base, src_right, src_left);
218 }
219
220
221 static LLVMValueRef
222 emit_ddy(struct lp_build_tgsi_soa_context *bld,
223 LLVMValueRef src)
224 {
225 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
226 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
227 return lp_build_sub(&bld->base, src_top, src_bottom);
228 }
229
230
231 /**
232 * Register fetch.
233 */
234 static LLVMValueRef
235 emit_fetch(
236 struct lp_build_tgsi_soa_context *bld,
237 const struct tgsi_full_instruction *inst,
238 unsigned index,
239 const unsigned chan_index )
240 {
241 const struct tgsi_full_src_register *reg = &inst->Src[index];
242 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
243 LLVMValueRef res;
244
245 switch (swizzle) {
246 case TGSI_SWIZZLE_X:
247 case TGSI_SWIZZLE_Y:
248 case TGSI_SWIZZLE_Z:
249 case TGSI_SWIZZLE_W:
250
251 switch (reg->Register.File) {
252 case TGSI_FILE_CONSTANT: {
253 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
254 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
255 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
256 res = lp_build_broadcast_scalar(&bld->base, scalar);
257 break;
258 }
259
260 case TGSI_FILE_IMMEDIATE:
261 res = bld->immediates[reg->Register.Index][swizzle];
262 assert(res);
263 break;
264
265 case TGSI_FILE_INPUT:
266 res = bld->inputs[reg->Register.Index][swizzle];
267 assert(res);
268 break;
269
270 case TGSI_FILE_TEMPORARY:
271 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
272 if(!res)
273 return bld->base.undef;
274 break;
275
276 default:
277 assert( 0 );
278 return bld->base.undef;
279 }
280 break;
281
282 default:
283 assert( 0 );
284 return bld->base.undef;
285 }
286
287 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
288 case TGSI_UTIL_SIGN_CLEAR:
289 res = lp_build_abs( &bld->base, res );
290 break;
291
292 case TGSI_UTIL_SIGN_SET:
293 /* TODO: Use bitwese OR for floating point */
294 res = lp_build_abs( &bld->base, res );
295 res = LLVMBuildNeg( bld->base.builder, res, "" );
296 break;
297
298 case TGSI_UTIL_SIGN_TOGGLE:
299 res = LLVMBuildNeg( bld->base.builder, res, "" );
300 break;
301
302 case TGSI_UTIL_SIGN_KEEP:
303 break;
304 }
305
306 return res;
307 }
308
309
310 /**
311 * Register fetch with derivatives.
312 */
313 static void
314 emit_fetch_deriv(
315 struct lp_build_tgsi_soa_context *bld,
316 const struct tgsi_full_instruction *inst,
317 unsigned index,
318 const unsigned chan_index,
319 LLVMValueRef *res,
320 LLVMValueRef *ddx,
321 LLVMValueRef *ddy)
322 {
323 LLVMValueRef src;
324
325 src = emit_fetch(bld, inst, index, chan_index);
326
327 if(res)
328 *res = src;
329
330 /* TODO: use interpolation coeffs for inputs */
331
332 if(ddx)
333 *ddx = emit_ddx(bld, src);
334
335 if(ddy)
336 *ddy = emit_ddy(bld, src);
337 }
338
339
340 /**
341 * Register store.
342 */
343 static void
344 emit_store(
345 struct lp_build_tgsi_soa_context *bld,
346 const struct tgsi_full_instruction *inst,
347 unsigned index,
348 unsigned chan_index,
349 LLVMValueRef value)
350 {
351 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
352
353 switch( inst->Instruction.Saturate ) {
354 case TGSI_SAT_NONE:
355 break;
356
357 case TGSI_SAT_ZERO_ONE:
358 value = lp_build_max(&bld->base, value, bld->base.zero);
359 value = lp_build_min(&bld->base, value, bld->base.one);
360 break;
361
362 case TGSI_SAT_MINUS_PLUS_ONE:
363 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
364 value = lp_build_min(&bld->base, value, bld->base.one);
365 break;
366
367 default:
368 assert(0);
369 }
370
371 switch( reg->Register.File ) {
372 case TGSI_FILE_OUTPUT:
373 lp_exec_mask_store(&bld->exec_mask, value,
374 bld->outputs[reg->Register.Index][chan_index]);
375 break;
376
377 case TGSI_FILE_TEMPORARY:
378 lp_exec_mask_store(&bld->exec_mask, value,
379 bld->temps[reg->Register.Index][chan_index]);
380 break;
381
382 case TGSI_FILE_ADDRESS:
383 /* FIXME */
384 assert(0);
385 break;
386
387 case TGSI_FILE_PREDICATE:
388 /* FIXME */
389 assert(0);
390 break;
391
392 default:
393 assert( 0 );
394 }
395 }
396
397
398 /**
399 * High-level instruction translators.
400 */
401
402
403 static void
404 emit_tex( struct lp_build_tgsi_soa_context *bld,
405 const struct tgsi_full_instruction *inst,
406 boolean apply_lodbias,
407 boolean projected,
408 LLVMValueRef *texel)
409 {
410 const uint unit = inst->Src[1].Register.Index;
411 LLVMValueRef lodbias;
412 LLVMValueRef oow = NULL;
413 LLVMValueRef coords[3];
414 unsigned num_coords;
415 unsigned i;
416
417 switch (inst->Texture.Texture) {
418 case TGSI_TEXTURE_1D:
419 num_coords = 1;
420 break;
421 case TGSI_TEXTURE_2D:
422 case TGSI_TEXTURE_RECT:
423 num_coords = 2;
424 break;
425 case TGSI_TEXTURE_SHADOW1D:
426 case TGSI_TEXTURE_SHADOW2D:
427 case TGSI_TEXTURE_SHADOWRECT:
428 case TGSI_TEXTURE_3D:
429 case TGSI_TEXTURE_CUBE:
430 num_coords = 3;
431 break;
432 default:
433 assert(0);
434 return;
435 }
436
437 if(apply_lodbias)
438 lodbias = emit_fetch( bld, inst, 0, 3 );
439 else
440 lodbias = bld->base.zero;
441
442 if (projected) {
443 oow = emit_fetch( bld, inst, 0, 3 );
444 oow = lp_build_rcp(&bld->base, oow);
445 }
446
447 for (i = 0; i < num_coords; i++) {
448 coords[i] = emit_fetch( bld, inst, 0, i );
449 if (projected)
450 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
451 }
452 for (i = num_coords; i < 3; i++) {
453 coords[i] = bld->base.undef;
454 }
455
456 bld->sampler->emit_fetch_texel(bld->sampler,
457 bld->base.builder,
458 bld->base.type,
459 unit, num_coords, coords, lodbias,
460 texel);
461 }
462
463
464 static void
465 emit_kil(
466 struct lp_build_tgsi_soa_context *bld,
467 const struct tgsi_full_instruction *inst )
468 {
469 const struct tgsi_full_src_register *reg = &inst->Src[0];
470 LLVMValueRef terms[NUM_CHANNELS];
471 LLVMValueRef mask;
472 unsigned chan_index;
473
474 memset(&terms, 0, sizeof terms);
475
476 FOR_EACH_CHANNEL( chan_index ) {
477 unsigned swizzle;
478
479 /* Unswizzle channel */
480 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
481
482 /* Check if the component has not been already tested. */
483 assert(swizzle < NUM_CHANNELS);
484 if( !terms[swizzle] )
485 /* TODO: change the comparison operator instead of setting the sign */
486 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
487 }
488
489 mask = NULL;
490 FOR_EACH_CHANNEL( chan_index ) {
491 if(terms[chan_index]) {
492 LLVMValueRef chan_mask;
493
494 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
495
496 if(mask)
497 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
498 else
499 mask = chan_mask;
500 }
501 }
502
503 if(mask)
504 lp_build_mask_update(bld->mask, mask);
505 }
506
507
508 /**
509 * Check if inst src/dest regs use indirect addressing into temporary
510 * register file.
511 */
512 static boolean
513 indirect_temp_reference(const struct tgsi_full_instruction *inst)
514 {
515 uint i;
516 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
517 const struct tgsi_full_src_register *reg = &inst->Src[i];
518 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
519 reg->Register.Indirect)
520 return TRUE;
521 }
522 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
523 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
524 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
525 reg->Register.Indirect)
526 return TRUE;
527 }
528 return FALSE;
529 }
530
531 static int
532 emit_declaration(
533 struct lp_build_tgsi_soa_context *bld,
534 const struct tgsi_full_declaration *decl)
535 {
536 unsigned first = decl->Range.First;
537 unsigned last = decl->Range.Last;
538 unsigned idx, i;
539
540 for (idx = first; idx <= last; ++idx) {
541 boolean ok;
542
543 switch (decl->Declaration.File) {
544 case TGSI_FILE_TEMPORARY:
545 for (i = 0; i < NUM_CHANNELS; i++)
546 bld->temps[idx][i] = lp_build_alloca(&bld->base);
547 ok = TRUE;
548 break;
549
550 case TGSI_FILE_OUTPUT:
551 for (i = 0; i < NUM_CHANNELS; i++)
552 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
553 ok = TRUE;
554 break;
555
556 default:
557 /* don't need to declare other vars */
558 ok = TRUE;
559 }
560
561 if (!ok)
562 return FALSE;
563 }
564
565 return TRUE;
566 }
567
568 static int
569 emit_instruction(
570 struct lp_build_tgsi_soa_context *bld,
571 const struct tgsi_full_instruction *inst,
572 const struct tgsi_opcode_info *info)
573 {
574 unsigned chan_index;
575 LLVMValueRef src0, src1, src2;
576 LLVMValueRef tmp0, tmp1, tmp2;
577 LLVMValueRef tmp3 = NULL;
578 LLVMValueRef tmp4 = NULL;
579 LLVMValueRef tmp5 = NULL;
580 LLVMValueRef tmp6 = NULL;
581 LLVMValueRef tmp7 = NULL;
582 LLVMValueRef res;
583 LLVMValueRef dst0[NUM_CHANNELS];
584
585 /* we can't handle indirect addressing into temp register file yet */
586 if (indirect_temp_reference(inst))
587 return FALSE;
588
589 /*
590 * Stores and write masks are handled in a general fashion after the long
591 * instruction opcode switch statement.
592 *
593 * Although not stricitly necessary, we avoid generating instructions for
594 * channels which won't be stored, in cases where's that easy. For some
595 * complex instructions, like texture sampling, it is more convenient to
596 * assume a full writemask and then let LLVM optimization passes eliminate
597 * redundant code.
598 */
599
600 assert(info->num_dst <= 1);
601 if(info->num_dst) {
602 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
603 dst0[chan_index] = bld->base.undef;
604 }
605 }
606
607 switch (inst->Instruction.Opcode) {
608 #if 0
609 case TGSI_OPCODE_ARL:
610 /* FIXME */
611 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
612 tmp0 = emit_fetch( bld, inst, 0, chan_index );
613 emit_flr(bld, 0, 0);
614 emit_f2it( bld, 0 );
615 dst0[chan_index] = tmp0;
616 }
617 break;
618 #endif
619
620 case TGSI_OPCODE_MOV:
621 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
622 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
623 }
624 break;
625
626 case TGSI_OPCODE_LIT:
627 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
628 dst0[CHAN_X] = bld->base.one;
629 }
630 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
631 src0 = emit_fetch( bld, inst, 0, CHAN_X );
632 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
633 }
634 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
635 /* XMM[1] = SrcReg[0].yyyy */
636 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
637 /* XMM[1] = max(XMM[1], 0) */
638 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
639 /* XMM[2] = SrcReg[0].wwww */
640 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
641 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
642 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
643 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
644 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
645 }
646 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
647 dst0[CHAN_W] = bld->base.one;
648 }
649 break;
650
651 case TGSI_OPCODE_RCP:
652 /* TGSI_OPCODE_RECIP */
653 src0 = emit_fetch( bld, inst, 0, CHAN_X );
654 res = lp_build_rcp(&bld->base, src0);
655 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
656 dst0[chan_index] = res;
657 }
658 break;
659
660 case TGSI_OPCODE_RSQ:
661 /* TGSI_OPCODE_RECIPSQRT */
662 src0 = emit_fetch( bld, inst, 0, CHAN_X );
663 src0 = lp_build_abs(&bld->base, src0);
664 res = lp_build_rsqrt(&bld->base, src0);
665 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
666 dst0[chan_index] = res;
667 }
668 break;
669
670 case TGSI_OPCODE_EXP:
671 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
672 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
673 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
674 LLVMValueRef *p_exp2_int_part = NULL;
675 LLVMValueRef *p_frac_part = NULL;
676 LLVMValueRef *p_exp2 = NULL;
677
678 src0 = emit_fetch( bld, inst, 0, CHAN_X );
679
680 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
681 p_exp2_int_part = &tmp0;
682 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
683 p_frac_part = &tmp1;
684 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
685 p_exp2 = &tmp2;
686
687 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
688
689 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
690 dst0[CHAN_X] = tmp0;
691 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
692 dst0[CHAN_Y] = tmp1;
693 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
694 dst0[CHAN_Z] = tmp2;
695 }
696 /* dst.w = 1.0 */
697 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
698 dst0[CHAN_W] = bld->base.one;
699 }
700 break;
701
702 case TGSI_OPCODE_LOG:
703 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
704 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
705 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
706 LLVMValueRef *p_floor_log2 = NULL;
707 LLVMValueRef *p_exp = NULL;
708 LLVMValueRef *p_log2 = NULL;
709
710 src0 = emit_fetch( bld, inst, 0, CHAN_X );
711 src0 = lp_build_abs( &bld->base, src0 );
712
713 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
714 p_floor_log2 = &tmp0;
715 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
716 p_exp = &tmp1;
717 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
718 p_log2 = &tmp2;
719
720 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
721
722 /* dst.x = floor(lg2(abs(src.x))) */
723 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
724 dst0[CHAN_X] = tmp0;
725 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
726 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
727 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
728 }
729 /* dst.z = lg2(abs(src.x)) */
730 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
731 dst0[CHAN_Z] = tmp2;
732 }
733 /* dst.w = 1.0 */
734 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
735 dst0[CHAN_W] = bld->base.one;
736 }
737 break;
738
739 case TGSI_OPCODE_MUL:
740 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
741 src0 = emit_fetch( bld, inst, 0, chan_index );
742 src1 = emit_fetch( bld, inst, 1, chan_index );
743 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
744 }
745 break;
746
747 case TGSI_OPCODE_ADD:
748 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
749 src0 = emit_fetch( bld, inst, 0, chan_index );
750 src1 = emit_fetch( bld, inst, 1, chan_index );
751 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
752 }
753 break;
754
755 case TGSI_OPCODE_DP3:
756 /* TGSI_OPCODE_DOT3 */
757 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
758 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
759 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
760 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
761 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
762 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
763 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
764 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
765 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
766 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
767 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
768 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
769 dst0[chan_index] = tmp0;
770 }
771 break;
772
773 case TGSI_OPCODE_DP4:
774 /* TGSI_OPCODE_DOT4 */
775 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
776 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
777 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
778 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
779 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
780 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
781 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
782 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
783 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
784 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
785 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
786 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
787 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
788 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
789 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
790 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
791 dst0[chan_index] = tmp0;
792 }
793 break;
794
795 case TGSI_OPCODE_DST:
796 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
797 dst0[CHAN_X] = bld->base.one;
798 }
799 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
800 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
801 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
802 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
803 }
804 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
805 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
806 }
807 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
808 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
809 }
810 break;
811
812 case TGSI_OPCODE_MIN:
813 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
814 src0 = emit_fetch( bld, inst, 0, chan_index );
815 src1 = emit_fetch( bld, inst, 1, chan_index );
816 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
817 }
818 break;
819
820 case TGSI_OPCODE_MAX:
821 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
822 src0 = emit_fetch( bld, inst, 0, chan_index );
823 src1 = emit_fetch( bld, inst, 1, chan_index );
824 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
825 }
826 break;
827
828 case TGSI_OPCODE_SLT:
829 /* TGSI_OPCODE_SETLT */
830 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
831 src0 = emit_fetch( bld, inst, 0, chan_index );
832 src1 = emit_fetch( bld, inst, 1, chan_index );
833 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
834 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
835 }
836 break;
837
838 case TGSI_OPCODE_SGE:
839 /* TGSI_OPCODE_SETGE */
840 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
841 src0 = emit_fetch( bld, inst, 0, chan_index );
842 src1 = emit_fetch( bld, inst, 1, chan_index );
843 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
844 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
845 }
846 break;
847
848 case TGSI_OPCODE_MAD:
849 /* TGSI_OPCODE_MADD */
850 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
851 tmp0 = emit_fetch( bld, inst, 0, chan_index );
852 tmp1 = emit_fetch( bld, inst, 1, chan_index );
853 tmp2 = emit_fetch( bld, inst, 2, chan_index );
854 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
855 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
856 dst0[chan_index] = tmp0;
857 }
858 break;
859
860 case TGSI_OPCODE_SUB:
861 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
862 tmp0 = emit_fetch( bld, inst, 0, chan_index );
863 tmp1 = emit_fetch( bld, inst, 1, chan_index );
864 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
865 }
866 break;
867
868 case TGSI_OPCODE_LRP:
869 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
870 src0 = emit_fetch( bld, inst, 0, chan_index );
871 src1 = emit_fetch( bld, inst, 1, chan_index );
872 src2 = emit_fetch( bld, inst, 2, chan_index );
873 tmp0 = lp_build_sub( &bld->base, src1, src2 );
874 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
875 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
876 }
877 break;
878
879 case TGSI_OPCODE_CND:
880 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
881 src0 = emit_fetch( bld, inst, 0, chan_index );
882 src1 = emit_fetch( bld, inst, 1, chan_index );
883 src2 = emit_fetch( bld, inst, 2, chan_index );
884 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
885 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
886 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
887 }
888 break;
889
890 case TGSI_OPCODE_DP2A:
891 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
892 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
893 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
894 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
895 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
896 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
897 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
898 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
899 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
900 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
901 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
902 }
903 break;
904
905 case TGSI_OPCODE_FRC:
906 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
907 src0 = emit_fetch( bld, inst, 0, chan_index );
908 tmp0 = lp_build_floor(&bld->base, src0);
909 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
910 dst0[chan_index] = tmp0;
911 }
912 break;
913
914 case TGSI_OPCODE_CLAMP:
915 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
916 tmp0 = emit_fetch( bld, inst, 0, chan_index );
917 src1 = emit_fetch( bld, inst, 1, chan_index );
918 src2 = emit_fetch( bld, inst, 2, chan_index );
919 tmp0 = lp_build_max(&bld->base, tmp0, src1);
920 tmp0 = lp_build_min(&bld->base, tmp0, src2);
921 dst0[chan_index] = tmp0;
922 }
923 break;
924
925 case TGSI_OPCODE_FLR:
926 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
927 tmp0 = emit_fetch( bld, inst, 0, chan_index );
928 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
929 }
930 break;
931
932 case TGSI_OPCODE_ROUND:
933 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
934 tmp0 = emit_fetch( bld, inst, 0, chan_index );
935 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
936 }
937 break;
938
939 case TGSI_OPCODE_EX2: {
940 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
941 tmp0 = lp_build_exp2( &bld->base, tmp0);
942 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
943 dst0[chan_index] = tmp0;
944 }
945 break;
946 }
947
948 case TGSI_OPCODE_LG2:
949 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
950 tmp0 = lp_build_log2( &bld->base, tmp0);
951 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
952 dst0[chan_index] = tmp0;
953 }
954 break;
955
956 case TGSI_OPCODE_POW:
957 src0 = emit_fetch( bld, inst, 0, CHAN_X );
958 src1 = emit_fetch( bld, inst, 1, CHAN_X );
959 res = lp_build_pow( &bld->base, src0, src1 );
960 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
961 dst0[chan_index] = res;
962 }
963 break;
964
965 case TGSI_OPCODE_XPD:
966 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
967 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
968 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
969 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
970 }
971 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
972 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
973 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
974 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
975 }
976 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
977 tmp2 = tmp0;
978 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
979 tmp5 = tmp3;
980 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
981 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
982 dst0[CHAN_X] = tmp2;
983 }
984 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
985 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
986 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
987 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
988 }
989 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
990 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
991 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
992 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
993 dst0[CHAN_Y] = tmp3;
994 }
995 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
996 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
997 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
998 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
999 dst0[CHAN_Z] = tmp5;
1000 }
1001 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1002 dst0[CHAN_W] = bld->base.one;
1003 }
1004 break;
1005
1006 case TGSI_OPCODE_ABS:
1007 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1008 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1009 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1010 }
1011 break;
1012
1013 case TGSI_OPCODE_RCC:
1014 /* deprecated? */
1015 assert(0);
1016 return 0;
1017
1018 case TGSI_OPCODE_DPH:
1019 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1020 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1021 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1022 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1023 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1024 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1025 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1026 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1027 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1028 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1029 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1030 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1031 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1032 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1033 dst0[chan_index] = tmp0;
1034 }
1035 break;
1036
1037 case TGSI_OPCODE_COS:
1038 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1039 tmp0 = lp_build_cos( &bld->base, tmp0 );
1040 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1041 dst0[chan_index] = tmp0;
1042 }
1043 break;
1044
1045 case TGSI_OPCODE_DDX:
1046 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1047 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1048 }
1049 break;
1050
1051 case TGSI_OPCODE_DDY:
1052 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1053 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1054 }
1055 break;
1056
1057 case TGSI_OPCODE_KILP:
1058 /* predicated kill */
1059 /* FIXME */
1060 return 0;
1061 break;
1062
1063 case TGSI_OPCODE_KIL:
1064 /* conditional kill */
1065 emit_kil( bld, inst );
1066 break;
1067
1068 case TGSI_OPCODE_PK2H:
1069 return 0;
1070 break;
1071
1072 case TGSI_OPCODE_PK2US:
1073 return 0;
1074 break;
1075
1076 case TGSI_OPCODE_PK4B:
1077 return 0;
1078 break;
1079
1080 case TGSI_OPCODE_PK4UB:
1081 return 0;
1082 break;
1083
1084 case TGSI_OPCODE_RFL:
1085 return 0;
1086 break;
1087
1088 case TGSI_OPCODE_SEQ:
1089 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1090 src0 = emit_fetch( bld, inst, 0, chan_index );
1091 src1 = emit_fetch( bld, inst, 1, chan_index );
1092 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1093 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1094 }
1095 break;
1096
1097 case TGSI_OPCODE_SFL:
1098 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1099 dst0[chan_index] = bld->base.zero;
1100 }
1101 break;
1102
1103 case TGSI_OPCODE_SGT:
1104 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1105 src0 = emit_fetch( bld, inst, 0, chan_index );
1106 src1 = emit_fetch( bld, inst, 1, chan_index );
1107 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1108 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1109 }
1110 break;
1111
1112 case TGSI_OPCODE_SIN:
1113 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1114 tmp0 = lp_build_sin( &bld->base, tmp0 );
1115 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1116 dst0[chan_index] = tmp0;
1117 }
1118 break;
1119
1120 case TGSI_OPCODE_SLE:
1121 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1122 src0 = emit_fetch( bld, inst, 0, chan_index );
1123 src1 = emit_fetch( bld, inst, 1, chan_index );
1124 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1125 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1126 }
1127 break;
1128
1129 case TGSI_OPCODE_SNE:
1130 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1131 src0 = emit_fetch( bld, inst, 0, chan_index );
1132 src1 = emit_fetch( bld, inst, 1, chan_index );
1133 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1134 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1135 }
1136 break;
1137
1138 case TGSI_OPCODE_STR:
1139 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1140 dst0[chan_index] = bld->base.one;
1141 }
1142 break;
1143
1144 case TGSI_OPCODE_TEX:
1145 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1146 break;
1147
1148 case TGSI_OPCODE_TXD:
1149 /* FIXME */
1150 return 0;
1151 break;
1152
1153 case TGSI_OPCODE_UP2H:
1154 /* deprecated */
1155 assert (0);
1156 return 0;
1157 break;
1158
1159 case TGSI_OPCODE_UP2US:
1160 /* deprecated */
1161 assert(0);
1162 return 0;
1163 break;
1164
1165 case TGSI_OPCODE_UP4B:
1166 /* deprecated */
1167 assert(0);
1168 return 0;
1169 break;
1170
1171 case TGSI_OPCODE_UP4UB:
1172 /* deprecated */
1173 assert(0);
1174 return 0;
1175 break;
1176
1177 case TGSI_OPCODE_X2D:
1178 /* deprecated? */
1179 assert(0);
1180 return 0;
1181 break;
1182
1183 case TGSI_OPCODE_ARA:
1184 /* deprecated */
1185 assert(0);
1186 return 0;
1187 break;
1188
1189 #if 0
1190 case TGSI_OPCODE_ARR:
1191 /* FIXME */
1192 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1193 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1194 emit_rnd( bld, 0, 0 );
1195 emit_f2it( bld, 0 );
1196 dst0[chan_index] = tmp0;
1197 }
1198 break;
1199 #endif
1200
1201 case TGSI_OPCODE_BRA:
1202 /* deprecated */
1203 assert(0);
1204 return 0;
1205 break;
1206
1207 case TGSI_OPCODE_CAL:
1208 /* FIXME */
1209 return 0;
1210 break;
1211
1212 case TGSI_OPCODE_RET:
1213 /* FIXME */
1214 return 0;
1215 break;
1216
1217 case TGSI_OPCODE_END:
1218 break;
1219
1220 case TGSI_OPCODE_SSG:
1221 /* TGSI_OPCODE_SGN */
1222 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1223 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1224 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1225 }
1226 break;
1227
1228 case TGSI_OPCODE_CMP:
1229 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1230 src0 = emit_fetch( bld, inst, 0, chan_index );
1231 src1 = emit_fetch( bld, inst, 1, chan_index );
1232 src2 = emit_fetch( bld, inst, 2, chan_index );
1233 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1234 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1235 }
1236 break;
1237
1238 case TGSI_OPCODE_SCS:
1239 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1240 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1241 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1242 }
1243 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1244 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1245 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1246 }
1247 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1248 dst0[CHAN_Z] = bld->base.zero;
1249 }
1250 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1251 dst0[CHAN_W] = bld->base.one;
1252 }
1253 break;
1254
1255 case TGSI_OPCODE_TXB:
1256 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1257 break;
1258
1259 case TGSI_OPCODE_NRM:
1260 /* fall-through */
1261 case TGSI_OPCODE_NRM4:
1262 /* 3 or 4-component normalization */
1263 {
1264 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1265
1266 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1267 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1268 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1269 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1270
1271 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1272
1273 /* xmm4 = src.x */
1274 /* xmm0 = src.x * src.x */
1275 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1276 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1277 tmp4 = tmp0;
1278 }
1279 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1280
1281 /* xmm5 = src.y */
1282 /* xmm0 = xmm0 + src.y * src.y */
1283 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1284 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1285 tmp5 = tmp1;
1286 }
1287 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1288 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1289
1290 /* xmm6 = src.z */
1291 /* xmm0 = xmm0 + src.z * src.z */
1292 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1293 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1294 tmp6 = tmp1;
1295 }
1296 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1297 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1298
1299 if (dims == 4) {
1300 /* xmm7 = src.w */
1301 /* xmm0 = xmm0 + src.w * src.w */
1302 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1303 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1304 tmp7 = tmp1;
1305 }
1306 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1307 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1308 }
1309
1310 /* xmm1 = 1 / sqrt(xmm0) */
1311 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1312
1313 /* dst.x = xmm1 * src.x */
1314 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1315 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1316 }
1317
1318 /* dst.y = xmm1 * src.y */
1319 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1320 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1321 }
1322
1323 /* dst.z = xmm1 * src.z */
1324 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1325 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1326 }
1327
1328 /* dst.w = xmm1 * src.w */
1329 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1330 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1331 }
1332 }
1333
1334 /* dst.w = 1.0 */
1335 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1336 dst0[CHAN_W] = bld->base.one;
1337 }
1338 }
1339 break;
1340
1341 case TGSI_OPCODE_DIV:
1342 /* deprecated */
1343 assert( 0 );
1344 return 0;
1345 break;
1346
1347 case TGSI_OPCODE_DP2:
1348 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1349 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1350 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1351 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1352 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1353 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1354 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1355 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1356 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1357 }
1358 break;
1359
1360 case TGSI_OPCODE_TXL:
1361 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1362 break;
1363
1364 case TGSI_OPCODE_TXP:
1365 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1366 break;
1367
1368 case TGSI_OPCODE_BRK:
1369 /* FIXME */
1370 return 0;
1371 break;
1372
1373 case TGSI_OPCODE_IF:
1374 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1375 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1376 break;
1377
1378 case TGSI_OPCODE_BGNFOR:
1379 /* deprecated */
1380 assert(0);
1381 return 0;
1382 break;
1383
1384 case TGSI_OPCODE_REP:
1385 /* deprecated */
1386 assert(0);
1387 return 0;
1388 break;
1389
1390 case TGSI_OPCODE_ELSE:
1391 lp_exec_mask_cond_invert(&bld->exec_mask);
1392 break;
1393
1394 case TGSI_OPCODE_ENDIF:
1395 lp_exec_mask_cond_pop(&bld->exec_mask);
1396 break;
1397
1398 case TGSI_OPCODE_ENDFOR:
1399 /* deprecated */
1400 assert(0);
1401 return 0;
1402 break;
1403
1404 case TGSI_OPCODE_ENDREP:
1405 /* deprecated */
1406 assert(0);
1407 return 0;
1408 break;
1409
1410 case TGSI_OPCODE_PUSHA:
1411 /* deprecated? */
1412 assert(0);
1413 return 0;
1414 break;
1415
1416 case TGSI_OPCODE_POPA:
1417 /* deprecated? */
1418 assert(0);
1419 return 0;
1420 break;
1421
1422 case TGSI_OPCODE_CEIL:
1423 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1424 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1425 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1426 }
1427 break;
1428
1429 case TGSI_OPCODE_I2F:
1430 /* deprecated? */
1431 assert(0);
1432 return 0;
1433 break;
1434
1435 case TGSI_OPCODE_NOT:
1436 /* deprecated? */
1437 assert(0);
1438 return 0;
1439 break;
1440
1441 case TGSI_OPCODE_TRUNC:
1442 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1443 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1444 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1445 }
1446 break;
1447
1448 case TGSI_OPCODE_SHL:
1449 /* deprecated? */
1450 assert(0);
1451 return 0;
1452 break;
1453
1454 case TGSI_OPCODE_ISHR:
1455 /* deprecated? */
1456 assert(0);
1457 return 0;
1458 break;
1459
1460 case TGSI_OPCODE_AND:
1461 /* deprecated? */
1462 assert(0);
1463 return 0;
1464 break;
1465
1466 case TGSI_OPCODE_OR:
1467 /* deprecated? */
1468 assert(0);
1469 return 0;
1470 break;
1471
1472 case TGSI_OPCODE_MOD:
1473 /* deprecated? */
1474 assert(0);
1475 return 0;
1476 break;
1477
1478 case TGSI_OPCODE_XOR:
1479 /* deprecated? */
1480 assert(0);
1481 return 0;
1482 break;
1483
1484 case TGSI_OPCODE_SAD:
1485 /* deprecated? */
1486 assert(0);
1487 return 0;
1488 break;
1489
1490 case TGSI_OPCODE_TXF:
1491 /* deprecated? */
1492 assert(0);
1493 return 0;
1494 break;
1495
1496 case TGSI_OPCODE_TXQ:
1497 /* deprecated? */
1498 assert(0);
1499 return 0;
1500 break;
1501
1502 case TGSI_OPCODE_CONT:
1503 /* FIXME */
1504 return 0;
1505 break;
1506
1507 case TGSI_OPCODE_EMIT:
1508 return 0;
1509 break;
1510
1511 case TGSI_OPCODE_ENDPRIM:
1512 return 0;
1513 break;
1514
1515 case TGSI_OPCODE_NOP:
1516 break;
1517
1518 default:
1519 return 0;
1520 }
1521
1522 if(info->num_dst) {
1523 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1524 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1525 }
1526 }
1527
1528 return 1;
1529 }
1530
1531
1532 void
1533 lp_build_tgsi_soa(LLVMBuilderRef builder,
1534 const struct tgsi_token *tokens,
1535 struct lp_type type,
1536 struct lp_build_mask_context *mask,
1537 LLVMValueRef consts_ptr,
1538 const LLVMValueRef *pos,
1539 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1540 LLVMValueRef (*outputs)[NUM_CHANNELS],
1541 struct lp_build_sampler_soa *sampler)
1542 {
1543 struct lp_build_tgsi_soa_context bld;
1544 struct tgsi_parse_context parse;
1545 uint num_immediates = 0;
1546 unsigned i;
1547
1548 /* Setup build context */
1549 memset(&bld, 0, sizeof bld);
1550 lp_build_context_init(&bld.base, builder, type);
1551 bld.mask = mask;
1552 bld.pos = pos;
1553 bld.inputs = inputs;
1554 bld.outputs = outputs;
1555 bld.consts_ptr = consts_ptr;
1556 bld.sampler = sampler;
1557
1558 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1559
1560 tgsi_parse_init( &parse, tokens );
1561
1562 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1563 tgsi_parse_token( &parse );
1564
1565 switch( parse.FullToken.Token.Type ) {
1566 case TGSI_TOKEN_TYPE_DECLARATION:
1567 /* Inputs already interpolated */
1568 {
1569 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1570 _debug_printf("warning: failed to define LLVM variable\n");
1571 }
1572 break;
1573
1574 case TGSI_TOKEN_TYPE_INSTRUCTION:
1575 {
1576 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1577 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1578 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1579 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1580 info ? info->mnemonic : "<invalid>");
1581 }
1582
1583 break;
1584
1585 case TGSI_TOKEN_TYPE_IMMEDIATE:
1586 /* simply copy the immediate values into the next immediates[] slot */
1587 {
1588 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1589 assert(size <= 4);
1590 assert(num_immediates < LP_MAX_IMMEDIATES);
1591 for( i = 0; i < size; ++i )
1592 bld.immediates[num_immediates][i] =
1593 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1594 for( i = size; i < 4; ++i )
1595 bld.immediates[num_immediates][i] = bld.base.undef;
1596 num_immediates++;
1597 }
1598 break;
1599
1600 case TGSI_TOKEN_TYPE_PROPERTY:
1601 break;
1602
1603 default:
1604 assert( 0 );
1605 }
1606 }
1607
1608 tgsi_parse_free( &parse );
1609 }
1610