Merge branch '7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_tgsi.h"
55 #include "lp_bld_debug.h"
56
57
58 #define LP_MAX_TEMPS 256
59 #define LP_MAX_IMMEDIATES 256
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79
80 #define QUAD_TOP_LEFT 0
81 #define QUAD_TOP_RIGHT 1
82 #define QUAD_BOTTOM_LEFT 2
83 #define QUAD_BOTTOM_RIGHT 3
84
85 #define LP_TGSI_MAX_NESTING 16
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMValueRef exec_mask;
99 };
100
101 struct lp_build_tgsi_soa_context
102 {
103 struct lp_build_context base;
104
105 LLVMValueRef consts_ptr;
106 const LLVMValueRef *pos;
107 const LLVMValueRef (*inputs)[NUM_CHANNELS];
108 LLVMValueRef (*outputs)[NUM_CHANNELS];
109
110 struct lp_build_sampler_soa *sampler;
111
112 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
113 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
114
115 struct lp_build_mask_context *mask;
116 struct lp_exec_mask exec_mask;
117 };
118
119 static const unsigned char
120 swizzle_left[4] = {
121 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
122 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
123 };
124
125 static const unsigned char
126 swizzle_right[4] = {
127 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
128 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
129 };
130
131 static const unsigned char
132 swizzle_top[4] = {
133 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
134 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
135 };
136
137 static const unsigned char
138 swizzle_bottom[4] = {
139 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
140 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
141 };
142
143 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
144 {
145 mask->bld = bld;
146 mask->has_mask = FALSE;
147 mask->cond_stack_size = 0;
148
149 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
150 }
151
152 static void lp_exec_mask_update(struct lp_exec_mask *mask)
153 {
154 mask->exec_mask = mask->cond_mask;
155 mask->has_mask = (mask->cond_stack_size > 0);
156 }
157
158 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
159 LLVMValueRef val)
160 {
161 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
162 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
163 mask->int_vec_type, "");
164
165 lp_exec_mask_update(mask);
166 }
167
168 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
169 {
170 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
171 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
172 mask->cond_mask, "");
173
174 /* means that we didn't have any mask before and that
175 * we were fully enabled */
176 if (mask->cond_stack_size <= 1) {
177 prev_mask = LLVMConstAllOnes(mask->int_vec_type);
178 }
179
180 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
181 inv_mask,
182 prev_mask, "");
183 lp_exec_mask_update(mask);
184 }
185
186 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
187 {
188 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
189 lp_exec_mask_update(mask);
190 }
191
192 static void lp_exec_mask_store(struct lp_exec_mask *mask,
193 LLVMValueRef val,
194 LLVMValueRef dst)
195 {
196 if (mask->has_mask) {
197 LLVMValueRef real_val, dst_val;
198
199 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
200 real_val = lp_build_select(mask->bld,
201 mask->exec_mask,
202 val, dst_val);
203
204 LLVMBuildStore(mask->bld->builder, real_val, dst);
205 } else
206 LLVMBuildStore(mask->bld->builder, val, dst);
207 }
208
209
210 static LLVMValueRef
211 emit_ddx(struct lp_build_tgsi_soa_context *bld,
212 LLVMValueRef src)
213 {
214 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
215 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
216 return lp_build_sub(&bld->base, src_right, src_left);
217 }
218
219
220 static LLVMValueRef
221 emit_ddy(struct lp_build_tgsi_soa_context *bld,
222 LLVMValueRef src)
223 {
224 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
225 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
226 return lp_build_sub(&bld->base, src_top, src_bottom);
227 }
228
229
230 /**
231 * Register fetch.
232 */
233 static LLVMValueRef
234 emit_fetch(
235 struct lp_build_tgsi_soa_context *bld,
236 const struct tgsi_full_instruction *inst,
237 unsigned index,
238 const unsigned chan_index )
239 {
240 const struct tgsi_full_src_register *reg = &inst->Src[index];
241 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
242 LLVMValueRef res;
243
244 switch (swizzle) {
245 case TGSI_SWIZZLE_X:
246 case TGSI_SWIZZLE_Y:
247 case TGSI_SWIZZLE_Z:
248 case TGSI_SWIZZLE_W:
249
250 switch (reg->Register.File) {
251 case TGSI_FILE_CONSTANT: {
252 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
253 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
254 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
255 res = lp_build_broadcast_scalar(&bld->base, scalar);
256 break;
257 }
258
259 case TGSI_FILE_IMMEDIATE:
260 res = bld->immediates[reg->Register.Index][swizzle];
261 assert(res);
262 break;
263
264 case TGSI_FILE_INPUT:
265 res = bld->inputs[reg->Register.Index][swizzle];
266 assert(res);
267 break;
268
269 case TGSI_FILE_TEMPORARY:
270 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
271 if(!res)
272 return bld->base.undef;
273 break;
274
275 default:
276 assert( 0 );
277 return bld->base.undef;
278 }
279 break;
280
281 default:
282 assert( 0 );
283 return bld->base.undef;
284 }
285
286 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
287 case TGSI_UTIL_SIGN_CLEAR:
288 res = lp_build_abs( &bld->base, res );
289 break;
290
291 case TGSI_UTIL_SIGN_SET:
292 /* TODO: Use bitwese OR for floating point */
293 res = lp_build_abs( &bld->base, res );
294 res = LLVMBuildNeg( bld->base.builder, res, "" );
295 break;
296
297 case TGSI_UTIL_SIGN_TOGGLE:
298 res = LLVMBuildNeg( bld->base.builder, res, "" );
299 break;
300
301 case TGSI_UTIL_SIGN_KEEP:
302 break;
303 }
304
305 return res;
306 }
307
308
309 /**
310 * Register fetch with derivatives.
311 */
312 static void
313 emit_fetch_deriv(
314 struct lp_build_tgsi_soa_context *bld,
315 const struct tgsi_full_instruction *inst,
316 unsigned index,
317 const unsigned chan_index,
318 LLVMValueRef *res,
319 LLVMValueRef *ddx,
320 LLVMValueRef *ddy)
321 {
322 LLVMValueRef src;
323
324 src = emit_fetch(bld, inst, index, chan_index);
325
326 if(res)
327 *res = src;
328
329 /* TODO: use interpolation coeffs for inputs */
330
331 if(ddx)
332 *ddx = emit_ddx(bld, src);
333
334 if(ddy)
335 *ddy = emit_ddy(bld, src);
336 }
337
338
339 /**
340 * Register store.
341 */
342 static void
343 emit_store(
344 struct lp_build_tgsi_soa_context *bld,
345 const struct tgsi_full_instruction *inst,
346 unsigned index,
347 unsigned chan_index,
348 LLVMValueRef value)
349 {
350 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
351
352 switch( inst->Instruction.Saturate ) {
353 case TGSI_SAT_NONE:
354 break;
355
356 case TGSI_SAT_ZERO_ONE:
357 value = lp_build_max(&bld->base, value, bld->base.zero);
358 value = lp_build_min(&bld->base, value, bld->base.one);
359 break;
360
361 case TGSI_SAT_MINUS_PLUS_ONE:
362 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
363 value = lp_build_min(&bld->base, value, bld->base.one);
364 break;
365
366 default:
367 assert(0);
368 }
369
370 switch( reg->Register.File ) {
371 case TGSI_FILE_OUTPUT:
372 lp_exec_mask_store(&bld->exec_mask, value,
373 bld->outputs[reg->Register.Index][chan_index]);
374 break;
375
376 case TGSI_FILE_TEMPORARY:
377 lp_exec_mask_store(&bld->exec_mask, value,
378 bld->temps[reg->Register.Index][chan_index]);
379 break;
380
381 case TGSI_FILE_ADDRESS:
382 /* FIXME */
383 assert(0);
384 break;
385
386 case TGSI_FILE_PREDICATE:
387 /* FIXME */
388 assert(0);
389 break;
390
391 default:
392 assert( 0 );
393 }
394 }
395
396
397 /**
398 * High-level instruction translators.
399 */
400
401
402 static void
403 emit_tex( struct lp_build_tgsi_soa_context *bld,
404 const struct tgsi_full_instruction *inst,
405 boolean apply_lodbias,
406 boolean projected,
407 LLVMValueRef *texel)
408 {
409 const uint unit = inst->Src[1].Register.Index;
410 LLVMValueRef lodbias;
411 LLVMValueRef oow = NULL;
412 LLVMValueRef coords[3];
413 unsigned num_coords;
414 unsigned i;
415
416 switch (inst->Texture.Texture) {
417 case TGSI_TEXTURE_1D:
418 num_coords = 1;
419 break;
420 case TGSI_TEXTURE_2D:
421 case TGSI_TEXTURE_RECT:
422 num_coords = 2;
423 break;
424 case TGSI_TEXTURE_SHADOW1D:
425 case TGSI_TEXTURE_SHADOW2D:
426 case TGSI_TEXTURE_SHADOWRECT:
427 case TGSI_TEXTURE_3D:
428 case TGSI_TEXTURE_CUBE:
429 num_coords = 3;
430 break;
431 default:
432 assert(0);
433 return;
434 }
435
436 if(apply_lodbias)
437 lodbias = emit_fetch( bld, inst, 0, 3 );
438 else
439 lodbias = bld->base.zero;
440
441 if (projected) {
442 oow = emit_fetch( bld, inst, 0, 3 );
443 oow = lp_build_rcp(&bld->base, oow);
444 }
445
446 for (i = 0; i < num_coords; i++) {
447 coords[i] = emit_fetch( bld, inst, 0, i );
448 if (projected)
449 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
450 }
451 for (i = num_coords; i < 3; i++) {
452 coords[i] = bld->base.undef;
453 }
454
455 bld->sampler->emit_fetch_texel(bld->sampler,
456 bld->base.builder,
457 bld->base.type,
458 unit, num_coords, coords, lodbias,
459 texel);
460 }
461
462
463 static void
464 emit_kil(
465 struct lp_build_tgsi_soa_context *bld,
466 const struct tgsi_full_instruction *inst )
467 {
468 const struct tgsi_full_src_register *reg = &inst->Src[0];
469 LLVMValueRef terms[NUM_CHANNELS];
470 LLVMValueRef mask;
471 unsigned chan_index;
472
473 memset(&terms, 0, sizeof terms);
474
475 FOR_EACH_CHANNEL( chan_index ) {
476 unsigned swizzle;
477
478 /* Unswizzle channel */
479 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
480
481 /* Check if the component has not been already tested. */
482 assert(swizzle < NUM_CHANNELS);
483 if( !terms[swizzle] )
484 /* TODO: change the comparison operator instead of setting the sign */
485 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
486 }
487
488 mask = NULL;
489 FOR_EACH_CHANNEL( chan_index ) {
490 if(terms[chan_index]) {
491 LLVMValueRef chan_mask;
492
493 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
494
495 if(mask)
496 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
497 else
498 mask = chan_mask;
499 }
500 }
501
502 if(mask)
503 lp_build_mask_update(bld->mask, mask);
504 }
505
506
507 /**
508 * Check if inst src/dest regs use indirect addressing into temporary
509 * register file.
510 */
511 static boolean
512 indirect_temp_reference(const struct tgsi_full_instruction *inst)
513 {
514 uint i;
515 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
516 const struct tgsi_full_src_register *reg = &inst->Src[i];
517 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
518 reg->Register.Indirect)
519 return TRUE;
520 }
521 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
522 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
523 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
524 reg->Register.Indirect)
525 return TRUE;
526 }
527 return FALSE;
528 }
529
530 static int
531 emit_declaration(
532 struct lp_build_tgsi_soa_context *bld,
533 const struct tgsi_full_declaration *decl)
534 {
535 unsigned first = decl->Range.First;
536 unsigned last = decl->Range.Last;
537 unsigned idx, i;
538
539 for (idx = first; idx <= last; ++idx) {
540 boolean ok;
541
542 switch (decl->Declaration.File) {
543 case TGSI_FILE_TEMPORARY:
544 for (i = 0; i < NUM_CHANNELS; i++)
545 bld->temps[idx][i] = lp_build_alloca(&bld->base);
546 ok = TRUE;
547 break;
548
549 case TGSI_FILE_OUTPUT:
550 for (i = 0; i < NUM_CHANNELS; i++)
551 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
552 ok = TRUE;
553 break;
554
555 default:
556 /* don't need to declare other vars */
557 ok = TRUE;
558 }
559
560 if (!ok)
561 return FALSE;
562 }
563
564 return TRUE;
565 }
566
567 static int
568 emit_instruction(
569 struct lp_build_tgsi_soa_context *bld,
570 const struct tgsi_full_instruction *inst,
571 const struct tgsi_opcode_info *info)
572 {
573 unsigned chan_index;
574 LLVMValueRef src0, src1, src2;
575 LLVMValueRef tmp0, tmp1, tmp2;
576 LLVMValueRef tmp3 = NULL;
577 LLVMValueRef tmp4 = NULL;
578 LLVMValueRef tmp5 = NULL;
579 LLVMValueRef tmp6 = NULL;
580 LLVMValueRef tmp7 = NULL;
581 LLVMValueRef res;
582 LLVMValueRef dst0[NUM_CHANNELS];
583
584 /* we can't handle indirect addressing into temp register file yet */
585 if (indirect_temp_reference(inst))
586 return FALSE;
587
588 /*
589 * Stores and write masks are handled in a general fashion after the long
590 * instruction opcode switch statement.
591 *
592 * Although not stricitly necessary, we avoid generating instructions for
593 * channels which won't be stored, in cases where's that easy. For some
594 * complex instructions, like texture sampling, it is more convenient to
595 * assume a full writemask and then let LLVM optimization passes eliminate
596 * redundant code.
597 */
598
599 assert(info->num_dst <= 1);
600 if(info->num_dst) {
601 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
602 dst0[chan_index] = bld->base.undef;
603 }
604 }
605
606 switch (inst->Instruction.Opcode) {
607 #if 0
608 case TGSI_OPCODE_ARL:
609 /* FIXME */
610 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
611 tmp0 = emit_fetch( bld, inst, 0, chan_index );
612 emit_flr(bld, 0, 0);
613 emit_f2it( bld, 0 );
614 dst0[chan_index] = tmp0;
615 }
616 break;
617 #endif
618
619 case TGSI_OPCODE_MOV:
620 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
621 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
622 }
623 break;
624
625 case TGSI_OPCODE_LIT:
626 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
627 dst0[CHAN_X] = bld->base.one;
628 }
629 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
630 src0 = emit_fetch( bld, inst, 0, CHAN_X );
631 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
632 }
633 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
634 /* XMM[1] = SrcReg[0].yyyy */
635 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
636 /* XMM[1] = max(XMM[1], 0) */
637 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
638 /* XMM[2] = SrcReg[0].wwww */
639 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
640 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
641 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
642 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
643 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
644 }
645 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
646 dst0[CHAN_W] = bld->base.one;
647 }
648 break;
649
650 case TGSI_OPCODE_RCP:
651 /* TGSI_OPCODE_RECIP */
652 src0 = emit_fetch( bld, inst, 0, CHAN_X );
653 res = lp_build_rcp(&bld->base, src0);
654 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
655 dst0[chan_index] = res;
656 }
657 break;
658
659 case TGSI_OPCODE_RSQ:
660 /* TGSI_OPCODE_RECIPSQRT */
661 src0 = emit_fetch( bld, inst, 0, CHAN_X );
662 src0 = lp_build_abs(&bld->base, src0);
663 res = lp_build_rsqrt(&bld->base, src0);
664 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
665 dst0[chan_index] = res;
666 }
667 break;
668
669 case TGSI_OPCODE_EXP:
670 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
671 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
672 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
673 LLVMValueRef *p_exp2_int_part = NULL;
674 LLVMValueRef *p_frac_part = NULL;
675 LLVMValueRef *p_exp2 = NULL;
676
677 src0 = emit_fetch( bld, inst, 0, CHAN_X );
678
679 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
680 p_exp2_int_part = &tmp0;
681 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
682 p_frac_part = &tmp1;
683 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
684 p_exp2 = &tmp2;
685
686 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
687
688 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
689 dst0[CHAN_X] = tmp0;
690 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
691 dst0[CHAN_Y] = tmp1;
692 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
693 dst0[CHAN_Z] = tmp2;
694 }
695 /* dst.w = 1.0 */
696 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
697 dst0[CHAN_W] = bld->base.one;
698 }
699 break;
700
701 case TGSI_OPCODE_LOG:
702 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
703 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
704 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
705 LLVMValueRef *p_floor_log2 = NULL;
706 LLVMValueRef *p_exp = NULL;
707 LLVMValueRef *p_log2 = NULL;
708
709 src0 = emit_fetch( bld, inst, 0, CHAN_X );
710 src0 = lp_build_abs( &bld->base, src0 );
711
712 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
713 p_floor_log2 = &tmp0;
714 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
715 p_exp = &tmp1;
716 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
717 p_log2 = &tmp2;
718
719 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
720
721 /* dst.x = floor(lg2(abs(src.x))) */
722 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
723 dst0[CHAN_X] = tmp0;
724 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
725 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
726 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
727 }
728 /* dst.z = lg2(abs(src.x)) */
729 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
730 dst0[CHAN_Z] = tmp2;
731 }
732 /* dst.w = 1.0 */
733 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
734 dst0[CHAN_W] = bld->base.one;
735 }
736 break;
737
738 case TGSI_OPCODE_MUL:
739 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
740 src0 = emit_fetch( bld, inst, 0, chan_index );
741 src1 = emit_fetch( bld, inst, 1, chan_index );
742 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
743 }
744 break;
745
746 case TGSI_OPCODE_ADD:
747 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
748 src0 = emit_fetch( bld, inst, 0, chan_index );
749 src1 = emit_fetch( bld, inst, 1, chan_index );
750 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
751 }
752 break;
753
754 case TGSI_OPCODE_DP3:
755 /* TGSI_OPCODE_DOT3 */
756 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
757 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
758 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
759 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
760 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
761 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
762 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
763 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
764 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
765 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
766 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
767 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
768 dst0[chan_index] = tmp0;
769 }
770 break;
771
772 case TGSI_OPCODE_DP4:
773 /* TGSI_OPCODE_DOT4 */
774 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
775 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
776 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
777 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
778 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
779 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
780 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
781 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
782 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
783 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
784 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
785 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
786 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
787 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
788 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
789 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
790 dst0[chan_index] = tmp0;
791 }
792 break;
793
794 case TGSI_OPCODE_DST:
795 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
796 dst0[CHAN_X] = bld->base.one;
797 }
798 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
799 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
800 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
801 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
802 }
803 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
804 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
805 }
806 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
807 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
808 }
809 break;
810
811 case TGSI_OPCODE_MIN:
812 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
813 src0 = emit_fetch( bld, inst, 0, chan_index );
814 src1 = emit_fetch( bld, inst, 1, chan_index );
815 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
816 }
817 break;
818
819 case TGSI_OPCODE_MAX:
820 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
821 src0 = emit_fetch( bld, inst, 0, chan_index );
822 src1 = emit_fetch( bld, inst, 1, chan_index );
823 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
824 }
825 break;
826
827 case TGSI_OPCODE_SLT:
828 /* TGSI_OPCODE_SETLT */
829 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
830 src0 = emit_fetch( bld, inst, 0, chan_index );
831 src1 = emit_fetch( bld, inst, 1, chan_index );
832 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
833 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
834 }
835 break;
836
837 case TGSI_OPCODE_SGE:
838 /* TGSI_OPCODE_SETGE */
839 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
840 src0 = emit_fetch( bld, inst, 0, chan_index );
841 src1 = emit_fetch( bld, inst, 1, chan_index );
842 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
843 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
844 }
845 break;
846
847 case TGSI_OPCODE_MAD:
848 /* TGSI_OPCODE_MADD */
849 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
850 tmp0 = emit_fetch( bld, inst, 0, chan_index );
851 tmp1 = emit_fetch( bld, inst, 1, chan_index );
852 tmp2 = emit_fetch( bld, inst, 2, chan_index );
853 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
854 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
855 dst0[chan_index] = tmp0;
856 }
857 break;
858
859 case TGSI_OPCODE_SUB:
860 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
861 tmp0 = emit_fetch( bld, inst, 0, chan_index );
862 tmp1 = emit_fetch( bld, inst, 1, chan_index );
863 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
864 }
865 break;
866
867 case TGSI_OPCODE_LRP:
868 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
869 src0 = emit_fetch( bld, inst, 0, chan_index );
870 src1 = emit_fetch( bld, inst, 1, chan_index );
871 src2 = emit_fetch( bld, inst, 2, chan_index );
872 tmp0 = lp_build_sub( &bld->base, src1, src2 );
873 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
874 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
875 }
876 break;
877
878 case TGSI_OPCODE_CND:
879 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
880 src0 = emit_fetch( bld, inst, 0, chan_index );
881 src1 = emit_fetch( bld, inst, 1, chan_index );
882 src2 = emit_fetch( bld, inst, 2, chan_index );
883 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
884 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
885 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
886 }
887 break;
888
889 case TGSI_OPCODE_DP2A:
890 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
891 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
892 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
893 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
894 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
895 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
896 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
897 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
898 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
899 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
900 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
901 }
902 break;
903
904 case TGSI_OPCODE_FRC:
905 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
906 src0 = emit_fetch( bld, inst, 0, chan_index );
907 tmp0 = lp_build_floor(&bld->base, src0);
908 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
909 dst0[chan_index] = tmp0;
910 }
911 break;
912
913 case TGSI_OPCODE_CLAMP:
914 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
915 tmp0 = emit_fetch( bld, inst, 0, chan_index );
916 src1 = emit_fetch( bld, inst, 1, chan_index );
917 src2 = emit_fetch( bld, inst, 2, chan_index );
918 tmp0 = lp_build_max(&bld->base, tmp0, src1);
919 tmp0 = lp_build_min(&bld->base, tmp0, src2);
920 dst0[chan_index] = tmp0;
921 }
922 break;
923
924 case TGSI_OPCODE_FLR:
925 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
926 tmp0 = emit_fetch( bld, inst, 0, chan_index );
927 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
928 }
929 break;
930
931 case TGSI_OPCODE_ROUND:
932 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
933 tmp0 = emit_fetch( bld, inst, 0, chan_index );
934 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
935 }
936 break;
937
938 case TGSI_OPCODE_EX2: {
939 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
940 tmp0 = lp_build_exp2( &bld->base, tmp0);
941 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
942 dst0[chan_index] = tmp0;
943 }
944 break;
945 }
946
947 case TGSI_OPCODE_LG2:
948 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
949 tmp0 = lp_build_log2( &bld->base, tmp0);
950 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
951 dst0[chan_index] = tmp0;
952 }
953 break;
954
955 case TGSI_OPCODE_POW:
956 src0 = emit_fetch( bld, inst, 0, CHAN_X );
957 src1 = emit_fetch( bld, inst, 1, CHAN_X );
958 res = lp_build_pow( &bld->base, src0, src1 );
959 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
960 dst0[chan_index] = res;
961 }
962 break;
963
964 case TGSI_OPCODE_XPD:
965 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
966 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
967 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
968 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
969 }
970 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
971 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
972 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
973 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
974 }
975 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
976 tmp2 = tmp0;
977 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
978 tmp5 = tmp3;
979 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
980 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
981 dst0[CHAN_X] = tmp2;
982 }
983 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
984 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
985 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
986 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
987 }
988 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
989 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
990 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
991 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
992 dst0[CHAN_Y] = tmp3;
993 }
994 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
995 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
996 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
997 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
998 dst0[CHAN_Z] = tmp5;
999 }
1000 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1001 dst0[CHAN_W] = bld->base.one;
1002 }
1003 break;
1004
1005 case TGSI_OPCODE_ABS:
1006 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1007 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1008 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1009 }
1010 break;
1011
1012 case TGSI_OPCODE_RCC:
1013 /* deprecated? */
1014 assert(0);
1015 return 0;
1016
1017 case TGSI_OPCODE_DPH:
1018 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1019 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1020 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1021 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1022 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1023 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1024 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1025 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1026 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1027 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1028 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1029 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1030 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1031 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1032 dst0[chan_index] = tmp0;
1033 }
1034 break;
1035
1036 case TGSI_OPCODE_COS:
1037 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1038 tmp0 = lp_build_cos( &bld->base, tmp0 );
1039 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1040 dst0[chan_index] = tmp0;
1041 }
1042 break;
1043
1044 case TGSI_OPCODE_DDX:
1045 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1046 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1047 }
1048 break;
1049
1050 case TGSI_OPCODE_DDY:
1051 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1052 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1053 }
1054 break;
1055
1056 case TGSI_OPCODE_KILP:
1057 /* predicated kill */
1058 /* FIXME */
1059 return 0;
1060 break;
1061
1062 case TGSI_OPCODE_KIL:
1063 /* conditional kill */
1064 emit_kil( bld, inst );
1065 break;
1066
1067 case TGSI_OPCODE_PK2H:
1068 return 0;
1069 break;
1070
1071 case TGSI_OPCODE_PK2US:
1072 return 0;
1073 break;
1074
1075 case TGSI_OPCODE_PK4B:
1076 return 0;
1077 break;
1078
1079 case TGSI_OPCODE_PK4UB:
1080 return 0;
1081 break;
1082
1083 case TGSI_OPCODE_RFL:
1084 return 0;
1085 break;
1086
1087 case TGSI_OPCODE_SEQ:
1088 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1089 src0 = emit_fetch( bld, inst, 0, chan_index );
1090 src1 = emit_fetch( bld, inst, 1, chan_index );
1091 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1092 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1093 }
1094 break;
1095
1096 case TGSI_OPCODE_SFL:
1097 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1098 dst0[chan_index] = bld->base.zero;
1099 }
1100 break;
1101
1102 case TGSI_OPCODE_SGT:
1103 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1104 src0 = emit_fetch( bld, inst, 0, chan_index );
1105 src1 = emit_fetch( bld, inst, 1, chan_index );
1106 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1107 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1108 }
1109 break;
1110
1111 case TGSI_OPCODE_SIN:
1112 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1113 tmp0 = lp_build_sin( &bld->base, tmp0 );
1114 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1115 dst0[chan_index] = tmp0;
1116 }
1117 break;
1118
1119 case TGSI_OPCODE_SLE:
1120 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1121 src0 = emit_fetch( bld, inst, 0, chan_index );
1122 src1 = emit_fetch( bld, inst, 1, chan_index );
1123 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1124 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1125 }
1126 break;
1127
1128 case TGSI_OPCODE_SNE:
1129 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1130 src0 = emit_fetch( bld, inst, 0, chan_index );
1131 src1 = emit_fetch( bld, inst, 1, chan_index );
1132 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1133 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1134 }
1135 break;
1136
1137 case TGSI_OPCODE_STR:
1138 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1139 dst0[chan_index] = bld->base.one;
1140 }
1141 break;
1142
1143 case TGSI_OPCODE_TEX:
1144 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1145 break;
1146
1147 case TGSI_OPCODE_TXD:
1148 /* FIXME */
1149 return 0;
1150 break;
1151
1152 case TGSI_OPCODE_UP2H:
1153 /* deprecated */
1154 assert (0);
1155 return 0;
1156 break;
1157
1158 case TGSI_OPCODE_UP2US:
1159 /* deprecated */
1160 assert(0);
1161 return 0;
1162 break;
1163
1164 case TGSI_OPCODE_UP4B:
1165 /* deprecated */
1166 assert(0);
1167 return 0;
1168 break;
1169
1170 case TGSI_OPCODE_UP4UB:
1171 /* deprecated */
1172 assert(0);
1173 return 0;
1174 break;
1175
1176 case TGSI_OPCODE_X2D:
1177 /* deprecated? */
1178 assert(0);
1179 return 0;
1180 break;
1181
1182 case TGSI_OPCODE_ARA:
1183 /* deprecated */
1184 assert(0);
1185 return 0;
1186 break;
1187
1188 #if 0
1189 case TGSI_OPCODE_ARR:
1190 /* FIXME */
1191 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1192 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1193 emit_rnd( bld, 0, 0 );
1194 emit_f2it( bld, 0 );
1195 dst0[chan_index] = tmp0;
1196 }
1197 break;
1198 #endif
1199
1200 case TGSI_OPCODE_BRA:
1201 /* deprecated */
1202 assert(0);
1203 return 0;
1204 break;
1205
1206 case TGSI_OPCODE_CAL:
1207 /* FIXME */
1208 return 0;
1209 break;
1210
1211 case TGSI_OPCODE_RET:
1212 /* FIXME */
1213 return 0;
1214 break;
1215
1216 case TGSI_OPCODE_END:
1217 break;
1218
1219 case TGSI_OPCODE_SSG:
1220 /* TGSI_OPCODE_SGN */
1221 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1222 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1223 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1224 }
1225 break;
1226
1227 case TGSI_OPCODE_CMP:
1228 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1229 src0 = emit_fetch( bld, inst, 0, chan_index );
1230 src1 = emit_fetch( bld, inst, 1, chan_index );
1231 src2 = emit_fetch( bld, inst, 2, chan_index );
1232 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1233 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1234 }
1235 break;
1236
1237 case TGSI_OPCODE_SCS:
1238 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1239 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1240 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1241 }
1242 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1243 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1244 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1245 }
1246 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1247 dst0[CHAN_Z] = bld->base.zero;
1248 }
1249 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1250 dst0[CHAN_W] = bld->base.one;
1251 }
1252 break;
1253
1254 case TGSI_OPCODE_TXB:
1255 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1256 break;
1257
1258 case TGSI_OPCODE_NRM:
1259 /* fall-through */
1260 case TGSI_OPCODE_NRM4:
1261 /* 3 or 4-component normalization */
1262 {
1263 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1264
1265 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1266 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1267 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1268 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1269
1270 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1271
1272 /* xmm4 = src.x */
1273 /* xmm0 = src.x * src.x */
1274 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1275 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1276 tmp4 = tmp0;
1277 }
1278 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1279
1280 /* xmm5 = src.y */
1281 /* xmm0 = xmm0 + src.y * src.y */
1282 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1283 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1284 tmp5 = tmp1;
1285 }
1286 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1287 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1288
1289 /* xmm6 = src.z */
1290 /* xmm0 = xmm0 + src.z * src.z */
1291 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1292 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1293 tmp6 = tmp1;
1294 }
1295 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1296 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1297
1298 if (dims == 4) {
1299 /* xmm7 = src.w */
1300 /* xmm0 = xmm0 + src.w * src.w */
1301 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1302 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1303 tmp7 = tmp1;
1304 }
1305 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1306 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1307 }
1308
1309 /* xmm1 = 1 / sqrt(xmm0) */
1310 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1311
1312 /* dst.x = xmm1 * src.x */
1313 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1314 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1315 }
1316
1317 /* dst.y = xmm1 * src.y */
1318 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1319 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1320 }
1321
1322 /* dst.z = xmm1 * src.z */
1323 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1324 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1325 }
1326
1327 /* dst.w = xmm1 * src.w */
1328 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1329 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1330 }
1331 }
1332
1333 /* dst.w = 1.0 */
1334 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1335 dst0[CHAN_W] = bld->base.one;
1336 }
1337 }
1338 break;
1339
1340 case TGSI_OPCODE_DIV:
1341 /* deprecated */
1342 assert( 0 );
1343 return 0;
1344 break;
1345
1346 case TGSI_OPCODE_DP2:
1347 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1348 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1349 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1350 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1351 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1352 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1353 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1354 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1355 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1356 }
1357 break;
1358
1359 case TGSI_OPCODE_TXL:
1360 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1361 break;
1362
1363 case TGSI_OPCODE_TXP:
1364 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1365 break;
1366
1367 case TGSI_OPCODE_BRK:
1368 /* FIXME */
1369 return 0;
1370 break;
1371
1372 case TGSI_OPCODE_IF:
1373 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1374 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1375 break;
1376
1377 case TGSI_OPCODE_BGNFOR:
1378 /* deprecated */
1379 assert(0);
1380 return 0;
1381 break;
1382
1383 case TGSI_OPCODE_REP:
1384 /* deprecated */
1385 assert(0);
1386 return 0;
1387 break;
1388
1389 case TGSI_OPCODE_ELSE:
1390 lp_exec_mask_cond_invert(&bld->exec_mask);
1391 break;
1392
1393 case TGSI_OPCODE_ENDIF:
1394 lp_exec_mask_cond_pop(&bld->exec_mask);
1395 break;
1396
1397 case TGSI_OPCODE_ENDFOR:
1398 /* deprecated */
1399 assert(0);
1400 return 0;
1401 break;
1402
1403 case TGSI_OPCODE_ENDREP:
1404 /* deprecated */
1405 assert(0);
1406 return 0;
1407 break;
1408
1409 case TGSI_OPCODE_PUSHA:
1410 /* deprecated? */
1411 assert(0);
1412 return 0;
1413 break;
1414
1415 case TGSI_OPCODE_POPA:
1416 /* deprecated? */
1417 assert(0);
1418 return 0;
1419 break;
1420
1421 case TGSI_OPCODE_CEIL:
1422 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1423 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1424 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1425 }
1426 break;
1427
1428 case TGSI_OPCODE_I2F:
1429 /* deprecated? */
1430 assert(0);
1431 return 0;
1432 break;
1433
1434 case TGSI_OPCODE_NOT:
1435 /* deprecated? */
1436 assert(0);
1437 return 0;
1438 break;
1439
1440 case TGSI_OPCODE_TRUNC:
1441 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1442 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1443 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1444 }
1445 break;
1446
1447 case TGSI_OPCODE_SHL:
1448 /* deprecated? */
1449 assert(0);
1450 return 0;
1451 break;
1452
1453 case TGSI_OPCODE_ISHR:
1454 /* deprecated? */
1455 assert(0);
1456 return 0;
1457 break;
1458
1459 case TGSI_OPCODE_AND:
1460 /* deprecated? */
1461 assert(0);
1462 return 0;
1463 break;
1464
1465 case TGSI_OPCODE_OR:
1466 /* deprecated? */
1467 assert(0);
1468 return 0;
1469 break;
1470
1471 case TGSI_OPCODE_MOD:
1472 /* deprecated? */
1473 assert(0);
1474 return 0;
1475 break;
1476
1477 case TGSI_OPCODE_XOR:
1478 /* deprecated? */
1479 assert(0);
1480 return 0;
1481 break;
1482
1483 case TGSI_OPCODE_SAD:
1484 /* deprecated? */
1485 assert(0);
1486 return 0;
1487 break;
1488
1489 case TGSI_OPCODE_TXF:
1490 /* deprecated? */
1491 assert(0);
1492 return 0;
1493 break;
1494
1495 case TGSI_OPCODE_TXQ:
1496 /* deprecated? */
1497 assert(0);
1498 return 0;
1499 break;
1500
1501 case TGSI_OPCODE_CONT:
1502 /* FIXME */
1503 return 0;
1504 break;
1505
1506 case TGSI_OPCODE_EMIT:
1507 return 0;
1508 break;
1509
1510 case TGSI_OPCODE_ENDPRIM:
1511 return 0;
1512 break;
1513
1514 case TGSI_OPCODE_NOP:
1515 break;
1516
1517 default:
1518 return 0;
1519 }
1520
1521 if(info->num_dst) {
1522 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1523 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1524 }
1525 }
1526
1527 return 1;
1528 }
1529
1530
1531 void
1532 lp_build_tgsi_soa(LLVMBuilderRef builder,
1533 const struct tgsi_token *tokens,
1534 struct lp_type type,
1535 struct lp_build_mask_context *mask,
1536 LLVMValueRef consts_ptr,
1537 const LLVMValueRef *pos,
1538 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1539 LLVMValueRef (*outputs)[NUM_CHANNELS],
1540 struct lp_build_sampler_soa *sampler)
1541 {
1542 struct lp_build_tgsi_soa_context bld;
1543 struct tgsi_parse_context parse;
1544 uint num_immediates = 0;
1545 unsigned i;
1546
1547 /* Setup build context */
1548 memset(&bld, 0, sizeof bld);
1549 lp_build_context_init(&bld.base, builder, type);
1550 bld.mask = mask;
1551 bld.pos = pos;
1552 bld.inputs = inputs;
1553 bld.outputs = outputs;
1554 bld.consts_ptr = consts_ptr;
1555 bld.sampler = sampler;
1556
1557 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1558
1559 tgsi_parse_init( &parse, tokens );
1560
1561 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1562 tgsi_parse_token( &parse );
1563
1564 switch( parse.FullToken.Token.Type ) {
1565 case TGSI_TOKEN_TYPE_DECLARATION:
1566 /* Inputs already interpolated */
1567 {
1568 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1569 _debug_printf("warning: failed to define LLVM variable\n");
1570 }
1571 break;
1572
1573 case TGSI_TOKEN_TYPE_INSTRUCTION:
1574 {
1575 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1576 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1577 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1578 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1579 info ? info->mnemonic : "<invalid>");
1580 }
1581
1582 break;
1583
1584 case TGSI_TOKEN_TYPE_IMMEDIATE:
1585 /* simply copy the immediate values into the next immediates[] slot */
1586 {
1587 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1588 assert(size <= 4);
1589 assert(num_immediates < LP_MAX_IMMEDIATES);
1590 for( i = 0; i < size; ++i )
1591 bld.immediates[num_immediates][i] =
1592 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1593 for( i = size; i < 4; ++i )
1594 bld.immediates[num_immediates][i] = bld.base.undef;
1595 num_immediates++;
1596 }
1597 break;
1598
1599 case TGSI_TOKEN_TYPE_PROPERTY:
1600 break;
1601
1602 default:
1603 assert( 0 );
1604 }
1605 }
1606
1607 tgsi_parse_free( &parse );
1608 }
1609