Merge branch 'gallium-no-rhw-position'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_tgsi.h"
55 #include "lp_bld_debug.h"
56
57
58 #define LP_MAX_TEMPS 256
59 #define LP_MAX_IMMEDIATES 256
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79
80 #define QUAD_TOP_LEFT 0
81 #define QUAD_TOP_RIGHT 1
82 #define QUAD_BOTTOM_LEFT 2
83 #define QUAD_BOTTOM_RIGHT 3
84
85 #define LP_TGSI_MAX_NESTING 16
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMValueRef exec_mask;
99
100 LLVMValueRef inv_mask;
101 };
102
103 struct lp_build_tgsi_soa_context
104 {
105 struct lp_build_context base;
106
107 LLVMValueRef consts_ptr;
108 const LLVMValueRef *pos;
109 const LLVMValueRef (*inputs)[NUM_CHANNELS];
110 LLVMValueRef (*outputs)[NUM_CHANNELS];
111
112 struct lp_build_sampler_soa *sampler;
113
114 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
115 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
116
117 struct lp_build_mask_context *mask;
118 struct lp_exec_mask exec_mask;
119 };
120
121 static const unsigned char
122 swizzle_left[4] = {
123 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
124 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
125 };
126
127 static const unsigned char
128 swizzle_right[4] = {
129 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
130 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
131 };
132
133 static const unsigned char
134 swizzle_top[4] = {
135 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
136 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
137 };
138
139 static const unsigned char
140 swizzle_bottom[4] = {
141 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
142 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
143 };
144
145 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
146 {
147 mask->bld = bld;
148 mask->has_mask = FALSE;
149 mask->cond_stack_size = 0;
150
151 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
152 mask->inv_mask =
153 LLVMConstSub(LLVMConstNull(mask->int_vec_type),
154 LLVMConstAllOnes(mask->int_vec_type));
155 }
156
157 static void lp_exec_mask_update(struct lp_exec_mask *mask)
158 {
159 mask->exec_mask = mask->cond_mask;
160 if (mask->cond_stack_size > 0)
161 mask->has_mask = TRUE;
162 }
163
164 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
165 LLVMValueRef val)
166 {
167 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
168 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
169 mask->int_vec_type, "");
170
171 lp_exec_mask_update(mask);
172 }
173
174 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
175 {
176 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
177 LLVMValueRef inv_mask = LLVMBuildXor(mask->bld->builder,
178 mask->cond_mask,
179 mask->inv_mask, "");
180 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
181 inv_mask,
182 prev_mask, "");
183 lp_exec_mask_update(mask);
184 }
185
186 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
187 {
188 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
189 lp_exec_mask_update(mask);
190 }
191
192 static void lp_exec_mask_store(struct lp_exec_mask *mask,
193 LLVMValueRef val,
194 LLVMValueRef dst)
195 {
196 if (mask->has_mask) {
197 LLVMValueRef real_val, dst_val;
198
199 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
200 real_val = lp_build_select(mask->bld,
201 mask->exec_mask,
202 val, dst_val);
203
204 LLVMBuildStore(mask->bld->builder, real_val, dst);
205 } else
206 LLVMBuildStore(mask->bld->builder, val, dst);
207 }
208
209
210 static LLVMValueRef
211 emit_ddx(struct lp_build_tgsi_soa_context *bld,
212 LLVMValueRef src)
213 {
214 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
215 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
216 return lp_build_sub(&bld->base, src_right, src_left);
217 }
218
219
220 static LLVMValueRef
221 emit_ddy(struct lp_build_tgsi_soa_context *bld,
222 LLVMValueRef src)
223 {
224 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
225 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
226 return lp_build_sub(&bld->base, src_top, src_bottom);
227 }
228
229
230 /**
231 * Register fetch.
232 */
233 static LLVMValueRef
234 emit_fetch(
235 struct lp_build_tgsi_soa_context *bld,
236 const struct tgsi_full_instruction *inst,
237 unsigned index,
238 const unsigned chan_index )
239 {
240 const struct tgsi_full_src_register *reg = &inst->Src[index];
241 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
242 LLVMValueRef res;
243
244 switch (swizzle) {
245 case TGSI_SWIZZLE_X:
246 case TGSI_SWIZZLE_Y:
247 case TGSI_SWIZZLE_Z:
248 case TGSI_SWIZZLE_W:
249
250 switch (reg->Register.File) {
251 case TGSI_FILE_CONSTANT: {
252 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
253 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
254 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
255 res = lp_build_broadcast_scalar(&bld->base, scalar);
256 break;
257 }
258
259 case TGSI_FILE_IMMEDIATE:
260 res = bld->immediates[reg->Register.Index][swizzle];
261 assert(res);
262 break;
263
264 case TGSI_FILE_INPUT:
265 res = bld->inputs[reg->Register.Index][swizzle];
266 assert(res);
267 break;
268
269 case TGSI_FILE_TEMPORARY:
270 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
271 if(!res)
272 return bld->base.undef;
273 break;
274
275 default:
276 assert( 0 );
277 return bld->base.undef;
278 }
279 break;
280
281 default:
282 assert( 0 );
283 return bld->base.undef;
284 }
285
286 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
287 case TGSI_UTIL_SIGN_CLEAR:
288 res = lp_build_abs( &bld->base, res );
289 break;
290
291 case TGSI_UTIL_SIGN_SET:
292 /* TODO: Use bitwese OR for floating point */
293 res = lp_build_abs( &bld->base, res );
294 res = LLVMBuildNeg( bld->base.builder, res, "" );
295 break;
296
297 case TGSI_UTIL_SIGN_TOGGLE:
298 res = LLVMBuildNeg( bld->base.builder, res, "" );
299 break;
300
301 case TGSI_UTIL_SIGN_KEEP:
302 break;
303 }
304
305 return res;
306 }
307
308
309 /**
310 * Register fetch with derivatives.
311 */
312 static void
313 emit_fetch_deriv(
314 struct lp_build_tgsi_soa_context *bld,
315 const struct tgsi_full_instruction *inst,
316 unsigned index,
317 const unsigned chan_index,
318 LLVMValueRef *res,
319 LLVMValueRef *ddx,
320 LLVMValueRef *ddy)
321 {
322 LLVMValueRef src;
323
324 src = emit_fetch(bld, inst, index, chan_index);
325
326 if(res)
327 *res = src;
328
329 /* TODO: use interpolation coeffs for inputs */
330
331 if(ddx)
332 *ddx = emit_ddx(bld, src);
333
334 if(ddy)
335 *ddy = emit_ddy(bld, src);
336 }
337
338
339 /**
340 * Register store.
341 */
342 static void
343 emit_store(
344 struct lp_build_tgsi_soa_context *bld,
345 const struct tgsi_full_instruction *inst,
346 unsigned index,
347 unsigned chan_index,
348 LLVMValueRef value)
349 {
350 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
351
352 switch( inst->Instruction.Saturate ) {
353 case TGSI_SAT_NONE:
354 break;
355
356 case TGSI_SAT_ZERO_ONE:
357 value = lp_build_max(&bld->base, value, bld->base.zero);
358 value = lp_build_min(&bld->base, value, bld->base.one);
359 break;
360
361 case TGSI_SAT_MINUS_PLUS_ONE:
362 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
363 value = lp_build_min(&bld->base, value, bld->base.one);
364 break;
365
366 default:
367 assert(0);
368 }
369
370 switch( reg->Register.File ) {
371 case TGSI_FILE_OUTPUT:
372 lp_exec_mask_store(&bld->exec_mask, value,
373 bld->outputs[reg->Register.Index][chan_index]);
374 break;
375
376 case TGSI_FILE_TEMPORARY:
377 lp_exec_mask_store(&bld->exec_mask, value,
378 bld->temps[reg->Register.Index][chan_index]);
379 break;
380
381 case TGSI_FILE_ADDRESS:
382 /* FIXME */
383 assert(0);
384 break;
385
386 default:
387 assert( 0 );
388 }
389 }
390
391
392 /**
393 * High-level instruction translators.
394 */
395
396
397 static void
398 emit_tex( struct lp_build_tgsi_soa_context *bld,
399 const struct tgsi_full_instruction *inst,
400 boolean apply_lodbias,
401 boolean projected,
402 LLVMValueRef *texel)
403 {
404 const uint unit = inst->Src[1].Register.Index;
405 LLVMValueRef lodbias;
406 LLVMValueRef oow = NULL;
407 LLVMValueRef coords[3];
408 unsigned num_coords;
409 unsigned i;
410
411 switch (inst->Texture.Texture) {
412 case TGSI_TEXTURE_1D:
413 num_coords = 1;
414 break;
415 case TGSI_TEXTURE_2D:
416 case TGSI_TEXTURE_RECT:
417 num_coords = 2;
418 break;
419 case TGSI_TEXTURE_SHADOW1D:
420 case TGSI_TEXTURE_SHADOW2D:
421 case TGSI_TEXTURE_SHADOWRECT:
422 case TGSI_TEXTURE_3D:
423 case TGSI_TEXTURE_CUBE:
424 num_coords = 3;
425 break;
426 default:
427 assert(0);
428 return;
429 }
430
431 if(apply_lodbias)
432 lodbias = emit_fetch( bld, inst, 0, 3 );
433 else
434 lodbias = bld->base.zero;
435
436 if (projected) {
437 oow = emit_fetch( bld, inst, 0, 3 );
438 oow = lp_build_rcp(&bld->base, oow);
439 }
440
441 for (i = 0; i < num_coords; i++) {
442 coords[i] = emit_fetch( bld, inst, 0, i );
443 if (projected)
444 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
445 }
446 for (i = num_coords; i < 3; i++) {
447 coords[i] = bld->base.undef;
448 }
449
450 bld->sampler->emit_fetch_texel(bld->sampler,
451 bld->base.builder,
452 bld->base.type,
453 unit, num_coords, coords, lodbias,
454 texel);
455 }
456
457
458 static void
459 emit_kil(
460 struct lp_build_tgsi_soa_context *bld,
461 const struct tgsi_full_instruction *inst )
462 {
463 const struct tgsi_full_src_register *reg = &inst->Src[0];
464 LLVMValueRef terms[NUM_CHANNELS];
465 LLVMValueRef mask;
466 unsigned chan_index;
467
468 memset(&terms, 0, sizeof terms);
469
470 FOR_EACH_CHANNEL( chan_index ) {
471 unsigned swizzle;
472
473 /* Unswizzle channel */
474 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
475
476 /* Check if the component has not been already tested. */
477 assert(swizzle < NUM_CHANNELS);
478 if( !terms[swizzle] )
479 /* TODO: change the comparison operator instead of setting the sign */
480 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
481 }
482
483 mask = NULL;
484 FOR_EACH_CHANNEL( chan_index ) {
485 if(terms[chan_index]) {
486 LLVMValueRef chan_mask;
487
488 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
489
490 if(mask)
491 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
492 else
493 mask = chan_mask;
494 }
495 }
496
497 if(mask)
498 lp_build_mask_update(bld->mask, mask);
499 }
500
501
502 /**
503 * Check if inst src/dest regs use indirect addressing into temporary
504 * register file.
505 */
506 static boolean
507 indirect_temp_reference(const struct tgsi_full_instruction *inst)
508 {
509 uint i;
510 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
511 const struct tgsi_full_src_register *reg = &inst->Src[i];
512 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
513 reg->Register.Indirect)
514 return TRUE;
515 }
516 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
517 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
518 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
519 reg->Register.Indirect)
520 return TRUE;
521 }
522 return FALSE;
523 }
524
525 static int
526 emit_declaration(
527 struct lp_build_tgsi_soa_context *bld,
528 const struct tgsi_full_declaration *decl)
529 {
530 unsigned first = decl->Range.First;
531 unsigned last = decl->Range.Last;
532 unsigned idx, i;
533
534 for (idx = first; idx <= last; ++idx) {
535 boolean ok;
536
537 switch (decl->Declaration.File) {
538 case TGSI_FILE_TEMPORARY:
539 for (i = 0; i < NUM_CHANNELS; i++)
540 bld->temps[idx][i] = lp_build_alloca(&bld->base);
541 ok = TRUE;
542 break;
543
544 case TGSI_FILE_OUTPUT:
545 for (i = 0; i < NUM_CHANNELS; i++)
546 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
547 ok = TRUE;
548 break;
549
550 default:
551 /* don't need to declare other vars */
552 ok = TRUE;
553 }
554
555 if (!ok)
556 return FALSE;
557 }
558
559 return TRUE;
560 }
561
562 static int
563 emit_instruction(
564 struct lp_build_tgsi_soa_context *bld,
565 const struct tgsi_full_instruction *inst,
566 const struct tgsi_opcode_info *info)
567 {
568 unsigned chan_index;
569 LLVMValueRef src0, src1, src2;
570 LLVMValueRef tmp0, tmp1, tmp2;
571 LLVMValueRef tmp3 = NULL;
572 LLVMValueRef tmp4 = NULL;
573 LLVMValueRef tmp5 = NULL;
574 LLVMValueRef tmp6 = NULL;
575 LLVMValueRef tmp7 = NULL;
576 LLVMValueRef res;
577 LLVMValueRef dst0[NUM_CHANNELS];
578
579 /* we can't handle indirect addressing into temp register file yet */
580 if (indirect_temp_reference(inst))
581 return FALSE;
582
583 assert(info->num_dst <= 1);
584 if(info->num_dst) {
585 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
586 dst0[chan_index] = bld->base.undef;
587 }
588 }
589
590 switch (inst->Instruction.Opcode) {
591 #if 0
592 case TGSI_OPCODE_ARL:
593 /* FIXME */
594 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
595 tmp0 = emit_fetch( bld, inst, 0, chan_index );
596 emit_flr(bld, 0, 0);
597 emit_f2it( bld, 0 );
598 dst0[chan_index] = tmp0;
599 }
600 break;
601 #endif
602
603 case TGSI_OPCODE_MOV:
604 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
605 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
606 }
607 break;
608
609 case TGSI_OPCODE_LIT:
610 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
611 dst0[CHAN_X] = bld->base.one;
612 }
613 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
614 src0 = emit_fetch( bld, inst, 0, CHAN_X );
615 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
616 }
617 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
618 /* XMM[1] = SrcReg[0].yyyy */
619 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
620 /* XMM[1] = max(XMM[1], 0) */
621 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
622 /* XMM[2] = SrcReg[0].wwww */
623 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
624 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
625 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
626 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
627 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
628 }
629 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
630 dst0[CHAN_W] = bld->base.one;
631 }
632 break;
633
634 case TGSI_OPCODE_RCP:
635 /* TGSI_OPCODE_RECIP */
636 src0 = emit_fetch( bld, inst, 0, CHAN_X );
637 res = lp_build_rcp(&bld->base, src0);
638 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
639 dst0[chan_index] = res;
640 }
641 break;
642
643 case TGSI_OPCODE_RSQ:
644 /* TGSI_OPCODE_RECIPSQRT */
645 src0 = emit_fetch( bld, inst, 0, CHAN_X );
646 src0 = lp_build_abs(&bld->base, src0);
647 res = lp_build_rsqrt(&bld->base, src0);
648 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
649 dst0[chan_index] = res;
650 }
651 break;
652
653 case TGSI_OPCODE_EXP:
654 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
655 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
656 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
657 LLVMValueRef *p_exp2_int_part = NULL;
658 LLVMValueRef *p_frac_part = NULL;
659 LLVMValueRef *p_exp2 = NULL;
660
661 src0 = emit_fetch( bld, inst, 0, CHAN_X );
662
663 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
664 p_exp2_int_part = &tmp0;
665 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
666 p_frac_part = &tmp1;
667 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
668 p_exp2 = &tmp2;
669
670 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
671
672 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
673 dst0[CHAN_X] = tmp0;
674 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
675 dst0[CHAN_Y] = tmp1;
676 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
677 dst0[CHAN_Z] = tmp2;
678 }
679 /* dst.w = 1.0 */
680 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
681 dst0[CHAN_W] = bld->base.one;
682 }
683 break;
684
685 case TGSI_OPCODE_LOG:
686 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
687 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
688 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
689 LLVMValueRef *p_floor_log2 = NULL;
690 LLVMValueRef *p_exp = NULL;
691 LLVMValueRef *p_log2 = NULL;
692
693 src0 = emit_fetch( bld, inst, 0, CHAN_X );
694 src0 = lp_build_abs( &bld->base, src0 );
695
696 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
697 p_floor_log2 = &tmp0;
698 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
699 p_exp = &tmp1;
700 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
701 p_log2 = &tmp2;
702
703 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
704
705 /* dst.x = floor(lg2(abs(src.x))) */
706 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
707 dst0[CHAN_X] = tmp0;
708 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
709 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
710 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
711 }
712 /* dst.z = lg2(abs(src.x)) */
713 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
714 dst0[CHAN_Z] = tmp2;
715 }
716 /* dst.w = 1.0 */
717 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
718 dst0[CHAN_W] = bld->base.one;
719 }
720 break;
721
722 case TGSI_OPCODE_MUL:
723 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
724 src0 = emit_fetch( bld, inst, 0, chan_index );
725 src1 = emit_fetch( bld, inst, 1, chan_index );
726 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
727 }
728 break;
729
730 case TGSI_OPCODE_ADD:
731 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
732 src0 = emit_fetch( bld, inst, 0, chan_index );
733 src1 = emit_fetch( bld, inst, 1, chan_index );
734 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
735 }
736 break;
737
738 case TGSI_OPCODE_DP3:
739 /* TGSI_OPCODE_DOT3 */
740 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
741 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
742 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
743 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
744 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
745 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
746 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
747 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
748 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
749 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
750 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
751 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
752 dst0[chan_index] = tmp0;
753 }
754 break;
755
756 case TGSI_OPCODE_DP4:
757 /* TGSI_OPCODE_DOT4 */
758 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
759 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
760 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
761 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
762 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
763 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
764 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
765 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
766 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
767 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
768 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
769 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
770 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
771 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
772 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
773 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
774 dst0[chan_index] = tmp0;
775 }
776 break;
777
778 case TGSI_OPCODE_DST:
779 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
780 dst0[CHAN_X] = bld->base.one;
781 }
782 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
783 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
784 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
785 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
786 }
787 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
788 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
789 }
790 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
791 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
792 }
793 break;
794
795 case TGSI_OPCODE_MIN:
796 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
797 src0 = emit_fetch( bld, inst, 0, chan_index );
798 src1 = emit_fetch( bld, inst, 1, chan_index );
799 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
800 }
801 break;
802
803 case TGSI_OPCODE_MAX:
804 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
805 src0 = emit_fetch( bld, inst, 0, chan_index );
806 src1 = emit_fetch( bld, inst, 1, chan_index );
807 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
808 }
809 break;
810
811 case TGSI_OPCODE_SLT:
812 /* TGSI_OPCODE_SETLT */
813 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
814 src0 = emit_fetch( bld, inst, 0, chan_index );
815 src1 = emit_fetch( bld, inst, 1, chan_index );
816 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
817 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
818 }
819 break;
820
821 case TGSI_OPCODE_SGE:
822 /* TGSI_OPCODE_SETGE */
823 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
824 src0 = emit_fetch( bld, inst, 0, chan_index );
825 src1 = emit_fetch( bld, inst, 1, chan_index );
826 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
827 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
828 }
829 break;
830
831 case TGSI_OPCODE_MAD:
832 /* TGSI_OPCODE_MADD */
833 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
834 tmp0 = emit_fetch( bld, inst, 0, chan_index );
835 tmp1 = emit_fetch( bld, inst, 1, chan_index );
836 tmp2 = emit_fetch( bld, inst, 2, chan_index );
837 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
838 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
839 dst0[chan_index] = tmp0;
840 }
841 break;
842
843 case TGSI_OPCODE_SUB:
844 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
845 tmp0 = emit_fetch( bld, inst, 0, chan_index );
846 tmp1 = emit_fetch( bld, inst, 1, chan_index );
847 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
848 }
849 break;
850
851 case TGSI_OPCODE_LRP:
852 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
853 src0 = emit_fetch( bld, inst, 0, chan_index );
854 src1 = emit_fetch( bld, inst, 1, chan_index );
855 src2 = emit_fetch( bld, inst, 2, chan_index );
856 tmp0 = lp_build_sub( &bld->base, src1, src2 );
857 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
858 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
859 }
860 break;
861
862 case TGSI_OPCODE_CND:
863 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
864 src0 = emit_fetch( bld, inst, 0, chan_index );
865 src1 = emit_fetch( bld, inst, 1, chan_index );
866 src2 = emit_fetch( bld, inst, 2, chan_index );
867 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
868 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
869 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
870 }
871 break;
872
873 case TGSI_OPCODE_DP2A:
874 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
875 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
876 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
877 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
878 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
879 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
880 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
881 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
882 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
883 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
884 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
885 }
886 break;
887
888 case TGSI_OPCODE_FRC:
889 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
890 src0 = emit_fetch( bld, inst, 0, chan_index );
891 tmp0 = lp_build_floor(&bld->base, src0);
892 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
893 dst0[chan_index] = tmp0;
894 }
895 break;
896
897 case TGSI_OPCODE_CLAMP:
898 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
899 tmp0 = emit_fetch( bld, inst, 0, chan_index );
900 src1 = emit_fetch( bld, inst, 1, chan_index );
901 src2 = emit_fetch( bld, inst, 2, chan_index );
902 tmp0 = lp_build_max(&bld->base, tmp0, src1);
903 tmp0 = lp_build_min(&bld->base, tmp0, src2);
904 dst0[chan_index] = tmp0;
905 }
906 break;
907
908 case TGSI_OPCODE_FLR:
909 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
910 tmp0 = emit_fetch( bld, inst, 0, chan_index );
911 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
912 }
913 break;
914
915 case TGSI_OPCODE_ROUND:
916 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
917 tmp0 = emit_fetch( bld, inst, 0, chan_index );
918 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
919 }
920 break;
921
922 case TGSI_OPCODE_EX2: {
923 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
924 tmp0 = lp_build_exp2( &bld->base, tmp0);
925 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
926 dst0[chan_index] = tmp0;
927 }
928 break;
929 }
930
931 case TGSI_OPCODE_LG2:
932 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
933 tmp0 = lp_build_log2( &bld->base, tmp0);
934 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
935 dst0[chan_index] = tmp0;
936 }
937 break;
938
939 case TGSI_OPCODE_POW:
940 src0 = emit_fetch( bld, inst, 0, CHAN_X );
941 src1 = emit_fetch( bld, inst, 1, CHAN_X );
942 res = lp_build_pow( &bld->base, src0, src1 );
943 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
944 dst0[chan_index] = res;
945 }
946 break;
947
948 case TGSI_OPCODE_XPD:
949 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
950 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
951 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
952 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
953 }
954 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
955 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
956 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
957 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
958 }
959 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
960 tmp2 = tmp0;
961 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
962 tmp5 = tmp3;
963 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
964 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
965 dst0[CHAN_X] = tmp2;
966 }
967 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
968 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
969 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
970 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
971 }
972 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
973 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
974 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
975 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
976 dst0[CHAN_Y] = tmp3;
977 }
978 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
979 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
980 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
981 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
982 dst0[CHAN_Z] = tmp5;
983 }
984 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
985 dst0[CHAN_W] = bld->base.one;
986 }
987 break;
988
989 case TGSI_OPCODE_ABS:
990 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
991 tmp0 = emit_fetch( bld, inst, 0, chan_index );
992 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
993 }
994 break;
995
996 case TGSI_OPCODE_RCC:
997 /* deprecated? */
998 assert(0);
999 return 0;
1000
1001 case TGSI_OPCODE_DPH:
1002 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1003 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1004 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1005 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1006 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1007 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1008 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1009 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1010 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1011 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1012 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1013 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1014 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1015 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1016 dst0[chan_index] = tmp0;
1017 }
1018 break;
1019
1020 case TGSI_OPCODE_COS:
1021 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1022 tmp0 = lp_build_cos( &bld->base, tmp0 );
1023 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1024 dst0[chan_index] = tmp0;
1025 }
1026 break;
1027
1028 case TGSI_OPCODE_DDX:
1029 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1030 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1031 }
1032 break;
1033
1034 case TGSI_OPCODE_DDY:
1035 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1036 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1037 }
1038 break;
1039
1040 case TGSI_OPCODE_KILP:
1041 /* predicated kill */
1042 /* FIXME */
1043 return 0;
1044 break;
1045
1046 case TGSI_OPCODE_KIL:
1047 /* conditional kill */
1048 emit_kil( bld, inst );
1049 break;
1050
1051 case TGSI_OPCODE_PK2H:
1052 return 0;
1053 break;
1054
1055 case TGSI_OPCODE_PK2US:
1056 return 0;
1057 break;
1058
1059 case TGSI_OPCODE_PK4B:
1060 return 0;
1061 break;
1062
1063 case TGSI_OPCODE_PK4UB:
1064 return 0;
1065 break;
1066
1067 case TGSI_OPCODE_RFL:
1068 return 0;
1069 break;
1070
1071 case TGSI_OPCODE_SEQ:
1072 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1073 src0 = emit_fetch( bld, inst, 0, chan_index );
1074 src1 = emit_fetch( bld, inst, 1, chan_index );
1075 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1076 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1077 }
1078 break;
1079
1080 case TGSI_OPCODE_SFL:
1081 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1082 dst0[chan_index] = bld->base.zero;
1083 }
1084 break;
1085
1086 case TGSI_OPCODE_SGT:
1087 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1088 src0 = emit_fetch( bld, inst, 0, chan_index );
1089 src1 = emit_fetch( bld, inst, 1, chan_index );
1090 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1091 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1092 }
1093 break;
1094
1095 case TGSI_OPCODE_SIN:
1096 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1097 tmp0 = lp_build_sin( &bld->base, tmp0 );
1098 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1099 dst0[chan_index] = tmp0;
1100 }
1101 break;
1102
1103 case TGSI_OPCODE_SLE:
1104 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1105 src0 = emit_fetch( bld, inst, 0, chan_index );
1106 src1 = emit_fetch( bld, inst, 1, chan_index );
1107 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1108 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1109 }
1110 break;
1111
1112 case TGSI_OPCODE_SNE:
1113 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1114 src0 = emit_fetch( bld, inst, 0, chan_index );
1115 src1 = emit_fetch( bld, inst, 1, chan_index );
1116 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1117 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1118 }
1119 break;
1120
1121 case TGSI_OPCODE_STR:
1122 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1123 dst0[chan_index] = bld->base.one;
1124 }
1125 break;
1126
1127 case TGSI_OPCODE_TEX:
1128 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1129 break;
1130
1131 case TGSI_OPCODE_TXD:
1132 /* FIXME */
1133 return 0;
1134 break;
1135
1136 case TGSI_OPCODE_UP2H:
1137 /* deprecated */
1138 assert (0);
1139 return 0;
1140 break;
1141
1142 case TGSI_OPCODE_UP2US:
1143 /* deprecated */
1144 assert(0);
1145 return 0;
1146 break;
1147
1148 case TGSI_OPCODE_UP4B:
1149 /* deprecated */
1150 assert(0);
1151 return 0;
1152 break;
1153
1154 case TGSI_OPCODE_UP4UB:
1155 /* deprecated */
1156 assert(0);
1157 return 0;
1158 break;
1159
1160 case TGSI_OPCODE_X2D:
1161 /* deprecated? */
1162 assert(0);
1163 return 0;
1164 break;
1165
1166 case TGSI_OPCODE_ARA:
1167 /* deprecated */
1168 assert(0);
1169 return 0;
1170 break;
1171
1172 #if 0
1173 case TGSI_OPCODE_ARR:
1174 /* FIXME */
1175 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1176 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1177 emit_rnd( bld, 0, 0 );
1178 emit_f2it( bld, 0 );
1179 dst0[chan_index] = tmp0;
1180 }
1181 break;
1182 #endif
1183
1184 case TGSI_OPCODE_BRA:
1185 /* deprecated */
1186 assert(0);
1187 return 0;
1188 break;
1189
1190 case TGSI_OPCODE_CAL:
1191 /* FIXME */
1192 return 0;
1193 break;
1194
1195 case TGSI_OPCODE_RET:
1196 /* FIXME */
1197 return 0;
1198 break;
1199
1200 case TGSI_OPCODE_END:
1201 break;
1202
1203 case TGSI_OPCODE_SSG:
1204 /* TGSI_OPCODE_SGN */
1205 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1206 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1207 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1208 }
1209 break;
1210
1211 case TGSI_OPCODE_CMP:
1212 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1213 src0 = emit_fetch( bld, inst, 0, chan_index );
1214 src1 = emit_fetch( bld, inst, 1, chan_index );
1215 src2 = emit_fetch( bld, inst, 2, chan_index );
1216 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1217 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1218 }
1219 break;
1220
1221 case TGSI_OPCODE_SCS:
1222 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1223 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1224 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1225 }
1226 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1227 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1228 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1229 }
1230 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1231 dst0[CHAN_Z] = bld->base.zero;
1232 }
1233 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1234 dst0[CHAN_W] = bld->base.one;
1235 }
1236 break;
1237
1238 case TGSI_OPCODE_TXB:
1239 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1240 break;
1241
1242 case TGSI_OPCODE_NRM:
1243 /* fall-through */
1244 case TGSI_OPCODE_NRM4:
1245 /* 3 or 4-component normalization */
1246 {
1247 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1248
1249 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1250 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1251 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1252 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1253
1254 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1255
1256 /* xmm4 = src.x */
1257 /* xmm0 = src.x * src.x */
1258 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1259 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1260 tmp4 = tmp0;
1261 }
1262 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1263
1264 /* xmm5 = src.y */
1265 /* xmm0 = xmm0 + src.y * src.y */
1266 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1267 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1268 tmp5 = tmp1;
1269 }
1270 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1271 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1272
1273 /* xmm6 = src.z */
1274 /* xmm0 = xmm0 + src.z * src.z */
1275 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1276 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1277 tmp6 = tmp1;
1278 }
1279 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1280 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1281
1282 if (dims == 4) {
1283 /* xmm7 = src.w */
1284 /* xmm0 = xmm0 + src.w * src.w */
1285 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1286 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1287 tmp7 = tmp1;
1288 }
1289 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1290 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1291 }
1292
1293 /* xmm1 = 1 / sqrt(xmm0) */
1294 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1295
1296 /* dst.x = xmm1 * src.x */
1297 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1298 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1299 }
1300
1301 /* dst.y = xmm1 * src.y */
1302 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1303 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1304 }
1305
1306 /* dst.z = xmm1 * src.z */
1307 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1308 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1309 }
1310
1311 /* dst.w = xmm1 * src.w */
1312 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1313 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1314 }
1315 }
1316
1317 /* dst.w = 1.0 */
1318 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1319 dst0[CHAN_W] = bld->base.one;
1320 }
1321 }
1322 break;
1323
1324 case TGSI_OPCODE_DIV:
1325 /* deprecated */
1326 assert( 0 );
1327 return 0;
1328 break;
1329
1330 case TGSI_OPCODE_DP2:
1331 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1332 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1333 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1334 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1335 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1336 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1337 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1338 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1339 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1340 }
1341 break;
1342
1343 case TGSI_OPCODE_TXL:
1344 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1345 break;
1346
1347 case TGSI_OPCODE_TXP:
1348 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1349 break;
1350
1351 case TGSI_OPCODE_BRK:
1352 /* FIXME */
1353 return 0;
1354 break;
1355
1356 case TGSI_OPCODE_IF:
1357 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1358 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1359 break;
1360
1361 case TGSI_OPCODE_BGNFOR:
1362 /* deprecated */
1363 assert(0);
1364 return 0;
1365 break;
1366
1367 case TGSI_OPCODE_REP:
1368 /* deprecated */
1369 assert(0);
1370 return 0;
1371 break;
1372
1373 case TGSI_OPCODE_ELSE:
1374 lp_exec_mask_cond_invert(&bld->exec_mask);
1375 break;
1376
1377 case TGSI_OPCODE_ENDIF:
1378 lp_exec_mask_cond_pop(&bld->exec_mask);
1379 break;
1380
1381 case TGSI_OPCODE_ENDFOR:
1382 /* deprecated */
1383 assert(0);
1384 return 0;
1385 break;
1386
1387 case TGSI_OPCODE_ENDREP:
1388 /* deprecated */
1389 assert(0);
1390 return 0;
1391 break;
1392
1393 case TGSI_OPCODE_PUSHA:
1394 /* deprecated? */
1395 assert(0);
1396 return 0;
1397 break;
1398
1399 case TGSI_OPCODE_POPA:
1400 /* deprecated? */
1401 assert(0);
1402 return 0;
1403 break;
1404
1405 case TGSI_OPCODE_CEIL:
1406 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1407 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1408 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1409 }
1410 break;
1411
1412 case TGSI_OPCODE_I2F:
1413 /* deprecated? */
1414 assert(0);
1415 return 0;
1416 break;
1417
1418 case TGSI_OPCODE_NOT:
1419 /* deprecated? */
1420 assert(0);
1421 return 0;
1422 break;
1423
1424 case TGSI_OPCODE_TRUNC:
1425 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1426 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1427 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1428 }
1429 break;
1430
1431 case TGSI_OPCODE_SHL:
1432 /* deprecated? */
1433 assert(0);
1434 return 0;
1435 break;
1436
1437 case TGSI_OPCODE_ISHR:
1438 /* deprecated? */
1439 assert(0);
1440 return 0;
1441 break;
1442
1443 case TGSI_OPCODE_AND:
1444 /* deprecated? */
1445 assert(0);
1446 return 0;
1447 break;
1448
1449 case TGSI_OPCODE_OR:
1450 /* deprecated? */
1451 assert(0);
1452 return 0;
1453 break;
1454
1455 case TGSI_OPCODE_MOD:
1456 /* deprecated? */
1457 assert(0);
1458 return 0;
1459 break;
1460
1461 case TGSI_OPCODE_XOR:
1462 /* deprecated? */
1463 assert(0);
1464 return 0;
1465 break;
1466
1467 case TGSI_OPCODE_SAD:
1468 /* deprecated? */
1469 assert(0);
1470 return 0;
1471 break;
1472
1473 case TGSI_OPCODE_TXF:
1474 /* deprecated? */
1475 assert(0);
1476 return 0;
1477 break;
1478
1479 case TGSI_OPCODE_TXQ:
1480 /* deprecated? */
1481 assert(0);
1482 return 0;
1483 break;
1484
1485 case TGSI_OPCODE_CONT:
1486 /* FIXME */
1487 return 0;
1488 break;
1489
1490 case TGSI_OPCODE_EMIT:
1491 return 0;
1492 break;
1493
1494 case TGSI_OPCODE_ENDPRIM:
1495 return 0;
1496 break;
1497
1498 case TGSI_OPCODE_NOP:
1499 break;
1500
1501 default:
1502 return 0;
1503 }
1504
1505 if(info->num_dst) {
1506 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1507 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1508 }
1509 }
1510
1511 return 1;
1512 }
1513
1514
1515 void
1516 lp_build_tgsi_soa(LLVMBuilderRef builder,
1517 const struct tgsi_token *tokens,
1518 struct lp_type type,
1519 struct lp_build_mask_context *mask,
1520 LLVMValueRef consts_ptr,
1521 const LLVMValueRef *pos,
1522 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1523 LLVMValueRef (*outputs)[NUM_CHANNELS],
1524 struct lp_build_sampler_soa *sampler)
1525 {
1526 struct lp_build_tgsi_soa_context bld;
1527 struct tgsi_parse_context parse;
1528 uint num_immediates = 0;
1529 unsigned i;
1530
1531 /* Setup build context */
1532 memset(&bld, 0, sizeof bld);
1533 lp_build_context_init(&bld.base, builder, type);
1534 bld.mask = mask;
1535 bld.pos = pos;
1536 bld.inputs = inputs;
1537 bld.outputs = outputs;
1538 bld.consts_ptr = consts_ptr;
1539 bld.sampler = sampler;
1540
1541 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1542
1543 tgsi_parse_init( &parse, tokens );
1544
1545 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1546 tgsi_parse_token( &parse );
1547
1548 switch( parse.FullToken.Token.Type ) {
1549 case TGSI_TOKEN_TYPE_DECLARATION:
1550 /* Inputs already interpolated */
1551 {
1552 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1553 _debug_printf("warning: failed to define LLVM variable\n");
1554 }
1555 break;
1556
1557 case TGSI_TOKEN_TYPE_INSTRUCTION:
1558 {
1559 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1560 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1561 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1562 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1563 info ? info->mnemonic : "<invalid>");
1564 }
1565
1566 break;
1567
1568 case TGSI_TOKEN_TYPE_IMMEDIATE:
1569 /* simply copy the immediate values into the next immediates[] slot */
1570 {
1571 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1572 assert(size <= 4);
1573 assert(num_immediates < LP_MAX_IMMEDIATES);
1574 for( i = 0; i < size; ++i )
1575 bld.immediates[num_immediates][i] =
1576 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1577 for( i = size; i < 4; ++i )
1578 bld.immediates[num_immediates][i] = bld.base.undef;
1579 num_immediates++;
1580 }
1581 break;
1582
1583 case TGSI_TOKEN_TYPE_PROPERTY:
1584 break;
1585
1586 default:
1587 assert( 0 );
1588 }
1589 }
1590
1591 tgsi_parse_free( &parse );
1592 }
1593