Merge branch 'gallium-dynamicstencilref'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_tgsi.h"
55
56
57 #define LP_MAX_TEMPS 256
58 #define LP_MAX_IMMEDIATES 256
59
60
61 #define FOR_EACH_CHANNEL( CHAN )\
62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71 FOR_EACH_CHANNEL( CHAN )\
72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74 #define CHAN_X 0
75 #define CHAN_Y 1
76 #define CHAN_Z 2
77 #define CHAN_W 3
78
79 #define QUAD_TOP_LEFT 0
80 #define QUAD_TOP_RIGHT 1
81 #define QUAD_BOTTOM_LEFT 2
82 #define QUAD_BOTTOM_RIGHT 3
83
84
85 struct lp_build_tgsi_soa_context
86 {
87 struct lp_build_context base;
88
89 LLVMValueRef consts_ptr;
90 const LLVMValueRef *pos;
91 const LLVMValueRef (*inputs)[NUM_CHANNELS];
92 LLVMValueRef (*outputs)[NUM_CHANNELS];
93
94 struct lp_build_sampler_soa *sampler;
95
96 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
97 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
98
99 struct lp_build_mask_context *mask;
100 };
101
102
103 static const unsigned char
104 swizzle_left[4] = {
105 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
106 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
107 };
108
109 static const unsigned char
110 swizzle_right[4] = {
111 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
112 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
113 };
114
115 static const unsigned char
116 swizzle_top[4] = {
117 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
118 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
119 };
120
121 static const unsigned char
122 swizzle_bottom[4] = {
123 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
124 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
125 };
126
127
128 static LLVMValueRef
129 emit_ddx(struct lp_build_tgsi_soa_context *bld,
130 LLVMValueRef src)
131 {
132 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
133 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
134 return lp_build_sub(&bld->base, src_right, src_left);
135 }
136
137
138 static LLVMValueRef
139 emit_ddy(struct lp_build_tgsi_soa_context *bld,
140 LLVMValueRef src)
141 {
142 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
143 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
144 return lp_build_sub(&bld->base, src_top, src_bottom);
145 }
146
147
148 /**
149 * Register fetch.
150 */
151 static LLVMValueRef
152 emit_fetch(
153 struct lp_build_tgsi_soa_context *bld,
154 const struct tgsi_full_instruction *inst,
155 unsigned index,
156 const unsigned chan_index )
157 {
158 const struct tgsi_full_src_register *reg = &inst->Src[index];
159 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
160 LLVMValueRef res;
161
162 switch (swizzle) {
163 case TGSI_SWIZZLE_X:
164 case TGSI_SWIZZLE_Y:
165 case TGSI_SWIZZLE_Z:
166 case TGSI_SWIZZLE_W:
167
168 switch (reg->Register.File) {
169 case TGSI_FILE_CONSTANT: {
170 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
171 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
172 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
173 res = lp_build_broadcast_scalar(&bld->base, scalar);
174 break;
175 }
176
177 case TGSI_FILE_IMMEDIATE:
178 res = bld->immediates[reg->Register.Index][swizzle];
179 assert(res);
180 break;
181
182 case TGSI_FILE_INPUT:
183 res = bld->inputs[reg->Register.Index][swizzle];
184 assert(res);
185 break;
186
187 case TGSI_FILE_TEMPORARY:
188 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
189 if(!res)
190 return bld->base.undef;
191 break;
192
193 default:
194 assert( 0 );
195 return bld->base.undef;
196 }
197 break;
198
199 default:
200 assert( 0 );
201 return bld->base.undef;
202 }
203
204 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
205 case TGSI_UTIL_SIGN_CLEAR:
206 res = lp_build_abs( &bld->base, res );
207 break;
208
209 case TGSI_UTIL_SIGN_SET:
210 /* TODO: Use bitwese OR for floating point */
211 res = lp_build_abs( &bld->base, res );
212 res = LLVMBuildNeg( bld->base.builder, res, "" );
213 break;
214
215 case TGSI_UTIL_SIGN_TOGGLE:
216 res = LLVMBuildNeg( bld->base.builder, res, "" );
217 break;
218
219 case TGSI_UTIL_SIGN_KEEP:
220 break;
221 }
222
223 return res;
224 }
225
226
227 /**
228 * Register fetch with derivatives.
229 */
230 static void
231 emit_fetch_deriv(
232 struct lp_build_tgsi_soa_context *bld,
233 const struct tgsi_full_instruction *inst,
234 unsigned index,
235 const unsigned chan_index,
236 LLVMValueRef *res,
237 LLVMValueRef *ddx,
238 LLVMValueRef *ddy)
239 {
240 LLVMValueRef src;
241
242 src = emit_fetch(bld, inst, index, chan_index);
243
244 if(res)
245 *res = src;
246
247 /* TODO: use interpolation coeffs for inputs */
248
249 if(ddx)
250 *ddx = emit_ddx(bld, src);
251
252 if(ddy)
253 *ddy = emit_ddy(bld, src);
254 }
255
256
257 /**
258 * Register store.
259 */
260 static void
261 emit_store(
262 struct lp_build_tgsi_soa_context *bld,
263 const struct tgsi_full_instruction *inst,
264 unsigned index,
265 unsigned chan_index,
266 LLVMValueRef value)
267 {
268 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
269
270 switch( inst->Instruction.Saturate ) {
271 case TGSI_SAT_NONE:
272 break;
273
274 case TGSI_SAT_ZERO_ONE:
275 value = lp_build_max(&bld->base, value, bld->base.zero);
276 value = lp_build_min(&bld->base, value, bld->base.one);
277 break;
278
279 case TGSI_SAT_MINUS_PLUS_ONE:
280 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
281 value = lp_build_min(&bld->base, value, bld->base.one);
282 break;
283
284 default:
285 assert(0);
286 }
287
288 switch( reg->Register.File ) {
289 case TGSI_FILE_OUTPUT:
290 LLVMBuildStore(bld->base.builder, value,
291 bld->outputs[reg->Register.Index][chan_index]);
292 break;
293
294 case TGSI_FILE_TEMPORARY:
295 LLVMBuildStore(bld->base.builder, value,
296 bld->temps[reg->Register.Index][chan_index]);
297 break;
298
299 case TGSI_FILE_ADDRESS:
300 /* FIXME */
301 assert(0);
302 break;
303
304 default:
305 assert( 0 );
306 }
307 }
308
309
310 /**
311 * High-level instruction translators.
312 */
313
314
315 static void
316 emit_tex( struct lp_build_tgsi_soa_context *bld,
317 const struct tgsi_full_instruction *inst,
318 boolean apply_lodbias,
319 boolean projected,
320 LLVMValueRef *texel)
321 {
322 const uint unit = inst->Src[1].Register.Index;
323 LLVMValueRef lodbias;
324 LLVMValueRef oow = NULL;
325 LLVMValueRef coords[3];
326 unsigned num_coords;
327 unsigned i;
328
329 switch (inst->Texture.Texture) {
330 case TGSI_TEXTURE_1D:
331 num_coords = 1;
332 break;
333 case TGSI_TEXTURE_2D:
334 case TGSI_TEXTURE_RECT:
335 num_coords = 2;
336 break;
337 case TGSI_TEXTURE_SHADOW1D:
338 case TGSI_TEXTURE_SHADOW2D:
339 case TGSI_TEXTURE_SHADOWRECT:
340 case TGSI_TEXTURE_3D:
341 case TGSI_TEXTURE_CUBE:
342 num_coords = 3;
343 break;
344 default:
345 assert(0);
346 return;
347 }
348
349 if(apply_lodbias)
350 lodbias = emit_fetch( bld, inst, 0, 3 );
351 else
352 lodbias = bld->base.zero;
353
354 if (projected) {
355 oow = emit_fetch( bld, inst, 0, 3 );
356 oow = lp_build_rcp(&bld->base, oow);
357 }
358
359 for (i = 0; i < num_coords; i++) {
360 coords[i] = emit_fetch( bld, inst, 0, i );
361 if (projected)
362 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
363 }
364 for (i = num_coords; i < 3; i++) {
365 coords[i] = bld->base.undef;
366 }
367
368 bld->sampler->emit_fetch_texel(bld->sampler,
369 bld->base.builder,
370 bld->base.type,
371 unit, num_coords, coords, lodbias,
372 texel);
373 }
374
375
376 static void
377 emit_kil(
378 struct lp_build_tgsi_soa_context *bld,
379 const struct tgsi_full_instruction *inst )
380 {
381 const struct tgsi_full_src_register *reg = &inst->Src[0];
382 LLVMValueRef terms[NUM_CHANNELS];
383 LLVMValueRef mask;
384 unsigned chan_index;
385
386 memset(&terms, 0, sizeof terms);
387
388 FOR_EACH_CHANNEL( chan_index ) {
389 unsigned swizzle;
390
391 /* Unswizzle channel */
392 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
393
394 /* Check if the component has not been already tested. */
395 assert(swizzle < NUM_CHANNELS);
396 if( !terms[swizzle] )
397 /* TODO: change the comparison operator instead of setting the sign */
398 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
399 }
400
401 mask = NULL;
402 FOR_EACH_CHANNEL( chan_index ) {
403 if(terms[chan_index]) {
404 LLVMValueRef chan_mask;
405
406 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
407
408 if(mask)
409 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
410 else
411 mask = chan_mask;
412 }
413 }
414
415 if(mask)
416 lp_build_mask_update(bld->mask, mask);
417 }
418
419
420 /**
421 * Check if inst src/dest regs use indirect addressing into temporary
422 * register file.
423 */
424 static boolean
425 indirect_temp_reference(const struct tgsi_full_instruction *inst)
426 {
427 uint i;
428 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
429 const struct tgsi_full_src_register *reg = &inst->Src[i];
430 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
431 reg->Register.Indirect)
432 return TRUE;
433 }
434 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
435 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
436 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
437 reg->Register.Indirect)
438 return TRUE;
439 }
440 return FALSE;
441 }
442
443 static int
444 emit_declaration(
445 struct lp_build_tgsi_soa_context *bld,
446 const struct tgsi_full_declaration *decl)
447 {
448 unsigned first = decl->Range.First;
449 unsigned last = decl->Range.Last;
450 unsigned idx, i;
451
452 for (idx = first; idx <= last; ++idx) {
453 boolean ok;
454
455 switch (decl->Declaration.File) {
456 case TGSI_FILE_TEMPORARY:
457 for (i = 0; i < NUM_CHANNELS; i++)
458 bld->temps[idx][i] = lp_build_alloca(&bld->base);
459 ok = TRUE;
460 break;
461
462 case TGSI_FILE_OUTPUT:
463 for (i = 0; i < NUM_CHANNELS; i++)
464 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
465 ok = TRUE;
466 break;
467
468 default:
469 /* don't need to declare other vars */
470 ok = TRUE;
471 }
472
473 if (!ok)
474 return FALSE;
475 }
476
477 return TRUE;
478 }
479
480 static int
481 emit_instruction(
482 struct lp_build_tgsi_soa_context *bld,
483 const struct tgsi_full_instruction *inst,
484 const struct tgsi_opcode_info *info)
485 {
486 unsigned chan_index;
487 LLVMValueRef src0, src1, src2;
488 LLVMValueRef tmp0, tmp1, tmp2;
489 LLVMValueRef tmp3 = NULL;
490 LLVMValueRef tmp4 = NULL;
491 LLVMValueRef tmp5 = NULL;
492 LLVMValueRef tmp6 = NULL;
493 LLVMValueRef tmp7 = NULL;
494 LLVMValueRef res;
495 LLVMValueRef dst0[NUM_CHANNELS];
496
497 /* we can't handle indirect addressing into temp register file yet */
498 if (indirect_temp_reference(inst))
499 return FALSE;
500
501 assert(info->num_dst <= 1);
502 if(info->num_dst) {
503 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
504 dst0[chan_index] = bld->base.undef;
505 }
506 }
507
508 switch (inst->Instruction.Opcode) {
509 #if 0
510 case TGSI_OPCODE_ARL:
511 /* FIXME */
512 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
513 tmp0 = emit_fetch( bld, inst, 0, chan_index );
514 emit_flr(bld, 0, 0);
515 emit_f2it( bld, 0 );
516 dst0[chan_index] = tmp0;
517 }
518 break;
519 #endif
520
521 case TGSI_OPCODE_MOV:
522 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
523 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
524 }
525 break;
526
527 case TGSI_OPCODE_LIT:
528 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
529 dst0[CHAN_X] = bld->base.one;
530 }
531 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
532 src0 = emit_fetch( bld, inst, 0, CHAN_X );
533 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
534 }
535 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
536 /* XMM[1] = SrcReg[0].yyyy */
537 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
538 /* XMM[1] = max(XMM[1], 0) */
539 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
540 /* XMM[2] = SrcReg[0].wwww */
541 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
542 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
543 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
544 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
545 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
546 }
547 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
548 dst0[CHAN_W] = bld->base.one;
549 }
550 break;
551
552 case TGSI_OPCODE_RCP:
553 /* TGSI_OPCODE_RECIP */
554 src0 = emit_fetch( bld, inst, 0, CHAN_X );
555 res = lp_build_rcp(&bld->base, src0);
556 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
557 dst0[chan_index] = res;
558 }
559 break;
560
561 case TGSI_OPCODE_RSQ:
562 /* TGSI_OPCODE_RECIPSQRT */
563 src0 = emit_fetch( bld, inst, 0, CHAN_X );
564 src0 = lp_build_abs(&bld->base, src0);
565 res = lp_build_rsqrt(&bld->base, src0);
566 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
567 dst0[chan_index] = res;
568 }
569 break;
570
571 case TGSI_OPCODE_EXP:
572 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
573 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
574 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
575 LLVMValueRef *p_exp2_int_part = NULL;
576 LLVMValueRef *p_frac_part = NULL;
577 LLVMValueRef *p_exp2 = NULL;
578
579 src0 = emit_fetch( bld, inst, 0, CHAN_X );
580
581 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
582 p_exp2_int_part = &tmp0;
583 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
584 p_frac_part = &tmp1;
585 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
586 p_exp2 = &tmp2;
587
588 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
589
590 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
591 dst0[CHAN_X] = tmp0;
592 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
593 dst0[CHAN_Y] = tmp1;
594 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
595 dst0[CHAN_Z] = tmp2;
596 }
597 /* dst.w = 1.0 */
598 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
599 dst0[CHAN_W] = bld->base.one;
600 }
601 break;
602
603 case TGSI_OPCODE_LOG:
604 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
605 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
606 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
607 LLVMValueRef *p_floor_log2 = NULL;
608 LLVMValueRef *p_exp = NULL;
609 LLVMValueRef *p_log2 = NULL;
610
611 src0 = emit_fetch( bld, inst, 0, CHAN_X );
612 src0 = lp_build_abs( &bld->base, src0 );
613
614 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
615 p_floor_log2 = &tmp0;
616 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
617 p_exp = &tmp1;
618 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
619 p_log2 = &tmp2;
620
621 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
622
623 /* dst.x = floor(lg2(abs(src.x))) */
624 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
625 dst0[CHAN_X] = tmp0;
626 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
627 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
628 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
629 }
630 /* dst.z = lg2(abs(src.x)) */
631 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
632 dst0[CHAN_Z] = tmp2;
633 }
634 /* dst.w = 1.0 */
635 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
636 dst0[CHAN_W] = bld->base.one;
637 }
638 break;
639
640 case TGSI_OPCODE_MUL:
641 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
642 src0 = emit_fetch( bld, inst, 0, chan_index );
643 src1 = emit_fetch( bld, inst, 1, chan_index );
644 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
645 }
646 break;
647
648 case TGSI_OPCODE_ADD:
649 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
650 src0 = emit_fetch( bld, inst, 0, chan_index );
651 src1 = emit_fetch( bld, inst, 1, chan_index );
652 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
653 }
654 break;
655
656 case TGSI_OPCODE_DP3:
657 /* TGSI_OPCODE_DOT3 */
658 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
659 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
660 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
661 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
662 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
663 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
664 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
665 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
666 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
667 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
668 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
669 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
670 dst0[chan_index] = tmp0;
671 }
672 break;
673
674 case TGSI_OPCODE_DP4:
675 /* TGSI_OPCODE_DOT4 */
676 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
677 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
678 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
679 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
680 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
681 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
682 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
683 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
684 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
685 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
686 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
687 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
688 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
689 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
690 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
691 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
692 dst0[chan_index] = tmp0;
693 }
694 break;
695
696 case TGSI_OPCODE_DST:
697 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
698 dst0[CHAN_X] = bld->base.one;
699 }
700 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
701 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
702 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
703 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
704 }
705 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
706 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
707 }
708 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
709 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
710 }
711 break;
712
713 case TGSI_OPCODE_MIN:
714 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
715 src0 = emit_fetch( bld, inst, 0, chan_index );
716 src1 = emit_fetch( bld, inst, 1, chan_index );
717 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
718 }
719 break;
720
721 case TGSI_OPCODE_MAX:
722 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
723 src0 = emit_fetch( bld, inst, 0, chan_index );
724 src1 = emit_fetch( bld, inst, 1, chan_index );
725 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
726 }
727 break;
728
729 case TGSI_OPCODE_SLT:
730 /* TGSI_OPCODE_SETLT */
731 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
732 src0 = emit_fetch( bld, inst, 0, chan_index );
733 src1 = emit_fetch( bld, inst, 1, chan_index );
734 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
735 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
736 }
737 break;
738
739 case TGSI_OPCODE_SGE:
740 /* TGSI_OPCODE_SETGE */
741 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
742 src0 = emit_fetch( bld, inst, 0, chan_index );
743 src1 = emit_fetch( bld, inst, 1, chan_index );
744 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
745 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
746 }
747 break;
748
749 case TGSI_OPCODE_MAD:
750 /* TGSI_OPCODE_MADD */
751 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
752 tmp0 = emit_fetch( bld, inst, 0, chan_index );
753 tmp1 = emit_fetch( bld, inst, 1, chan_index );
754 tmp2 = emit_fetch( bld, inst, 2, chan_index );
755 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
756 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
757 dst0[chan_index] = tmp0;
758 }
759 break;
760
761 case TGSI_OPCODE_SUB:
762 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
763 tmp0 = emit_fetch( bld, inst, 0, chan_index );
764 tmp1 = emit_fetch( bld, inst, 1, chan_index );
765 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
766 }
767 break;
768
769 case TGSI_OPCODE_LRP:
770 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
771 src0 = emit_fetch( bld, inst, 0, chan_index );
772 src1 = emit_fetch( bld, inst, 1, chan_index );
773 src2 = emit_fetch( bld, inst, 2, chan_index );
774 tmp0 = lp_build_sub( &bld->base, src1, src2 );
775 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
776 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
777 }
778 break;
779
780 case TGSI_OPCODE_CND:
781 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
782 src0 = emit_fetch( bld, inst, 0, chan_index );
783 src1 = emit_fetch( bld, inst, 1, chan_index );
784 src2 = emit_fetch( bld, inst, 2, chan_index );
785 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
786 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
787 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
788 }
789 break;
790
791 case TGSI_OPCODE_DP2A:
792 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
793 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
794 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
795 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
796 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
797 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
798 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
799 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
800 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
801 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
802 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
803 }
804 break;
805
806 case TGSI_OPCODE_FRC:
807 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
808 src0 = emit_fetch( bld, inst, 0, chan_index );
809 tmp0 = lp_build_floor(&bld->base, src0);
810 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
811 dst0[chan_index] = tmp0;
812 }
813 break;
814
815 case TGSI_OPCODE_CLAMP:
816 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
817 tmp0 = emit_fetch( bld, inst, 0, chan_index );
818 src1 = emit_fetch( bld, inst, 1, chan_index );
819 src2 = emit_fetch( bld, inst, 2, chan_index );
820 tmp0 = lp_build_max(&bld->base, tmp0, src1);
821 tmp0 = lp_build_min(&bld->base, tmp0, src2);
822 dst0[chan_index] = tmp0;
823 }
824 break;
825
826 case TGSI_OPCODE_FLR:
827 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
828 tmp0 = emit_fetch( bld, inst, 0, chan_index );
829 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
830 }
831 break;
832
833 case TGSI_OPCODE_ROUND:
834 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
835 tmp0 = emit_fetch( bld, inst, 0, chan_index );
836 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
837 }
838 break;
839
840 case TGSI_OPCODE_EX2: {
841 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
842 tmp0 = lp_build_exp2( &bld->base, tmp0);
843 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
844 dst0[chan_index] = tmp0;
845 }
846 break;
847 }
848
849 case TGSI_OPCODE_LG2:
850 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
851 tmp0 = lp_build_log2( &bld->base, tmp0);
852 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
853 dst0[chan_index] = tmp0;
854 }
855 break;
856
857 case TGSI_OPCODE_POW:
858 src0 = emit_fetch( bld, inst, 0, CHAN_X );
859 src1 = emit_fetch( bld, inst, 1, CHAN_X );
860 res = lp_build_pow( &bld->base, src0, src1 );
861 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
862 dst0[chan_index] = res;
863 }
864 break;
865
866 case TGSI_OPCODE_XPD:
867 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
868 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
869 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
870 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
871 }
872 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
873 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
874 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
875 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
876 }
877 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
878 tmp2 = tmp0;
879 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
880 tmp5 = tmp3;
881 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
882 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
883 dst0[CHAN_X] = tmp2;
884 }
885 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
886 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
887 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
888 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
889 }
890 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
891 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
892 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
893 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
894 dst0[CHAN_Y] = tmp3;
895 }
896 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
897 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
898 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
899 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
900 dst0[CHAN_Z] = tmp5;
901 }
902 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
903 dst0[CHAN_W] = bld->base.one;
904 }
905 break;
906
907 case TGSI_OPCODE_ABS:
908 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
909 tmp0 = emit_fetch( bld, inst, 0, chan_index );
910 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
911 }
912 break;
913
914 case TGSI_OPCODE_RCC:
915 /* deprecated? */
916 assert(0);
917 return 0;
918
919 case TGSI_OPCODE_DPH:
920 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
921 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
922 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
923 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
924 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
925 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
926 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
927 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
928 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
929 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
930 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
931 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
932 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
933 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
934 dst0[chan_index] = tmp0;
935 }
936 break;
937
938 case TGSI_OPCODE_COS:
939 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
940 tmp0 = lp_build_cos( &bld->base, tmp0 );
941 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
942 dst0[chan_index] = tmp0;
943 }
944 break;
945
946 case TGSI_OPCODE_DDX:
947 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
948 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
949 }
950 break;
951
952 case TGSI_OPCODE_DDY:
953 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
954 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
955 }
956 break;
957
958 case TGSI_OPCODE_KILP:
959 /* predicated kill */
960 /* FIXME */
961 return 0;
962 break;
963
964 case TGSI_OPCODE_KIL:
965 /* conditional kill */
966 emit_kil( bld, inst );
967 break;
968
969 case TGSI_OPCODE_PK2H:
970 return 0;
971 break;
972
973 case TGSI_OPCODE_PK2US:
974 return 0;
975 break;
976
977 case TGSI_OPCODE_PK4B:
978 return 0;
979 break;
980
981 case TGSI_OPCODE_PK4UB:
982 return 0;
983 break;
984
985 case TGSI_OPCODE_RFL:
986 return 0;
987 break;
988
989 case TGSI_OPCODE_SEQ:
990 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
991 src0 = emit_fetch( bld, inst, 0, chan_index );
992 src1 = emit_fetch( bld, inst, 1, chan_index );
993 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
994 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
995 }
996 break;
997
998 case TGSI_OPCODE_SFL:
999 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1000 dst0[chan_index] = bld->base.zero;
1001 }
1002 break;
1003
1004 case TGSI_OPCODE_SGT:
1005 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1006 src0 = emit_fetch( bld, inst, 0, chan_index );
1007 src1 = emit_fetch( bld, inst, 1, chan_index );
1008 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1009 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1010 }
1011 break;
1012
1013 case TGSI_OPCODE_SIN:
1014 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1015 tmp0 = lp_build_sin( &bld->base, tmp0 );
1016 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1017 dst0[chan_index] = tmp0;
1018 }
1019 break;
1020
1021 case TGSI_OPCODE_SLE:
1022 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1023 src0 = emit_fetch( bld, inst, 0, chan_index );
1024 src1 = emit_fetch( bld, inst, 1, chan_index );
1025 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1026 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1027 }
1028 break;
1029
1030 case TGSI_OPCODE_SNE:
1031 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1032 src0 = emit_fetch( bld, inst, 0, chan_index );
1033 src1 = emit_fetch( bld, inst, 1, chan_index );
1034 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1035 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1036 }
1037 break;
1038
1039 case TGSI_OPCODE_STR:
1040 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1041 dst0[chan_index] = bld->base.one;
1042 }
1043 break;
1044
1045 case TGSI_OPCODE_TEX:
1046 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1047 break;
1048
1049 case TGSI_OPCODE_TXD:
1050 /* FIXME */
1051 return 0;
1052 break;
1053
1054 case TGSI_OPCODE_UP2H:
1055 /* deprecated */
1056 assert (0);
1057 return 0;
1058 break;
1059
1060 case TGSI_OPCODE_UP2US:
1061 /* deprecated */
1062 assert(0);
1063 return 0;
1064 break;
1065
1066 case TGSI_OPCODE_UP4B:
1067 /* deprecated */
1068 assert(0);
1069 return 0;
1070 break;
1071
1072 case TGSI_OPCODE_UP4UB:
1073 /* deprecated */
1074 assert(0);
1075 return 0;
1076 break;
1077
1078 case TGSI_OPCODE_X2D:
1079 /* deprecated? */
1080 assert(0);
1081 return 0;
1082 break;
1083
1084 case TGSI_OPCODE_ARA:
1085 /* deprecated */
1086 assert(0);
1087 return 0;
1088 break;
1089
1090 #if 0
1091 case TGSI_OPCODE_ARR:
1092 /* FIXME */
1093 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1094 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1095 emit_rnd( bld, 0, 0 );
1096 emit_f2it( bld, 0 );
1097 dst0[chan_index] = tmp0;
1098 }
1099 break;
1100 #endif
1101
1102 case TGSI_OPCODE_BRA:
1103 /* deprecated */
1104 assert(0);
1105 return 0;
1106 break;
1107
1108 case TGSI_OPCODE_CAL:
1109 /* FIXME */
1110 return 0;
1111 break;
1112
1113 case TGSI_OPCODE_RET:
1114 /* FIXME */
1115 return 0;
1116 break;
1117
1118 case TGSI_OPCODE_END:
1119 break;
1120
1121 case TGSI_OPCODE_SSG:
1122 /* TGSI_OPCODE_SGN */
1123 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1124 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1125 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1126 }
1127 break;
1128
1129 case TGSI_OPCODE_CMP:
1130 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1131 src0 = emit_fetch( bld, inst, 0, chan_index );
1132 src1 = emit_fetch( bld, inst, 1, chan_index );
1133 src2 = emit_fetch( bld, inst, 2, chan_index );
1134 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1135 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1136 }
1137 break;
1138
1139 case TGSI_OPCODE_SCS:
1140 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1141 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1142 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1143 }
1144 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1145 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1146 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1147 }
1148 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1149 dst0[CHAN_Z] = bld->base.zero;
1150 }
1151 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1152 dst0[CHAN_W] = bld->base.one;
1153 }
1154 break;
1155
1156 case TGSI_OPCODE_TXB:
1157 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1158 break;
1159
1160 case TGSI_OPCODE_NRM:
1161 /* fall-through */
1162 case TGSI_OPCODE_NRM4:
1163 /* 3 or 4-component normalization */
1164 {
1165 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1166
1167 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1168 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1169 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1170 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1171
1172 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1173
1174 /* xmm4 = src.x */
1175 /* xmm0 = src.x * src.x */
1176 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1177 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1178 tmp4 = tmp0;
1179 }
1180 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1181
1182 /* xmm5 = src.y */
1183 /* xmm0 = xmm0 + src.y * src.y */
1184 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1185 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1186 tmp5 = tmp1;
1187 }
1188 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1189 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1190
1191 /* xmm6 = src.z */
1192 /* xmm0 = xmm0 + src.z * src.z */
1193 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1194 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1195 tmp6 = tmp1;
1196 }
1197 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1198 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1199
1200 if (dims == 4) {
1201 /* xmm7 = src.w */
1202 /* xmm0 = xmm0 + src.w * src.w */
1203 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1204 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1205 tmp7 = tmp1;
1206 }
1207 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1208 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1209 }
1210
1211 /* xmm1 = 1 / sqrt(xmm0) */
1212 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1213
1214 /* dst.x = xmm1 * src.x */
1215 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1216 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1217 }
1218
1219 /* dst.y = xmm1 * src.y */
1220 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1221 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1222 }
1223
1224 /* dst.z = xmm1 * src.z */
1225 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1226 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1227 }
1228
1229 /* dst.w = xmm1 * src.w */
1230 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1231 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1232 }
1233 }
1234
1235 /* dst.w = 1.0 */
1236 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1237 dst0[CHAN_W] = bld->base.one;
1238 }
1239 }
1240 break;
1241
1242 case TGSI_OPCODE_DIV:
1243 /* deprecated */
1244 assert( 0 );
1245 return 0;
1246 break;
1247
1248 case TGSI_OPCODE_DP2:
1249 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1250 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1251 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1252 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1253 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1254 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1255 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1256 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1257 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1258 }
1259 break;
1260
1261 case TGSI_OPCODE_TXL:
1262 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1263 break;
1264
1265 case TGSI_OPCODE_TXP:
1266 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1267 break;
1268
1269 case TGSI_OPCODE_BRK:
1270 /* FIXME */
1271 return 0;
1272 break;
1273
1274 case TGSI_OPCODE_IF:
1275 /* FIXME */
1276 return 0;
1277 break;
1278
1279 case TGSI_OPCODE_BGNFOR:
1280 /* deprecated */
1281 assert(0);
1282 return 0;
1283 break;
1284
1285 case TGSI_OPCODE_REP:
1286 /* deprecated */
1287 assert(0);
1288 return 0;
1289 break;
1290
1291 case TGSI_OPCODE_ELSE:
1292 /* FIXME */
1293 return 0;
1294 break;
1295
1296 case TGSI_OPCODE_ENDIF:
1297 /* FIXME */
1298 return 0;
1299 break;
1300
1301 case TGSI_OPCODE_ENDFOR:
1302 /* deprecated */
1303 assert(0);
1304 return 0;
1305 break;
1306
1307 case TGSI_OPCODE_ENDREP:
1308 /* deprecated */
1309 assert(0);
1310 return 0;
1311 break;
1312
1313 case TGSI_OPCODE_PUSHA:
1314 /* deprecated? */
1315 assert(0);
1316 return 0;
1317 break;
1318
1319 case TGSI_OPCODE_POPA:
1320 /* deprecated? */
1321 assert(0);
1322 return 0;
1323 break;
1324
1325 case TGSI_OPCODE_CEIL:
1326 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1327 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1328 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1329 }
1330 break;
1331
1332 case TGSI_OPCODE_I2F:
1333 /* deprecated? */
1334 assert(0);
1335 return 0;
1336 break;
1337
1338 case TGSI_OPCODE_NOT:
1339 /* deprecated? */
1340 assert(0);
1341 return 0;
1342 break;
1343
1344 case TGSI_OPCODE_TRUNC:
1345 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1346 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1347 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1348 }
1349 break;
1350
1351 case TGSI_OPCODE_SHL:
1352 /* deprecated? */
1353 assert(0);
1354 return 0;
1355 break;
1356
1357 case TGSI_OPCODE_ISHR:
1358 /* deprecated? */
1359 assert(0);
1360 return 0;
1361 break;
1362
1363 case TGSI_OPCODE_AND:
1364 /* deprecated? */
1365 assert(0);
1366 return 0;
1367 break;
1368
1369 case TGSI_OPCODE_OR:
1370 /* deprecated? */
1371 assert(0);
1372 return 0;
1373 break;
1374
1375 case TGSI_OPCODE_MOD:
1376 /* deprecated? */
1377 assert(0);
1378 return 0;
1379 break;
1380
1381 case TGSI_OPCODE_XOR:
1382 /* deprecated? */
1383 assert(0);
1384 return 0;
1385 break;
1386
1387 case TGSI_OPCODE_SAD:
1388 /* deprecated? */
1389 assert(0);
1390 return 0;
1391 break;
1392
1393 case TGSI_OPCODE_TXF:
1394 /* deprecated? */
1395 assert(0);
1396 return 0;
1397 break;
1398
1399 case TGSI_OPCODE_TXQ:
1400 /* deprecated? */
1401 assert(0);
1402 return 0;
1403 break;
1404
1405 case TGSI_OPCODE_CONT:
1406 /* FIXME */
1407 return 0;
1408 break;
1409
1410 case TGSI_OPCODE_EMIT:
1411 return 0;
1412 break;
1413
1414 case TGSI_OPCODE_ENDPRIM:
1415 return 0;
1416 break;
1417
1418 case TGSI_OPCODE_NOP:
1419 break;
1420
1421 default:
1422 return 0;
1423 }
1424
1425 if(info->num_dst) {
1426 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1427 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1428 }
1429 }
1430
1431 return 1;
1432 }
1433
1434
1435 void
1436 lp_build_tgsi_soa(LLVMBuilderRef builder,
1437 const struct tgsi_token *tokens,
1438 struct lp_type type,
1439 struct lp_build_mask_context *mask,
1440 LLVMValueRef consts_ptr,
1441 const LLVMValueRef *pos,
1442 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1443 LLVMValueRef (*outputs)[NUM_CHANNELS],
1444 struct lp_build_sampler_soa *sampler)
1445 {
1446 struct lp_build_tgsi_soa_context bld;
1447 struct tgsi_parse_context parse;
1448 uint num_immediates = 0;
1449 unsigned i;
1450
1451 /* Setup build context */
1452 memset(&bld, 0, sizeof bld);
1453 lp_build_context_init(&bld.base, builder, type);
1454 bld.mask = mask;
1455 bld.pos = pos;
1456 bld.inputs = inputs;
1457 bld.outputs = outputs;
1458 bld.consts_ptr = consts_ptr;
1459 bld.sampler = sampler;
1460
1461 tgsi_parse_init( &parse, tokens );
1462
1463 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1464 tgsi_parse_token( &parse );
1465
1466 switch( parse.FullToken.Token.Type ) {
1467 case TGSI_TOKEN_TYPE_DECLARATION:
1468 /* Inputs already interpolated */
1469 {
1470 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1471 _debug_printf("warning: failed to define LLVM variable\n");
1472 }
1473 break;
1474
1475 case TGSI_TOKEN_TYPE_INSTRUCTION:
1476 {
1477 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1478 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1479 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1480 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1481 info ? info->mnemonic : "<invalid>");
1482 }
1483
1484 break;
1485
1486 case TGSI_TOKEN_TYPE_IMMEDIATE:
1487 /* simply copy the immediate values into the next immediates[] slot */
1488 {
1489 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1490 assert(size <= 4);
1491 assert(num_immediates < LP_MAX_IMMEDIATES);
1492 for( i = 0; i < size; ++i )
1493 bld.immediates[num_immediates][i] =
1494 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1495 for( i = size; i < 4; ++i )
1496 bld.immediates[num_immediates][i] = bld.base.undef;
1497 num_immediates++;
1498 }
1499 break;
1500
1501 case TGSI_TOKEN_TYPE_PROPERTY:
1502 break;
1503
1504 default:
1505 assert( 0 );
1506 }
1507 }
1508
1509 tgsi_parse_free( &parse );
1510 }
1511