mesa: Add "shader/" path to #include statements in shader parser/lexer sources
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81 #define QUAD_TOP_LEFT 0
82 #define QUAD_TOP_RIGHT 1
83 #define QUAD_BOTTOM_LEFT 2
84 #define QUAD_BOTTOM_RIGHT 3
85
86
87 struct lp_build_tgsi_soa_context
88 {
89 struct lp_build_context base;
90
91 LLVMValueRef consts_ptr;
92 const LLVMValueRef *pos;
93 const LLVMValueRef (*inputs)[NUM_CHANNELS];
94 LLVMValueRef (*outputs)[NUM_CHANNELS];
95
96 struct lp_build_sampler_soa *sampler;
97
98 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
99 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
100
101 struct lp_build_mask_context *mask;
102 };
103
104
105 static const unsigned char
106 swizzle_left[4] = {
107 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
108 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
109 };
110
111 static const unsigned char
112 swizzle_right[4] = {
113 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
114 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
115 };
116
117 static const unsigned char
118 swizzle_top[4] = {
119 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
120 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
121 };
122
123 static const unsigned char
124 swizzle_bottom[4] = {
125 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
126 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
127 };
128
129
130 static LLVMValueRef
131 emit_ddx(struct lp_build_tgsi_soa_context *bld,
132 LLVMValueRef src)
133 {
134 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
135 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
136 return lp_build_sub(&bld->base, src_right, src_left);
137 }
138
139
140 static LLVMValueRef
141 emit_ddy(struct lp_build_tgsi_soa_context *bld,
142 LLVMValueRef src)
143 {
144 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
145 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
146 return lp_build_sub(&bld->base, src_top, src_bottom);
147 }
148
149
150 /**
151 * Register fetch.
152 */
153 static LLVMValueRef
154 emit_fetch(
155 struct lp_build_tgsi_soa_context *bld,
156 const struct tgsi_full_instruction *inst,
157 unsigned index,
158 const unsigned chan_index )
159 {
160 const struct tgsi_full_src_register *reg = &inst->Src[index];
161 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
162 LLVMValueRef res;
163
164 switch (swizzle) {
165 case TGSI_SWIZZLE_X:
166 case TGSI_SWIZZLE_Y:
167 case TGSI_SWIZZLE_Z:
168 case TGSI_SWIZZLE_W:
169
170 switch (reg->Register.File) {
171 case TGSI_FILE_CONSTANT: {
172 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
173 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
174 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
175 res = lp_build_broadcast_scalar(&bld->base, scalar);
176 break;
177 }
178
179 case TGSI_FILE_IMMEDIATE:
180 res = bld->immediates[reg->Register.Index][swizzle];
181 assert(res);
182 break;
183
184 case TGSI_FILE_INPUT:
185 res = bld->inputs[reg->Register.Index][swizzle];
186 assert(res);
187 break;
188
189 case TGSI_FILE_TEMPORARY:
190 res = bld->temps[reg->Register.Index][swizzle];
191 if(!res)
192 return bld->base.undef;
193 break;
194
195 default:
196 assert( 0 );
197 return bld->base.undef;
198 }
199 break;
200
201 default:
202 assert( 0 );
203 return bld->base.undef;
204 }
205
206 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
207 case TGSI_UTIL_SIGN_CLEAR:
208 res = lp_build_abs( &bld->base, res );
209 break;
210
211 case TGSI_UTIL_SIGN_SET:
212 /* TODO: Use bitwese OR for floating point */
213 res = lp_build_abs( &bld->base, res );
214 res = LLVMBuildNeg( bld->base.builder, res, "" );
215 break;
216
217 case TGSI_UTIL_SIGN_TOGGLE:
218 res = LLVMBuildNeg( bld->base.builder, res, "" );
219 break;
220
221 case TGSI_UTIL_SIGN_KEEP:
222 break;
223 }
224
225 return res;
226 }
227
228
229 /**
230 * Register fetch with derivatives.
231 */
232 static void
233 emit_fetch_deriv(
234 struct lp_build_tgsi_soa_context *bld,
235 const struct tgsi_full_instruction *inst,
236 unsigned index,
237 const unsigned chan_index,
238 LLVMValueRef *res,
239 LLVMValueRef *ddx,
240 LLVMValueRef *ddy)
241 {
242 LLVMValueRef src;
243
244 src = emit_fetch(bld, inst, index, chan_index);
245
246 if(res)
247 *res = src;
248
249 /* TODO: use interpolation coeffs for inputs */
250
251 if(ddx)
252 *ddx = emit_ddx(bld, src);
253
254 if(ddy)
255 *ddy = emit_ddy(bld, src);
256 }
257
258
259 /**
260 * Register store.
261 */
262 static void
263 emit_store(
264 struct lp_build_tgsi_soa_context *bld,
265 const struct tgsi_full_instruction *inst,
266 unsigned index,
267 unsigned chan_index,
268 LLVMValueRef value)
269 {
270 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
271
272 switch( inst->Instruction.Saturate ) {
273 case TGSI_SAT_NONE:
274 break;
275
276 case TGSI_SAT_ZERO_ONE:
277 value = lp_build_max(&bld->base, value, bld->base.zero);
278 value = lp_build_min(&bld->base, value, bld->base.one);
279 break;
280
281 case TGSI_SAT_MINUS_PLUS_ONE:
282 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
283 value = lp_build_min(&bld->base, value, bld->base.one);
284 break;
285
286 default:
287 assert(0);
288 }
289
290 switch( reg->Register.File ) {
291 case TGSI_FILE_OUTPUT:
292 bld->outputs[reg->Register.Index][chan_index] = value;
293 break;
294
295 case TGSI_FILE_TEMPORARY:
296 bld->temps[reg->Register.Index][chan_index] = value;
297 break;
298
299 case TGSI_FILE_ADDRESS:
300 /* FIXME */
301 assert(0);
302 break;
303
304 default:
305 assert( 0 );
306 }
307 }
308
309
310 /**
311 * High-level instruction translators.
312 */
313
314
315 static void
316 emit_tex( struct lp_build_tgsi_soa_context *bld,
317 const struct tgsi_full_instruction *inst,
318 boolean apply_lodbias,
319 boolean projected,
320 LLVMValueRef *texel)
321 {
322 const uint unit = inst->Src[1].Register.Index;
323 LLVMValueRef lodbias;
324 LLVMValueRef oow = NULL;
325 LLVMValueRef coords[3];
326 unsigned num_coords;
327 unsigned i;
328
329 switch (inst->Texture.Texture) {
330 case TGSI_TEXTURE_1D:
331 num_coords = 1;
332 break;
333 case TGSI_TEXTURE_2D:
334 case TGSI_TEXTURE_RECT:
335 num_coords = 2;
336 break;
337 case TGSI_TEXTURE_SHADOW1D:
338 case TGSI_TEXTURE_SHADOW2D:
339 case TGSI_TEXTURE_SHADOWRECT:
340 case TGSI_TEXTURE_3D:
341 case TGSI_TEXTURE_CUBE:
342 num_coords = 3;
343 break;
344 default:
345 assert(0);
346 return;
347 }
348
349 if(apply_lodbias)
350 lodbias = emit_fetch( bld, inst, 0, 3 );
351 else
352 lodbias = bld->base.zero;
353
354 if (projected) {
355 oow = emit_fetch( bld, inst, 0, 3 );
356 oow = lp_build_rcp(&bld->base, oow);
357 }
358
359 for (i = 0; i < num_coords; i++) {
360 coords[i] = emit_fetch( bld, inst, 0, i );
361 if (projected)
362 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
363 }
364 for (i = num_coords; i < 3; i++) {
365 coords[i] = bld->base.undef;
366 }
367
368 bld->sampler->emit_fetch_texel(bld->sampler,
369 bld->base.builder,
370 bld->base.type,
371 unit, num_coords, coords, lodbias,
372 texel);
373 }
374
375
376 static void
377 emit_kil(
378 struct lp_build_tgsi_soa_context *bld,
379 const struct tgsi_full_instruction *inst )
380 {
381 const struct tgsi_full_src_register *reg = &inst->Src[0];
382 LLVMValueRef terms[NUM_CHANNELS];
383 LLVMValueRef mask;
384 unsigned chan_index;
385
386 memset(&terms, 0, sizeof terms);
387
388 FOR_EACH_CHANNEL( chan_index ) {
389 unsigned swizzle;
390
391 /* Unswizzle channel */
392 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
393
394 /* Check if the component has not been already tested. */
395 assert(swizzle < NUM_CHANNELS);
396 if( !terms[swizzle] )
397 /* TODO: change the comparison operator instead of setting the sign */
398 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
399 }
400
401 mask = NULL;
402 FOR_EACH_CHANNEL( chan_index ) {
403 if(terms[chan_index]) {
404 LLVMValueRef chan_mask;
405
406 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
407
408 if(mask)
409 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
410 else
411 mask = chan_mask;
412 }
413 }
414
415 if(mask)
416 lp_build_mask_update(bld->mask, mask);
417 }
418
419
420 /**
421 * Check if inst src/dest regs use indirect addressing into temporary
422 * register file.
423 */
424 static boolean
425 indirect_temp_reference(const struct tgsi_full_instruction *inst)
426 {
427 uint i;
428 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
429 const struct tgsi_full_src_register *reg = &inst->Src[i];
430 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
431 reg->Register.Indirect)
432 return TRUE;
433 }
434 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
435 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
436 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
437 reg->Register.Indirect)
438 return TRUE;
439 }
440 return FALSE;
441 }
442
443
444 static int
445 emit_instruction(
446 struct lp_build_tgsi_soa_context *bld,
447 const struct tgsi_full_instruction *inst,
448 const struct tgsi_opcode_info *info)
449 {
450 unsigned chan_index;
451 LLVMValueRef src0, src1, src2;
452 LLVMValueRef tmp0, tmp1, tmp2;
453 LLVMValueRef tmp3 = NULL;
454 LLVMValueRef tmp4 = NULL;
455 LLVMValueRef tmp5 = NULL;
456 LLVMValueRef tmp6 = NULL;
457 LLVMValueRef tmp7 = NULL;
458 LLVMValueRef res;
459 LLVMValueRef dst0[NUM_CHANNELS];
460
461 /* we can't handle indirect addressing into temp register file yet */
462 if (indirect_temp_reference(inst))
463 return FALSE;
464
465 assert(info->num_dst <= 1);
466 if(info->num_dst) {
467 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
468 dst0[chan_index] = bld->base.undef;
469 }
470 }
471
472 switch (inst->Instruction.Opcode) {
473 #if 0
474 case TGSI_OPCODE_ARL:
475 /* FIXME */
476 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
477 tmp0 = emit_fetch( bld, inst, 0, chan_index );
478 emit_flr(bld, 0, 0);
479 emit_f2it( bld, 0 );
480 dst0[chan_index] = tmp0;
481 }
482 break;
483 #endif
484
485 case TGSI_OPCODE_MOV:
486 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
487 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
488 }
489 break;
490
491 case TGSI_OPCODE_LIT:
492 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
493 dst0[CHAN_X] = bld->base.one;
494 }
495 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
496 src0 = emit_fetch( bld, inst, 0, CHAN_X );
497 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
498 }
499 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
500 /* XMM[1] = SrcReg[0].yyyy */
501 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
502 /* XMM[1] = max(XMM[1], 0) */
503 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
504 /* XMM[2] = SrcReg[0].wwww */
505 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
506 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
507 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
508 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
509 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
510 }
511 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
512 dst0[CHAN_W] = bld->base.one;
513 }
514 break;
515
516 case TGSI_OPCODE_RCP:
517 /* TGSI_OPCODE_RECIP */
518 src0 = emit_fetch( bld, inst, 0, CHAN_X );
519 res = lp_build_rcp(&bld->base, src0);
520 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
521 dst0[chan_index] = res;
522 }
523 break;
524
525 case TGSI_OPCODE_RSQ:
526 /* TGSI_OPCODE_RECIPSQRT */
527 src0 = emit_fetch( bld, inst, 0, CHAN_X );
528 src0 = lp_build_abs(&bld->base, src0);
529 res = lp_build_rsqrt(&bld->base, src0);
530 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
531 dst0[chan_index] = res;
532 }
533 break;
534
535 case TGSI_OPCODE_EXP:
536 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
537 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
538 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
539 LLVMValueRef *p_exp2_int_part = NULL;
540 LLVMValueRef *p_frac_part = NULL;
541 LLVMValueRef *p_exp2 = NULL;
542
543 src0 = emit_fetch( bld, inst, 0, CHAN_X );
544
545 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
546 p_exp2_int_part = &tmp0;
547 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
548 p_frac_part = &tmp1;
549 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
550 p_exp2 = &tmp2;
551
552 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
553
554 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
555 dst0[CHAN_X] = tmp0;
556 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
557 dst0[CHAN_Y] = tmp1;
558 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
559 dst0[CHAN_Z] = tmp2;
560 }
561 /* dst.w = 1.0 */
562 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
563 dst0[CHAN_W] = bld->base.one;
564 }
565 break;
566
567 case TGSI_OPCODE_LOG:
568 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
569 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
570 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
571 LLVMValueRef *p_floor_log2 = NULL;
572 LLVMValueRef *p_exp = NULL;
573 LLVMValueRef *p_log2 = NULL;
574
575 src0 = emit_fetch( bld, inst, 0, CHAN_X );
576 src0 = lp_build_abs( &bld->base, src0 );
577
578 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
579 p_floor_log2 = &tmp0;
580 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
581 p_exp = &tmp1;
582 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
583 p_log2 = &tmp2;
584
585 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
586
587 /* dst.x = floor(lg2(abs(src.x))) */
588 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
589 dst0[CHAN_X] = tmp0;
590 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
591 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
592 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
593 }
594 /* dst.z = lg2(abs(src.x)) */
595 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
596 dst0[CHAN_Z] = tmp2;
597 }
598 /* dst.w = 1.0 */
599 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
600 dst0[CHAN_W] = bld->base.one;
601 }
602 break;
603
604 case TGSI_OPCODE_MUL:
605 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
606 src0 = emit_fetch( bld, inst, 0, chan_index );
607 src1 = emit_fetch( bld, inst, 1, chan_index );
608 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
609 }
610 break;
611
612 case TGSI_OPCODE_ADD:
613 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
614 src0 = emit_fetch( bld, inst, 0, chan_index );
615 src1 = emit_fetch( bld, inst, 1, chan_index );
616 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
617 }
618 break;
619
620 case TGSI_OPCODE_DP3:
621 /* TGSI_OPCODE_DOT3 */
622 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
623 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
624 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
625 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
626 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
627 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
628 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
629 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
630 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
631 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
632 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
633 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
634 dst0[chan_index] = tmp0;
635 }
636 break;
637
638 case TGSI_OPCODE_DP4:
639 /* TGSI_OPCODE_DOT4 */
640 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
641 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
642 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
643 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
644 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
645 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
646 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
647 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
648 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
649 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
650 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
651 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
652 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
653 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
654 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
655 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
656 dst0[chan_index] = tmp0;
657 }
658 break;
659
660 case TGSI_OPCODE_DST:
661 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
662 dst0[CHAN_X] = bld->base.one;
663 }
664 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
665 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
666 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
667 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
668 }
669 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
670 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
671 }
672 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
673 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
674 }
675 break;
676
677 case TGSI_OPCODE_MIN:
678 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
679 src0 = emit_fetch( bld, inst, 0, chan_index );
680 src1 = emit_fetch( bld, inst, 1, chan_index );
681 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
682 }
683 break;
684
685 case TGSI_OPCODE_MAX:
686 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
687 src0 = emit_fetch( bld, inst, 0, chan_index );
688 src1 = emit_fetch( bld, inst, 1, chan_index );
689 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
690 }
691 break;
692
693 case TGSI_OPCODE_SLT:
694 /* TGSI_OPCODE_SETLT */
695 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
696 src0 = emit_fetch( bld, inst, 0, chan_index );
697 src1 = emit_fetch( bld, inst, 1, chan_index );
698 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
699 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
700 }
701 break;
702
703 case TGSI_OPCODE_SGE:
704 /* TGSI_OPCODE_SETGE */
705 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
706 src0 = emit_fetch( bld, inst, 0, chan_index );
707 src1 = emit_fetch( bld, inst, 1, chan_index );
708 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
709 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
710 }
711 break;
712
713 case TGSI_OPCODE_MAD:
714 /* TGSI_OPCODE_MADD */
715 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
716 tmp0 = emit_fetch( bld, inst, 0, chan_index );
717 tmp1 = emit_fetch( bld, inst, 1, chan_index );
718 tmp2 = emit_fetch( bld, inst, 2, chan_index );
719 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
720 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
721 dst0[chan_index] = tmp0;
722 }
723 break;
724
725 case TGSI_OPCODE_SUB:
726 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
727 tmp0 = emit_fetch( bld, inst, 0, chan_index );
728 tmp1 = emit_fetch( bld, inst, 1, chan_index );
729 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
730 }
731 break;
732
733 case TGSI_OPCODE_LRP:
734 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
735 src0 = emit_fetch( bld, inst, 0, chan_index );
736 src1 = emit_fetch( bld, inst, 1, chan_index );
737 src2 = emit_fetch( bld, inst, 2, chan_index );
738 tmp0 = lp_build_sub( &bld->base, src1, src2 );
739 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
740 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
741 }
742 break;
743
744 case TGSI_OPCODE_CND:
745 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
746 src0 = emit_fetch( bld, inst, 0, chan_index );
747 src1 = emit_fetch( bld, inst, 1, chan_index );
748 src2 = emit_fetch( bld, inst, 2, chan_index );
749 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
750 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
751 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
752 }
753 break;
754
755 case TGSI_OPCODE_DP2A:
756 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
757 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
758 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
759 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
760 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
761 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
762 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
763 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
764 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
765 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
766 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
767 }
768 break;
769
770 case TGSI_OPCODE_FRC:
771 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
772 src0 = emit_fetch( bld, inst, 0, chan_index );
773 tmp0 = lp_build_floor(&bld->base, src0);
774 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
775 dst0[chan_index] = tmp0;
776 }
777 break;
778
779 case TGSI_OPCODE_CLAMP:
780 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
781 tmp0 = emit_fetch( bld, inst, 0, chan_index );
782 src1 = emit_fetch( bld, inst, 1, chan_index );
783 src2 = emit_fetch( bld, inst, 2, chan_index );
784 tmp0 = lp_build_max(&bld->base, tmp0, src1);
785 tmp0 = lp_build_min(&bld->base, tmp0, src2);
786 dst0[chan_index] = tmp0;
787 }
788 break;
789
790 case TGSI_OPCODE_FLR:
791 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
792 tmp0 = emit_fetch( bld, inst, 0, chan_index );
793 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
794 }
795 break;
796
797 case TGSI_OPCODE_ROUND:
798 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
799 tmp0 = emit_fetch( bld, inst, 0, chan_index );
800 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
801 }
802 break;
803
804 case TGSI_OPCODE_EX2: {
805 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
806 tmp0 = lp_build_exp2( &bld->base, tmp0);
807 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
808 dst0[chan_index] = tmp0;
809 }
810 break;
811 }
812
813 case TGSI_OPCODE_LG2:
814 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
815 tmp0 = lp_build_log2( &bld->base, tmp0);
816 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
817 dst0[chan_index] = tmp0;
818 }
819 break;
820
821 case TGSI_OPCODE_POW:
822 src0 = emit_fetch( bld, inst, 0, CHAN_X );
823 src1 = emit_fetch( bld, inst, 1, CHAN_X );
824 res = lp_build_pow( &bld->base, src0, src1 );
825 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
826 dst0[chan_index] = res;
827 }
828 break;
829
830 case TGSI_OPCODE_XPD:
831 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
832 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
833 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
834 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
835 }
836 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
837 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
838 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
839 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
840 }
841 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
842 tmp2 = tmp0;
843 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
844 tmp5 = tmp3;
845 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
846 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
847 dst0[CHAN_X] = tmp2;
848 }
849 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
850 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
851 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
852 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
853 }
854 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
855 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
856 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
857 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
858 dst0[CHAN_Y] = tmp3;
859 }
860 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
861 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
862 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
863 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
864 dst0[CHAN_Z] = tmp5;
865 }
866 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
867 dst0[CHAN_W] = bld->base.one;
868 }
869 break;
870
871 case TGSI_OPCODE_ABS:
872 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
873 tmp0 = emit_fetch( bld, inst, 0, chan_index );
874 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
875 }
876 break;
877
878 case TGSI_OPCODE_RCC:
879 /* deprecated? */
880 assert(0);
881 return 0;
882
883 case TGSI_OPCODE_DPH:
884 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
885 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
886 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
887 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
888 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
889 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
890 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
891 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
892 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
893 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
894 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
895 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
896 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
897 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
898 dst0[chan_index] = tmp0;
899 }
900 break;
901
902 case TGSI_OPCODE_COS:
903 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
904 tmp0 = lp_build_cos( &bld->base, tmp0 );
905 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
906 dst0[chan_index] = tmp0;
907 }
908 break;
909
910 case TGSI_OPCODE_DDX:
911 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
912 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
913 }
914 break;
915
916 case TGSI_OPCODE_DDY:
917 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
918 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
919 }
920 break;
921
922 case TGSI_OPCODE_KILP:
923 /* predicated kill */
924 /* FIXME */
925 return 0;
926 break;
927
928 case TGSI_OPCODE_KIL:
929 /* conditional kill */
930 emit_kil( bld, inst );
931 break;
932
933 case TGSI_OPCODE_PK2H:
934 return 0;
935 break;
936
937 case TGSI_OPCODE_PK2US:
938 return 0;
939 break;
940
941 case TGSI_OPCODE_PK4B:
942 return 0;
943 break;
944
945 case TGSI_OPCODE_PK4UB:
946 return 0;
947 break;
948
949 case TGSI_OPCODE_RFL:
950 return 0;
951 break;
952
953 case TGSI_OPCODE_SEQ:
954 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
955 src0 = emit_fetch( bld, inst, 0, chan_index );
956 src1 = emit_fetch( bld, inst, 1, chan_index );
957 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
958 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
959 }
960 break;
961
962 case TGSI_OPCODE_SFL:
963 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
964 dst0[chan_index] = bld->base.zero;
965 }
966 break;
967
968 case TGSI_OPCODE_SGT:
969 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
970 src0 = emit_fetch( bld, inst, 0, chan_index );
971 src1 = emit_fetch( bld, inst, 1, chan_index );
972 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
973 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
974 }
975 break;
976
977 case TGSI_OPCODE_SIN:
978 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
979 tmp0 = lp_build_sin( &bld->base, tmp0 );
980 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
981 dst0[chan_index] = tmp0;
982 }
983 break;
984
985 case TGSI_OPCODE_SLE:
986 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
987 src0 = emit_fetch( bld, inst, 0, chan_index );
988 src1 = emit_fetch( bld, inst, 1, chan_index );
989 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
990 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
991 }
992 break;
993
994 case TGSI_OPCODE_SNE:
995 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
996 src0 = emit_fetch( bld, inst, 0, chan_index );
997 src1 = emit_fetch( bld, inst, 1, chan_index );
998 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
999 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1000 }
1001 break;
1002
1003 case TGSI_OPCODE_STR:
1004 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1005 dst0[chan_index] = bld->base.one;
1006 }
1007 break;
1008
1009 case TGSI_OPCODE_TEX:
1010 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1011 break;
1012
1013 case TGSI_OPCODE_TXD:
1014 /* FIXME */
1015 return 0;
1016 break;
1017
1018 case TGSI_OPCODE_UP2H:
1019 /* deprecated */
1020 assert (0);
1021 return 0;
1022 break;
1023
1024 case TGSI_OPCODE_UP2US:
1025 /* deprecated */
1026 assert(0);
1027 return 0;
1028 break;
1029
1030 case TGSI_OPCODE_UP4B:
1031 /* deprecated */
1032 assert(0);
1033 return 0;
1034 break;
1035
1036 case TGSI_OPCODE_UP4UB:
1037 /* deprecated */
1038 assert(0);
1039 return 0;
1040 break;
1041
1042 case TGSI_OPCODE_X2D:
1043 /* deprecated? */
1044 assert(0);
1045 return 0;
1046 break;
1047
1048 case TGSI_OPCODE_ARA:
1049 /* deprecated */
1050 assert(0);
1051 return 0;
1052 break;
1053
1054 #if 0
1055 case TGSI_OPCODE_ARR:
1056 /* FIXME */
1057 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1058 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1059 emit_rnd( bld, 0, 0 );
1060 emit_f2it( bld, 0 );
1061 dst0[chan_index] = tmp0;
1062 }
1063 break;
1064 #endif
1065
1066 case TGSI_OPCODE_BRA:
1067 /* deprecated */
1068 assert(0);
1069 return 0;
1070 break;
1071
1072 case TGSI_OPCODE_CAL:
1073 /* FIXME */
1074 return 0;
1075 break;
1076
1077 case TGSI_OPCODE_RET:
1078 /* FIXME */
1079 return 0;
1080 break;
1081
1082 case TGSI_OPCODE_END:
1083 break;
1084
1085 case TGSI_OPCODE_SSG:
1086 /* TGSI_OPCODE_SGN */
1087 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1088 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1089 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1090 }
1091 break;
1092
1093 case TGSI_OPCODE_CMP:
1094 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1095 src0 = emit_fetch( bld, inst, 0, chan_index );
1096 src1 = emit_fetch( bld, inst, 1, chan_index );
1097 src2 = emit_fetch( bld, inst, 2, chan_index );
1098 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1099 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1100 }
1101 break;
1102
1103 case TGSI_OPCODE_SCS:
1104 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1105 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1106 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1107 }
1108 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1109 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1110 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1111 }
1112 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1113 dst0[CHAN_Z] = bld->base.zero;
1114 }
1115 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1116 dst0[CHAN_W] = bld->base.one;
1117 }
1118 break;
1119
1120 case TGSI_OPCODE_TXB:
1121 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1122 break;
1123
1124 case TGSI_OPCODE_NRM:
1125 /* fall-through */
1126 case TGSI_OPCODE_NRM4:
1127 /* 3 or 4-component normalization */
1128 {
1129 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1130
1131 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1132 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1133 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1134 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1135
1136 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1137
1138 /* xmm4 = src.x */
1139 /* xmm0 = src.x * src.x */
1140 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1141 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1142 tmp4 = tmp0;
1143 }
1144 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1145
1146 /* xmm5 = src.y */
1147 /* xmm0 = xmm0 + src.y * src.y */
1148 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1149 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1150 tmp5 = tmp1;
1151 }
1152 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1153 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1154
1155 /* xmm6 = src.z */
1156 /* xmm0 = xmm0 + src.z * src.z */
1157 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1158 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1159 tmp6 = tmp1;
1160 }
1161 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1162 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1163
1164 if (dims == 4) {
1165 /* xmm7 = src.w */
1166 /* xmm0 = xmm0 + src.w * src.w */
1167 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1168 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1169 tmp7 = tmp1;
1170 }
1171 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1172 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1173 }
1174
1175 /* xmm1 = 1 / sqrt(xmm0) */
1176 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1177
1178 /* dst.x = xmm1 * src.x */
1179 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1180 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1181 }
1182
1183 /* dst.y = xmm1 * src.y */
1184 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1185 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1186 }
1187
1188 /* dst.z = xmm1 * src.z */
1189 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1190 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1191 }
1192
1193 /* dst.w = xmm1 * src.w */
1194 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1195 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1196 }
1197 }
1198
1199 /* dst.w = 1.0 */
1200 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1201 dst0[CHAN_W] = bld->base.one;
1202 }
1203 }
1204 break;
1205
1206 case TGSI_OPCODE_DIV:
1207 /* deprecated */
1208 assert( 0 );
1209 return 0;
1210 break;
1211
1212 case TGSI_OPCODE_DP2:
1213 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1214 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1215 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1216 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1217 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1218 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1219 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1220 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1221 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1222 }
1223 break;
1224
1225 case TGSI_OPCODE_TXL:
1226 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1227 break;
1228
1229 case TGSI_OPCODE_TXP:
1230 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1231 break;
1232
1233 case TGSI_OPCODE_BRK:
1234 /* FIXME */
1235 return 0;
1236 break;
1237
1238 case TGSI_OPCODE_IF:
1239 /* FIXME */
1240 return 0;
1241 break;
1242
1243 case TGSI_OPCODE_BGNFOR:
1244 /* deprecated */
1245 assert(0);
1246 return 0;
1247 break;
1248
1249 case TGSI_OPCODE_REP:
1250 /* deprecated */
1251 assert(0);
1252 return 0;
1253 break;
1254
1255 case TGSI_OPCODE_ELSE:
1256 /* FIXME */
1257 return 0;
1258 break;
1259
1260 case TGSI_OPCODE_ENDIF:
1261 /* FIXME */
1262 return 0;
1263 break;
1264
1265 case TGSI_OPCODE_ENDFOR:
1266 /* deprecated */
1267 assert(0);
1268 return 0;
1269 break;
1270
1271 case TGSI_OPCODE_ENDREP:
1272 /* deprecated */
1273 assert(0);
1274 return 0;
1275 break;
1276
1277 case TGSI_OPCODE_PUSHA:
1278 /* deprecated? */
1279 assert(0);
1280 return 0;
1281 break;
1282
1283 case TGSI_OPCODE_POPA:
1284 /* deprecated? */
1285 assert(0);
1286 return 0;
1287 break;
1288
1289 case TGSI_OPCODE_CEIL:
1290 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1291 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1292 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1293 }
1294 break;
1295
1296 case TGSI_OPCODE_I2F:
1297 /* deprecated? */
1298 assert(0);
1299 return 0;
1300 break;
1301
1302 case TGSI_OPCODE_NOT:
1303 /* deprecated? */
1304 assert(0);
1305 return 0;
1306 break;
1307
1308 case TGSI_OPCODE_TRUNC:
1309 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1310 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1311 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1312 }
1313 break;
1314
1315 case TGSI_OPCODE_SHL:
1316 /* deprecated? */
1317 assert(0);
1318 return 0;
1319 break;
1320
1321 case TGSI_OPCODE_ISHR:
1322 /* deprecated? */
1323 assert(0);
1324 return 0;
1325 break;
1326
1327 case TGSI_OPCODE_AND:
1328 /* deprecated? */
1329 assert(0);
1330 return 0;
1331 break;
1332
1333 case TGSI_OPCODE_OR:
1334 /* deprecated? */
1335 assert(0);
1336 return 0;
1337 break;
1338
1339 case TGSI_OPCODE_MOD:
1340 /* deprecated? */
1341 assert(0);
1342 return 0;
1343 break;
1344
1345 case TGSI_OPCODE_XOR:
1346 /* deprecated? */
1347 assert(0);
1348 return 0;
1349 break;
1350
1351 case TGSI_OPCODE_SAD:
1352 /* deprecated? */
1353 assert(0);
1354 return 0;
1355 break;
1356
1357 case TGSI_OPCODE_TXF:
1358 /* deprecated? */
1359 assert(0);
1360 return 0;
1361 break;
1362
1363 case TGSI_OPCODE_TXQ:
1364 /* deprecated? */
1365 assert(0);
1366 return 0;
1367 break;
1368
1369 case TGSI_OPCODE_CONT:
1370 /* deprecated? */
1371 assert(0);
1372 return 0;
1373 break;
1374
1375 case TGSI_OPCODE_EMIT:
1376 return 0;
1377 break;
1378
1379 case TGSI_OPCODE_ENDPRIM:
1380 return 0;
1381 break;
1382
1383 case TGSI_OPCODE_NOP:
1384 break;
1385
1386 default:
1387 return 0;
1388 }
1389
1390 if(info->num_dst) {
1391 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1392 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1393 }
1394 }
1395
1396 return 1;
1397 }
1398
1399
1400 void
1401 lp_build_tgsi_soa(LLVMBuilderRef builder,
1402 const struct tgsi_token *tokens,
1403 struct lp_type type,
1404 struct lp_build_mask_context *mask,
1405 LLVMValueRef consts_ptr,
1406 const LLVMValueRef *pos,
1407 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1408 LLVMValueRef (*outputs)[NUM_CHANNELS],
1409 struct lp_build_sampler_soa *sampler)
1410 {
1411 struct lp_build_tgsi_soa_context bld;
1412 struct tgsi_parse_context parse;
1413 uint num_immediates = 0;
1414 unsigned i;
1415
1416 /* Setup build context */
1417 memset(&bld, 0, sizeof bld);
1418 lp_build_context_init(&bld.base, builder, type);
1419 bld.mask = mask;
1420 bld.pos = pos;
1421 bld.inputs = inputs;
1422 bld.outputs = outputs;
1423 bld.consts_ptr = consts_ptr;
1424 bld.sampler = sampler;
1425
1426 tgsi_parse_init( &parse, tokens );
1427
1428 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1429 tgsi_parse_token( &parse );
1430
1431 switch( parse.FullToken.Token.Type ) {
1432 case TGSI_TOKEN_TYPE_DECLARATION:
1433 /* Inputs already interpolated */
1434 break;
1435
1436 case TGSI_TOKEN_TYPE_INSTRUCTION:
1437 {
1438 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1439 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1440 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1441 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1442 info ? info->mnemonic : "<invalid>");
1443 }
1444
1445 break;
1446
1447 case TGSI_TOKEN_TYPE_IMMEDIATE:
1448 /* simply copy the immediate values into the next immediates[] slot */
1449 {
1450 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1451 assert(size <= 4);
1452 assert(num_immediates < LP_MAX_IMMEDIATES);
1453 for( i = 0; i < size; ++i )
1454 bld.immediates[num_immediates][i] =
1455 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1456 for( i = size; i < 4; ++i )
1457 bld.immediates[num_immediates][i] = bld.base.undef;
1458 num_immediates++;
1459 }
1460 break;
1461
1462 default:
1463 assert( 0 );
1464 }
1465 }
1466
1467 tgsi_parse_free( &parse );
1468 }
1469