Merge branch 'mesa_7_7_branch'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_tgsi.h"
55
56
57 #define LP_MAX_TEMPS 256
58 #define LP_MAX_IMMEDIATES 256
59
60
61 #define FOR_EACH_CHANNEL( CHAN )\
62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71 FOR_EACH_CHANNEL( CHAN )\
72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74 #define CHAN_X 0
75 #define CHAN_Y 1
76 #define CHAN_Z 2
77 #define CHAN_W 3
78
79 #define QUAD_TOP_LEFT 0
80 #define QUAD_TOP_RIGHT 1
81 #define QUAD_BOTTOM_LEFT 2
82 #define QUAD_BOTTOM_RIGHT 3
83
84
85 struct lp_build_tgsi_soa_context
86 {
87 struct lp_build_context base;
88
89 LLVMValueRef consts_ptr;
90 const LLVMValueRef *pos;
91 const LLVMValueRef (*inputs)[NUM_CHANNELS];
92 LLVMValueRef (*outputs)[NUM_CHANNELS];
93
94 struct lp_build_sampler_soa *sampler;
95
96 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
97 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
98
99 struct lp_build_mask_context *mask;
100 };
101
102
103 static const unsigned char
104 swizzle_left[4] = {
105 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
106 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
107 };
108
109 static const unsigned char
110 swizzle_right[4] = {
111 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
112 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
113 };
114
115 static const unsigned char
116 swizzle_top[4] = {
117 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
118 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
119 };
120
121 static const unsigned char
122 swizzle_bottom[4] = {
123 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
124 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
125 };
126
127
128 static LLVMValueRef
129 emit_ddx(struct lp_build_tgsi_soa_context *bld,
130 LLVMValueRef src)
131 {
132 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
133 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
134 return lp_build_sub(&bld->base, src_right, src_left);
135 }
136
137
138 static LLVMValueRef
139 emit_ddy(struct lp_build_tgsi_soa_context *bld,
140 LLVMValueRef src)
141 {
142 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
143 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
144 return lp_build_sub(&bld->base, src_top, src_bottom);
145 }
146
147
148 /**
149 * Register fetch.
150 */
151 static LLVMValueRef
152 emit_fetch(
153 struct lp_build_tgsi_soa_context *bld,
154 const struct tgsi_full_instruction *inst,
155 unsigned index,
156 const unsigned chan_index )
157 {
158 const struct tgsi_full_src_register *reg = &inst->Src[index];
159 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
160 LLVMValueRef res;
161
162 switch (swizzle) {
163 case TGSI_SWIZZLE_X:
164 case TGSI_SWIZZLE_Y:
165 case TGSI_SWIZZLE_Z:
166 case TGSI_SWIZZLE_W:
167
168 switch (reg->Register.File) {
169 case TGSI_FILE_CONSTANT: {
170 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
171 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
172 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
173 res = lp_build_broadcast_scalar(&bld->base, scalar);
174 break;
175 }
176
177 case TGSI_FILE_IMMEDIATE:
178 res = bld->immediates[reg->Register.Index][swizzle];
179 assert(res);
180 break;
181
182 case TGSI_FILE_INPUT:
183 res = bld->inputs[reg->Register.Index][swizzle];
184 assert(res);
185 break;
186
187 case TGSI_FILE_TEMPORARY:
188 res = bld->temps[reg->Register.Index][swizzle];
189 if(!res)
190 return bld->base.undef;
191 break;
192
193 default:
194 assert( 0 );
195 return bld->base.undef;
196 }
197 break;
198
199 default:
200 assert( 0 );
201 return bld->base.undef;
202 }
203
204 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
205 case TGSI_UTIL_SIGN_CLEAR:
206 res = lp_build_abs( &bld->base, res );
207 break;
208
209 case TGSI_UTIL_SIGN_SET:
210 /* TODO: Use bitwese OR for floating point */
211 res = lp_build_abs( &bld->base, res );
212 res = LLVMBuildNeg( bld->base.builder, res, "" );
213 break;
214
215 case TGSI_UTIL_SIGN_TOGGLE:
216 res = LLVMBuildNeg( bld->base.builder, res, "" );
217 break;
218
219 case TGSI_UTIL_SIGN_KEEP:
220 break;
221 }
222
223 return res;
224 }
225
226
227 /**
228 * Register fetch with derivatives.
229 */
230 static void
231 emit_fetch_deriv(
232 struct lp_build_tgsi_soa_context *bld,
233 const struct tgsi_full_instruction *inst,
234 unsigned index,
235 const unsigned chan_index,
236 LLVMValueRef *res,
237 LLVMValueRef *ddx,
238 LLVMValueRef *ddy)
239 {
240 LLVMValueRef src;
241
242 src = emit_fetch(bld, inst, index, chan_index);
243
244 if(res)
245 *res = src;
246
247 /* TODO: use interpolation coeffs for inputs */
248
249 if(ddx)
250 *ddx = emit_ddx(bld, src);
251
252 if(ddy)
253 *ddy = emit_ddy(bld, src);
254 }
255
256
257 /**
258 * Register store.
259 */
260 static void
261 emit_store(
262 struct lp_build_tgsi_soa_context *bld,
263 const struct tgsi_full_instruction *inst,
264 unsigned index,
265 unsigned chan_index,
266 LLVMValueRef value)
267 {
268 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
269
270 switch( inst->Instruction.Saturate ) {
271 case TGSI_SAT_NONE:
272 break;
273
274 case TGSI_SAT_ZERO_ONE:
275 value = lp_build_max(&bld->base, value, bld->base.zero);
276 value = lp_build_min(&bld->base, value, bld->base.one);
277 break;
278
279 case TGSI_SAT_MINUS_PLUS_ONE:
280 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
281 value = lp_build_min(&bld->base, value, bld->base.one);
282 break;
283
284 default:
285 assert(0);
286 }
287
288 switch( reg->Register.File ) {
289 case TGSI_FILE_OUTPUT:
290 bld->outputs[reg->Register.Index][chan_index] = value;
291 break;
292
293 case TGSI_FILE_TEMPORARY:
294 bld->temps[reg->Register.Index][chan_index] = value;
295 break;
296
297 case TGSI_FILE_ADDRESS:
298 /* FIXME */
299 assert(0);
300 break;
301
302 default:
303 assert( 0 );
304 }
305 }
306
307
308 /**
309 * High-level instruction translators.
310 */
311
312
313 static void
314 emit_tex( struct lp_build_tgsi_soa_context *bld,
315 const struct tgsi_full_instruction *inst,
316 boolean apply_lodbias,
317 boolean projected,
318 LLVMValueRef *texel)
319 {
320 const uint unit = inst->Src[1].Register.Index;
321 LLVMValueRef lodbias;
322 LLVMValueRef oow = NULL;
323 LLVMValueRef coords[3];
324 unsigned num_coords;
325 unsigned i;
326
327 switch (inst->Texture.Texture) {
328 case TGSI_TEXTURE_1D:
329 num_coords = 1;
330 break;
331 case TGSI_TEXTURE_2D:
332 case TGSI_TEXTURE_RECT:
333 num_coords = 2;
334 break;
335 case TGSI_TEXTURE_SHADOW1D:
336 case TGSI_TEXTURE_SHADOW2D:
337 case TGSI_TEXTURE_SHADOWRECT:
338 case TGSI_TEXTURE_3D:
339 case TGSI_TEXTURE_CUBE:
340 num_coords = 3;
341 break;
342 default:
343 assert(0);
344 return;
345 }
346
347 if(apply_lodbias)
348 lodbias = emit_fetch( bld, inst, 0, 3 );
349 else
350 lodbias = bld->base.zero;
351
352 if (projected) {
353 oow = emit_fetch( bld, inst, 0, 3 );
354 oow = lp_build_rcp(&bld->base, oow);
355 }
356
357 for (i = 0; i < num_coords; i++) {
358 coords[i] = emit_fetch( bld, inst, 0, i );
359 if (projected)
360 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
361 }
362 for (i = num_coords; i < 3; i++) {
363 coords[i] = bld->base.undef;
364 }
365
366 bld->sampler->emit_fetch_texel(bld->sampler,
367 bld->base.builder,
368 bld->base.type,
369 unit, num_coords, coords, lodbias,
370 texel);
371 }
372
373
374 static void
375 emit_kil(
376 struct lp_build_tgsi_soa_context *bld,
377 const struct tgsi_full_instruction *inst )
378 {
379 const struct tgsi_full_src_register *reg = &inst->Src[0];
380 LLVMValueRef terms[NUM_CHANNELS];
381 LLVMValueRef mask;
382 unsigned chan_index;
383
384 memset(&terms, 0, sizeof terms);
385
386 FOR_EACH_CHANNEL( chan_index ) {
387 unsigned swizzle;
388
389 /* Unswizzle channel */
390 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
391
392 /* Check if the component has not been already tested. */
393 assert(swizzle < NUM_CHANNELS);
394 if( !terms[swizzle] )
395 /* TODO: change the comparison operator instead of setting the sign */
396 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
397 }
398
399 mask = NULL;
400 FOR_EACH_CHANNEL( chan_index ) {
401 if(terms[chan_index]) {
402 LLVMValueRef chan_mask;
403
404 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
405
406 if(mask)
407 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
408 else
409 mask = chan_mask;
410 }
411 }
412
413 if(mask)
414 lp_build_mask_update(bld->mask, mask);
415 }
416
417
418 /**
419 * Check if inst src/dest regs use indirect addressing into temporary
420 * register file.
421 */
422 static boolean
423 indirect_temp_reference(const struct tgsi_full_instruction *inst)
424 {
425 uint i;
426 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
427 const struct tgsi_full_src_register *reg = &inst->Src[i];
428 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
429 reg->Register.Indirect)
430 return TRUE;
431 }
432 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
433 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
434 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
435 reg->Register.Indirect)
436 return TRUE;
437 }
438 return FALSE;
439 }
440
441
442 static int
443 emit_instruction(
444 struct lp_build_tgsi_soa_context *bld,
445 const struct tgsi_full_instruction *inst,
446 const struct tgsi_opcode_info *info)
447 {
448 unsigned chan_index;
449 LLVMValueRef src0, src1, src2;
450 LLVMValueRef tmp0, tmp1, tmp2;
451 LLVMValueRef tmp3 = NULL;
452 LLVMValueRef tmp4 = NULL;
453 LLVMValueRef tmp5 = NULL;
454 LLVMValueRef tmp6 = NULL;
455 LLVMValueRef tmp7 = NULL;
456 LLVMValueRef res;
457 LLVMValueRef dst0[NUM_CHANNELS];
458
459 /* we can't handle indirect addressing into temp register file yet */
460 if (indirect_temp_reference(inst))
461 return FALSE;
462
463 assert(info->num_dst <= 1);
464 if(info->num_dst) {
465 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
466 dst0[chan_index] = bld->base.undef;
467 }
468 }
469
470 switch (inst->Instruction.Opcode) {
471 #if 0
472 case TGSI_OPCODE_ARL:
473 /* FIXME */
474 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
475 tmp0 = emit_fetch( bld, inst, 0, chan_index );
476 emit_flr(bld, 0, 0);
477 emit_f2it( bld, 0 );
478 dst0[chan_index] = tmp0;
479 }
480 break;
481 #endif
482
483 case TGSI_OPCODE_MOV:
484 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
485 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
486 }
487 break;
488
489 case TGSI_OPCODE_LIT:
490 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
491 dst0[CHAN_X] = bld->base.one;
492 }
493 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
494 src0 = emit_fetch( bld, inst, 0, CHAN_X );
495 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
496 }
497 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
498 /* XMM[1] = SrcReg[0].yyyy */
499 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
500 /* XMM[1] = max(XMM[1], 0) */
501 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
502 /* XMM[2] = SrcReg[0].wwww */
503 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
504 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
505 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
506 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
507 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
508 }
509 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
510 dst0[CHAN_W] = bld->base.one;
511 }
512 break;
513
514 case TGSI_OPCODE_RCP:
515 /* TGSI_OPCODE_RECIP */
516 src0 = emit_fetch( bld, inst, 0, CHAN_X );
517 res = lp_build_rcp(&bld->base, src0);
518 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
519 dst0[chan_index] = res;
520 }
521 break;
522
523 case TGSI_OPCODE_RSQ:
524 /* TGSI_OPCODE_RECIPSQRT */
525 src0 = emit_fetch( bld, inst, 0, CHAN_X );
526 src0 = lp_build_abs(&bld->base, src0);
527 res = lp_build_rsqrt(&bld->base, src0);
528 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
529 dst0[chan_index] = res;
530 }
531 break;
532
533 case TGSI_OPCODE_EXP:
534 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
535 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
536 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
537 LLVMValueRef *p_exp2_int_part = NULL;
538 LLVMValueRef *p_frac_part = NULL;
539 LLVMValueRef *p_exp2 = NULL;
540
541 src0 = emit_fetch( bld, inst, 0, CHAN_X );
542
543 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
544 p_exp2_int_part = &tmp0;
545 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
546 p_frac_part = &tmp1;
547 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
548 p_exp2 = &tmp2;
549
550 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
551
552 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
553 dst0[CHAN_X] = tmp0;
554 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
555 dst0[CHAN_Y] = tmp1;
556 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
557 dst0[CHAN_Z] = tmp2;
558 }
559 /* dst.w = 1.0 */
560 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
561 dst0[CHAN_W] = bld->base.one;
562 }
563 break;
564
565 case TGSI_OPCODE_LOG:
566 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
567 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
568 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
569 LLVMValueRef *p_floor_log2 = NULL;
570 LLVMValueRef *p_exp = NULL;
571 LLVMValueRef *p_log2 = NULL;
572
573 src0 = emit_fetch( bld, inst, 0, CHAN_X );
574 src0 = lp_build_abs( &bld->base, src0 );
575
576 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
577 p_floor_log2 = &tmp0;
578 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
579 p_exp = &tmp1;
580 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
581 p_log2 = &tmp2;
582
583 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
584
585 /* dst.x = floor(lg2(abs(src.x))) */
586 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
587 dst0[CHAN_X] = tmp0;
588 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
589 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
590 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
591 }
592 /* dst.z = lg2(abs(src.x)) */
593 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
594 dst0[CHAN_Z] = tmp2;
595 }
596 /* dst.w = 1.0 */
597 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
598 dst0[CHAN_W] = bld->base.one;
599 }
600 break;
601
602 case TGSI_OPCODE_MUL:
603 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
604 src0 = emit_fetch( bld, inst, 0, chan_index );
605 src1 = emit_fetch( bld, inst, 1, chan_index );
606 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
607 }
608 break;
609
610 case TGSI_OPCODE_ADD:
611 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
612 src0 = emit_fetch( bld, inst, 0, chan_index );
613 src1 = emit_fetch( bld, inst, 1, chan_index );
614 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
615 }
616 break;
617
618 case TGSI_OPCODE_DP3:
619 /* TGSI_OPCODE_DOT3 */
620 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
621 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
622 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
623 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
624 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
625 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
626 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
627 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
628 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
629 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
630 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
631 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
632 dst0[chan_index] = tmp0;
633 }
634 break;
635
636 case TGSI_OPCODE_DP4:
637 /* TGSI_OPCODE_DOT4 */
638 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
639 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
640 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
641 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
642 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
643 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
644 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
645 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
646 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
647 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
648 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
649 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
650 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
651 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
652 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
653 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
654 dst0[chan_index] = tmp0;
655 }
656 break;
657
658 case TGSI_OPCODE_DST:
659 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
660 dst0[CHAN_X] = bld->base.one;
661 }
662 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
663 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
664 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
665 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
666 }
667 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
668 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
669 }
670 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
671 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
672 }
673 break;
674
675 case TGSI_OPCODE_MIN:
676 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
677 src0 = emit_fetch( bld, inst, 0, chan_index );
678 src1 = emit_fetch( bld, inst, 1, chan_index );
679 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
680 }
681 break;
682
683 case TGSI_OPCODE_MAX:
684 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
685 src0 = emit_fetch( bld, inst, 0, chan_index );
686 src1 = emit_fetch( bld, inst, 1, chan_index );
687 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
688 }
689 break;
690
691 case TGSI_OPCODE_SLT:
692 /* TGSI_OPCODE_SETLT */
693 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
694 src0 = emit_fetch( bld, inst, 0, chan_index );
695 src1 = emit_fetch( bld, inst, 1, chan_index );
696 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
697 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
698 }
699 break;
700
701 case TGSI_OPCODE_SGE:
702 /* TGSI_OPCODE_SETGE */
703 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
704 src0 = emit_fetch( bld, inst, 0, chan_index );
705 src1 = emit_fetch( bld, inst, 1, chan_index );
706 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
707 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
708 }
709 break;
710
711 case TGSI_OPCODE_MAD:
712 /* TGSI_OPCODE_MADD */
713 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
714 tmp0 = emit_fetch( bld, inst, 0, chan_index );
715 tmp1 = emit_fetch( bld, inst, 1, chan_index );
716 tmp2 = emit_fetch( bld, inst, 2, chan_index );
717 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
718 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
719 dst0[chan_index] = tmp0;
720 }
721 break;
722
723 case TGSI_OPCODE_SUB:
724 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
725 tmp0 = emit_fetch( bld, inst, 0, chan_index );
726 tmp1 = emit_fetch( bld, inst, 1, chan_index );
727 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
728 }
729 break;
730
731 case TGSI_OPCODE_LRP:
732 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
733 src0 = emit_fetch( bld, inst, 0, chan_index );
734 src1 = emit_fetch( bld, inst, 1, chan_index );
735 src2 = emit_fetch( bld, inst, 2, chan_index );
736 tmp0 = lp_build_sub( &bld->base, src1, src2 );
737 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
738 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
739 }
740 break;
741
742 case TGSI_OPCODE_CND:
743 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
744 src0 = emit_fetch( bld, inst, 0, chan_index );
745 src1 = emit_fetch( bld, inst, 1, chan_index );
746 src2 = emit_fetch( bld, inst, 2, chan_index );
747 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
748 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
749 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
750 }
751 break;
752
753 case TGSI_OPCODE_DP2A:
754 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
755 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
756 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
757 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
758 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
759 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
760 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
761 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
762 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
763 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
764 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
765 }
766 break;
767
768 case TGSI_OPCODE_FRC:
769 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
770 src0 = emit_fetch( bld, inst, 0, chan_index );
771 tmp0 = lp_build_floor(&bld->base, src0);
772 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
773 dst0[chan_index] = tmp0;
774 }
775 break;
776
777 case TGSI_OPCODE_CLAMP:
778 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
779 tmp0 = emit_fetch( bld, inst, 0, chan_index );
780 src1 = emit_fetch( bld, inst, 1, chan_index );
781 src2 = emit_fetch( bld, inst, 2, chan_index );
782 tmp0 = lp_build_max(&bld->base, tmp0, src1);
783 tmp0 = lp_build_min(&bld->base, tmp0, src2);
784 dst0[chan_index] = tmp0;
785 }
786 break;
787
788 case TGSI_OPCODE_FLR:
789 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
790 tmp0 = emit_fetch( bld, inst, 0, chan_index );
791 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
792 }
793 break;
794
795 case TGSI_OPCODE_ROUND:
796 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
797 tmp0 = emit_fetch( bld, inst, 0, chan_index );
798 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
799 }
800 break;
801
802 case TGSI_OPCODE_EX2: {
803 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
804 tmp0 = lp_build_exp2( &bld->base, tmp0);
805 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
806 dst0[chan_index] = tmp0;
807 }
808 break;
809 }
810
811 case TGSI_OPCODE_LG2:
812 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
813 tmp0 = lp_build_log2( &bld->base, tmp0);
814 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
815 dst0[chan_index] = tmp0;
816 }
817 break;
818
819 case TGSI_OPCODE_POW:
820 src0 = emit_fetch( bld, inst, 0, CHAN_X );
821 src1 = emit_fetch( bld, inst, 1, CHAN_X );
822 res = lp_build_pow( &bld->base, src0, src1 );
823 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
824 dst0[chan_index] = res;
825 }
826 break;
827
828 case TGSI_OPCODE_XPD:
829 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
830 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
831 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
832 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
833 }
834 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
835 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
836 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
837 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
838 }
839 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
840 tmp2 = tmp0;
841 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
842 tmp5 = tmp3;
843 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
844 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
845 dst0[CHAN_X] = tmp2;
846 }
847 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
848 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
849 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
850 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
851 }
852 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
853 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
854 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
855 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
856 dst0[CHAN_Y] = tmp3;
857 }
858 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
859 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
860 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
861 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
862 dst0[CHAN_Z] = tmp5;
863 }
864 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
865 dst0[CHAN_W] = bld->base.one;
866 }
867 break;
868
869 case TGSI_OPCODE_ABS:
870 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
871 tmp0 = emit_fetch( bld, inst, 0, chan_index );
872 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
873 }
874 break;
875
876 case TGSI_OPCODE_RCC:
877 /* deprecated? */
878 assert(0);
879 return 0;
880
881 case TGSI_OPCODE_DPH:
882 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
883 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
884 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
885 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
886 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
887 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
888 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
889 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
890 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
891 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
892 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
893 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
894 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
895 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
896 dst0[chan_index] = tmp0;
897 }
898 break;
899
900 case TGSI_OPCODE_COS:
901 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
902 tmp0 = lp_build_cos( &bld->base, tmp0 );
903 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
904 dst0[chan_index] = tmp0;
905 }
906 break;
907
908 case TGSI_OPCODE_DDX:
909 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
910 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
911 }
912 break;
913
914 case TGSI_OPCODE_DDY:
915 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
916 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
917 }
918 break;
919
920 case TGSI_OPCODE_KILP:
921 /* predicated kill */
922 /* FIXME */
923 return 0;
924 break;
925
926 case TGSI_OPCODE_KIL:
927 /* conditional kill */
928 emit_kil( bld, inst );
929 break;
930
931 case TGSI_OPCODE_PK2H:
932 return 0;
933 break;
934
935 case TGSI_OPCODE_PK2US:
936 return 0;
937 break;
938
939 case TGSI_OPCODE_PK4B:
940 return 0;
941 break;
942
943 case TGSI_OPCODE_PK4UB:
944 return 0;
945 break;
946
947 case TGSI_OPCODE_RFL:
948 return 0;
949 break;
950
951 case TGSI_OPCODE_SEQ:
952 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
953 src0 = emit_fetch( bld, inst, 0, chan_index );
954 src1 = emit_fetch( bld, inst, 1, chan_index );
955 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
956 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
957 }
958 break;
959
960 case TGSI_OPCODE_SFL:
961 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
962 dst0[chan_index] = bld->base.zero;
963 }
964 break;
965
966 case TGSI_OPCODE_SGT:
967 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
968 src0 = emit_fetch( bld, inst, 0, chan_index );
969 src1 = emit_fetch( bld, inst, 1, chan_index );
970 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
971 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
972 }
973 break;
974
975 case TGSI_OPCODE_SIN:
976 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
977 tmp0 = lp_build_sin( &bld->base, tmp0 );
978 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
979 dst0[chan_index] = tmp0;
980 }
981 break;
982
983 case TGSI_OPCODE_SLE:
984 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
985 src0 = emit_fetch( bld, inst, 0, chan_index );
986 src1 = emit_fetch( bld, inst, 1, chan_index );
987 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
988 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
989 }
990 break;
991
992 case TGSI_OPCODE_SNE:
993 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
994 src0 = emit_fetch( bld, inst, 0, chan_index );
995 src1 = emit_fetch( bld, inst, 1, chan_index );
996 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
997 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
998 }
999 break;
1000
1001 case TGSI_OPCODE_STR:
1002 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1003 dst0[chan_index] = bld->base.one;
1004 }
1005 break;
1006
1007 case TGSI_OPCODE_TEX:
1008 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1009 break;
1010
1011 case TGSI_OPCODE_TXD:
1012 /* FIXME */
1013 return 0;
1014 break;
1015
1016 case TGSI_OPCODE_UP2H:
1017 /* deprecated */
1018 assert (0);
1019 return 0;
1020 break;
1021
1022 case TGSI_OPCODE_UP2US:
1023 /* deprecated */
1024 assert(0);
1025 return 0;
1026 break;
1027
1028 case TGSI_OPCODE_UP4B:
1029 /* deprecated */
1030 assert(0);
1031 return 0;
1032 break;
1033
1034 case TGSI_OPCODE_UP4UB:
1035 /* deprecated */
1036 assert(0);
1037 return 0;
1038 break;
1039
1040 case TGSI_OPCODE_X2D:
1041 /* deprecated? */
1042 assert(0);
1043 return 0;
1044 break;
1045
1046 case TGSI_OPCODE_ARA:
1047 /* deprecated */
1048 assert(0);
1049 return 0;
1050 break;
1051
1052 #if 0
1053 case TGSI_OPCODE_ARR:
1054 /* FIXME */
1055 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1056 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1057 emit_rnd( bld, 0, 0 );
1058 emit_f2it( bld, 0 );
1059 dst0[chan_index] = tmp0;
1060 }
1061 break;
1062 #endif
1063
1064 case TGSI_OPCODE_BRA:
1065 /* deprecated */
1066 assert(0);
1067 return 0;
1068 break;
1069
1070 case TGSI_OPCODE_CAL:
1071 /* FIXME */
1072 return 0;
1073 break;
1074
1075 case TGSI_OPCODE_RET:
1076 /* FIXME */
1077 return 0;
1078 break;
1079
1080 case TGSI_OPCODE_END:
1081 break;
1082
1083 case TGSI_OPCODE_SSG:
1084 /* TGSI_OPCODE_SGN */
1085 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1086 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1087 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1088 }
1089 break;
1090
1091 case TGSI_OPCODE_CMP:
1092 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1093 src0 = emit_fetch( bld, inst, 0, chan_index );
1094 src1 = emit_fetch( bld, inst, 1, chan_index );
1095 src2 = emit_fetch( bld, inst, 2, chan_index );
1096 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1097 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1098 }
1099 break;
1100
1101 case TGSI_OPCODE_SCS:
1102 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1103 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1104 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1105 }
1106 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1107 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1108 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1109 }
1110 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1111 dst0[CHAN_Z] = bld->base.zero;
1112 }
1113 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1114 dst0[CHAN_W] = bld->base.one;
1115 }
1116 break;
1117
1118 case TGSI_OPCODE_TXB:
1119 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1120 break;
1121
1122 case TGSI_OPCODE_NRM:
1123 /* fall-through */
1124 case TGSI_OPCODE_NRM4:
1125 /* 3 or 4-component normalization */
1126 {
1127 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1128
1129 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1130 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1131 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1132 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1133
1134 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1135
1136 /* xmm4 = src.x */
1137 /* xmm0 = src.x * src.x */
1138 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1139 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1140 tmp4 = tmp0;
1141 }
1142 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1143
1144 /* xmm5 = src.y */
1145 /* xmm0 = xmm0 + src.y * src.y */
1146 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1147 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1148 tmp5 = tmp1;
1149 }
1150 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1151 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1152
1153 /* xmm6 = src.z */
1154 /* xmm0 = xmm0 + src.z * src.z */
1155 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1156 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1157 tmp6 = tmp1;
1158 }
1159 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1160 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1161
1162 if (dims == 4) {
1163 /* xmm7 = src.w */
1164 /* xmm0 = xmm0 + src.w * src.w */
1165 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1166 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1167 tmp7 = tmp1;
1168 }
1169 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1170 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1171 }
1172
1173 /* xmm1 = 1 / sqrt(xmm0) */
1174 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1175
1176 /* dst.x = xmm1 * src.x */
1177 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1178 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1179 }
1180
1181 /* dst.y = xmm1 * src.y */
1182 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1183 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1184 }
1185
1186 /* dst.z = xmm1 * src.z */
1187 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1188 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1189 }
1190
1191 /* dst.w = xmm1 * src.w */
1192 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1193 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1194 }
1195 }
1196
1197 /* dst.w = 1.0 */
1198 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1199 dst0[CHAN_W] = bld->base.one;
1200 }
1201 }
1202 break;
1203
1204 case TGSI_OPCODE_DIV:
1205 /* deprecated */
1206 assert( 0 );
1207 return 0;
1208 break;
1209
1210 case TGSI_OPCODE_DP2:
1211 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1212 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1213 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1214 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1215 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1216 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1217 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1218 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1219 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1220 }
1221 break;
1222
1223 case TGSI_OPCODE_TXL:
1224 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1225 break;
1226
1227 case TGSI_OPCODE_TXP:
1228 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1229 break;
1230
1231 case TGSI_OPCODE_BRK:
1232 /* FIXME */
1233 return 0;
1234 break;
1235
1236 case TGSI_OPCODE_IF:
1237 /* FIXME */
1238 return 0;
1239 break;
1240
1241 case TGSI_OPCODE_BGNFOR:
1242 /* deprecated */
1243 assert(0);
1244 return 0;
1245 break;
1246
1247 case TGSI_OPCODE_REP:
1248 /* deprecated */
1249 assert(0);
1250 return 0;
1251 break;
1252
1253 case TGSI_OPCODE_ELSE:
1254 /* FIXME */
1255 return 0;
1256 break;
1257
1258 case TGSI_OPCODE_ENDIF:
1259 /* FIXME */
1260 return 0;
1261 break;
1262
1263 case TGSI_OPCODE_ENDFOR:
1264 /* deprecated */
1265 assert(0);
1266 return 0;
1267 break;
1268
1269 case TGSI_OPCODE_ENDREP:
1270 /* deprecated */
1271 assert(0);
1272 return 0;
1273 break;
1274
1275 case TGSI_OPCODE_PUSHA:
1276 /* deprecated? */
1277 assert(0);
1278 return 0;
1279 break;
1280
1281 case TGSI_OPCODE_POPA:
1282 /* deprecated? */
1283 assert(0);
1284 return 0;
1285 break;
1286
1287 case TGSI_OPCODE_CEIL:
1288 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1289 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1290 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1291 }
1292 break;
1293
1294 case TGSI_OPCODE_I2F:
1295 /* deprecated? */
1296 assert(0);
1297 return 0;
1298 break;
1299
1300 case TGSI_OPCODE_NOT:
1301 /* deprecated? */
1302 assert(0);
1303 return 0;
1304 break;
1305
1306 case TGSI_OPCODE_TRUNC:
1307 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1308 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1309 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1310 }
1311 break;
1312
1313 case TGSI_OPCODE_SHL:
1314 /* deprecated? */
1315 assert(0);
1316 return 0;
1317 break;
1318
1319 case TGSI_OPCODE_ISHR:
1320 /* deprecated? */
1321 assert(0);
1322 return 0;
1323 break;
1324
1325 case TGSI_OPCODE_AND:
1326 /* deprecated? */
1327 assert(0);
1328 return 0;
1329 break;
1330
1331 case TGSI_OPCODE_OR:
1332 /* deprecated? */
1333 assert(0);
1334 return 0;
1335 break;
1336
1337 case TGSI_OPCODE_MOD:
1338 /* deprecated? */
1339 assert(0);
1340 return 0;
1341 break;
1342
1343 case TGSI_OPCODE_XOR:
1344 /* deprecated? */
1345 assert(0);
1346 return 0;
1347 break;
1348
1349 case TGSI_OPCODE_SAD:
1350 /* deprecated? */
1351 assert(0);
1352 return 0;
1353 break;
1354
1355 case TGSI_OPCODE_TXF:
1356 /* deprecated? */
1357 assert(0);
1358 return 0;
1359 break;
1360
1361 case TGSI_OPCODE_TXQ:
1362 /* deprecated? */
1363 assert(0);
1364 return 0;
1365 break;
1366
1367 case TGSI_OPCODE_CONT:
1368 /* deprecated? */
1369 assert(0);
1370 return 0;
1371 break;
1372
1373 case TGSI_OPCODE_EMIT:
1374 return 0;
1375 break;
1376
1377 case TGSI_OPCODE_ENDPRIM:
1378 return 0;
1379 break;
1380
1381 case TGSI_OPCODE_NOP:
1382 break;
1383
1384 default:
1385 return 0;
1386 }
1387
1388 if(info->num_dst) {
1389 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1390 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1391 }
1392 }
1393
1394 return 1;
1395 }
1396
1397
1398 void
1399 lp_build_tgsi_soa(LLVMBuilderRef builder,
1400 const struct tgsi_token *tokens,
1401 struct lp_type type,
1402 struct lp_build_mask_context *mask,
1403 LLVMValueRef consts_ptr,
1404 const LLVMValueRef *pos,
1405 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1406 LLVMValueRef (*outputs)[NUM_CHANNELS],
1407 struct lp_build_sampler_soa *sampler)
1408 {
1409 struct lp_build_tgsi_soa_context bld;
1410 struct tgsi_parse_context parse;
1411 uint num_immediates = 0;
1412 unsigned i;
1413
1414 /* Setup build context */
1415 memset(&bld, 0, sizeof bld);
1416 lp_build_context_init(&bld.base, builder, type);
1417 bld.mask = mask;
1418 bld.pos = pos;
1419 bld.inputs = inputs;
1420 bld.outputs = outputs;
1421 bld.consts_ptr = consts_ptr;
1422 bld.sampler = sampler;
1423
1424 tgsi_parse_init( &parse, tokens );
1425
1426 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1427 tgsi_parse_token( &parse );
1428
1429 switch( parse.FullToken.Token.Type ) {
1430 case TGSI_TOKEN_TYPE_DECLARATION:
1431 /* Inputs already interpolated */
1432 break;
1433
1434 case TGSI_TOKEN_TYPE_INSTRUCTION:
1435 {
1436 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1437 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1438 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1439 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1440 info ? info->mnemonic : "<invalid>");
1441 }
1442
1443 break;
1444
1445 case TGSI_TOKEN_TYPE_IMMEDIATE:
1446 /* simply copy the immediate values into the next immediates[] slot */
1447 {
1448 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1449 assert(size <= 4);
1450 assert(num_immediates < LP_MAX_IMMEDIATES);
1451 for( i = 0; i < size; ++i )
1452 bld.immediates[num_immediates][i] =
1453 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1454 for( i = size; i < 4; ++i )
1455 bld.immediates[num_immediates][i] = bld.base.undef;
1456 num_immediates++;
1457 }
1458 break;
1459
1460 default:
1461 assert( 0 );
1462 }
1463 }
1464
1465 tgsi_parse_free( &parse );
1466 }
1467