mesa: remove a line of dead code
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81 #define QUAD_TOP_LEFT 0
82 #define QUAD_TOP_RIGHT 1
83 #define QUAD_BOTTOM_LEFT 2
84 #define QUAD_BOTTOM_RIGHT 3
85
86
87 struct lp_build_tgsi_soa_context
88 {
89 struct lp_build_context base;
90
91 LLVMValueRef consts_ptr;
92 const LLVMValueRef *pos;
93 const LLVMValueRef (*inputs)[NUM_CHANNELS];
94 LLVMValueRef (*outputs)[NUM_CHANNELS];
95
96 struct lp_build_sampler_soa *sampler;
97
98 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
99 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
100
101 struct lp_build_mask_context *mask;
102 };
103
104
105 static const unsigned char
106 swizzle_left[4] = {
107 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
108 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
109 };
110
111 static const unsigned char
112 swizzle_right[4] = {
113 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
114 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
115 };
116
117 static const unsigned char
118 swizzle_top[4] = {
119 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
120 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
121 };
122
123 static const unsigned char
124 swizzle_bottom[4] = {
125 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
126 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
127 };
128
129
130 static LLVMValueRef
131 emit_ddx(struct lp_build_tgsi_soa_context *bld,
132 LLVMValueRef src)
133 {
134 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
135 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
136 return lp_build_sub(&bld->base, src_right, src_left);
137 }
138
139
140 static LLVMValueRef
141 emit_ddy(struct lp_build_tgsi_soa_context *bld,
142 LLVMValueRef src)
143 {
144 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
145 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
146 return lp_build_sub(&bld->base, src_top, src_bottom);
147 }
148
149
150 /**
151 * Register fetch.
152 */
153 static LLVMValueRef
154 emit_fetch(
155 struct lp_build_tgsi_soa_context *bld,
156 const struct tgsi_full_instruction *inst,
157 unsigned index,
158 const unsigned chan_index )
159 {
160 const struct tgsi_full_src_register *reg = &inst->Src[index];
161 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
162 LLVMValueRef res;
163
164 switch (swizzle) {
165 case TGSI_SWIZZLE_X:
166 case TGSI_SWIZZLE_Y:
167 case TGSI_SWIZZLE_Z:
168 case TGSI_SWIZZLE_W:
169
170 switch (reg->Register.File) {
171 case TGSI_FILE_CONSTANT: {
172 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
173 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
174 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
175 res = lp_build_broadcast_scalar(&bld->base, scalar);
176 break;
177 }
178
179 case TGSI_FILE_IMMEDIATE:
180 res = bld->immediates[reg->Register.Index][swizzle];
181 assert(res);
182 break;
183
184 case TGSI_FILE_INPUT:
185 res = bld->inputs[reg->Register.Index][swizzle];
186 assert(res);
187 break;
188
189 case TGSI_FILE_TEMPORARY:
190 res = bld->temps[reg->Register.Index][swizzle];
191 if(!res)
192 return bld->base.undef;
193 break;
194
195 default:
196 assert( 0 );
197 return bld->base.undef;
198 }
199 break;
200
201 default:
202 assert( 0 );
203 return bld->base.undef;
204 }
205
206 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
207 case TGSI_UTIL_SIGN_CLEAR:
208 res = lp_build_abs( &bld->base, res );
209 break;
210
211 case TGSI_UTIL_SIGN_SET:
212 /* TODO: Use bitwese OR for floating point */
213 res = lp_build_abs( &bld->base, res );
214 res = LLVMBuildNeg( bld->base.builder, res, "" );
215 break;
216
217 case TGSI_UTIL_SIGN_TOGGLE:
218 res = LLVMBuildNeg( bld->base.builder, res, "" );
219 break;
220
221 case TGSI_UTIL_SIGN_KEEP:
222 break;
223 }
224
225 return res;
226 }
227
228
229 /**
230 * Register fetch with derivatives.
231 */
232 static void
233 emit_fetch_deriv(
234 struct lp_build_tgsi_soa_context *bld,
235 const struct tgsi_full_instruction *inst,
236 unsigned index,
237 const unsigned chan_index,
238 LLVMValueRef *res,
239 LLVMValueRef *ddx,
240 LLVMValueRef *ddy)
241 {
242 LLVMValueRef src;
243
244 src = emit_fetch(bld, inst, index, chan_index);
245
246 if(res)
247 *res = src;
248
249 /* TODO: use interpolation coeffs for inputs */
250
251 if(ddx)
252 *ddx = emit_ddx(bld, src);
253
254 if(ddy)
255 *ddy = emit_ddy(bld, src);
256 }
257
258
259 /**
260 * Register store.
261 */
262 static void
263 emit_store(
264 struct lp_build_tgsi_soa_context *bld,
265 const struct tgsi_full_instruction *inst,
266 unsigned index,
267 unsigned chan_index,
268 LLVMValueRef value)
269 {
270 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
271
272 switch( inst->Instruction.Saturate ) {
273 case TGSI_SAT_NONE:
274 break;
275
276 case TGSI_SAT_ZERO_ONE:
277 value = lp_build_max(&bld->base, value, bld->base.zero);
278 value = lp_build_min(&bld->base, value, bld->base.one);
279 break;
280
281 case TGSI_SAT_MINUS_PLUS_ONE:
282 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
283 value = lp_build_min(&bld->base, value, bld->base.one);
284 break;
285
286 default:
287 assert(0);
288 }
289
290 switch( reg->Register.File ) {
291 case TGSI_FILE_OUTPUT:
292 bld->outputs[reg->Register.Index][chan_index] = value;
293 break;
294
295 case TGSI_FILE_TEMPORARY:
296 bld->temps[reg->Register.Index][chan_index] = value;
297 break;
298
299 case TGSI_FILE_ADDRESS:
300 /* FIXME */
301 assert(0);
302 break;
303
304 default:
305 assert( 0 );
306 }
307 }
308
309
310 /**
311 * High-level instruction translators.
312 */
313
314
315 static void
316 emit_tex( struct lp_build_tgsi_soa_context *bld,
317 const struct tgsi_full_instruction *inst,
318 boolean apply_lodbias,
319 boolean projected,
320 LLVMValueRef *texel)
321 {
322 const uint unit = inst->Src[1].Register.Index;
323 LLVMValueRef lodbias;
324 LLVMValueRef oow;
325 LLVMValueRef coords[3];
326 unsigned num_coords;
327 unsigned i;
328
329 switch (inst->Texture.Texture) {
330 case TGSI_TEXTURE_1D:
331 num_coords = 1;
332 break;
333 case TGSI_TEXTURE_2D:
334 case TGSI_TEXTURE_RECT:
335 num_coords = 2;
336 break;
337 case TGSI_TEXTURE_SHADOW1D:
338 case TGSI_TEXTURE_SHADOW2D:
339 case TGSI_TEXTURE_SHADOWRECT:
340 case TGSI_TEXTURE_3D:
341 case TGSI_TEXTURE_CUBE:
342 num_coords = 3;
343 break;
344 default:
345 assert(0);
346 return;
347 }
348
349 if(apply_lodbias)
350 lodbias = emit_fetch( bld, inst, 0, 3 );
351 else
352 lodbias = bld->base.zero;
353
354 if (projected) {
355 oow = emit_fetch( bld, inst, 0, 3 );
356 oow = lp_build_rcp(&bld->base, oow);
357 }
358
359 for (i = 0; i < num_coords; i++) {
360 coords[i] = emit_fetch( bld, inst, 0, i );
361 if (projected)
362 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
363 }
364
365 bld->sampler->emit_fetch_texel(bld->sampler,
366 bld->base.builder,
367 bld->base.type,
368 unit, num_coords, coords, lodbias,
369 texel);
370 }
371
372
373 static void
374 emit_kil(
375 struct lp_build_tgsi_soa_context *bld,
376 const struct tgsi_full_instruction *inst )
377 {
378 const struct tgsi_full_src_register *reg = &inst->Src[0];
379 LLVMValueRef terms[NUM_CHANNELS];
380 LLVMValueRef mask;
381 unsigned chan_index;
382
383 memset(&terms, 0, sizeof terms);
384
385 FOR_EACH_CHANNEL( chan_index ) {
386 unsigned swizzle;
387
388 /* Unswizzle channel */
389 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
390
391 /* Check if the component has not been already tested. */
392 assert(swizzle < NUM_CHANNELS);
393 if( !terms[swizzle] )
394 /* TODO: change the comparison operator instead of setting the sign */
395 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
396 }
397
398 mask = NULL;
399 FOR_EACH_CHANNEL( chan_index ) {
400 if(terms[chan_index]) {
401 LLVMValueRef chan_mask;
402
403 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
404
405 if(mask)
406 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
407 else
408 mask = chan_mask;
409 }
410 }
411
412 if(mask)
413 lp_build_mask_update(bld->mask, mask);
414 }
415
416
417 /**
418 * Check if inst src/dest regs use indirect addressing into temporary
419 * register file.
420 */
421 static boolean
422 indirect_temp_reference(const struct tgsi_full_instruction *inst)
423 {
424 uint i;
425 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
426 const struct tgsi_full_src_register *reg = &inst->Src[i];
427 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
428 reg->Register.Indirect)
429 return TRUE;
430 }
431 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
432 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
433 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
434 reg->Register.Indirect)
435 return TRUE;
436 }
437 return FALSE;
438 }
439
440
441 static int
442 emit_instruction(
443 struct lp_build_tgsi_soa_context *bld,
444 const struct tgsi_full_instruction *inst,
445 const struct tgsi_opcode_info *info)
446 {
447 unsigned chan_index;
448 LLVMValueRef src0, src1, src2;
449 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
450 LLVMValueRef res;
451 LLVMValueRef dst0[NUM_CHANNELS];
452
453 /* we can't handle indirect addressing into temp register file yet */
454 if (indirect_temp_reference(inst))
455 return FALSE;
456
457 assert(info->num_dst <= 1);
458 if(info->num_dst) {
459 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
460 dst0[chan_index] = bld->base.undef;
461 }
462 }
463
464 switch (inst->Instruction.Opcode) {
465 #if 0
466 case TGSI_OPCODE_ARL:
467 /* FIXME */
468 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
469 tmp0 = emit_fetch( bld, inst, 0, chan_index );
470 emit_flr(bld, 0, 0);
471 emit_f2it( bld, 0 );
472 dst0[chan_index] = tmp0;
473 }
474 break;
475 #endif
476
477 case TGSI_OPCODE_MOV:
478 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
479 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
480 }
481 break;
482
483 case TGSI_OPCODE_LIT:
484 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
485 dst0[CHAN_X] = bld->base.one;
486 }
487 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
488 src0 = emit_fetch( bld, inst, 0, CHAN_X );
489 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
490 }
491 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
492 /* XMM[1] = SrcReg[0].yyyy */
493 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
494 /* XMM[1] = max(XMM[1], 0) */
495 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
496 /* XMM[2] = SrcReg[0].wwww */
497 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
498 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
499 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
500 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
501 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
502 }
503 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
504 dst0[CHAN_W] = bld->base.one;
505 }
506 break;
507
508 case TGSI_OPCODE_RCP:
509 /* TGSI_OPCODE_RECIP */
510 src0 = emit_fetch( bld, inst, 0, CHAN_X );
511 res = lp_build_rcp(&bld->base, src0);
512 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
513 dst0[chan_index] = res;
514 }
515 break;
516
517 case TGSI_OPCODE_RSQ:
518 /* TGSI_OPCODE_RECIPSQRT */
519 src0 = emit_fetch( bld, inst, 0, CHAN_X );
520 src0 = lp_build_abs(&bld->base, src0);
521 res = lp_build_rsqrt(&bld->base, src0);
522 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
523 dst0[chan_index] = res;
524 }
525 break;
526
527 case TGSI_OPCODE_EXP:
528 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
529 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
530 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
531 LLVMValueRef *p_exp2_int_part = NULL;
532 LLVMValueRef *p_frac_part = NULL;
533 LLVMValueRef *p_exp2 = NULL;
534
535 src0 = emit_fetch( bld, inst, 0, CHAN_X );
536
537 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
538 p_exp2_int_part = &tmp0;
539 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
540 p_frac_part = &tmp1;
541 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
542 p_exp2 = &tmp2;
543
544 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
545
546 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
547 dst0[CHAN_X] = tmp0;
548 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
549 dst0[CHAN_Y] = tmp1;
550 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
551 dst0[CHAN_Z] = tmp2;
552 }
553 /* dst.w = 1.0 */
554 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
555 dst0[CHAN_W] = bld->base.one;
556 }
557 break;
558
559 case TGSI_OPCODE_LOG:
560 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
561 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
562 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
563 LLVMValueRef *p_floor_log2 = NULL;
564 LLVMValueRef *p_exp = NULL;
565 LLVMValueRef *p_log2 = NULL;
566
567 src0 = emit_fetch( bld, inst, 0, CHAN_X );
568 src0 = lp_build_abs( &bld->base, src0 );
569
570 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
571 p_floor_log2 = &tmp0;
572 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
573 p_exp = &tmp1;
574 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
575 p_log2 = &tmp2;
576
577 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
578
579 /* dst.x = floor(lg2(abs(src.x))) */
580 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
581 dst0[CHAN_X] = tmp0;
582 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
583 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
584 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
585 }
586 /* dst.z = lg2(abs(src.x)) */
587 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
588 dst0[CHAN_Z] = tmp2;
589 }
590 /* dst.w = 1.0 */
591 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
592 dst0[CHAN_W] = bld->base.one;
593 }
594 break;
595
596 case TGSI_OPCODE_MUL:
597 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
598 src0 = emit_fetch( bld, inst, 0, chan_index );
599 src1 = emit_fetch( bld, inst, 1, chan_index );
600 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
601 }
602 break;
603
604 case TGSI_OPCODE_ADD:
605 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
606 src0 = emit_fetch( bld, inst, 0, chan_index );
607 src1 = emit_fetch( bld, inst, 1, chan_index );
608 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
609 }
610 break;
611
612 case TGSI_OPCODE_DP3:
613 /* TGSI_OPCODE_DOT3 */
614 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
615 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
616 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
617 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
618 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
619 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
620 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
621 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
622 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
623 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
624 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
625 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
626 dst0[chan_index] = tmp0;
627 }
628 break;
629
630 case TGSI_OPCODE_DP4:
631 /* TGSI_OPCODE_DOT4 */
632 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
633 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
634 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
635 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
636 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
637 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
638 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
639 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
640 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
641 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
642 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
643 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
644 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
645 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
646 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
647 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
648 dst0[chan_index] = tmp0;
649 }
650 break;
651
652 case TGSI_OPCODE_DST:
653 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
654 dst0[CHAN_X] = bld->base.one;
655 }
656 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
657 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
658 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
659 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
660 }
661 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
662 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
663 }
664 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
665 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
666 }
667 break;
668
669 case TGSI_OPCODE_MIN:
670 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
671 src0 = emit_fetch( bld, inst, 0, chan_index );
672 src1 = emit_fetch( bld, inst, 1, chan_index );
673 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
674 }
675 break;
676
677 case TGSI_OPCODE_MAX:
678 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
679 src0 = emit_fetch( bld, inst, 0, chan_index );
680 src1 = emit_fetch( bld, inst, 1, chan_index );
681 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
682 }
683 break;
684
685 case TGSI_OPCODE_SLT:
686 /* TGSI_OPCODE_SETLT */
687 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
688 src0 = emit_fetch( bld, inst, 0, chan_index );
689 src1 = emit_fetch( bld, inst, 1, chan_index );
690 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
691 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
692 }
693 break;
694
695 case TGSI_OPCODE_SGE:
696 /* TGSI_OPCODE_SETGE */
697 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
698 src0 = emit_fetch( bld, inst, 0, chan_index );
699 src1 = emit_fetch( bld, inst, 1, chan_index );
700 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
701 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
702 }
703 break;
704
705 case TGSI_OPCODE_MAD:
706 /* TGSI_OPCODE_MADD */
707 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
708 tmp0 = emit_fetch( bld, inst, 0, chan_index );
709 tmp1 = emit_fetch( bld, inst, 1, chan_index );
710 tmp2 = emit_fetch( bld, inst, 2, chan_index );
711 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
712 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
713 dst0[chan_index] = tmp0;
714 }
715 break;
716
717 case TGSI_OPCODE_SUB:
718 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
719 tmp0 = emit_fetch( bld, inst, 0, chan_index );
720 tmp1 = emit_fetch( bld, inst, 1, chan_index );
721 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
722 }
723 break;
724
725 case TGSI_OPCODE_LRP:
726 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
727 src0 = emit_fetch( bld, inst, 0, chan_index );
728 src1 = emit_fetch( bld, inst, 1, chan_index );
729 src2 = emit_fetch( bld, inst, 2, chan_index );
730 tmp0 = lp_build_sub( &bld->base, src1, src2 );
731 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
732 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
733 }
734 break;
735
736 case TGSI_OPCODE_CND:
737 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
738 src0 = emit_fetch( bld, inst, 0, chan_index );
739 src1 = emit_fetch( bld, inst, 1, chan_index );
740 src2 = emit_fetch( bld, inst, 2, chan_index );
741 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
742 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
743 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
744 }
745 break;
746
747 case TGSI_OPCODE_DP2A:
748 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
749 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
750 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
751 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
752 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
753 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
754 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
755 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
756 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
757 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
758 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
759 }
760 break;
761
762 case TGSI_OPCODE_FRC:
763 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
764 src0 = emit_fetch( bld, inst, 0, chan_index );
765 tmp0 = lp_build_floor(&bld->base, src0);
766 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
767 dst0[chan_index] = tmp0;
768 }
769 break;
770
771 case TGSI_OPCODE_CLAMP:
772 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
773 tmp0 = emit_fetch( bld, inst, 0, chan_index );
774 src1 = emit_fetch( bld, inst, 1, chan_index );
775 src2 = emit_fetch( bld, inst, 2, chan_index );
776 tmp0 = lp_build_max(&bld->base, tmp0, src1);
777 tmp0 = lp_build_min(&bld->base, tmp0, src2);
778 dst0[chan_index] = tmp0;
779 }
780 break;
781
782 case TGSI_OPCODE_FLR:
783 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
784 tmp0 = emit_fetch( bld, inst, 0, chan_index );
785 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
786 }
787 break;
788
789 case TGSI_OPCODE_ROUND:
790 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
791 tmp0 = emit_fetch( bld, inst, 0, chan_index );
792 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
793 }
794 break;
795
796 case TGSI_OPCODE_EX2: {
797 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
798 tmp0 = lp_build_exp2( &bld->base, tmp0);
799 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
800 dst0[chan_index] = tmp0;
801 }
802 break;
803 }
804
805 case TGSI_OPCODE_LG2:
806 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
807 tmp0 = lp_build_log2( &bld->base, tmp0);
808 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
809 dst0[chan_index] = tmp0;
810 }
811 break;
812
813 case TGSI_OPCODE_POW:
814 src0 = emit_fetch( bld, inst, 0, CHAN_X );
815 src1 = emit_fetch( bld, inst, 1, CHAN_X );
816 res = lp_build_pow( &bld->base, src0, src1 );
817 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
818 dst0[chan_index] = res;
819 }
820 break;
821
822 case TGSI_OPCODE_XPD:
823 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
824 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
825 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
826 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
827 }
828 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
829 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
830 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
831 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
832 }
833 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
834 tmp2 = tmp0;
835 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
836 tmp5 = tmp3;
837 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
838 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
839 dst0[CHAN_X] = tmp2;
840 }
841 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
842 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
843 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
844 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
845 }
846 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
847 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
848 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
849 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
850 dst0[CHAN_Y] = tmp3;
851 }
852 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
853 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
854 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
855 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
856 dst0[CHAN_Z] = tmp5;
857 }
858 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
859 dst0[CHAN_W] = bld->base.one;
860 }
861 break;
862
863 case TGSI_OPCODE_ABS:
864 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
865 tmp0 = emit_fetch( bld, inst, 0, chan_index );
866 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
867 }
868 break;
869
870 case TGSI_OPCODE_RCC:
871 /* deprecated? */
872 assert(0);
873 return 0;
874
875 case TGSI_OPCODE_DPH:
876 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
877 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
878 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
879 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
880 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
881 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
882 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
883 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
884 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
885 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
886 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
887 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
888 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
889 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
890 dst0[chan_index] = tmp0;
891 }
892 break;
893
894 case TGSI_OPCODE_COS:
895 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
896 tmp0 = lp_build_cos( &bld->base, tmp0 );
897 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
898 dst0[chan_index] = tmp0;
899 }
900 break;
901
902 case TGSI_OPCODE_DDX:
903 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
904 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
905 }
906 break;
907
908 case TGSI_OPCODE_DDY:
909 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
910 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
911 }
912 break;
913
914 case TGSI_OPCODE_KILP:
915 /* predicated kill */
916 /* FIXME */
917 return 0;
918 break;
919
920 case TGSI_OPCODE_KIL:
921 /* conditional kill */
922 emit_kil( bld, inst );
923 break;
924
925 case TGSI_OPCODE_PK2H:
926 return 0;
927 break;
928
929 case TGSI_OPCODE_PK2US:
930 return 0;
931 break;
932
933 case TGSI_OPCODE_PK4B:
934 return 0;
935 break;
936
937 case TGSI_OPCODE_PK4UB:
938 return 0;
939 break;
940
941 case TGSI_OPCODE_RFL:
942 return 0;
943 break;
944
945 case TGSI_OPCODE_SEQ:
946 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
947 src0 = emit_fetch( bld, inst, 0, chan_index );
948 src1 = emit_fetch( bld, inst, 1, chan_index );
949 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
950 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
951 }
952 break;
953
954 case TGSI_OPCODE_SFL:
955 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
956 dst0[chan_index] = bld->base.zero;
957 }
958 break;
959
960 case TGSI_OPCODE_SGT:
961 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
962 src0 = emit_fetch( bld, inst, 0, chan_index );
963 src1 = emit_fetch( bld, inst, 1, chan_index );
964 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
965 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
966 }
967 break;
968
969 case TGSI_OPCODE_SIN:
970 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
971 tmp0 = lp_build_sin( &bld->base, tmp0 );
972 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
973 dst0[chan_index] = tmp0;
974 }
975 break;
976
977 case TGSI_OPCODE_SLE:
978 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
979 src0 = emit_fetch( bld, inst, 0, chan_index );
980 src1 = emit_fetch( bld, inst, 1, chan_index );
981 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
982 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
983 }
984 break;
985
986 case TGSI_OPCODE_SNE:
987 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
988 src0 = emit_fetch( bld, inst, 0, chan_index );
989 src1 = emit_fetch( bld, inst, 1, chan_index );
990 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
991 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
992 }
993 break;
994
995 case TGSI_OPCODE_STR:
996 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
997 dst0[chan_index] = bld->base.one;
998 }
999 break;
1000
1001 case TGSI_OPCODE_TEX:
1002 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1003 break;
1004
1005 case TGSI_OPCODE_TXD:
1006 /* FIXME */
1007 return 0;
1008 break;
1009
1010 case TGSI_OPCODE_UP2H:
1011 /* deprecated */
1012 assert (0);
1013 return 0;
1014 break;
1015
1016 case TGSI_OPCODE_UP2US:
1017 /* deprecated */
1018 assert(0);
1019 return 0;
1020 break;
1021
1022 case TGSI_OPCODE_UP4B:
1023 /* deprecated */
1024 assert(0);
1025 return 0;
1026 break;
1027
1028 case TGSI_OPCODE_UP4UB:
1029 /* deprecated */
1030 assert(0);
1031 return 0;
1032 break;
1033
1034 case TGSI_OPCODE_X2D:
1035 /* deprecated? */
1036 assert(0);
1037 return 0;
1038 break;
1039
1040 case TGSI_OPCODE_ARA:
1041 /* deprecated */
1042 assert(0);
1043 return 0;
1044 break;
1045
1046 #if 0
1047 case TGSI_OPCODE_ARR:
1048 /* FIXME */
1049 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1050 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1051 emit_rnd( bld, 0, 0 );
1052 emit_f2it( bld, 0 );
1053 dst0[chan_index] = tmp0;
1054 }
1055 break;
1056 #endif
1057
1058 case TGSI_OPCODE_BRA:
1059 /* deprecated */
1060 assert(0);
1061 return 0;
1062 break;
1063
1064 case TGSI_OPCODE_CAL:
1065 /* FIXME */
1066 return 0;
1067 break;
1068
1069 case TGSI_OPCODE_RET:
1070 /* FIXME */
1071 return 0;
1072 break;
1073
1074 case TGSI_OPCODE_END:
1075 break;
1076
1077 case TGSI_OPCODE_SSG:
1078 /* TGSI_OPCODE_SGN */
1079 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1080 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1081 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1082 }
1083 break;
1084
1085 case TGSI_OPCODE_CMP:
1086 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1087 src0 = emit_fetch( bld, inst, 0, chan_index );
1088 src1 = emit_fetch( bld, inst, 1, chan_index );
1089 src2 = emit_fetch( bld, inst, 2, chan_index );
1090 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1091 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1092 }
1093 break;
1094
1095 case TGSI_OPCODE_SCS:
1096 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1097 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1098 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1099 }
1100 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1101 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1102 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1103 }
1104 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1105 dst0[CHAN_Z] = bld->base.zero;
1106 }
1107 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1108 dst0[CHAN_W] = bld->base.one;
1109 }
1110 break;
1111
1112 case TGSI_OPCODE_TXB:
1113 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1114 break;
1115
1116 case TGSI_OPCODE_NRM:
1117 /* fall-through */
1118 case TGSI_OPCODE_NRM4:
1119 /* 3 or 4-component normalization */
1120 {
1121 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1122
1123 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1124 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1125 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1126 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1127
1128 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1129
1130 /* xmm4 = src.x */
1131 /* xmm0 = src.x * src.x */
1132 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1133 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1134 tmp4 = tmp0;
1135 }
1136 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1137
1138 /* xmm5 = src.y */
1139 /* xmm0 = xmm0 + src.y * src.y */
1140 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1141 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1142 tmp5 = tmp1;
1143 }
1144 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1145 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1146
1147 /* xmm6 = src.z */
1148 /* xmm0 = xmm0 + src.z * src.z */
1149 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1150 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1151 tmp6 = tmp1;
1152 }
1153 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1154 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1155
1156 if (dims == 4) {
1157 /* xmm7 = src.w */
1158 /* xmm0 = xmm0 + src.w * src.w */
1159 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1160 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1161 tmp7 = tmp1;
1162 }
1163 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1164 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1165 }
1166
1167 /* xmm1 = 1 / sqrt(xmm0) */
1168 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1169
1170 /* dst.x = xmm1 * src.x */
1171 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1172 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1173 }
1174
1175 /* dst.y = xmm1 * src.y */
1176 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1177 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1178 }
1179
1180 /* dst.z = xmm1 * src.z */
1181 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1182 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1183 }
1184
1185 /* dst.w = xmm1 * src.w */
1186 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1187 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1188 }
1189 }
1190
1191 /* dst.w = 1.0 */
1192 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1193 dst0[CHAN_W] = bld->base.one;
1194 }
1195 }
1196 break;
1197
1198 case TGSI_OPCODE_DIV:
1199 /* deprecated */
1200 assert( 0 );
1201 return 0;
1202 break;
1203
1204 case TGSI_OPCODE_DP2:
1205 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1206 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1207 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1208 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1209 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1210 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1211 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1212 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1213 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1214 }
1215 break;
1216
1217 case TGSI_OPCODE_TXL:
1218 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1219 break;
1220
1221 case TGSI_OPCODE_TXP:
1222 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1223 break;
1224
1225 case TGSI_OPCODE_BRK:
1226 /* FIXME */
1227 return 0;
1228 break;
1229
1230 case TGSI_OPCODE_IF:
1231 /* FIXME */
1232 return 0;
1233 break;
1234
1235 case TGSI_OPCODE_BGNFOR:
1236 /* deprecated */
1237 assert(0);
1238 return 0;
1239 break;
1240
1241 case TGSI_OPCODE_REP:
1242 /* deprecated */
1243 assert(0);
1244 return 0;
1245 break;
1246
1247 case TGSI_OPCODE_ELSE:
1248 /* FIXME */
1249 return 0;
1250 break;
1251
1252 case TGSI_OPCODE_ENDIF:
1253 /* FIXME */
1254 return 0;
1255 break;
1256
1257 case TGSI_OPCODE_ENDFOR:
1258 /* deprecated */
1259 assert(0);
1260 return 0;
1261 break;
1262
1263 case TGSI_OPCODE_ENDREP:
1264 /* deprecated */
1265 assert(0);
1266 return 0;
1267 break;
1268
1269 case TGSI_OPCODE_PUSHA:
1270 /* deprecated? */
1271 assert(0);
1272 return 0;
1273 break;
1274
1275 case TGSI_OPCODE_POPA:
1276 /* deprecated? */
1277 assert(0);
1278 return 0;
1279 break;
1280
1281 case TGSI_OPCODE_CEIL:
1282 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1283 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1284 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1285 }
1286 break;
1287
1288 case TGSI_OPCODE_I2F:
1289 /* deprecated? */
1290 assert(0);
1291 return 0;
1292 break;
1293
1294 case TGSI_OPCODE_NOT:
1295 /* deprecated? */
1296 assert(0);
1297 return 0;
1298 break;
1299
1300 case TGSI_OPCODE_TRUNC:
1301 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1302 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1303 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1304 }
1305 break;
1306
1307 case TGSI_OPCODE_SHL:
1308 /* deprecated? */
1309 assert(0);
1310 return 0;
1311 break;
1312
1313 case TGSI_OPCODE_SHR:
1314 /* deprecated? */
1315 assert(0);
1316 return 0;
1317 break;
1318
1319 case TGSI_OPCODE_AND:
1320 /* deprecated? */
1321 assert(0);
1322 return 0;
1323 break;
1324
1325 case TGSI_OPCODE_OR:
1326 /* deprecated? */
1327 assert(0);
1328 return 0;
1329 break;
1330
1331 case TGSI_OPCODE_MOD:
1332 /* deprecated? */
1333 assert(0);
1334 return 0;
1335 break;
1336
1337 case TGSI_OPCODE_XOR:
1338 /* deprecated? */
1339 assert(0);
1340 return 0;
1341 break;
1342
1343 case TGSI_OPCODE_SAD:
1344 /* deprecated? */
1345 assert(0);
1346 return 0;
1347 break;
1348
1349 case TGSI_OPCODE_TXF:
1350 /* deprecated? */
1351 assert(0);
1352 return 0;
1353 break;
1354
1355 case TGSI_OPCODE_TXQ:
1356 /* deprecated? */
1357 assert(0);
1358 return 0;
1359 break;
1360
1361 case TGSI_OPCODE_CONT:
1362 /* deprecated? */
1363 assert(0);
1364 return 0;
1365 break;
1366
1367 case TGSI_OPCODE_EMIT:
1368 return 0;
1369 break;
1370
1371 case TGSI_OPCODE_ENDPRIM:
1372 return 0;
1373 break;
1374
1375 case TGSI_OPCODE_NOP:
1376 break;
1377
1378 default:
1379 return 0;
1380 }
1381
1382 if(info->num_dst) {
1383 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1384 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1385 }
1386 }
1387
1388 return 1;
1389 }
1390
1391
1392 void
1393 lp_build_tgsi_soa(LLVMBuilderRef builder,
1394 const struct tgsi_token *tokens,
1395 struct lp_type type,
1396 struct lp_build_mask_context *mask,
1397 LLVMValueRef consts_ptr,
1398 const LLVMValueRef *pos,
1399 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1400 LLVMValueRef (*outputs)[NUM_CHANNELS],
1401 struct lp_build_sampler_soa *sampler)
1402 {
1403 struct lp_build_tgsi_soa_context bld;
1404 struct tgsi_parse_context parse;
1405 uint num_immediates = 0;
1406 unsigned i;
1407
1408 /* Setup build context */
1409 memset(&bld, 0, sizeof bld);
1410 lp_build_context_init(&bld.base, builder, type);
1411 bld.mask = mask;
1412 bld.pos = pos;
1413 bld.inputs = inputs;
1414 bld.outputs = outputs;
1415 bld.consts_ptr = consts_ptr;
1416 bld.sampler = sampler;
1417
1418 tgsi_parse_init( &parse, tokens );
1419
1420 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1421 tgsi_parse_token( &parse );
1422
1423 switch( parse.FullToken.Token.Type ) {
1424 case TGSI_TOKEN_TYPE_DECLARATION:
1425 /* Inputs already interpolated */
1426 break;
1427
1428 case TGSI_TOKEN_TYPE_INSTRUCTION:
1429 {
1430 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1431 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1432 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1433 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1434 info ? info->mnemonic : "<invalid>");
1435 }
1436
1437 break;
1438
1439 case TGSI_TOKEN_TYPE_IMMEDIATE:
1440 /* simply copy the immediate values into the next immediates[] slot */
1441 {
1442 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1443 assert(size <= 4);
1444 assert(num_immediates < LP_MAX_IMMEDIATES);
1445 for( i = 0; i < size; ++i )
1446 bld.immediates[num_immediates][i] =
1447 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1448 for( i = size; i < 4; ++i )
1449 bld.immediates[num_immediates][i] = bld.base.undef;
1450 num_immediates++;
1451 }
1452 break;
1453
1454 default:
1455 assert( 0 );
1456 }
1457 }
1458
1459 tgsi_parse_free( &parse );
1460 }
1461