swrast: use different temp array in _swrast_get_dest_rgba()
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81 #define QUAD_TOP_LEFT 0
82 #define QUAD_TOP_RIGHT 1
83 #define QUAD_BOTTOM_LEFT 2
84 #define QUAD_BOTTOM_RIGHT 3
85
86
87 struct lp_build_tgsi_soa_context
88 {
89 struct lp_build_context base;
90
91 LLVMValueRef consts_ptr;
92 const LLVMValueRef *pos;
93 const LLVMValueRef (*inputs)[NUM_CHANNELS];
94 LLVMValueRef (*outputs)[NUM_CHANNELS];
95
96 struct lp_build_sampler_soa *sampler;
97
98 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
99 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
100
101 struct lp_build_mask_context *mask;
102 };
103
104
105 static const unsigned char
106 swizzle_left[4] = {
107 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
108 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
109 };
110
111 static const unsigned char
112 swizzle_right[4] = {
113 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
114 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
115 };
116
117 static const unsigned char
118 swizzle_top[4] = {
119 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
120 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
121 };
122
123 static const unsigned char
124 swizzle_bottom[4] = {
125 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
126 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
127 };
128
129
130 static LLVMValueRef
131 emit_ddx(struct lp_build_tgsi_soa_context *bld,
132 LLVMValueRef src)
133 {
134 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
135 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
136 return lp_build_sub(&bld->base, src_right, src_left);
137 }
138
139
140 static LLVMValueRef
141 emit_ddy(struct lp_build_tgsi_soa_context *bld,
142 LLVMValueRef src)
143 {
144 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
145 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
146 return lp_build_sub(&bld->base, src_top, src_bottom);
147 }
148
149
150 /**
151 * Register fetch.
152 */
153 static LLVMValueRef
154 emit_fetch(
155 struct lp_build_tgsi_soa_context *bld,
156 const struct tgsi_full_instruction *inst,
157 unsigned index,
158 const unsigned chan_index )
159 {
160 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
161 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
162 LLVMValueRef res;
163
164 switch (swizzle) {
165 case TGSI_SWIZZLE_X:
166 case TGSI_SWIZZLE_Y:
167 case TGSI_SWIZZLE_Z:
168 case TGSI_SWIZZLE_W:
169
170 switch (reg->SrcRegister.File) {
171 case TGSI_FILE_CONSTANT: {
172 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
173 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
174 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
175 res = lp_build_broadcast_scalar(&bld->base, scalar);
176 break;
177 }
178
179 case TGSI_FILE_IMMEDIATE:
180 res = bld->immediates[reg->SrcRegister.Index][swizzle];
181 assert(res);
182 break;
183
184 case TGSI_FILE_INPUT:
185 res = bld->inputs[reg->SrcRegister.Index][swizzle];
186 assert(res);
187 break;
188
189 case TGSI_FILE_TEMPORARY:
190 res = bld->temps[reg->SrcRegister.Index][swizzle];
191 if(!res)
192 return bld->base.undef;
193 break;
194
195 default:
196 assert( 0 );
197 return bld->base.undef;
198 }
199 break;
200
201 default:
202 assert( 0 );
203 return bld->base.undef;
204 }
205
206 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
207 case TGSI_UTIL_SIGN_CLEAR:
208 res = lp_build_abs( &bld->base, res );
209 break;
210
211 case TGSI_UTIL_SIGN_SET:
212 /* TODO: Use bitwese OR for floating point */
213 res = lp_build_abs( &bld->base, res );
214 res = LLVMBuildNeg( bld->base.builder, res, "" );
215 break;
216
217 case TGSI_UTIL_SIGN_TOGGLE:
218 res = LLVMBuildNeg( bld->base.builder, res, "" );
219 break;
220
221 case TGSI_UTIL_SIGN_KEEP:
222 break;
223 }
224
225 return res;
226 }
227
228
229 /**
230 * Register fetch with derivatives.
231 */
232 static void
233 emit_fetch_deriv(
234 struct lp_build_tgsi_soa_context *bld,
235 const struct tgsi_full_instruction *inst,
236 unsigned index,
237 const unsigned chan_index,
238 LLVMValueRef *res,
239 LLVMValueRef *ddx,
240 LLVMValueRef *ddy)
241 {
242 LLVMValueRef src;
243
244 src = emit_fetch(bld, inst, index, chan_index);
245
246 if(res)
247 *res = src;
248
249 /* TODO: use interpolation coeffs for inputs */
250
251 if(ddx)
252 *ddx = emit_ddx(bld, src);
253
254 if(ddy)
255 *ddy = emit_ddy(bld, src);
256 }
257
258
259 /**
260 * Register store.
261 */
262 static void
263 emit_store(
264 struct lp_build_tgsi_soa_context *bld,
265 const struct tgsi_full_instruction *inst,
266 unsigned index,
267 unsigned chan_index,
268 LLVMValueRef value)
269 {
270 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
271
272 switch( inst->Instruction.Saturate ) {
273 case TGSI_SAT_NONE:
274 break;
275
276 case TGSI_SAT_ZERO_ONE:
277 value = lp_build_max(&bld->base, value, bld->base.zero);
278 value = lp_build_min(&bld->base, value, bld->base.one);
279 break;
280
281 case TGSI_SAT_MINUS_PLUS_ONE:
282 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
283 value = lp_build_min(&bld->base, value, bld->base.one);
284 break;
285
286 default:
287 assert(0);
288 }
289
290 switch( reg->DstRegister.File ) {
291 case TGSI_FILE_OUTPUT:
292 bld->outputs[reg->DstRegister.Index][chan_index] = value;
293 break;
294
295 case TGSI_FILE_TEMPORARY:
296 bld->temps[reg->DstRegister.Index][chan_index] = value;
297 break;
298
299 case TGSI_FILE_ADDRESS:
300 /* FIXME */
301 assert(0);
302 break;
303
304 default:
305 assert( 0 );
306 }
307 }
308
309
310 /**
311 * High-level instruction translators.
312 */
313
314
315 static void
316 emit_tex( struct lp_build_tgsi_soa_context *bld,
317 const struct tgsi_full_instruction *inst,
318 boolean apply_lodbias,
319 boolean projected,
320 LLVMValueRef *texel)
321 {
322 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
323 LLVMValueRef lodbias;
324 LLVMValueRef oow = NULL;
325 LLVMValueRef coords[3];
326 unsigned num_coords;
327 unsigned i;
328
329 switch (inst->InstructionExtTexture.Texture) {
330 case TGSI_TEXTURE_1D:
331 num_coords = 1;
332 break;
333 case TGSI_TEXTURE_2D:
334 case TGSI_TEXTURE_RECT:
335 num_coords = 2;
336 break;
337 case TGSI_TEXTURE_SHADOW1D:
338 case TGSI_TEXTURE_SHADOW2D:
339 case TGSI_TEXTURE_SHADOWRECT:
340 case TGSI_TEXTURE_3D:
341 case TGSI_TEXTURE_CUBE:
342 num_coords = 3;
343 break;
344 default:
345 assert(0);
346 return;
347 }
348
349 if(apply_lodbias)
350 lodbias = emit_fetch( bld, inst, 0, 3 );
351 else
352 lodbias = bld->base.zero;
353
354 if (projected) {
355 oow = emit_fetch( bld, inst, 0, 3 );
356 oow = lp_build_rcp(&bld->base, oow);
357 }
358
359 for (i = 0; i < num_coords; i++) {
360 coords[i] = emit_fetch( bld, inst, 0, i );
361 if (projected)
362 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
363 }
364
365 bld->sampler->emit_fetch_texel(bld->sampler,
366 bld->base.builder,
367 bld->base.type,
368 unit, num_coords, coords, lodbias,
369 texel);
370 }
371
372
373 static void
374 emit_kil(
375 struct lp_build_tgsi_soa_context *bld,
376 const struct tgsi_full_instruction *inst )
377 {
378 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
379 LLVMValueRef terms[NUM_CHANNELS];
380 LLVMValueRef mask;
381 unsigned chan_index;
382
383 memset(&terms, 0, sizeof terms);
384
385 FOR_EACH_CHANNEL( chan_index ) {
386 unsigned swizzle;
387
388 /* Unswizzle channel */
389 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
390
391 /* Check if the component has not been already tested. */
392 assert(swizzle < NUM_CHANNELS);
393 if( !terms[swizzle] )
394 /* TODO: change the comparison operator instead of setting the sign */
395 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
396 }
397
398 mask = NULL;
399 FOR_EACH_CHANNEL( chan_index ) {
400 if(terms[chan_index]) {
401 LLVMValueRef chan_mask;
402
403 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
404
405 if(mask)
406 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
407 else
408 mask = chan_mask;
409 }
410 }
411
412 if(mask)
413 lp_build_mask_update(bld->mask, mask);
414 }
415
416
417 /**
418 * Check if inst src/dest regs use indirect addressing into temporary
419 * register file.
420 */
421 static boolean
422 indirect_temp_reference(const struct tgsi_full_instruction *inst)
423 {
424 uint i;
425 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
426 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
427 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
428 reg->SrcRegister.Indirect)
429 return TRUE;
430 }
431 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
432 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
433 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
434 reg->DstRegister.Indirect)
435 return TRUE;
436 }
437 return FALSE;
438 }
439
440
441 static int
442 emit_instruction(
443 struct lp_build_tgsi_soa_context *bld,
444 const struct tgsi_full_instruction *inst,
445 const struct tgsi_opcode_info *info)
446 {
447 unsigned chan_index;
448 LLVMValueRef src0, src1, src2;
449 LLVMValueRef tmp0, tmp1, tmp2;
450 LLVMValueRef tmp3 = NULL;
451 LLVMValueRef tmp4 = NULL;
452 LLVMValueRef tmp5 = NULL;
453 LLVMValueRef tmp6 = NULL;
454 LLVMValueRef tmp7 = NULL;
455 LLVMValueRef res;
456 LLVMValueRef dst0[NUM_CHANNELS];
457
458 /* we can't handle indirect addressing into temp register file yet */
459 if (indirect_temp_reference(inst))
460 return FALSE;
461
462 assert(info->num_dst <= 1);
463 if(info->num_dst) {
464 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
465 dst0[chan_index] = bld->base.undef;
466 }
467 }
468
469 switch (inst->Instruction.Opcode) {
470 #if 0
471 case TGSI_OPCODE_ARL:
472 /* FIXME */
473 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
474 tmp0 = emit_fetch( bld, inst, 0, chan_index );
475 emit_flr(bld, 0, 0);
476 emit_f2it( bld, 0 );
477 dst0[chan_index] = tmp0;
478 }
479 break;
480 #endif
481
482 case TGSI_OPCODE_MOV:
483 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
484 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
485 }
486 break;
487
488 case TGSI_OPCODE_LIT:
489 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
490 dst0[CHAN_X] = bld->base.one;
491 }
492 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
493 src0 = emit_fetch( bld, inst, 0, CHAN_X );
494 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
495 }
496 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
497 /* XMM[1] = SrcReg[0].yyyy */
498 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
499 /* XMM[1] = max(XMM[1], 0) */
500 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
501 /* XMM[2] = SrcReg[0].wwww */
502 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
503 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
504 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
505 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
506 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
507 }
508 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
509 dst0[CHAN_W] = bld->base.one;
510 }
511 break;
512
513 case TGSI_OPCODE_RCP:
514 /* TGSI_OPCODE_RECIP */
515 src0 = emit_fetch( bld, inst, 0, CHAN_X );
516 res = lp_build_rcp(&bld->base, src0);
517 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
518 dst0[chan_index] = res;
519 }
520 break;
521
522 case TGSI_OPCODE_RSQ:
523 /* TGSI_OPCODE_RECIPSQRT */
524 src0 = emit_fetch( bld, inst, 0, CHAN_X );
525 src0 = lp_build_abs(&bld->base, src0);
526 res = lp_build_rsqrt(&bld->base, src0);
527 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
528 dst0[chan_index] = res;
529 }
530 break;
531
532 case TGSI_OPCODE_EXP:
533 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
534 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
535 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
536 LLVMValueRef *p_exp2_int_part = NULL;
537 LLVMValueRef *p_frac_part = NULL;
538 LLVMValueRef *p_exp2 = NULL;
539
540 src0 = emit_fetch( bld, inst, 0, CHAN_X );
541
542 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
543 p_exp2_int_part = &tmp0;
544 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
545 p_frac_part = &tmp1;
546 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
547 p_exp2 = &tmp2;
548
549 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
550
551 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
552 dst0[CHAN_X] = tmp0;
553 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
554 dst0[CHAN_Y] = tmp1;
555 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
556 dst0[CHAN_Z] = tmp2;
557 }
558 /* dst.w = 1.0 */
559 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
560 dst0[CHAN_W] = bld->base.one;
561 }
562 break;
563
564 case TGSI_OPCODE_LOG:
565 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
566 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
567 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
568 LLVMValueRef *p_floor_log2 = NULL;
569 LLVMValueRef *p_exp = NULL;
570 LLVMValueRef *p_log2 = NULL;
571
572 src0 = emit_fetch( bld, inst, 0, CHAN_X );
573 src0 = lp_build_abs( &bld->base, src0 );
574
575 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
576 p_floor_log2 = &tmp0;
577 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
578 p_exp = &tmp1;
579 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
580 p_log2 = &tmp2;
581
582 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
583
584 /* dst.x = floor(lg2(abs(src.x))) */
585 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
586 dst0[CHAN_X] = tmp0;
587 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
588 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
589 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
590 }
591 /* dst.z = lg2(abs(src.x)) */
592 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
593 dst0[CHAN_Z] = tmp2;
594 }
595 /* dst.w = 1.0 */
596 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
597 dst0[CHAN_W] = bld->base.one;
598 }
599 break;
600
601 case TGSI_OPCODE_MUL:
602 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
603 src0 = emit_fetch( bld, inst, 0, chan_index );
604 src1 = emit_fetch( bld, inst, 1, chan_index );
605 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
606 }
607 break;
608
609 case TGSI_OPCODE_ADD:
610 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
611 src0 = emit_fetch( bld, inst, 0, chan_index );
612 src1 = emit_fetch( bld, inst, 1, chan_index );
613 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
614 }
615 break;
616
617 case TGSI_OPCODE_DP3:
618 /* TGSI_OPCODE_DOT3 */
619 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
620 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
621 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
622 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
623 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
624 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
625 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
626 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
627 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
628 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
629 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
630 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
631 dst0[chan_index] = tmp0;
632 }
633 break;
634
635 case TGSI_OPCODE_DP4:
636 /* TGSI_OPCODE_DOT4 */
637 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
638 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
639 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
640 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
641 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
642 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
643 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
644 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
645 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
646 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
647 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
648 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
649 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
650 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
651 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
652 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
653 dst0[chan_index] = tmp0;
654 }
655 break;
656
657 case TGSI_OPCODE_DST:
658 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
659 dst0[CHAN_X] = bld->base.one;
660 }
661 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
662 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
663 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
664 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
665 }
666 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
667 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
668 }
669 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
670 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
671 }
672 break;
673
674 case TGSI_OPCODE_MIN:
675 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
676 src0 = emit_fetch( bld, inst, 0, chan_index );
677 src1 = emit_fetch( bld, inst, 1, chan_index );
678 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
679 }
680 break;
681
682 case TGSI_OPCODE_MAX:
683 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
684 src0 = emit_fetch( bld, inst, 0, chan_index );
685 src1 = emit_fetch( bld, inst, 1, chan_index );
686 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
687 }
688 break;
689
690 case TGSI_OPCODE_SLT:
691 /* TGSI_OPCODE_SETLT */
692 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
693 src0 = emit_fetch( bld, inst, 0, chan_index );
694 src1 = emit_fetch( bld, inst, 1, chan_index );
695 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
696 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
697 }
698 break;
699
700 case TGSI_OPCODE_SGE:
701 /* TGSI_OPCODE_SETGE */
702 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
703 src0 = emit_fetch( bld, inst, 0, chan_index );
704 src1 = emit_fetch( bld, inst, 1, chan_index );
705 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
706 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
707 }
708 break;
709
710 case TGSI_OPCODE_MAD:
711 /* TGSI_OPCODE_MADD */
712 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
713 tmp0 = emit_fetch( bld, inst, 0, chan_index );
714 tmp1 = emit_fetch( bld, inst, 1, chan_index );
715 tmp2 = emit_fetch( bld, inst, 2, chan_index );
716 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
717 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
718 dst0[chan_index] = tmp0;
719 }
720 break;
721
722 case TGSI_OPCODE_SUB:
723 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
724 tmp0 = emit_fetch( bld, inst, 0, chan_index );
725 tmp1 = emit_fetch( bld, inst, 1, chan_index );
726 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
727 }
728 break;
729
730 case TGSI_OPCODE_LRP:
731 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
732 src0 = emit_fetch( bld, inst, 0, chan_index );
733 src1 = emit_fetch( bld, inst, 1, chan_index );
734 src2 = emit_fetch( bld, inst, 2, chan_index );
735 tmp0 = lp_build_sub( &bld->base, src1, src2 );
736 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
737 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
738 }
739 break;
740
741 case TGSI_OPCODE_CND:
742 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
743 src0 = emit_fetch( bld, inst, 0, chan_index );
744 src1 = emit_fetch( bld, inst, 1, chan_index );
745 src2 = emit_fetch( bld, inst, 2, chan_index );
746 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
747 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
748 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
749 }
750 break;
751
752 case TGSI_OPCODE_DP2A:
753 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
754 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
755 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
756 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
757 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
758 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
759 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
760 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
761 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
762 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
763 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
764 }
765 break;
766
767 case TGSI_OPCODE_FRC:
768 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
769 src0 = emit_fetch( bld, inst, 0, chan_index );
770 tmp0 = lp_build_floor(&bld->base, src0);
771 tmp0 = lp_build_sub(&bld->base, tmp0, src0);
772 dst0[chan_index] = tmp0;
773 }
774 break;
775
776 case TGSI_OPCODE_CLAMP:
777 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
778 tmp0 = emit_fetch( bld, inst, 0, chan_index );
779 src1 = emit_fetch( bld, inst, 1, chan_index );
780 src2 = emit_fetch( bld, inst, 2, chan_index );
781 tmp0 = lp_build_max(&bld->base, tmp0, src1);
782 tmp0 = lp_build_min(&bld->base, tmp0, src2);
783 dst0[chan_index] = tmp0;
784 }
785 break;
786
787 case TGSI_OPCODE_FLR:
788 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
789 tmp0 = emit_fetch( bld, inst, 0, chan_index );
790 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
791 }
792 break;
793
794 case TGSI_OPCODE_ROUND:
795 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
796 tmp0 = emit_fetch( bld, inst, 0, chan_index );
797 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
798 }
799 break;
800
801 case TGSI_OPCODE_EX2: {
802 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
803 tmp0 = lp_build_exp2( &bld->base, tmp0);
804 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
805 dst0[chan_index] = tmp0;
806 }
807 break;
808 }
809
810 case TGSI_OPCODE_LG2:
811 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
812 tmp0 = lp_build_log2( &bld->base, tmp0);
813 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
814 dst0[chan_index] = tmp0;
815 }
816 break;
817
818 case TGSI_OPCODE_POW:
819 src0 = emit_fetch( bld, inst, 0, CHAN_X );
820 src1 = emit_fetch( bld, inst, 1, CHAN_X );
821 res = lp_build_pow( &bld->base, src0, src1 );
822 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
823 dst0[chan_index] = res;
824 }
825 break;
826
827 case TGSI_OPCODE_XPD:
828 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
829 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
830 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
831 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
832 }
833 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
834 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
835 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
836 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
837 }
838 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
839 tmp2 = tmp0;
840 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
841 tmp5 = tmp3;
842 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
843 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
844 dst0[CHAN_X] = tmp2;
845 }
846 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
847 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
848 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
849 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
850 }
851 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
852 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
853 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
854 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
855 dst0[CHAN_Y] = tmp3;
856 }
857 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
858 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
859 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
860 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
861 dst0[CHAN_Z] = tmp5;
862 }
863 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
864 dst0[CHAN_W] = bld->base.one;
865 }
866 break;
867
868 case TGSI_OPCODE_ABS:
869 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
870 tmp0 = emit_fetch( bld, inst, 0, chan_index );
871 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
872 }
873 break;
874
875 case TGSI_OPCODE_RCC:
876 /* deprecated? */
877 assert(0);
878 return 0;
879
880 case TGSI_OPCODE_DPH:
881 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
882 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
883 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
884 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
885 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
886 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
887 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
888 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
889 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
890 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
891 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
892 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
893 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
894 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
895 dst0[chan_index] = tmp0;
896 }
897 break;
898
899 case TGSI_OPCODE_COS:
900 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
901 tmp0 = lp_build_cos( &bld->base, tmp0 );
902 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
903 dst0[chan_index] = tmp0;
904 }
905 break;
906
907 case TGSI_OPCODE_DDX:
908 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
909 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
910 }
911 break;
912
913 case TGSI_OPCODE_DDY:
914 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
915 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
916 }
917 break;
918
919 case TGSI_OPCODE_KILP:
920 /* predicated kill */
921 /* FIXME */
922 return 0;
923 break;
924
925 case TGSI_OPCODE_KIL:
926 /* conditional kill */
927 emit_kil( bld, inst );
928 break;
929
930 case TGSI_OPCODE_PK2H:
931 return 0;
932 break;
933
934 case TGSI_OPCODE_PK2US:
935 return 0;
936 break;
937
938 case TGSI_OPCODE_PK4B:
939 return 0;
940 break;
941
942 case TGSI_OPCODE_PK4UB:
943 return 0;
944 break;
945
946 case TGSI_OPCODE_RFL:
947 return 0;
948 break;
949
950 case TGSI_OPCODE_SEQ:
951 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
952 src0 = emit_fetch( bld, inst, 0, chan_index );
953 src1 = emit_fetch( bld, inst, 1, chan_index );
954 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
955 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
956 }
957 break;
958
959 case TGSI_OPCODE_SFL:
960 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
961 dst0[chan_index] = bld->base.zero;
962 }
963 break;
964
965 case TGSI_OPCODE_SGT:
966 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
967 src0 = emit_fetch( bld, inst, 0, chan_index );
968 src1 = emit_fetch( bld, inst, 1, chan_index );
969 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
970 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
971 }
972 break;
973
974 case TGSI_OPCODE_SIN:
975 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
976 tmp0 = lp_build_sin( &bld->base, tmp0 );
977 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
978 dst0[chan_index] = tmp0;
979 }
980 break;
981
982 case TGSI_OPCODE_SLE:
983 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
984 src0 = emit_fetch( bld, inst, 0, chan_index );
985 src1 = emit_fetch( bld, inst, 1, chan_index );
986 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
987 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
988 }
989 break;
990
991 case TGSI_OPCODE_SNE:
992 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
993 src0 = emit_fetch( bld, inst, 0, chan_index );
994 src1 = emit_fetch( bld, inst, 1, chan_index );
995 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
996 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
997 }
998 break;
999
1000 case TGSI_OPCODE_STR:
1001 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1002 dst0[chan_index] = bld->base.one;
1003 }
1004 break;
1005
1006 case TGSI_OPCODE_TEX:
1007 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1008 break;
1009
1010 case TGSI_OPCODE_TXD:
1011 /* FIXME */
1012 return 0;
1013 break;
1014
1015 case TGSI_OPCODE_UP2H:
1016 /* deprecated */
1017 assert (0);
1018 return 0;
1019 break;
1020
1021 case TGSI_OPCODE_UP2US:
1022 /* deprecated */
1023 assert(0);
1024 return 0;
1025 break;
1026
1027 case TGSI_OPCODE_UP4B:
1028 /* deprecated */
1029 assert(0);
1030 return 0;
1031 break;
1032
1033 case TGSI_OPCODE_UP4UB:
1034 /* deprecated */
1035 assert(0);
1036 return 0;
1037 break;
1038
1039 case TGSI_OPCODE_X2D:
1040 /* deprecated? */
1041 assert(0);
1042 return 0;
1043 break;
1044
1045 case TGSI_OPCODE_ARA:
1046 /* deprecated */
1047 assert(0);
1048 return 0;
1049 break;
1050
1051 #if 0
1052 case TGSI_OPCODE_ARR:
1053 /* FIXME */
1054 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1055 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1056 emit_rnd( bld, 0, 0 );
1057 emit_f2it( bld, 0 );
1058 dst0[chan_index] = tmp0;
1059 }
1060 break;
1061 #endif
1062
1063 case TGSI_OPCODE_BRA:
1064 /* deprecated */
1065 assert(0);
1066 return 0;
1067 break;
1068
1069 case TGSI_OPCODE_CAL:
1070 /* FIXME */
1071 return 0;
1072 break;
1073
1074 case TGSI_OPCODE_RET:
1075 /* FIXME */
1076 return 0;
1077 break;
1078
1079 case TGSI_OPCODE_END:
1080 break;
1081
1082 case TGSI_OPCODE_SSG:
1083 /* TGSI_OPCODE_SGN */
1084 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1085 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1086 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1087 }
1088 break;
1089
1090 case TGSI_OPCODE_CMP:
1091 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1092 src0 = emit_fetch( bld, inst, 0, chan_index );
1093 src1 = emit_fetch( bld, inst, 1, chan_index );
1094 src2 = emit_fetch( bld, inst, 2, chan_index );
1095 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1096 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1097 }
1098 break;
1099
1100 case TGSI_OPCODE_SCS:
1101 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1102 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1103 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1104 }
1105 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1106 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1107 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1108 }
1109 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1110 dst0[CHAN_Z] = bld->base.zero;
1111 }
1112 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1113 dst0[CHAN_W] = bld->base.one;
1114 }
1115 break;
1116
1117 case TGSI_OPCODE_TXB:
1118 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1119 break;
1120
1121 case TGSI_OPCODE_NRM:
1122 /* fall-through */
1123 case TGSI_OPCODE_NRM4:
1124 /* 3 or 4-component normalization */
1125 {
1126 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1127
1128 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1129 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1130 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1131 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1132
1133 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1134
1135 /* xmm4 = src.x */
1136 /* xmm0 = src.x * src.x */
1137 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1138 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1139 tmp4 = tmp0;
1140 }
1141 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1142
1143 /* xmm5 = src.y */
1144 /* xmm0 = xmm0 + src.y * src.y */
1145 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1146 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1147 tmp5 = tmp1;
1148 }
1149 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1150 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1151
1152 /* xmm6 = src.z */
1153 /* xmm0 = xmm0 + src.z * src.z */
1154 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1155 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1156 tmp6 = tmp1;
1157 }
1158 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1159 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1160
1161 if (dims == 4) {
1162 /* xmm7 = src.w */
1163 /* xmm0 = xmm0 + src.w * src.w */
1164 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1165 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1166 tmp7 = tmp1;
1167 }
1168 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1169 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1170 }
1171
1172 /* xmm1 = 1 / sqrt(xmm0) */
1173 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1174
1175 /* dst.x = xmm1 * src.x */
1176 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1177 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1178 }
1179
1180 /* dst.y = xmm1 * src.y */
1181 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1182 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1183 }
1184
1185 /* dst.z = xmm1 * src.z */
1186 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1187 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1188 }
1189
1190 /* dst.w = xmm1 * src.w */
1191 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1192 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1193 }
1194 }
1195
1196 /* dst.w = 1.0 */
1197 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1198 dst0[CHAN_W] = bld->base.one;
1199 }
1200 }
1201 break;
1202
1203 case TGSI_OPCODE_DIV:
1204 /* deprecated */
1205 assert( 0 );
1206 return 0;
1207 break;
1208
1209 case TGSI_OPCODE_DP2:
1210 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1211 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1212 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1213 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1214 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1215 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1216 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1217 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1218 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1219 }
1220 break;
1221
1222 case TGSI_OPCODE_TXL:
1223 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1224 break;
1225
1226 case TGSI_OPCODE_TXP:
1227 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1228 break;
1229
1230 case TGSI_OPCODE_BRK:
1231 /* FIXME */
1232 return 0;
1233 break;
1234
1235 case TGSI_OPCODE_IF:
1236 /* FIXME */
1237 return 0;
1238 break;
1239
1240 case TGSI_OPCODE_BGNFOR:
1241 /* deprecated */
1242 assert(0);
1243 return 0;
1244 break;
1245
1246 case TGSI_OPCODE_REP:
1247 /* deprecated */
1248 assert(0);
1249 return 0;
1250 break;
1251
1252 case TGSI_OPCODE_ELSE:
1253 /* FIXME */
1254 return 0;
1255 break;
1256
1257 case TGSI_OPCODE_ENDIF:
1258 /* FIXME */
1259 return 0;
1260 break;
1261
1262 case TGSI_OPCODE_ENDFOR:
1263 /* deprecated */
1264 assert(0);
1265 return 0;
1266 break;
1267
1268 case TGSI_OPCODE_ENDREP:
1269 /* deprecated */
1270 assert(0);
1271 return 0;
1272 break;
1273
1274 case TGSI_OPCODE_PUSHA:
1275 /* deprecated? */
1276 assert(0);
1277 return 0;
1278 break;
1279
1280 case TGSI_OPCODE_POPA:
1281 /* deprecated? */
1282 assert(0);
1283 return 0;
1284 break;
1285
1286 case TGSI_OPCODE_CEIL:
1287 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1288 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1289 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1290 }
1291 break;
1292
1293 case TGSI_OPCODE_I2F:
1294 /* deprecated? */
1295 assert(0);
1296 return 0;
1297 break;
1298
1299 case TGSI_OPCODE_NOT:
1300 /* deprecated? */
1301 assert(0);
1302 return 0;
1303 break;
1304
1305 case TGSI_OPCODE_TRUNC:
1306 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1307 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1308 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1309 }
1310 break;
1311
1312 case TGSI_OPCODE_SHL:
1313 /* deprecated? */
1314 assert(0);
1315 return 0;
1316 break;
1317
1318 case TGSI_OPCODE_SHR:
1319 /* deprecated? */
1320 assert(0);
1321 return 0;
1322 break;
1323
1324 case TGSI_OPCODE_AND:
1325 /* deprecated? */
1326 assert(0);
1327 return 0;
1328 break;
1329
1330 case TGSI_OPCODE_OR:
1331 /* deprecated? */
1332 assert(0);
1333 return 0;
1334 break;
1335
1336 case TGSI_OPCODE_MOD:
1337 /* deprecated? */
1338 assert(0);
1339 return 0;
1340 break;
1341
1342 case TGSI_OPCODE_XOR:
1343 /* deprecated? */
1344 assert(0);
1345 return 0;
1346 break;
1347
1348 case TGSI_OPCODE_SAD:
1349 /* deprecated? */
1350 assert(0);
1351 return 0;
1352 break;
1353
1354 case TGSI_OPCODE_TXF:
1355 /* deprecated? */
1356 assert(0);
1357 return 0;
1358 break;
1359
1360 case TGSI_OPCODE_TXQ:
1361 /* deprecated? */
1362 assert(0);
1363 return 0;
1364 break;
1365
1366 case TGSI_OPCODE_CONT:
1367 /* deprecated? */
1368 assert(0);
1369 return 0;
1370 break;
1371
1372 case TGSI_OPCODE_EMIT:
1373 return 0;
1374 break;
1375
1376 case TGSI_OPCODE_ENDPRIM:
1377 return 0;
1378 break;
1379
1380 case TGSI_OPCODE_NOP:
1381 break;
1382
1383 default:
1384 return 0;
1385 }
1386
1387 if(info->num_dst) {
1388 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1389 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1390 }
1391 }
1392
1393 return 1;
1394 }
1395
1396
1397 void
1398 lp_build_tgsi_soa(LLVMBuilderRef builder,
1399 const struct tgsi_token *tokens,
1400 struct lp_type type,
1401 struct lp_build_mask_context *mask,
1402 LLVMValueRef consts_ptr,
1403 const LLVMValueRef *pos,
1404 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1405 LLVMValueRef (*outputs)[NUM_CHANNELS],
1406 struct lp_build_sampler_soa *sampler)
1407 {
1408 struct lp_build_tgsi_soa_context bld;
1409 struct tgsi_parse_context parse;
1410 uint num_immediates = 0;
1411 unsigned i;
1412
1413 /* Setup build context */
1414 memset(&bld, 0, sizeof bld);
1415 lp_build_context_init(&bld.base, builder, type);
1416 bld.mask = mask;
1417 bld.pos = pos;
1418 bld.inputs = inputs;
1419 bld.outputs = outputs;
1420 bld.consts_ptr = consts_ptr;
1421 bld.sampler = sampler;
1422
1423 tgsi_parse_init( &parse, tokens );
1424
1425 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1426 tgsi_parse_token( &parse );
1427
1428 switch( parse.FullToken.Token.Type ) {
1429 case TGSI_TOKEN_TYPE_DECLARATION:
1430 /* Inputs already interpolated */
1431 break;
1432
1433 case TGSI_TOKEN_TYPE_INSTRUCTION:
1434 {
1435 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1436 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1437 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1438 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1439 info ? info->mnemonic : "<invalid>");
1440 }
1441
1442 break;
1443
1444 case TGSI_TOKEN_TYPE_IMMEDIATE:
1445 /* simply copy the immediate values into the next immediates[] slot */
1446 {
1447 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1448 assert(size <= 4);
1449 assert(num_immediates < LP_MAX_IMMEDIATES);
1450 for( i = 0; i < size; ++i )
1451 bld.immediates[num_immediates][i] =
1452 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1453 for( i = size; i < 4; ++i )
1454 bld.immediates[num_immediates][i] = bld.base.undef;
1455 num_immediates++;
1456 }
1457 break;
1458
1459 default:
1460 assert( 0 );
1461 }
1462 }
1463
1464 tgsi_parse_free( &parse );
1465 }
1466