Merge branch 'mesa_7_6_branch'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81 #define QUAD_TOP_LEFT 0
82 #define QUAD_TOP_RIGHT 1
83 #define QUAD_BOTTOM_LEFT 2
84 #define QUAD_BOTTOM_RIGHT 3
85
86
87 struct lp_build_tgsi_soa_context
88 {
89 struct lp_build_context base;
90
91 LLVMValueRef consts_ptr;
92 const LLVMValueRef *pos;
93 const LLVMValueRef (*inputs)[NUM_CHANNELS];
94 LLVMValueRef (*outputs)[NUM_CHANNELS];
95
96 struct lp_build_sampler_soa *sampler;
97
98 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
99 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
100
101 struct lp_build_mask_context *mask;
102 };
103
104
105 static const unsigned char
106 swizzle_left[4] = {
107 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
108 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
109 };
110
111 static const unsigned char
112 swizzle_right[4] = {
113 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
114 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
115 };
116
117 static const unsigned char
118 swizzle_top[4] = {
119 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
120 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
121 };
122
123 static const unsigned char
124 swizzle_bottom[4] = {
125 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
126 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
127 };
128
129
130 static LLVMValueRef
131 emit_ddx(struct lp_build_tgsi_soa_context *bld,
132 LLVMValueRef src)
133 {
134 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
135 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
136 return lp_build_sub(&bld->base, src_right, src_left);
137 }
138
139
140 static LLVMValueRef
141 emit_ddy(struct lp_build_tgsi_soa_context *bld,
142 LLVMValueRef src)
143 {
144 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
145 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
146 return lp_build_sub(&bld->base, src_top, src_bottom);
147 }
148
149
150 /**
151 * Register fetch.
152 */
153 static LLVMValueRef
154 emit_fetch(
155 struct lp_build_tgsi_soa_context *bld,
156 const struct tgsi_full_instruction *inst,
157 unsigned index,
158 const unsigned chan_index )
159 {
160 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
161 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
162 LLVMValueRef res;
163
164 switch (swizzle) {
165 case TGSI_EXTSWIZZLE_X:
166 case TGSI_EXTSWIZZLE_Y:
167 case TGSI_EXTSWIZZLE_Z:
168 case TGSI_EXTSWIZZLE_W:
169
170 switch (reg->SrcRegister.File) {
171 case TGSI_FILE_CONSTANT: {
172 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
173 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
174 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
175 res = lp_build_broadcast_scalar(&bld->base, scalar);
176 break;
177 }
178
179 case TGSI_FILE_IMMEDIATE:
180 res = bld->immediates[reg->SrcRegister.Index][swizzle];
181 assert(res);
182 break;
183
184 case TGSI_FILE_INPUT:
185 res = bld->inputs[reg->SrcRegister.Index][swizzle];
186 assert(res);
187 break;
188
189 case TGSI_FILE_TEMPORARY:
190 res = bld->temps[reg->SrcRegister.Index][swizzle];
191 if(!res)
192 return bld->base.undef;
193 break;
194
195 default:
196 assert( 0 );
197 return bld->base.undef;
198 }
199 break;
200
201 case TGSI_EXTSWIZZLE_ZERO:
202 res = bld->base.zero;
203 break;
204
205 case TGSI_EXTSWIZZLE_ONE:
206 res = bld->base.one;
207 break;
208
209 default:
210 assert( 0 );
211 return bld->base.undef;
212 }
213
214 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
215 case TGSI_UTIL_SIGN_CLEAR:
216 res = lp_build_abs( &bld->base, res );
217 break;
218
219 case TGSI_UTIL_SIGN_SET:
220 /* TODO: Use bitwese OR for floating point */
221 res = lp_build_abs( &bld->base, res );
222 res = LLVMBuildNeg( bld->base.builder, res, "" );
223 break;
224
225 case TGSI_UTIL_SIGN_TOGGLE:
226 res = LLVMBuildNeg( bld->base.builder, res, "" );
227 break;
228
229 case TGSI_UTIL_SIGN_KEEP:
230 break;
231 }
232
233 return res;
234 }
235
236
237 /**
238 * Register fetch with derivatives.
239 */
240 static void
241 emit_fetch_deriv(
242 struct lp_build_tgsi_soa_context *bld,
243 const struct tgsi_full_instruction *inst,
244 unsigned index,
245 const unsigned chan_index,
246 LLVMValueRef *res,
247 LLVMValueRef *ddx,
248 LLVMValueRef *ddy)
249 {
250 LLVMValueRef src;
251
252 src = emit_fetch(bld, inst, index, chan_index);
253
254 if(res)
255 *res = src;
256
257 /* TODO: use interpolation coeffs for inputs */
258
259 if(ddx)
260 *ddx = emit_ddx(bld, src);
261
262 if(ddy)
263 *ddy = emit_ddy(bld, src);
264 }
265
266
267 /**
268 * Register store.
269 */
270 static void
271 emit_store(
272 struct lp_build_tgsi_soa_context *bld,
273 const struct tgsi_full_instruction *inst,
274 unsigned index,
275 unsigned chan_index,
276 LLVMValueRef value)
277 {
278 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
279
280 switch( inst->Instruction.Saturate ) {
281 case TGSI_SAT_NONE:
282 break;
283
284 case TGSI_SAT_ZERO_ONE:
285 value = lp_build_max(&bld->base, value, bld->base.zero);
286 value = lp_build_min(&bld->base, value, bld->base.one);
287 break;
288
289 case TGSI_SAT_MINUS_PLUS_ONE:
290 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
291 value = lp_build_min(&bld->base, value, bld->base.one);
292 break;
293
294 default:
295 assert(0);
296 }
297
298 switch( reg->DstRegister.File ) {
299 case TGSI_FILE_OUTPUT:
300 bld->outputs[reg->DstRegister.Index][chan_index] = value;
301 break;
302
303 case TGSI_FILE_TEMPORARY:
304 bld->temps[reg->DstRegister.Index][chan_index] = value;
305 break;
306
307 case TGSI_FILE_ADDRESS:
308 /* FIXME */
309 assert(0);
310 break;
311
312 default:
313 assert( 0 );
314 }
315 }
316
317
318 /**
319 * High-level instruction translators.
320 */
321
322
323 static void
324 emit_tex( struct lp_build_tgsi_soa_context *bld,
325 const struct tgsi_full_instruction *inst,
326 boolean apply_lodbias,
327 boolean projected,
328 LLVMValueRef *texel)
329 {
330 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
331 LLVMValueRef lodbias;
332 LLVMValueRef oow;
333 LLVMValueRef coords[3];
334 unsigned num_coords;
335 unsigned i;
336
337 switch (inst->InstructionExtTexture.Texture) {
338 case TGSI_TEXTURE_1D:
339 num_coords = 1;
340 break;
341 case TGSI_TEXTURE_2D:
342 case TGSI_TEXTURE_RECT:
343 num_coords = 2;
344 break;
345 case TGSI_TEXTURE_SHADOW1D:
346 case TGSI_TEXTURE_SHADOW2D:
347 case TGSI_TEXTURE_SHADOWRECT:
348 case TGSI_TEXTURE_3D:
349 case TGSI_TEXTURE_CUBE:
350 num_coords = 3;
351 break;
352 default:
353 assert(0);
354 return;
355 }
356
357 if(apply_lodbias)
358 lodbias = emit_fetch( bld, inst, 0, 3 );
359 else
360 lodbias = bld->base.zero;
361
362 if (projected) {
363 oow = emit_fetch( bld, inst, 0, 3 );
364 oow = lp_build_rcp(&bld->base, oow);
365 }
366
367 for (i = 0; i < num_coords; i++) {
368 coords[i] = emit_fetch( bld, inst, 0, i );
369 if (projected)
370 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
371 }
372
373 bld->sampler->emit_fetch_texel(bld->sampler,
374 bld->base.builder,
375 bld->base.type,
376 unit, num_coords, coords, lodbias,
377 texel);
378 }
379
380
381 static void
382 emit_kil(
383 struct lp_build_tgsi_soa_context *bld,
384 const struct tgsi_full_instruction *inst )
385 {
386 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
387 LLVMValueRef terms[NUM_CHANNELS];
388 LLVMValueRef mask;
389 unsigned chan_index;
390
391 memset(&terms, 0, sizeof terms);
392
393 FOR_EACH_CHANNEL( chan_index ) {
394 unsigned swizzle;
395
396 /* Unswizzle channel */
397 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
398
399 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
400 * not to be tested. */
401 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
402 continue;
403
404 /* Check if the component has not been already tested. */
405 assert(swizzle < NUM_CHANNELS);
406 if( !terms[swizzle] )
407 /* TODO: change the comparison operator instead of setting the sign */
408 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
409 }
410
411 mask = NULL;
412 FOR_EACH_CHANNEL( chan_index ) {
413 if(terms[chan_index]) {
414 LLVMValueRef chan_mask;
415
416 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
417
418 if(mask)
419 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
420 else
421 mask = chan_mask;
422 }
423 }
424
425 if(mask)
426 lp_build_mask_update(bld->mask, mask);
427 }
428
429
430 /**
431 * Check if inst src/dest regs use indirect addressing into temporary
432 * register file.
433 */
434 static boolean
435 indirect_temp_reference(const struct tgsi_full_instruction *inst)
436 {
437 uint i;
438 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
439 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
440 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
441 reg->SrcRegister.Indirect)
442 return TRUE;
443 }
444 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
445 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
446 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
447 reg->DstRegister.Indirect)
448 return TRUE;
449 }
450 return FALSE;
451 }
452
453
454 static int
455 emit_instruction(
456 struct lp_build_tgsi_soa_context *bld,
457 const struct tgsi_full_instruction *inst,
458 const struct tgsi_opcode_info *info)
459 {
460 unsigned chan_index;
461 LLVMValueRef src0, src1, src2;
462 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
463 LLVMValueRef res;
464 LLVMValueRef dst0[NUM_CHANNELS];
465
466 /* we can't handle indirect addressing into temp register file yet */
467 if (indirect_temp_reference(inst))
468 return FALSE;
469
470 assert(info->num_dst <= 1);
471 if(info->num_dst) {
472 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
473 dst0[chan_index] = bld->base.undef;
474 }
475 }
476
477 switch (inst->Instruction.Opcode) {
478 #if 0
479 case TGSI_OPCODE_ARL:
480 /* FIXME */
481 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
482 tmp0 = emit_fetch( bld, inst, 0, chan_index );
483 emit_flr(bld, 0, 0);
484 emit_f2it( bld, 0 );
485 dst0[chan_index] = tmp0;
486 }
487 break;
488 #endif
489
490 case TGSI_OPCODE_MOV:
491 case TGSI_OPCODE_SWZ:
492 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
493 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
494 }
495 break;
496
497 case TGSI_OPCODE_LIT:
498 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
499 dst0[CHAN_X] = bld->base.one;
500 }
501 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
502 src0 = emit_fetch( bld, inst, 0, CHAN_X );
503 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
504 }
505 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
506 /* XMM[1] = SrcReg[0].yyyy */
507 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
508 /* XMM[1] = max(XMM[1], 0) */
509 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
510 /* XMM[2] = SrcReg[0].wwww */
511 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
512 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
513 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
514 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
515 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
516 }
517 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
518 dst0[CHAN_W] = bld->base.one;
519 }
520 break;
521
522 case TGSI_OPCODE_RCP:
523 /* TGSI_OPCODE_RECIP */
524 src0 = emit_fetch( bld, inst, 0, CHAN_X );
525 res = lp_build_rcp(&bld->base, src0);
526 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
527 dst0[chan_index] = res;
528 }
529 break;
530
531 case TGSI_OPCODE_RSQ:
532 /* TGSI_OPCODE_RECIPSQRT */
533 src0 = emit_fetch( bld, inst, 0, CHAN_X );
534 src0 = lp_build_abs(&bld->base, src0);
535 res = lp_build_rsqrt(&bld->base, src0);
536 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
537 dst0[chan_index] = res;
538 }
539 break;
540
541 case TGSI_OPCODE_EXP:
542 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
543 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
544 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
545 LLVMValueRef *p_exp2_int_part = NULL;
546 LLVMValueRef *p_frac_part = NULL;
547 LLVMValueRef *p_exp2 = NULL;
548
549 src0 = emit_fetch( bld, inst, 0, CHAN_X );
550
551 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
552 p_exp2_int_part = &tmp0;
553 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
554 p_frac_part = &tmp1;
555 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
556 p_exp2 = &tmp2;
557
558 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
559
560 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
561 dst0[CHAN_X] = tmp0;
562 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
563 dst0[CHAN_Y] = tmp1;
564 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
565 dst0[CHAN_Z] = tmp2;
566 }
567 /* dst.w = 1.0 */
568 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
569 dst0[CHAN_W] = bld->base.one;
570 }
571 break;
572
573 case TGSI_OPCODE_LOG:
574 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
575 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
576 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
577 LLVMValueRef *p_floor_log2;
578 LLVMValueRef *p_exp;
579 LLVMValueRef *p_log2;
580
581 src0 = emit_fetch( bld, inst, 0, CHAN_X );
582 src0 = lp_build_abs( &bld->base, src0 );
583
584 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
585 p_floor_log2 = &tmp0;
586 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
587 p_exp = &tmp1;
588 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
589 p_log2 = &tmp2;
590
591 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
592
593 /* dst.x = floor(lg2(abs(src.x))) */
594 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
595 dst0[CHAN_X] = tmp0;
596 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
597 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
598 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
599 }
600 /* dst.z = lg2(abs(src.x)) */
601 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
602 dst0[CHAN_Z] = tmp2;
603 }
604 /* dst.w = 1.0 */
605 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
606 dst0[CHAN_W] = bld->base.one;
607 }
608 break;
609
610 case TGSI_OPCODE_MUL:
611 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
612 src0 = emit_fetch( bld, inst, 0, chan_index );
613 src1 = emit_fetch( bld, inst, 1, chan_index );
614 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
615 }
616 break;
617
618 case TGSI_OPCODE_ADD:
619 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
620 src0 = emit_fetch( bld, inst, 0, chan_index );
621 src1 = emit_fetch( bld, inst, 1, chan_index );
622 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
623 }
624 break;
625
626 case TGSI_OPCODE_DP3:
627 /* TGSI_OPCODE_DOT3 */
628 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
629 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
630 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
631 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
632 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
633 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
634 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
635 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
636 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
637 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
638 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
639 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
640 dst0[chan_index] = tmp0;
641 }
642 break;
643
644 case TGSI_OPCODE_DP4:
645 /* TGSI_OPCODE_DOT4 */
646 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
647 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
648 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
649 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
650 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
651 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
652 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
653 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
654 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
655 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
656 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
657 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
658 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
659 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
660 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
661 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
662 dst0[chan_index] = tmp0;
663 }
664 break;
665
666 case TGSI_OPCODE_DST:
667 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
668 dst0[CHAN_X] = bld->base.one;
669 }
670 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
671 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
672 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
673 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
674 }
675 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
676 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
677 }
678 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
679 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
680 }
681 break;
682
683 case TGSI_OPCODE_MIN:
684 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
685 src0 = emit_fetch( bld, inst, 0, chan_index );
686 src1 = emit_fetch( bld, inst, 1, chan_index );
687 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
688 }
689 break;
690
691 case TGSI_OPCODE_MAX:
692 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
693 src0 = emit_fetch( bld, inst, 0, chan_index );
694 src1 = emit_fetch( bld, inst, 1, chan_index );
695 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
696 }
697 break;
698
699 case TGSI_OPCODE_SLT:
700 /* TGSI_OPCODE_SETLT */
701 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
702 src0 = emit_fetch( bld, inst, 0, chan_index );
703 src1 = emit_fetch( bld, inst, 1, chan_index );
704 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
705 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
706 }
707 break;
708
709 case TGSI_OPCODE_SGE:
710 /* TGSI_OPCODE_SETGE */
711 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
712 src0 = emit_fetch( bld, inst, 0, chan_index );
713 src1 = emit_fetch( bld, inst, 1, chan_index );
714 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
715 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
716 }
717 break;
718
719 case TGSI_OPCODE_MAD:
720 /* TGSI_OPCODE_MADD */
721 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
722 tmp0 = emit_fetch( bld, inst, 0, chan_index );
723 tmp1 = emit_fetch( bld, inst, 1, chan_index );
724 tmp2 = emit_fetch( bld, inst, 2, chan_index );
725 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
726 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
727 dst0[chan_index] = tmp0;
728 }
729 break;
730
731 case TGSI_OPCODE_SUB:
732 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
733 tmp0 = emit_fetch( bld, inst, 0, chan_index );
734 tmp1 = emit_fetch( bld, inst, 1, chan_index );
735 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
736 }
737 break;
738
739 case TGSI_OPCODE_LRP:
740 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
741 src0 = emit_fetch( bld, inst, 0, chan_index );
742 src1 = emit_fetch( bld, inst, 1, chan_index );
743 src2 = emit_fetch( bld, inst, 2, chan_index );
744 tmp0 = lp_build_sub( &bld->base, src1, src2 );
745 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
746 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
747 }
748 break;
749
750 case TGSI_OPCODE_CND:
751 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
752 src0 = emit_fetch( bld, inst, 0, chan_index );
753 src1 = emit_fetch( bld, inst, 1, chan_index );
754 src2 = emit_fetch( bld, inst, 2, chan_index );
755 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
756 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
757 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
758 }
759 break;
760
761 case TGSI_OPCODE_DP2A:
762 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
763 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
764 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
765 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
766 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
767 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
768 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
769 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
770 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
771 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
772 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
773 }
774 break;
775
776 case TGSI_OPCODE_FRC:
777 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
778 src0 = emit_fetch( bld, inst, 0, chan_index );
779 tmp0 = lp_build_floor(&bld->base, src0);
780 tmp0 = lp_build_sub(&bld->base, tmp0, src0);
781 dst0[chan_index] = tmp0;
782 }
783 break;
784
785 case TGSI_OPCODE_CLAMP:
786 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
787 tmp0 = emit_fetch( bld, inst, 0, chan_index );
788 src1 = emit_fetch( bld, inst, 1, chan_index );
789 src2 = emit_fetch( bld, inst, 2, chan_index );
790 tmp0 = lp_build_max(&bld->base, tmp0, src1);
791 tmp0 = lp_build_min(&bld->base, tmp0, src2);
792 dst0[chan_index] = tmp0;
793 }
794 break;
795
796 case TGSI_OPCODE_FLR:
797 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
798 tmp0 = emit_fetch( bld, inst, 0, chan_index );
799 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
800 }
801 break;
802
803 case TGSI_OPCODE_ROUND:
804 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
805 tmp0 = emit_fetch( bld, inst, 0, chan_index );
806 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
807 }
808 break;
809
810 case TGSI_OPCODE_EX2: {
811 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
812 tmp0 = lp_build_exp2( &bld->base, tmp0);
813 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
814 dst0[chan_index] = tmp0;
815 }
816 break;
817 }
818
819 case TGSI_OPCODE_LG2:
820 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
821 tmp0 = lp_build_log2( &bld->base, tmp0);
822 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
823 dst0[chan_index] = tmp0;
824 }
825 break;
826
827 case TGSI_OPCODE_POW:
828 src0 = emit_fetch( bld, inst, 0, CHAN_X );
829 src1 = emit_fetch( bld, inst, 1, CHAN_X );
830 res = lp_build_pow( &bld->base, src0, src1 );
831 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
832 dst0[chan_index] = res;
833 }
834 break;
835
836 case TGSI_OPCODE_XPD:
837 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
838 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
839 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
840 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
841 }
842 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
843 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
844 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
845 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
846 }
847 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
848 tmp2 = tmp0;
849 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
850 tmp5 = tmp3;
851 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
852 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
853 dst0[CHAN_X] = tmp2;
854 }
855 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
856 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
857 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
858 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
859 }
860 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
861 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
862 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
863 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
864 dst0[CHAN_Y] = tmp3;
865 }
866 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
867 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
868 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
869 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
870 dst0[CHAN_Z] = tmp5;
871 }
872 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
873 dst0[CHAN_W] = bld->base.one;
874 }
875 break;
876
877 case TGSI_OPCODE_ABS:
878 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
879 tmp0 = emit_fetch( bld, inst, 0, chan_index );
880 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
881 }
882 break;
883
884 case TGSI_OPCODE_RCC:
885 /* deprecated? */
886 assert(0);
887 return 0;
888
889 case TGSI_OPCODE_DPH:
890 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
891 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
892 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
893 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
894 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
895 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
896 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
897 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
898 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
899 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
900 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
901 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
902 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
903 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
904 dst0[chan_index] = tmp0;
905 }
906 break;
907
908 case TGSI_OPCODE_COS:
909 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
910 tmp0 = lp_build_cos( &bld->base, tmp0 );
911 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
912 dst0[chan_index] = tmp0;
913 }
914 break;
915
916 case TGSI_OPCODE_DDX:
917 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
918 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
919 }
920 break;
921
922 case TGSI_OPCODE_DDY:
923 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
924 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
925 }
926 break;
927
928 case TGSI_OPCODE_KILP:
929 /* predicated kill */
930 /* FIXME */
931 return 0;
932 break;
933
934 case TGSI_OPCODE_KIL:
935 /* conditional kill */
936 emit_kil( bld, inst );
937 break;
938
939 case TGSI_OPCODE_PK2H:
940 return 0;
941 break;
942
943 case TGSI_OPCODE_PK2US:
944 return 0;
945 break;
946
947 case TGSI_OPCODE_PK4B:
948 return 0;
949 break;
950
951 case TGSI_OPCODE_PK4UB:
952 return 0;
953 break;
954
955 case TGSI_OPCODE_RFL:
956 return 0;
957 break;
958
959 case TGSI_OPCODE_SEQ:
960 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
961 src0 = emit_fetch( bld, inst, 0, chan_index );
962 src1 = emit_fetch( bld, inst, 1, chan_index );
963 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
964 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
965 }
966 break;
967
968 case TGSI_OPCODE_SFL:
969 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
970 dst0[chan_index] = bld->base.zero;
971 }
972 break;
973
974 case TGSI_OPCODE_SGT:
975 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
976 src0 = emit_fetch( bld, inst, 0, chan_index );
977 src1 = emit_fetch( bld, inst, 1, chan_index );
978 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
979 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
980 }
981 break;
982
983 case TGSI_OPCODE_SIN:
984 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
985 tmp0 = lp_build_sin( &bld->base, tmp0 );
986 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
987 dst0[chan_index] = tmp0;
988 }
989 break;
990
991 case TGSI_OPCODE_SLE:
992 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
993 src0 = emit_fetch( bld, inst, 0, chan_index );
994 src1 = emit_fetch( bld, inst, 1, chan_index );
995 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
996 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
997 }
998 break;
999
1000 case TGSI_OPCODE_SNE:
1001 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1002 src0 = emit_fetch( bld, inst, 0, chan_index );
1003 src1 = emit_fetch( bld, inst, 1, chan_index );
1004 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1005 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1006 }
1007 break;
1008
1009 case TGSI_OPCODE_STR:
1010 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1011 dst0[chan_index] = bld->base.one;
1012 }
1013 break;
1014
1015 case TGSI_OPCODE_TEX:
1016 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1017 break;
1018
1019 case TGSI_OPCODE_TXD:
1020 /* FIXME */
1021 return 0;
1022 break;
1023
1024 case TGSI_OPCODE_UP2H:
1025 /* deprecated */
1026 assert (0);
1027 return 0;
1028 break;
1029
1030 case TGSI_OPCODE_UP2US:
1031 /* deprecated */
1032 assert(0);
1033 return 0;
1034 break;
1035
1036 case TGSI_OPCODE_UP4B:
1037 /* deprecated */
1038 assert(0);
1039 return 0;
1040 break;
1041
1042 case TGSI_OPCODE_UP4UB:
1043 /* deprecated */
1044 assert(0);
1045 return 0;
1046 break;
1047
1048 case TGSI_OPCODE_X2D:
1049 /* deprecated? */
1050 assert(0);
1051 return 0;
1052 break;
1053
1054 case TGSI_OPCODE_ARA:
1055 /* deprecated */
1056 assert(0);
1057 return 0;
1058 break;
1059
1060 #if 0
1061 case TGSI_OPCODE_ARR:
1062 /* FIXME */
1063 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1064 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1065 emit_rnd( bld, 0, 0 );
1066 emit_f2it( bld, 0 );
1067 dst0[chan_index] = tmp0;
1068 }
1069 break;
1070 #endif
1071
1072 case TGSI_OPCODE_BRA:
1073 /* deprecated */
1074 assert(0);
1075 return 0;
1076 break;
1077
1078 case TGSI_OPCODE_CAL:
1079 /* FIXME */
1080 return 0;
1081 break;
1082
1083 case TGSI_OPCODE_RET:
1084 /* FIXME */
1085 return 0;
1086 break;
1087
1088 case TGSI_OPCODE_END:
1089 break;
1090
1091 case TGSI_OPCODE_SSG:
1092 /* TGSI_OPCODE_SGN */
1093 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1094 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1095 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1096 }
1097 break;
1098
1099 case TGSI_OPCODE_CMP:
1100 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1101 src0 = emit_fetch( bld, inst, 0, chan_index );
1102 src1 = emit_fetch( bld, inst, 1, chan_index );
1103 src2 = emit_fetch( bld, inst, 2, chan_index );
1104 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1105 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1106 }
1107 break;
1108
1109 case TGSI_OPCODE_SCS:
1110 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1111 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1112 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1113 }
1114 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1115 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1116 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1117 }
1118 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1119 dst0[CHAN_Z] = bld->base.zero;
1120 }
1121 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1122 dst0[CHAN_W] = bld->base.one;
1123 }
1124 break;
1125
1126 case TGSI_OPCODE_TXB:
1127 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1128 break;
1129
1130 case TGSI_OPCODE_NRM:
1131 /* fall-through */
1132 case TGSI_OPCODE_NRM4:
1133 /* 3 or 4-component normalization */
1134 {
1135 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1136
1137 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1138 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1139 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1140 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1141
1142 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1143
1144 /* xmm4 = src.x */
1145 /* xmm0 = src.x * src.x */
1146 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1147 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1148 tmp4 = tmp0;
1149 }
1150 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1151
1152 /* xmm5 = src.y */
1153 /* xmm0 = xmm0 + src.y * src.y */
1154 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1155 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1156 tmp5 = tmp1;
1157 }
1158 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1159 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1160
1161 /* xmm6 = src.z */
1162 /* xmm0 = xmm0 + src.z * src.z */
1163 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1164 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1165 tmp6 = tmp1;
1166 }
1167 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1168 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1169
1170 if (dims == 4) {
1171 /* xmm7 = src.w */
1172 /* xmm0 = xmm0 + src.w * src.w */
1173 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1174 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1175 tmp7 = tmp1;
1176 }
1177 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1178 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1179 }
1180
1181 /* xmm1 = 1 / sqrt(xmm0) */
1182 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1183
1184 /* dst.x = xmm1 * src.x */
1185 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1186 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1187 }
1188
1189 /* dst.y = xmm1 * src.y */
1190 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1191 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1192 }
1193
1194 /* dst.z = xmm1 * src.z */
1195 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1196 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1197 }
1198
1199 /* dst.w = xmm1 * src.w */
1200 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1201 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1202 }
1203 }
1204
1205 /* dst.w = 1.0 */
1206 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1207 dst0[CHAN_W] = bld->base.one;
1208 }
1209 }
1210 break;
1211
1212 case TGSI_OPCODE_DIV:
1213 /* deprecated */
1214 assert( 0 );
1215 return 0;
1216 break;
1217
1218 case TGSI_OPCODE_DP2:
1219 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1220 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1221 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1222 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1223 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1224 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1225 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1226 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1227 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1228 }
1229 break;
1230
1231 case TGSI_OPCODE_TXL:
1232 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1233 break;
1234
1235 case TGSI_OPCODE_TXP:
1236 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1237 break;
1238
1239 case TGSI_OPCODE_BRK:
1240 /* FIXME */
1241 return 0;
1242 break;
1243
1244 case TGSI_OPCODE_IF:
1245 /* FIXME */
1246 return 0;
1247 break;
1248
1249 case TGSI_OPCODE_BGNFOR:
1250 /* deprecated */
1251 assert(0);
1252 return 0;
1253 break;
1254
1255 case TGSI_OPCODE_REP:
1256 /* deprecated */
1257 assert(0);
1258 return 0;
1259 break;
1260
1261 case TGSI_OPCODE_ELSE:
1262 /* FIXME */
1263 return 0;
1264 break;
1265
1266 case TGSI_OPCODE_ENDIF:
1267 /* FIXME */
1268 return 0;
1269 break;
1270
1271 case TGSI_OPCODE_ENDFOR:
1272 /* deprecated */
1273 assert(0);
1274 return 0;
1275 break;
1276
1277 case TGSI_OPCODE_ENDREP:
1278 /* deprecated */
1279 assert(0);
1280 return 0;
1281 break;
1282
1283 case TGSI_OPCODE_PUSHA:
1284 /* deprecated? */
1285 assert(0);
1286 return 0;
1287 break;
1288
1289 case TGSI_OPCODE_POPA:
1290 /* deprecated? */
1291 assert(0);
1292 return 0;
1293 break;
1294
1295 case TGSI_OPCODE_CEIL:
1296 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1297 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1298 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1299 }
1300 break;
1301
1302 case TGSI_OPCODE_I2F:
1303 /* deprecated? */
1304 assert(0);
1305 return 0;
1306 break;
1307
1308 case TGSI_OPCODE_NOT:
1309 /* deprecated? */
1310 assert(0);
1311 return 0;
1312 break;
1313
1314 case TGSI_OPCODE_TRUNC:
1315 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1316 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1317 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1318 }
1319 break;
1320
1321 case TGSI_OPCODE_SHL:
1322 /* deprecated? */
1323 assert(0);
1324 return 0;
1325 break;
1326
1327 case TGSI_OPCODE_SHR:
1328 /* deprecated? */
1329 assert(0);
1330 return 0;
1331 break;
1332
1333 case TGSI_OPCODE_AND:
1334 /* deprecated? */
1335 assert(0);
1336 return 0;
1337 break;
1338
1339 case TGSI_OPCODE_OR:
1340 /* deprecated? */
1341 assert(0);
1342 return 0;
1343 break;
1344
1345 case TGSI_OPCODE_MOD:
1346 /* deprecated? */
1347 assert(0);
1348 return 0;
1349 break;
1350
1351 case TGSI_OPCODE_XOR:
1352 /* deprecated? */
1353 assert(0);
1354 return 0;
1355 break;
1356
1357 case TGSI_OPCODE_SAD:
1358 /* deprecated? */
1359 assert(0);
1360 return 0;
1361 break;
1362
1363 case TGSI_OPCODE_TXF:
1364 /* deprecated? */
1365 assert(0);
1366 return 0;
1367 break;
1368
1369 case TGSI_OPCODE_TXQ:
1370 /* deprecated? */
1371 assert(0);
1372 return 0;
1373 break;
1374
1375 case TGSI_OPCODE_CONT:
1376 /* deprecated? */
1377 assert(0);
1378 return 0;
1379 break;
1380
1381 case TGSI_OPCODE_EMIT:
1382 return 0;
1383 break;
1384
1385 case TGSI_OPCODE_ENDPRIM:
1386 return 0;
1387 break;
1388
1389 case TGSI_OPCODE_NOISE1:
1390 case TGSI_OPCODE_NOISE2:
1391 case TGSI_OPCODE_NOISE3:
1392 case TGSI_OPCODE_NOISE4:
1393 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1394 dst0[chan_index] = bld->base.zero;
1395 }
1396 break;
1397
1398 case TGSI_OPCODE_NOP:
1399 break;
1400
1401 default:
1402 return 0;
1403 }
1404
1405 if(info->num_dst) {
1406 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1407 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1408 }
1409 }
1410
1411 return 1;
1412 }
1413
1414
1415 void
1416 lp_build_tgsi_soa(LLVMBuilderRef builder,
1417 const struct tgsi_token *tokens,
1418 struct lp_type type,
1419 struct lp_build_mask_context *mask,
1420 LLVMValueRef consts_ptr,
1421 const LLVMValueRef *pos,
1422 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1423 LLVMValueRef (*outputs)[NUM_CHANNELS],
1424 struct lp_build_sampler_soa *sampler)
1425 {
1426 struct lp_build_tgsi_soa_context bld;
1427 struct tgsi_parse_context parse;
1428 uint num_immediates = 0;
1429 unsigned i;
1430
1431 /* Setup build context */
1432 memset(&bld, 0, sizeof bld);
1433 lp_build_context_init(&bld.base, builder, type);
1434 bld.mask = mask;
1435 bld.pos = pos;
1436 bld.inputs = inputs;
1437 bld.outputs = outputs;
1438 bld.consts_ptr = consts_ptr;
1439 bld.sampler = sampler;
1440
1441 tgsi_parse_init( &parse, tokens );
1442
1443 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1444 tgsi_parse_token( &parse );
1445
1446 switch( parse.FullToken.Token.Type ) {
1447 case TGSI_TOKEN_TYPE_DECLARATION:
1448 /* Inputs already interpolated */
1449 break;
1450
1451 case TGSI_TOKEN_TYPE_INSTRUCTION:
1452 {
1453 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1454 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1455 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1456 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1457 info ? info->mnemonic : "<invalid>");
1458 }
1459
1460 break;
1461
1462 case TGSI_TOKEN_TYPE_IMMEDIATE:
1463 /* simply copy the immediate values into the next immediates[] slot */
1464 {
1465 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1466 assert(size <= 4);
1467 assert(num_immediates < LP_MAX_IMMEDIATES);
1468 for( i = 0; i < size; ++i )
1469 bld.immediates[num_immediates][i] =
1470 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1471 for( i = size; i < 4; ++i )
1472 bld.immediates[num_immediates][i] = bld.base.undef;
1473 num_immediates++;
1474 }
1475 break;
1476
1477 default:
1478 assert( 0 );
1479 }
1480 }
1481
1482 tgsi_parse_free( &parse );
1483 }
1484