gallivm: implement loops
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_tgsi.h"
55 #include "lp_bld_debug.h"
56
57
58 #define LP_MAX_TEMPS 256
59 #define LP_MAX_IMMEDIATES 256
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79
80 #define QUAD_TOP_LEFT 0
81 #define QUAD_TOP_RIGHT 1
82 #define QUAD_BOTTOM_LEFT 2
83 #define QUAD_BOTTOM_RIGHT 3
84
85 #define LP_TGSI_MAX_NESTING 16
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
99 int break_stack_size;
100 LLVMValueRef break_mask;
101
102 LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
103 int cont_stack_size;
104 LLVMValueRef cont_mask;
105
106 LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
107 int loop_stack_size;
108 LLVMBasicBlockRef loop_block;
109
110
111 LLVMValueRef exec_mask;
112 };
113
114 struct lp_build_tgsi_soa_context
115 {
116 struct lp_build_context base;
117
118 LLVMValueRef consts_ptr;
119 const LLVMValueRef *pos;
120 const LLVMValueRef (*inputs)[NUM_CHANNELS];
121 LLVMValueRef (*outputs)[NUM_CHANNELS];
122
123 struct lp_build_sampler_soa *sampler;
124
125 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
126 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
127
128 struct lp_build_mask_context *mask;
129 struct lp_exec_mask exec_mask;
130 };
131
132 static const unsigned char
133 swizzle_left[4] = {
134 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
135 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
136 };
137
138 static const unsigned char
139 swizzle_right[4] = {
140 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
141 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
142 };
143
144 static const unsigned char
145 swizzle_top[4] = {
146 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
147 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
148 };
149
150 static const unsigned char
151 swizzle_bottom[4] = {
152 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
153 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
154 };
155
156 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
157 {
158 mask->bld = bld;
159 mask->has_mask = FALSE;
160 mask->cond_stack_size = 0;
161 mask->loop_stack_size = 0;
162 mask->break_stack_size = 0;
163 mask->cont_stack_size = 0;
164
165 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
166 }
167
168 static void lp_exec_mask_update(struct lp_exec_mask *mask)
169 {
170 if (mask->loop_stack_size) {
171 /*for loops we need to update the entire mask at
172 * runtime */
173 LLVMValueRef tmp;
174 tmp = LLVMBuildAnd(mask->bld->builder,
175 mask->cont_mask,
176 mask->break_mask,
177 "maskcb");
178 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
179 mask->cond_mask,
180 tmp,
181 "maskfull");
182 } else
183 mask->exec_mask = mask->cond_mask;
184
185
186 mask->has_mask = (mask->cond_stack_size > 0 ||
187 mask->loop_stack_size > 0);
188 }
189
190 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
191 LLVMValueRef val)
192 {
193 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
194 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
195 mask->int_vec_type, "");
196
197 lp_exec_mask_update(mask);
198 }
199
200 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
201 {
202 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
203 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
204 mask->cond_mask, "");
205
206 /* means that we didn't have any mask before and that
207 * we were fully enabled */
208 if (mask->cond_stack_size <= 1) {
209 prev_mask = LLVMConstAllOnes(mask->int_vec_type);
210 }
211
212 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
213 inv_mask,
214 prev_mask, "");
215 lp_exec_mask_update(mask);
216 }
217
218 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
219 {
220 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
221 lp_exec_mask_update(mask);
222 }
223
224 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
225 {
226
227 if (mask->cont_stack_size == 0)
228 mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
229 if (mask->cont_stack_size == 0)
230 mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
231 if (mask->cond_stack_size == 0)
232 mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
233 mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
234 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
235 LLVMBuildBr(mask->bld->builder, mask->loop_block);
236 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
237
238 lp_exec_mask_update(mask);
239 }
240
241 static void lp_exec_break(struct lp_exec_mask *mask)
242 {
243 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
244 mask->exec_mask,
245 "break");
246
247 mask->break_stack[mask->break_stack_size++] = mask->break_mask;
248 if (mask->break_stack_size > 1) {
249 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
250 mask->break_mask,
251 exec_mask, "break_full");
252 } else
253 mask->break_mask = exec_mask;
254
255 lp_exec_mask_update(mask);
256 }
257
258 static void lp_exec_continue(struct lp_exec_mask *mask)
259 {
260 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
261 mask->exec_mask,
262 "");
263
264 mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
265 if (mask->cont_stack_size > 1) {
266 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
267 mask->cont_mask,
268 exec_mask, "");
269 } else
270 mask->cont_mask = exec_mask;
271
272 lp_exec_mask_update(mask);
273 }
274
275
276 static void lp_exec_endloop(struct lp_exec_mask *mask)
277 {
278 LLVMBasicBlockRef endloop;
279 LLVMValueRef i1cond;
280
281 { /* convert our soa vector into i1 */
282 int i;
283 LLVMValueRef packed = 0;
284 for (i = 0; i < mask->bld->type.length; ++i) {
285 LLVMValueRef component = LLVMBuildExtractElement(
286 mask->bld->builder,
287 mask->break_mask,
288 LLVMConstInt(LLVMInt32Type(), i, 0), "");
289 if (packed)
290 packed = LLVMBuildOr(mask->bld->builder,
291 packed, component, "");
292 else
293 packed = component;
294 }
295 i1cond = LLVMBuildICmp(mask->bld->builder, LLVMIntNE,
296 packed,
297 LLVMConstNull(LLVMTypeOf(packed)),
298 "");
299 }
300
301 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
302
303 LLVMBuildCondBr(mask->bld->builder,
304 i1cond, endloop, mask->loop_block);
305
306 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
307
308 mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
309 /* pop the break mask */
310 if (mask->cont_stack_size) {
311 mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
312 }
313 if (mask->break_stack_size) {
314 mask->break_mask = mask->cont_stack[--mask->break_stack_size];
315 }
316
317 lp_exec_mask_update(mask);
318 }
319
320 static void lp_exec_mask_store(struct lp_exec_mask *mask,
321 LLVMValueRef val,
322 LLVMValueRef dst)
323 {
324 if (mask->has_mask) {
325 LLVMValueRef real_val, dst_val;
326
327 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
328 real_val = lp_build_select(mask->bld,
329 mask->exec_mask,
330 val, dst_val);
331
332 LLVMBuildStore(mask->bld->builder, real_val, dst);
333 } else
334 LLVMBuildStore(mask->bld->builder, val, dst);
335 }
336
337
338 static LLVMValueRef
339 emit_ddx(struct lp_build_tgsi_soa_context *bld,
340 LLVMValueRef src)
341 {
342 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
343 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
344 return lp_build_sub(&bld->base, src_right, src_left);
345 }
346
347
348 static LLVMValueRef
349 emit_ddy(struct lp_build_tgsi_soa_context *bld,
350 LLVMValueRef src)
351 {
352 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
353 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
354 return lp_build_sub(&bld->base, src_top, src_bottom);
355 }
356
357
358 /**
359 * Register fetch.
360 */
361 static LLVMValueRef
362 emit_fetch(
363 struct lp_build_tgsi_soa_context *bld,
364 const struct tgsi_full_instruction *inst,
365 unsigned index,
366 const unsigned chan_index )
367 {
368 const struct tgsi_full_src_register *reg = &inst->Src[index];
369 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
370 LLVMValueRef res;
371
372 switch (swizzle) {
373 case TGSI_SWIZZLE_X:
374 case TGSI_SWIZZLE_Y:
375 case TGSI_SWIZZLE_Z:
376 case TGSI_SWIZZLE_W:
377
378 switch (reg->Register.File) {
379 case TGSI_FILE_CONSTANT: {
380 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
381 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
382 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
383 res = lp_build_broadcast_scalar(&bld->base, scalar);
384 break;
385 }
386
387 case TGSI_FILE_IMMEDIATE:
388 res = bld->immediates[reg->Register.Index][swizzle];
389 assert(res);
390 break;
391
392 case TGSI_FILE_INPUT:
393 res = bld->inputs[reg->Register.Index][swizzle];
394 assert(res);
395 break;
396
397 case TGSI_FILE_TEMPORARY:
398 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
399 if(!res)
400 return bld->base.undef;
401 break;
402
403 default:
404 assert( 0 );
405 return bld->base.undef;
406 }
407 break;
408
409 default:
410 assert( 0 );
411 return bld->base.undef;
412 }
413
414 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
415 case TGSI_UTIL_SIGN_CLEAR:
416 res = lp_build_abs( &bld->base, res );
417 break;
418
419 case TGSI_UTIL_SIGN_SET:
420 /* TODO: Use bitwese OR for floating point */
421 res = lp_build_abs( &bld->base, res );
422 res = LLVMBuildNeg( bld->base.builder, res, "" );
423 break;
424
425 case TGSI_UTIL_SIGN_TOGGLE:
426 res = LLVMBuildNeg( bld->base.builder, res, "" );
427 break;
428
429 case TGSI_UTIL_SIGN_KEEP:
430 break;
431 }
432
433 return res;
434 }
435
436
437 /**
438 * Register fetch with derivatives.
439 */
440 static void
441 emit_fetch_deriv(
442 struct lp_build_tgsi_soa_context *bld,
443 const struct tgsi_full_instruction *inst,
444 unsigned index,
445 const unsigned chan_index,
446 LLVMValueRef *res,
447 LLVMValueRef *ddx,
448 LLVMValueRef *ddy)
449 {
450 LLVMValueRef src;
451
452 src = emit_fetch(bld, inst, index, chan_index);
453
454 if(res)
455 *res = src;
456
457 /* TODO: use interpolation coeffs for inputs */
458
459 if(ddx)
460 *ddx = emit_ddx(bld, src);
461
462 if(ddy)
463 *ddy = emit_ddy(bld, src);
464 }
465
466
467 /**
468 * Register store.
469 */
470 static void
471 emit_store(
472 struct lp_build_tgsi_soa_context *bld,
473 const struct tgsi_full_instruction *inst,
474 unsigned index,
475 unsigned chan_index,
476 LLVMValueRef value)
477 {
478 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
479
480 switch( inst->Instruction.Saturate ) {
481 case TGSI_SAT_NONE:
482 break;
483
484 case TGSI_SAT_ZERO_ONE:
485 value = lp_build_max(&bld->base, value, bld->base.zero);
486 value = lp_build_min(&bld->base, value, bld->base.one);
487 break;
488
489 case TGSI_SAT_MINUS_PLUS_ONE:
490 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
491 value = lp_build_min(&bld->base, value, bld->base.one);
492 break;
493
494 default:
495 assert(0);
496 }
497
498 switch( reg->Register.File ) {
499 case TGSI_FILE_OUTPUT:
500 lp_exec_mask_store(&bld->exec_mask, value,
501 bld->outputs[reg->Register.Index][chan_index]);
502 break;
503
504 case TGSI_FILE_TEMPORARY:
505 lp_exec_mask_store(&bld->exec_mask, value,
506 bld->temps[reg->Register.Index][chan_index]);
507 break;
508
509 case TGSI_FILE_ADDRESS:
510 /* FIXME */
511 assert(0);
512 break;
513
514 case TGSI_FILE_PREDICATE:
515 /* FIXME */
516 assert(0);
517 break;
518
519 default:
520 assert( 0 );
521 }
522 }
523
524
525 /**
526 * High-level instruction translators.
527 */
528
529
530 static void
531 emit_tex( struct lp_build_tgsi_soa_context *bld,
532 const struct tgsi_full_instruction *inst,
533 boolean apply_lodbias,
534 boolean projected,
535 LLVMValueRef *texel)
536 {
537 const uint unit = inst->Src[1].Register.Index;
538 LLVMValueRef lodbias;
539 LLVMValueRef oow = NULL;
540 LLVMValueRef coords[3];
541 unsigned num_coords;
542 unsigned i;
543
544 switch (inst->Texture.Texture) {
545 case TGSI_TEXTURE_1D:
546 num_coords = 1;
547 break;
548 case TGSI_TEXTURE_2D:
549 case TGSI_TEXTURE_RECT:
550 num_coords = 2;
551 break;
552 case TGSI_TEXTURE_SHADOW1D:
553 case TGSI_TEXTURE_SHADOW2D:
554 case TGSI_TEXTURE_SHADOWRECT:
555 case TGSI_TEXTURE_3D:
556 case TGSI_TEXTURE_CUBE:
557 num_coords = 3;
558 break;
559 default:
560 assert(0);
561 return;
562 }
563
564 if(apply_lodbias)
565 lodbias = emit_fetch( bld, inst, 0, 3 );
566 else
567 lodbias = bld->base.zero;
568
569 if (projected) {
570 oow = emit_fetch( bld, inst, 0, 3 );
571 oow = lp_build_rcp(&bld->base, oow);
572 }
573
574 for (i = 0; i < num_coords; i++) {
575 coords[i] = emit_fetch( bld, inst, 0, i );
576 if (projected)
577 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
578 }
579 for (i = num_coords; i < 3; i++) {
580 coords[i] = bld->base.undef;
581 }
582
583 bld->sampler->emit_fetch_texel(bld->sampler,
584 bld->base.builder,
585 bld->base.type,
586 unit, num_coords, coords, lodbias,
587 texel);
588 }
589
590
591 static void
592 emit_kil(
593 struct lp_build_tgsi_soa_context *bld,
594 const struct tgsi_full_instruction *inst )
595 {
596 const struct tgsi_full_src_register *reg = &inst->Src[0];
597 LLVMValueRef terms[NUM_CHANNELS];
598 LLVMValueRef mask;
599 unsigned chan_index;
600
601 memset(&terms, 0, sizeof terms);
602
603 FOR_EACH_CHANNEL( chan_index ) {
604 unsigned swizzle;
605
606 /* Unswizzle channel */
607 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
608
609 /* Check if the component has not been already tested. */
610 assert(swizzle < NUM_CHANNELS);
611 if( !terms[swizzle] )
612 /* TODO: change the comparison operator instead of setting the sign */
613 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
614 }
615
616 mask = NULL;
617 FOR_EACH_CHANNEL( chan_index ) {
618 if(terms[chan_index]) {
619 LLVMValueRef chan_mask;
620
621 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
622
623 if(mask)
624 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
625 else
626 mask = chan_mask;
627 }
628 }
629
630 if(mask)
631 lp_build_mask_update(bld->mask, mask);
632 }
633
634
635 /**
636 * Check if inst src/dest regs use indirect addressing into temporary
637 * register file.
638 */
639 static boolean
640 indirect_temp_reference(const struct tgsi_full_instruction *inst)
641 {
642 uint i;
643 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
644 const struct tgsi_full_src_register *reg = &inst->Src[i];
645 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
646 reg->Register.Indirect)
647 return TRUE;
648 }
649 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
650 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
651 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
652 reg->Register.Indirect)
653 return TRUE;
654 }
655 return FALSE;
656 }
657
658 static int
659 emit_declaration(
660 struct lp_build_tgsi_soa_context *bld,
661 const struct tgsi_full_declaration *decl)
662 {
663 unsigned first = decl->Range.First;
664 unsigned last = decl->Range.Last;
665 unsigned idx, i;
666
667 for (idx = first; idx <= last; ++idx) {
668 boolean ok;
669
670 switch (decl->Declaration.File) {
671 case TGSI_FILE_TEMPORARY:
672 for (i = 0; i < NUM_CHANNELS; i++)
673 bld->temps[idx][i] = lp_build_alloca(&bld->base);
674 ok = TRUE;
675 break;
676
677 case TGSI_FILE_OUTPUT:
678 for (i = 0; i < NUM_CHANNELS; i++)
679 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
680 ok = TRUE;
681 break;
682
683 default:
684 /* don't need to declare other vars */
685 ok = TRUE;
686 }
687
688 if (!ok)
689 return FALSE;
690 }
691
692 return TRUE;
693 }
694
695 static int
696 emit_instruction(
697 struct lp_build_tgsi_soa_context *bld,
698 const struct tgsi_full_instruction *inst,
699 const struct tgsi_opcode_info *info)
700 {
701 unsigned chan_index;
702 LLVMValueRef src0, src1, src2;
703 LLVMValueRef tmp0, tmp1, tmp2;
704 LLVMValueRef tmp3 = NULL;
705 LLVMValueRef tmp4 = NULL;
706 LLVMValueRef tmp5 = NULL;
707 LLVMValueRef tmp6 = NULL;
708 LLVMValueRef tmp7 = NULL;
709 LLVMValueRef res;
710 LLVMValueRef dst0[NUM_CHANNELS];
711
712 /* we can't handle indirect addressing into temp register file yet */
713 if (indirect_temp_reference(inst))
714 return FALSE;
715
716 /*
717 * Stores and write masks are handled in a general fashion after the long
718 * instruction opcode switch statement.
719 *
720 * Although not stricitly necessary, we avoid generating instructions for
721 * channels which won't be stored, in cases where's that easy. For some
722 * complex instructions, like texture sampling, it is more convenient to
723 * assume a full writemask and then let LLVM optimization passes eliminate
724 * redundant code.
725 */
726
727 assert(info->num_dst <= 1);
728 if(info->num_dst) {
729 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
730 dst0[chan_index] = bld->base.undef;
731 }
732 }
733
734 switch (inst->Instruction.Opcode) {
735 #if 0
736 case TGSI_OPCODE_ARL:
737 /* FIXME */
738 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
739 tmp0 = emit_fetch( bld, inst, 0, chan_index );
740 emit_flr(bld, 0, 0);
741 emit_f2it( bld, 0 );
742 dst0[chan_index] = tmp0;
743 }
744 break;
745 #endif
746
747 case TGSI_OPCODE_MOV:
748 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
749 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
750 }
751 break;
752
753 case TGSI_OPCODE_LIT:
754 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
755 dst0[CHAN_X] = bld->base.one;
756 }
757 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
758 src0 = emit_fetch( bld, inst, 0, CHAN_X );
759 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
760 }
761 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
762 /* XMM[1] = SrcReg[0].yyyy */
763 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
764 /* XMM[1] = max(XMM[1], 0) */
765 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
766 /* XMM[2] = SrcReg[0].wwww */
767 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
768 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
769 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
770 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
771 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
772 }
773 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
774 dst0[CHAN_W] = bld->base.one;
775 }
776 break;
777
778 case TGSI_OPCODE_RCP:
779 /* TGSI_OPCODE_RECIP */
780 src0 = emit_fetch( bld, inst, 0, CHAN_X );
781 res = lp_build_rcp(&bld->base, src0);
782 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
783 dst0[chan_index] = res;
784 }
785 break;
786
787 case TGSI_OPCODE_RSQ:
788 /* TGSI_OPCODE_RECIPSQRT */
789 src0 = emit_fetch( bld, inst, 0, CHAN_X );
790 src0 = lp_build_abs(&bld->base, src0);
791 res = lp_build_rsqrt(&bld->base, src0);
792 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
793 dst0[chan_index] = res;
794 }
795 break;
796
797 case TGSI_OPCODE_EXP:
798 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
799 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
800 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
801 LLVMValueRef *p_exp2_int_part = NULL;
802 LLVMValueRef *p_frac_part = NULL;
803 LLVMValueRef *p_exp2 = NULL;
804
805 src0 = emit_fetch( bld, inst, 0, CHAN_X );
806
807 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
808 p_exp2_int_part = &tmp0;
809 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
810 p_frac_part = &tmp1;
811 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
812 p_exp2 = &tmp2;
813
814 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
815
816 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
817 dst0[CHAN_X] = tmp0;
818 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
819 dst0[CHAN_Y] = tmp1;
820 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
821 dst0[CHAN_Z] = tmp2;
822 }
823 /* dst.w = 1.0 */
824 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
825 dst0[CHAN_W] = bld->base.one;
826 }
827 break;
828
829 case TGSI_OPCODE_LOG:
830 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
831 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
832 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
833 LLVMValueRef *p_floor_log2 = NULL;
834 LLVMValueRef *p_exp = NULL;
835 LLVMValueRef *p_log2 = NULL;
836
837 src0 = emit_fetch( bld, inst, 0, CHAN_X );
838 src0 = lp_build_abs( &bld->base, src0 );
839
840 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
841 p_floor_log2 = &tmp0;
842 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
843 p_exp = &tmp1;
844 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
845 p_log2 = &tmp2;
846
847 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
848
849 /* dst.x = floor(lg2(abs(src.x))) */
850 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
851 dst0[CHAN_X] = tmp0;
852 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
853 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
854 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
855 }
856 /* dst.z = lg2(abs(src.x)) */
857 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
858 dst0[CHAN_Z] = tmp2;
859 }
860 /* dst.w = 1.0 */
861 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
862 dst0[CHAN_W] = bld->base.one;
863 }
864 break;
865
866 case TGSI_OPCODE_MUL:
867 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
868 src0 = emit_fetch( bld, inst, 0, chan_index );
869 src1 = emit_fetch( bld, inst, 1, chan_index );
870 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
871 }
872 break;
873
874 case TGSI_OPCODE_ADD:
875 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
876 src0 = emit_fetch( bld, inst, 0, chan_index );
877 src1 = emit_fetch( bld, inst, 1, chan_index );
878 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
879 }
880 break;
881
882 case TGSI_OPCODE_DP3:
883 /* TGSI_OPCODE_DOT3 */
884 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
885 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
886 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
887 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
888 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
889 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
890 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
891 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
892 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
893 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
894 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
895 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
896 dst0[chan_index] = tmp0;
897 }
898 break;
899
900 case TGSI_OPCODE_DP4:
901 /* TGSI_OPCODE_DOT4 */
902 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
903 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
904 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
905 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
906 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
907 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
908 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
909 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
910 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
911 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
912 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
913 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
914 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
915 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
916 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
917 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
918 dst0[chan_index] = tmp0;
919 }
920 break;
921
922 case TGSI_OPCODE_DST:
923 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
924 dst0[CHAN_X] = bld->base.one;
925 }
926 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
927 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
928 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
929 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
930 }
931 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
932 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
933 }
934 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
935 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
936 }
937 break;
938
939 case TGSI_OPCODE_MIN:
940 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
941 src0 = emit_fetch( bld, inst, 0, chan_index );
942 src1 = emit_fetch( bld, inst, 1, chan_index );
943 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
944 }
945 break;
946
947 case TGSI_OPCODE_MAX:
948 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
949 src0 = emit_fetch( bld, inst, 0, chan_index );
950 src1 = emit_fetch( bld, inst, 1, chan_index );
951 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
952 }
953 break;
954
955 case TGSI_OPCODE_SLT:
956 /* TGSI_OPCODE_SETLT */
957 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
958 src0 = emit_fetch( bld, inst, 0, chan_index );
959 src1 = emit_fetch( bld, inst, 1, chan_index );
960 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
961 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
962 }
963 break;
964
965 case TGSI_OPCODE_SGE:
966 /* TGSI_OPCODE_SETGE */
967 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
968 src0 = emit_fetch( bld, inst, 0, chan_index );
969 src1 = emit_fetch( bld, inst, 1, chan_index );
970 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
971 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
972 }
973 break;
974
975 case TGSI_OPCODE_MAD:
976 /* TGSI_OPCODE_MADD */
977 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
978 tmp0 = emit_fetch( bld, inst, 0, chan_index );
979 tmp1 = emit_fetch( bld, inst, 1, chan_index );
980 tmp2 = emit_fetch( bld, inst, 2, chan_index );
981 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
982 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
983 dst0[chan_index] = tmp0;
984 }
985 break;
986
987 case TGSI_OPCODE_SUB:
988 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
989 tmp0 = emit_fetch( bld, inst, 0, chan_index );
990 tmp1 = emit_fetch( bld, inst, 1, chan_index );
991 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
992 }
993 break;
994
995 case TGSI_OPCODE_LRP:
996 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
997 src0 = emit_fetch( bld, inst, 0, chan_index );
998 src1 = emit_fetch( bld, inst, 1, chan_index );
999 src2 = emit_fetch( bld, inst, 2, chan_index );
1000 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1001 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1002 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1003 }
1004 break;
1005
1006 case TGSI_OPCODE_CND:
1007 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1008 src0 = emit_fetch( bld, inst, 0, chan_index );
1009 src1 = emit_fetch( bld, inst, 1, chan_index );
1010 src2 = emit_fetch( bld, inst, 2, chan_index );
1011 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
1012 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1013 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1014 }
1015 break;
1016
1017 case TGSI_OPCODE_DP2A:
1018 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1019 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1020 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1021 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1022 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1023 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1024 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1025 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1026 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1027 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1028 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1029 }
1030 break;
1031
1032 case TGSI_OPCODE_FRC:
1033 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1034 src0 = emit_fetch( bld, inst, 0, chan_index );
1035 tmp0 = lp_build_floor(&bld->base, src0);
1036 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1037 dst0[chan_index] = tmp0;
1038 }
1039 break;
1040
1041 case TGSI_OPCODE_CLAMP:
1042 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1043 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1044 src1 = emit_fetch( bld, inst, 1, chan_index );
1045 src2 = emit_fetch( bld, inst, 2, chan_index );
1046 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1047 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1048 dst0[chan_index] = tmp0;
1049 }
1050 break;
1051
1052 case TGSI_OPCODE_FLR:
1053 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1054 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1055 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1056 }
1057 break;
1058
1059 case TGSI_OPCODE_ROUND:
1060 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1061 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1062 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1063 }
1064 break;
1065
1066 case TGSI_OPCODE_EX2: {
1067 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1068 tmp0 = lp_build_exp2( &bld->base, tmp0);
1069 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1070 dst0[chan_index] = tmp0;
1071 }
1072 break;
1073 }
1074
1075 case TGSI_OPCODE_LG2:
1076 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1077 tmp0 = lp_build_log2( &bld->base, tmp0);
1078 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1079 dst0[chan_index] = tmp0;
1080 }
1081 break;
1082
1083 case TGSI_OPCODE_POW:
1084 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1085 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1086 res = lp_build_pow( &bld->base, src0, src1 );
1087 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1088 dst0[chan_index] = res;
1089 }
1090 break;
1091
1092 case TGSI_OPCODE_XPD:
1093 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1094 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1095 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1096 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1097 }
1098 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1099 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1100 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1101 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1102 }
1103 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1104 tmp2 = tmp0;
1105 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1106 tmp5 = tmp3;
1107 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1108 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1109 dst0[CHAN_X] = tmp2;
1110 }
1111 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1112 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1113 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1114 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1115 }
1116 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1117 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1118 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1119 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1120 dst0[CHAN_Y] = tmp3;
1121 }
1122 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1123 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1124 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1125 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1126 dst0[CHAN_Z] = tmp5;
1127 }
1128 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1129 dst0[CHAN_W] = bld->base.one;
1130 }
1131 break;
1132
1133 case TGSI_OPCODE_ABS:
1134 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1135 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1136 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1137 }
1138 break;
1139
1140 case TGSI_OPCODE_RCC:
1141 /* deprecated? */
1142 assert(0);
1143 return 0;
1144
1145 case TGSI_OPCODE_DPH:
1146 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1147 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1148 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1149 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1150 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1151 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1152 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1153 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1154 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1155 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1156 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1157 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1158 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1159 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1160 dst0[chan_index] = tmp0;
1161 }
1162 break;
1163
1164 case TGSI_OPCODE_COS:
1165 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1166 tmp0 = lp_build_cos( &bld->base, tmp0 );
1167 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1168 dst0[chan_index] = tmp0;
1169 }
1170 break;
1171
1172 case TGSI_OPCODE_DDX:
1173 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1174 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1175 }
1176 break;
1177
1178 case TGSI_OPCODE_DDY:
1179 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1180 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1181 }
1182 break;
1183
1184 case TGSI_OPCODE_KILP:
1185 /* predicated kill */
1186 /* FIXME */
1187 return 0;
1188 break;
1189
1190 case TGSI_OPCODE_KIL:
1191 /* conditional kill */
1192 emit_kil( bld, inst );
1193 break;
1194
1195 case TGSI_OPCODE_PK2H:
1196 return 0;
1197 break;
1198
1199 case TGSI_OPCODE_PK2US:
1200 return 0;
1201 break;
1202
1203 case TGSI_OPCODE_PK4B:
1204 return 0;
1205 break;
1206
1207 case TGSI_OPCODE_PK4UB:
1208 return 0;
1209 break;
1210
1211 case TGSI_OPCODE_RFL:
1212 return 0;
1213 break;
1214
1215 case TGSI_OPCODE_SEQ:
1216 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1217 src0 = emit_fetch( bld, inst, 0, chan_index );
1218 src1 = emit_fetch( bld, inst, 1, chan_index );
1219 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1220 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1221 }
1222 break;
1223
1224 case TGSI_OPCODE_SFL:
1225 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1226 dst0[chan_index] = bld->base.zero;
1227 }
1228 break;
1229
1230 case TGSI_OPCODE_SGT:
1231 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1232 src0 = emit_fetch( bld, inst, 0, chan_index );
1233 src1 = emit_fetch( bld, inst, 1, chan_index );
1234 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1235 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1236 }
1237 break;
1238
1239 case TGSI_OPCODE_SIN:
1240 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1241 tmp0 = lp_build_sin( &bld->base, tmp0 );
1242 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1243 dst0[chan_index] = tmp0;
1244 }
1245 break;
1246
1247 case TGSI_OPCODE_SLE:
1248 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1249 src0 = emit_fetch( bld, inst, 0, chan_index );
1250 src1 = emit_fetch( bld, inst, 1, chan_index );
1251 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1252 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1253 }
1254 break;
1255
1256 case TGSI_OPCODE_SNE:
1257 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1258 src0 = emit_fetch( bld, inst, 0, chan_index );
1259 src1 = emit_fetch( bld, inst, 1, chan_index );
1260 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1261 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1262 }
1263 break;
1264
1265 case TGSI_OPCODE_STR:
1266 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1267 dst0[chan_index] = bld->base.one;
1268 }
1269 break;
1270
1271 case TGSI_OPCODE_TEX:
1272 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1273 break;
1274
1275 case TGSI_OPCODE_TXD:
1276 /* FIXME */
1277 return 0;
1278 break;
1279
1280 case TGSI_OPCODE_UP2H:
1281 /* deprecated */
1282 assert (0);
1283 return 0;
1284 break;
1285
1286 case TGSI_OPCODE_UP2US:
1287 /* deprecated */
1288 assert(0);
1289 return 0;
1290 break;
1291
1292 case TGSI_OPCODE_UP4B:
1293 /* deprecated */
1294 assert(0);
1295 return 0;
1296 break;
1297
1298 case TGSI_OPCODE_UP4UB:
1299 /* deprecated */
1300 assert(0);
1301 return 0;
1302 break;
1303
1304 case TGSI_OPCODE_X2D:
1305 /* deprecated? */
1306 assert(0);
1307 return 0;
1308 break;
1309
1310 case TGSI_OPCODE_ARA:
1311 /* deprecated */
1312 assert(0);
1313 return 0;
1314 break;
1315
1316 #if 0
1317 case TGSI_OPCODE_ARR:
1318 /* FIXME */
1319 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1320 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1321 emit_rnd( bld, 0, 0 );
1322 emit_f2it( bld, 0 );
1323 dst0[chan_index] = tmp0;
1324 }
1325 break;
1326 #endif
1327
1328 case TGSI_OPCODE_BRA:
1329 /* deprecated */
1330 assert(0);
1331 return 0;
1332 break;
1333
1334 case TGSI_OPCODE_CAL:
1335 /* FIXME */
1336 return 0;
1337 break;
1338
1339 case TGSI_OPCODE_RET:
1340 /* FIXME */
1341 return 0;
1342 break;
1343
1344 case TGSI_OPCODE_END:
1345 break;
1346
1347 case TGSI_OPCODE_SSG:
1348 /* TGSI_OPCODE_SGN */
1349 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1350 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1351 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1352 }
1353 break;
1354
1355 case TGSI_OPCODE_CMP:
1356 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1357 src0 = emit_fetch( bld, inst, 0, chan_index );
1358 src1 = emit_fetch( bld, inst, 1, chan_index );
1359 src2 = emit_fetch( bld, inst, 2, chan_index );
1360 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1361 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1362 }
1363 break;
1364
1365 case TGSI_OPCODE_SCS:
1366 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1367 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1368 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1369 }
1370 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1371 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1372 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1373 }
1374 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1375 dst0[CHAN_Z] = bld->base.zero;
1376 }
1377 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1378 dst0[CHAN_W] = bld->base.one;
1379 }
1380 break;
1381
1382 case TGSI_OPCODE_TXB:
1383 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1384 break;
1385
1386 case TGSI_OPCODE_NRM:
1387 /* fall-through */
1388 case TGSI_OPCODE_NRM4:
1389 /* 3 or 4-component normalization */
1390 {
1391 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1392
1393 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1394 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1395 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1396 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1397
1398 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1399
1400 /* xmm4 = src.x */
1401 /* xmm0 = src.x * src.x */
1402 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1403 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1404 tmp4 = tmp0;
1405 }
1406 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1407
1408 /* xmm5 = src.y */
1409 /* xmm0 = xmm0 + src.y * src.y */
1410 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1411 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1412 tmp5 = tmp1;
1413 }
1414 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1415 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1416
1417 /* xmm6 = src.z */
1418 /* xmm0 = xmm0 + src.z * src.z */
1419 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1420 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1421 tmp6 = tmp1;
1422 }
1423 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1424 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1425
1426 if (dims == 4) {
1427 /* xmm7 = src.w */
1428 /* xmm0 = xmm0 + src.w * src.w */
1429 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1430 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1431 tmp7 = tmp1;
1432 }
1433 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1434 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1435 }
1436
1437 /* xmm1 = 1 / sqrt(xmm0) */
1438 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1439
1440 /* dst.x = xmm1 * src.x */
1441 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1442 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1443 }
1444
1445 /* dst.y = xmm1 * src.y */
1446 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1447 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1448 }
1449
1450 /* dst.z = xmm1 * src.z */
1451 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1452 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1453 }
1454
1455 /* dst.w = xmm1 * src.w */
1456 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1457 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1458 }
1459 }
1460
1461 /* dst.w = 1.0 */
1462 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1463 dst0[CHAN_W] = bld->base.one;
1464 }
1465 }
1466 break;
1467
1468 case TGSI_OPCODE_DIV:
1469 /* deprecated */
1470 assert( 0 );
1471 return 0;
1472 break;
1473
1474 case TGSI_OPCODE_DP2:
1475 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1476 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1477 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1478 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1479 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1480 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1481 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1482 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1483 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1484 }
1485 break;
1486
1487 case TGSI_OPCODE_TXL:
1488 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1489 break;
1490
1491 case TGSI_OPCODE_TXP:
1492 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1493 break;
1494
1495 case TGSI_OPCODE_BRK:
1496 lp_exec_break(&bld->exec_mask);
1497 break;
1498
1499 case TGSI_OPCODE_IF:
1500 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1501 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1502 break;
1503
1504 case TGSI_OPCODE_BGNFOR:
1505 /* deprecated */
1506 assert(0);
1507 return 0;
1508 break;
1509
1510 case TGSI_OPCODE_BGNLOOP:
1511 lp_exec_bgnloop(&bld->exec_mask);
1512 break;
1513
1514 case TGSI_OPCODE_REP:
1515 /* deprecated */
1516 assert(0);
1517 return 0;
1518 break;
1519
1520 case TGSI_OPCODE_ELSE:
1521 lp_exec_mask_cond_invert(&bld->exec_mask);
1522 break;
1523
1524 case TGSI_OPCODE_ENDIF:
1525 lp_exec_mask_cond_pop(&bld->exec_mask);
1526 break;
1527
1528 case TGSI_OPCODE_ENDFOR:
1529 /* deprecated */
1530 assert(0);
1531 return 0;
1532 break;
1533
1534 case TGSI_OPCODE_ENDLOOP:
1535 lp_exec_endloop(&bld->exec_mask);
1536 break;
1537
1538 case TGSI_OPCODE_ENDREP:
1539 /* deprecated */
1540 assert(0);
1541 return 0;
1542 break;
1543
1544 case TGSI_OPCODE_PUSHA:
1545 /* deprecated? */
1546 assert(0);
1547 return 0;
1548 break;
1549
1550 case TGSI_OPCODE_POPA:
1551 /* deprecated? */
1552 assert(0);
1553 return 0;
1554 break;
1555
1556 case TGSI_OPCODE_CEIL:
1557 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1558 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1559 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1560 }
1561 break;
1562
1563 case TGSI_OPCODE_I2F:
1564 /* deprecated? */
1565 assert(0);
1566 return 0;
1567 break;
1568
1569 case TGSI_OPCODE_NOT:
1570 /* deprecated? */
1571 assert(0);
1572 return 0;
1573 break;
1574
1575 case TGSI_OPCODE_TRUNC:
1576 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1577 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1578 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1579 }
1580 break;
1581
1582 case TGSI_OPCODE_SHL:
1583 /* deprecated? */
1584 assert(0);
1585 return 0;
1586 break;
1587
1588 case TGSI_OPCODE_ISHR:
1589 /* deprecated? */
1590 assert(0);
1591 return 0;
1592 break;
1593
1594 case TGSI_OPCODE_AND:
1595 /* deprecated? */
1596 assert(0);
1597 return 0;
1598 break;
1599
1600 case TGSI_OPCODE_OR:
1601 /* deprecated? */
1602 assert(0);
1603 return 0;
1604 break;
1605
1606 case TGSI_OPCODE_MOD:
1607 /* deprecated? */
1608 assert(0);
1609 return 0;
1610 break;
1611
1612 case TGSI_OPCODE_XOR:
1613 /* deprecated? */
1614 assert(0);
1615 return 0;
1616 break;
1617
1618 case TGSI_OPCODE_SAD:
1619 /* deprecated? */
1620 assert(0);
1621 return 0;
1622 break;
1623
1624 case TGSI_OPCODE_TXF:
1625 /* deprecated? */
1626 assert(0);
1627 return 0;
1628 break;
1629
1630 case TGSI_OPCODE_TXQ:
1631 /* deprecated? */
1632 assert(0);
1633 return 0;
1634 break;
1635
1636 case TGSI_OPCODE_CONT:
1637 lp_exec_continue(&bld->exec_mask);
1638 break;
1639
1640 case TGSI_OPCODE_EMIT:
1641 return 0;
1642 break;
1643
1644 case TGSI_OPCODE_ENDPRIM:
1645 return 0;
1646 break;
1647
1648 case TGSI_OPCODE_NOP:
1649 break;
1650
1651 default:
1652 return 0;
1653 }
1654
1655 if(info->num_dst) {
1656 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1657 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1658 }
1659 }
1660
1661 return 1;
1662 }
1663
1664
1665 void
1666 lp_build_tgsi_soa(LLVMBuilderRef builder,
1667 const struct tgsi_token *tokens,
1668 struct lp_type type,
1669 struct lp_build_mask_context *mask,
1670 LLVMValueRef consts_ptr,
1671 const LLVMValueRef *pos,
1672 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1673 LLVMValueRef (*outputs)[NUM_CHANNELS],
1674 struct lp_build_sampler_soa *sampler)
1675 {
1676 struct lp_build_tgsi_soa_context bld;
1677 struct tgsi_parse_context parse;
1678 uint num_immediates = 0;
1679 unsigned i;
1680
1681 /* Setup build context */
1682 memset(&bld, 0, sizeof bld);
1683 lp_build_context_init(&bld.base, builder, type);
1684 bld.mask = mask;
1685 bld.pos = pos;
1686 bld.inputs = inputs;
1687 bld.outputs = outputs;
1688 bld.consts_ptr = consts_ptr;
1689 bld.sampler = sampler;
1690
1691 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1692
1693 tgsi_parse_init( &parse, tokens );
1694
1695 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1696 tgsi_parse_token( &parse );
1697
1698 switch( parse.FullToken.Token.Type ) {
1699 case TGSI_TOKEN_TYPE_DECLARATION:
1700 /* Inputs already interpolated */
1701 {
1702 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1703 _debug_printf("warning: failed to define LLVM variable\n");
1704 }
1705 break;
1706
1707 case TGSI_TOKEN_TYPE_INSTRUCTION:
1708 {
1709 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1710 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1711 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1712 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1713 info ? info->mnemonic : "<invalid>");
1714 }
1715
1716 break;
1717
1718 case TGSI_TOKEN_TYPE_IMMEDIATE:
1719 /* simply copy the immediate values into the next immediates[] slot */
1720 {
1721 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1722 assert(size <= 4);
1723 assert(num_immediates < LP_MAX_IMMEDIATES);
1724 for( i = 0; i < size; ++i )
1725 bld.immediates[num_immediates][i] =
1726 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1727 for( i = size; i < 4; ++i )
1728 bld.immediates[num_immediates][i] = bld.base.undef;
1729 num_immediates++;
1730 }
1731 break;
1732
1733 case TGSI_TOKEN_TYPE_PROPERTY:
1734 break;
1735
1736 default:
1737 assert( 0 );
1738 }
1739 }
1740 if (0) {
1741 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1742 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1743 debug_printf("11111111111111111111111111111 \n");
1744 tgsi_dump(tokens, 0);
1745 LLVMDumpValue(function);
1746 debug_printf("2222222222222222222222222222 \n");
1747 }
1748 tgsi_parse_free( &parse );
1749 }
1750