gallivm: simplify conditional branching
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_tgsi.h"
55 #include "lp_bld_debug.h"
56
57
58 #define LP_MAX_TEMPS 256
59 #define LP_MAX_IMMEDIATES 256
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79
80 #define QUAD_TOP_LEFT 0
81 #define QUAD_TOP_RIGHT 1
82 #define QUAD_BOTTOM_LEFT 2
83 #define QUAD_BOTTOM_RIGHT 3
84
85 #define LP_TGSI_MAX_NESTING 16
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
99 int break_stack_size;
100 LLVMValueRef break_mask;
101
102 LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
103 int cont_stack_size;
104 LLVMValueRef cont_mask;
105
106 LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
107 int loop_stack_size;
108 LLVMBasicBlockRef loop_block;
109
110
111 LLVMValueRef exec_mask;
112 };
113
114 struct lp_build_tgsi_soa_context
115 {
116 struct lp_build_context base;
117
118 LLVMValueRef consts_ptr;
119 const LLVMValueRef *pos;
120 const LLVMValueRef (*inputs)[NUM_CHANNELS];
121 LLVMValueRef (*outputs)[NUM_CHANNELS];
122
123 struct lp_build_sampler_soa *sampler;
124
125 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
126 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
127
128 struct lp_build_mask_context *mask;
129 struct lp_exec_mask exec_mask;
130 };
131
132 static const unsigned char
133 swizzle_left[4] = {
134 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
135 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
136 };
137
138 static const unsigned char
139 swizzle_right[4] = {
140 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
141 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
142 };
143
144 static const unsigned char
145 swizzle_top[4] = {
146 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
147 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
148 };
149
150 static const unsigned char
151 swizzle_bottom[4] = {
152 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
153 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
154 };
155
156 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
157 {
158 mask->bld = bld;
159 mask->has_mask = FALSE;
160 mask->cond_stack_size = 0;
161 mask->loop_stack_size = 0;
162 mask->break_stack_size = 0;
163 mask->cont_stack_size = 0;
164
165 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
166 }
167
168 static void lp_exec_mask_update(struct lp_exec_mask *mask)
169 {
170 if (mask->loop_stack_size) {
171 /*for loops we need to update the entire mask at
172 * runtime */
173 LLVMValueRef tmp;
174 tmp = LLVMBuildAnd(mask->bld->builder,
175 mask->cont_mask,
176 mask->break_mask,
177 "maskcb");
178 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
179 mask->cond_mask,
180 tmp,
181 "maskfull");
182 } else
183 mask->exec_mask = mask->cond_mask;
184
185
186 mask->has_mask = (mask->cond_stack_size > 0 ||
187 mask->loop_stack_size > 0);
188 }
189
190 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
191 LLVMValueRef val)
192 {
193 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
194 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
195 mask->int_vec_type, "");
196
197 lp_exec_mask_update(mask);
198 }
199
200 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
201 {
202 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
203 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
204 mask->cond_mask, "");
205
206 /* means that we didn't have any mask before and that
207 * we were fully enabled */
208 if (mask->cond_stack_size <= 1) {
209 prev_mask = LLVMConstAllOnes(mask->int_vec_type);
210 }
211
212 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
213 inv_mask,
214 prev_mask, "");
215 lp_exec_mask_update(mask);
216 }
217
218 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
219 {
220 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
221 lp_exec_mask_update(mask);
222 }
223
224 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
225 {
226
227 if (mask->cont_stack_size == 0)
228 mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
229 if (mask->cont_stack_size == 0)
230 mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
231 if (mask->cond_stack_size == 0)
232 mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
233 mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
234 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
235 LLVMBuildBr(mask->bld->builder, mask->loop_block);
236 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
237
238 lp_exec_mask_update(mask);
239 }
240
241 static void lp_exec_break(struct lp_exec_mask *mask)
242 {
243 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
244 mask->exec_mask,
245 "break");
246
247 mask->break_stack[mask->break_stack_size++] = mask->break_mask;
248 if (mask->break_stack_size > 1) {
249 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
250 mask->break_mask,
251 exec_mask, "break_full");
252 } else
253 mask->break_mask = exec_mask;
254
255 lp_exec_mask_update(mask);
256 }
257
258 static void lp_exec_continue(struct lp_exec_mask *mask)
259 {
260 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
261 mask->exec_mask,
262 "");
263
264 mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
265 if (mask->cont_stack_size > 1) {
266 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
267 mask->cont_mask,
268 exec_mask, "");
269 } else
270 mask->cont_mask = exec_mask;
271
272 lp_exec_mask_update(mask);
273 }
274
275
276 static void lp_exec_endloop(struct lp_exec_mask *mask)
277 {
278 LLVMBasicBlockRef endloop;
279 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
280 mask->bld->type.length);
281 /* i1cond = (mask == 0) */
282 LLVMValueRef i1cond = LLVMBuildICmp(
283 mask->bld->builder,
284 LLVMIntNE,
285 LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
286 LLVMConstNull(reg_type), "");
287
288 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
289
290 LLVMBuildCondBr(mask->bld->builder,
291 i1cond, mask->loop_block, endloop);
292
293 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
294
295 mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
296 /* pop the break mask */
297 if (mask->cont_stack_size) {
298 mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
299 }
300 if (mask->break_stack_size) {
301 mask->break_mask = mask->cont_stack[--mask->break_stack_size];
302 }
303
304 lp_exec_mask_update(mask);
305 }
306
307 static void lp_exec_mask_store(struct lp_exec_mask *mask,
308 LLVMValueRef val,
309 LLVMValueRef dst)
310 {
311 if (mask->has_mask) {
312 LLVMValueRef real_val, dst_val;
313
314 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
315 real_val = lp_build_select(mask->bld,
316 mask->exec_mask,
317 val, dst_val);
318
319 LLVMBuildStore(mask->bld->builder, real_val, dst);
320 } else
321 LLVMBuildStore(mask->bld->builder, val, dst);
322 }
323
324
325 static LLVMValueRef
326 emit_ddx(struct lp_build_tgsi_soa_context *bld,
327 LLVMValueRef src)
328 {
329 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
330 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
331 return lp_build_sub(&bld->base, src_right, src_left);
332 }
333
334
335 static LLVMValueRef
336 emit_ddy(struct lp_build_tgsi_soa_context *bld,
337 LLVMValueRef src)
338 {
339 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
340 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
341 return lp_build_sub(&bld->base, src_top, src_bottom);
342 }
343
344
345 /**
346 * Register fetch.
347 */
348 static LLVMValueRef
349 emit_fetch(
350 struct lp_build_tgsi_soa_context *bld,
351 const struct tgsi_full_instruction *inst,
352 unsigned index,
353 const unsigned chan_index )
354 {
355 const struct tgsi_full_src_register *reg = &inst->Src[index];
356 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
357 LLVMValueRef res;
358
359 switch (swizzle) {
360 case TGSI_SWIZZLE_X:
361 case TGSI_SWIZZLE_Y:
362 case TGSI_SWIZZLE_Z:
363 case TGSI_SWIZZLE_W:
364
365 switch (reg->Register.File) {
366 case TGSI_FILE_CONSTANT: {
367 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
368 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
369 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
370 res = lp_build_broadcast_scalar(&bld->base, scalar);
371 break;
372 }
373
374 case TGSI_FILE_IMMEDIATE:
375 res = bld->immediates[reg->Register.Index][swizzle];
376 assert(res);
377 break;
378
379 case TGSI_FILE_INPUT:
380 res = bld->inputs[reg->Register.Index][swizzle];
381 assert(res);
382 break;
383
384 case TGSI_FILE_TEMPORARY:
385 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
386 if(!res)
387 return bld->base.undef;
388 break;
389
390 default:
391 assert( 0 );
392 return bld->base.undef;
393 }
394 break;
395
396 default:
397 assert( 0 );
398 return bld->base.undef;
399 }
400
401 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
402 case TGSI_UTIL_SIGN_CLEAR:
403 res = lp_build_abs( &bld->base, res );
404 break;
405
406 case TGSI_UTIL_SIGN_SET:
407 /* TODO: Use bitwese OR for floating point */
408 res = lp_build_abs( &bld->base, res );
409 res = LLVMBuildNeg( bld->base.builder, res, "" );
410 break;
411
412 case TGSI_UTIL_SIGN_TOGGLE:
413 res = LLVMBuildNeg( bld->base.builder, res, "" );
414 break;
415
416 case TGSI_UTIL_SIGN_KEEP:
417 break;
418 }
419
420 return res;
421 }
422
423
424 /**
425 * Register fetch with derivatives.
426 */
427 static void
428 emit_fetch_deriv(
429 struct lp_build_tgsi_soa_context *bld,
430 const struct tgsi_full_instruction *inst,
431 unsigned index,
432 const unsigned chan_index,
433 LLVMValueRef *res,
434 LLVMValueRef *ddx,
435 LLVMValueRef *ddy)
436 {
437 LLVMValueRef src;
438
439 src = emit_fetch(bld, inst, index, chan_index);
440
441 if(res)
442 *res = src;
443
444 /* TODO: use interpolation coeffs for inputs */
445
446 if(ddx)
447 *ddx = emit_ddx(bld, src);
448
449 if(ddy)
450 *ddy = emit_ddy(bld, src);
451 }
452
453
454 /**
455 * Register store.
456 */
457 static void
458 emit_store(
459 struct lp_build_tgsi_soa_context *bld,
460 const struct tgsi_full_instruction *inst,
461 unsigned index,
462 unsigned chan_index,
463 LLVMValueRef value)
464 {
465 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
466
467 switch( inst->Instruction.Saturate ) {
468 case TGSI_SAT_NONE:
469 break;
470
471 case TGSI_SAT_ZERO_ONE:
472 value = lp_build_max(&bld->base, value, bld->base.zero);
473 value = lp_build_min(&bld->base, value, bld->base.one);
474 break;
475
476 case TGSI_SAT_MINUS_PLUS_ONE:
477 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
478 value = lp_build_min(&bld->base, value, bld->base.one);
479 break;
480
481 default:
482 assert(0);
483 }
484
485 switch( reg->Register.File ) {
486 case TGSI_FILE_OUTPUT:
487 lp_exec_mask_store(&bld->exec_mask, value,
488 bld->outputs[reg->Register.Index][chan_index]);
489 break;
490
491 case TGSI_FILE_TEMPORARY:
492 lp_exec_mask_store(&bld->exec_mask, value,
493 bld->temps[reg->Register.Index][chan_index]);
494 break;
495
496 case TGSI_FILE_ADDRESS:
497 /* FIXME */
498 assert(0);
499 break;
500
501 case TGSI_FILE_PREDICATE:
502 /* FIXME */
503 assert(0);
504 break;
505
506 default:
507 assert( 0 );
508 }
509 }
510
511
512 /**
513 * High-level instruction translators.
514 */
515
516
517 static void
518 emit_tex( struct lp_build_tgsi_soa_context *bld,
519 const struct tgsi_full_instruction *inst,
520 boolean apply_lodbias,
521 boolean projected,
522 LLVMValueRef *texel)
523 {
524 const uint unit = inst->Src[1].Register.Index;
525 LLVMValueRef lodbias;
526 LLVMValueRef oow = NULL;
527 LLVMValueRef coords[3];
528 unsigned num_coords;
529 unsigned i;
530
531 switch (inst->Texture.Texture) {
532 case TGSI_TEXTURE_1D:
533 num_coords = 1;
534 break;
535 case TGSI_TEXTURE_2D:
536 case TGSI_TEXTURE_RECT:
537 num_coords = 2;
538 break;
539 case TGSI_TEXTURE_SHADOW1D:
540 case TGSI_TEXTURE_SHADOW2D:
541 case TGSI_TEXTURE_SHADOWRECT:
542 case TGSI_TEXTURE_3D:
543 case TGSI_TEXTURE_CUBE:
544 num_coords = 3;
545 break;
546 default:
547 assert(0);
548 return;
549 }
550
551 if(apply_lodbias)
552 lodbias = emit_fetch( bld, inst, 0, 3 );
553 else
554 lodbias = bld->base.zero;
555
556 if (projected) {
557 oow = emit_fetch( bld, inst, 0, 3 );
558 oow = lp_build_rcp(&bld->base, oow);
559 }
560
561 for (i = 0; i < num_coords; i++) {
562 coords[i] = emit_fetch( bld, inst, 0, i );
563 if (projected)
564 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
565 }
566 for (i = num_coords; i < 3; i++) {
567 coords[i] = bld->base.undef;
568 }
569
570 bld->sampler->emit_fetch_texel(bld->sampler,
571 bld->base.builder,
572 bld->base.type,
573 unit, num_coords, coords, lodbias,
574 texel);
575 }
576
577
578 static void
579 emit_kil(
580 struct lp_build_tgsi_soa_context *bld,
581 const struct tgsi_full_instruction *inst )
582 {
583 const struct tgsi_full_src_register *reg = &inst->Src[0];
584 LLVMValueRef terms[NUM_CHANNELS];
585 LLVMValueRef mask;
586 unsigned chan_index;
587
588 memset(&terms, 0, sizeof terms);
589
590 FOR_EACH_CHANNEL( chan_index ) {
591 unsigned swizzle;
592
593 /* Unswizzle channel */
594 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
595
596 /* Check if the component has not been already tested. */
597 assert(swizzle < NUM_CHANNELS);
598 if( !terms[swizzle] )
599 /* TODO: change the comparison operator instead of setting the sign */
600 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
601 }
602
603 mask = NULL;
604 FOR_EACH_CHANNEL( chan_index ) {
605 if(terms[chan_index]) {
606 LLVMValueRef chan_mask;
607
608 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
609
610 if(mask)
611 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
612 else
613 mask = chan_mask;
614 }
615 }
616
617 if(mask)
618 lp_build_mask_update(bld->mask, mask);
619 }
620
621
622 /**
623 * Check if inst src/dest regs use indirect addressing into temporary
624 * register file.
625 */
626 static boolean
627 indirect_temp_reference(const struct tgsi_full_instruction *inst)
628 {
629 uint i;
630 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
631 const struct tgsi_full_src_register *reg = &inst->Src[i];
632 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
633 reg->Register.Indirect)
634 return TRUE;
635 }
636 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
637 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
638 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
639 reg->Register.Indirect)
640 return TRUE;
641 }
642 return FALSE;
643 }
644
645 static int
646 emit_declaration(
647 struct lp_build_tgsi_soa_context *bld,
648 const struct tgsi_full_declaration *decl)
649 {
650 unsigned first = decl->Range.First;
651 unsigned last = decl->Range.Last;
652 unsigned idx, i;
653
654 for (idx = first; idx <= last; ++idx) {
655 boolean ok;
656
657 switch (decl->Declaration.File) {
658 case TGSI_FILE_TEMPORARY:
659 for (i = 0; i < NUM_CHANNELS; i++)
660 bld->temps[idx][i] = lp_build_alloca(&bld->base);
661 ok = TRUE;
662 break;
663
664 case TGSI_FILE_OUTPUT:
665 for (i = 0; i < NUM_CHANNELS; i++)
666 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
667 ok = TRUE;
668 break;
669
670 default:
671 /* don't need to declare other vars */
672 ok = TRUE;
673 }
674
675 if (!ok)
676 return FALSE;
677 }
678
679 return TRUE;
680 }
681
682 static int
683 emit_instruction(
684 struct lp_build_tgsi_soa_context *bld,
685 const struct tgsi_full_instruction *inst,
686 const struct tgsi_opcode_info *info)
687 {
688 unsigned chan_index;
689 LLVMValueRef src0, src1, src2;
690 LLVMValueRef tmp0, tmp1, tmp2;
691 LLVMValueRef tmp3 = NULL;
692 LLVMValueRef tmp4 = NULL;
693 LLVMValueRef tmp5 = NULL;
694 LLVMValueRef tmp6 = NULL;
695 LLVMValueRef tmp7 = NULL;
696 LLVMValueRef res;
697 LLVMValueRef dst0[NUM_CHANNELS];
698
699 /* we can't handle indirect addressing into temp register file yet */
700 if (indirect_temp_reference(inst))
701 return FALSE;
702
703 /*
704 * Stores and write masks are handled in a general fashion after the long
705 * instruction opcode switch statement.
706 *
707 * Although not stricitly necessary, we avoid generating instructions for
708 * channels which won't be stored, in cases where's that easy. For some
709 * complex instructions, like texture sampling, it is more convenient to
710 * assume a full writemask and then let LLVM optimization passes eliminate
711 * redundant code.
712 */
713
714 assert(info->num_dst <= 1);
715 if(info->num_dst) {
716 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
717 dst0[chan_index] = bld->base.undef;
718 }
719 }
720
721 switch (inst->Instruction.Opcode) {
722 #if 0
723 case TGSI_OPCODE_ARL:
724 /* FIXME */
725 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
726 tmp0 = emit_fetch( bld, inst, 0, chan_index );
727 emit_flr(bld, 0, 0);
728 emit_f2it( bld, 0 );
729 dst0[chan_index] = tmp0;
730 }
731 break;
732 #endif
733
734 case TGSI_OPCODE_MOV:
735 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
736 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
737 }
738 break;
739
740 case TGSI_OPCODE_LIT:
741 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
742 dst0[CHAN_X] = bld->base.one;
743 }
744 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
745 src0 = emit_fetch( bld, inst, 0, CHAN_X );
746 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
747 }
748 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
749 /* XMM[1] = SrcReg[0].yyyy */
750 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
751 /* XMM[1] = max(XMM[1], 0) */
752 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
753 /* XMM[2] = SrcReg[0].wwww */
754 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
755 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
756 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
757 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
758 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
759 }
760 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
761 dst0[CHAN_W] = bld->base.one;
762 }
763 break;
764
765 case TGSI_OPCODE_RCP:
766 /* TGSI_OPCODE_RECIP */
767 src0 = emit_fetch( bld, inst, 0, CHAN_X );
768 res = lp_build_rcp(&bld->base, src0);
769 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
770 dst0[chan_index] = res;
771 }
772 break;
773
774 case TGSI_OPCODE_RSQ:
775 /* TGSI_OPCODE_RECIPSQRT */
776 src0 = emit_fetch( bld, inst, 0, CHAN_X );
777 src0 = lp_build_abs(&bld->base, src0);
778 res = lp_build_rsqrt(&bld->base, src0);
779 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
780 dst0[chan_index] = res;
781 }
782 break;
783
784 case TGSI_OPCODE_EXP:
785 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
786 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
787 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
788 LLVMValueRef *p_exp2_int_part = NULL;
789 LLVMValueRef *p_frac_part = NULL;
790 LLVMValueRef *p_exp2 = NULL;
791
792 src0 = emit_fetch( bld, inst, 0, CHAN_X );
793
794 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
795 p_exp2_int_part = &tmp0;
796 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
797 p_frac_part = &tmp1;
798 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
799 p_exp2 = &tmp2;
800
801 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
802
803 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
804 dst0[CHAN_X] = tmp0;
805 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
806 dst0[CHAN_Y] = tmp1;
807 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
808 dst0[CHAN_Z] = tmp2;
809 }
810 /* dst.w = 1.0 */
811 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
812 dst0[CHAN_W] = bld->base.one;
813 }
814 break;
815
816 case TGSI_OPCODE_LOG:
817 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
818 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
819 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
820 LLVMValueRef *p_floor_log2 = NULL;
821 LLVMValueRef *p_exp = NULL;
822 LLVMValueRef *p_log2 = NULL;
823
824 src0 = emit_fetch( bld, inst, 0, CHAN_X );
825 src0 = lp_build_abs( &bld->base, src0 );
826
827 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
828 p_floor_log2 = &tmp0;
829 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
830 p_exp = &tmp1;
831 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
832 p_log2 = &tmp2;
833
834 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
835
836 /* dst.x = floor(lg2(abs(src.x))) */
837 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
838 dst0[CHAN_X] = tmp0;
839 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
840 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
841 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
842 }
843 /* dst.z = lg2(abs(src.x)) */
844 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
845 dst0[CHAN_Z] = tmp2;
846 }
847 /* dst.w = 1.0 */
848 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
849 dst0[CHAN_W] = bld->base.one;
850 }
851 break;
852
853 case TGSI_OPCODE_MUL:
854 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
855 src0 = emit_fetch( bld, inst, 0, chan_index );
856 src1 = emit_fetch( bld, inst, 1, chan_index );
857 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
858 }
859 break;
860
861 case TGSI_OPCODE_ADD:
862 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
863 src0 = emit_fetch( bld, inst, 0, chan_index );
864 src1 = emit_fetch( bld, inst, 1, chan_index );
865 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
866 }
867 break;
868
869 case TGSI_OPCODE_DP3:
870 /* TGSI_OPCODE_DOT3 */
871 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
872 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
873 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
874 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
875 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
876 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
877 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
878 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
879 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
880 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
881 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
882 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
883 dst0[chan_index] = tmp0;
884 }
885 break;
886
887 case TGSI_OPCODE_DP4:
888 /* TGSI_OPCODE_DOT4 */
889 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
890 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
891 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
892 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
893 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
894 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
895 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
896 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
897 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
898 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
899 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
900 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
901 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
902 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
903 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
904 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
905 dst0[chan_index] = tmp0;
906 }
907 break;
908
909 case TGSI_OPCODE_DST:
910 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
911 dst0[CHAN_X] = bld->base.one;
912 }
913 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
914 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
915 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
916 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
917 }
918 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
919 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
920 }
921 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
922 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
923 }
924 break;
925
926 case TGSI_OPCODE_MIN:
927 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
928 src0 = emit_fetch( bld, inst, 0, chan_index );
929 src1 = emit_fetch( bld, inst, 1, chan_index );
930 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
931 }
932 break;
933
934 case TGSI_OPCODE_MAX:
935 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
936 src0 = emit_fetch( bld, inst, 0, chan_index );
937 src1 = emit_fetch( bld, inst, 1, chan_index );
938 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
939 }
940 break;
941
942 case TGSI_OPCODE_SLT:
943 /* TGSI_OPCODE_SETLT */
944 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
945 src0 = emit_fetch( bld, inst, 0, chan_index );
946 src1 = emit_fetch( bld, inst, 1, chan_index );
947 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
948 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
949 }
950 break;
951
952 case TGSI_OPCODE_SGE:
953 /* TGSI_OPCODE_SETGE */
954 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
955 src0 = emit_fetch( bld, inst, 0, chan_index );
956 src1 = emit_fetch( bld, inst, 1, chan_index );
957 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
958 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
959 }
960 break;
961
962 case TGSI_OPCODE_MAD:
963 /* TGSI_OPCODE_MADD */
964 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
965 tmp0 = emit_fetch( bld, inst, 0, chan_index );
966 tmp1 = emit_fetch( bld, inst, 1, chan_index );
967 tmp2 = emit_fetch( bld, inst, 2, chan_index );
968 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
969 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
970 dst0[chan_index] = tmp0;
971 }
972 break;
973
974 case TGSI_OPCODE_SUB:
975 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
976 tmp0 = emit_fetch( bld, inst, 0, chan_index );
977 tmp1 = emit_fetch( bld, inst, 1, chan_index );
978 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
979 }
980 break;
981
982 case TGSI_OPCODE_LRP:
983 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
984 src0 = emit_fetch( bld, inst, 0, chan_index );
985 src1 = emit_fetch( bld, inst, 1, chan_index );
986 src2 = emit_fetch( bld, inst, 2, chan_index );
987 tmp0 = lp_build_sub( &bld->base, src1, src2 );
988 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
989 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
990 }
991 break;
992
993 case TGSI_OPCODE_CND:
994 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
995 src0 = emit_fetch( bld, inst, 0, chan_index );
996 src1 = emit_fetch( bld, inst, 1, chan_index );
997 src2 = emit_fetch( bld, inst, 2, chan_index );
998 tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
999 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1000 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1001 }
1002 break;
1003
1004 case TGSI_OPCODE_DP2A:
1005 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1006 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1007 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1008 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1009 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1010 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1011 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1012 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1013 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1014 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1015 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1016 }
1017 break;
1018
1019 case TGSI_OPCODE_FRC:
1020 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1021 src0 = emit_fetch( bld, inst, 0, chan_index );
1022 tmp0 = lp_build_floor(&bld->base, src0);
1023 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1024 dst0[chan_index] = tmp0;
1025 }
1026 break;
1027
1028 case TGSI_OPCODE_CLAMP:
1029 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1030 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1031 src1 = emit_fetch( bld, inst, 1, chan_index );
1032 src2 = emit_fetch( bld, inst, 2, chan_index );
1033 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1034 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1035 dst0[chan_index] = tmp0;
1036 }
1037 break;
1038
1039 case TGSI_OPCODE_FLR:
1040 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1041 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1042 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1043 }
1044 break;
1045
1046 case TGSI_OPCODE_ROUND:
1047 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1048 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1049 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1050 }
1051 break;
1052
1053 case TGSI_OPCODE_EX2: {
1054 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1055 tmp0 = lp_build_exp2( &bld->base, tmp0);
1056 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1057 dst0[chan_index] = tmp0;
1058 }
1059 break;
1060 }
1061
1062 case TGSI_OPCODE_LG2:
1063 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1064 tmp0 = lp_build_log2( &bld->base, tmp0);
1065 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1066 dst0[chan_index] = tmp0;
1067 }
1068 break;
1069
1070 case TGSI_OPCODE_POW:
1071 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1072 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1073 res = lp_build_pow( &bld->base, src0, src1 );
1074 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1075 dst0[chan_index] = res;
1076 }
1077 break;
1078
1079 case TGSI_OPCODE_XPD:
1080 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1081 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1082 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1083 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1084 }
1085 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1086 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1087 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1088 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1089 }
1090 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1091 tmp2 = tmp0;
1092 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1093 tmp5 = tmp3;
1094 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1095 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1096 dst0[CHAN_X] = tmp2;
1097 }
1098 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1099 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1100 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1101 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1102 }
1103 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1104 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1105 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1106 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1107 dst0[CHAN_Y] = tmp3;
1108 }
1109 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1110 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1111 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1112 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1113 dst0[CHAN_Z] = tmp5;
1114 }
1115 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1116 dst0[CHAN_W] = bld->base.one;
1117 }
1118 break;
1119
1120 case TGSI_OPCODE_ABS:
1121 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1122 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1123 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1124 }
1125 break;
1126
1127 case TGSI_OPCODE_RCC:
1128 /* deprecated? */
1129 assert(0);
1130 return 0;
1131
1132 case TGSI_OPCODE_DPH:
1133 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1134 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1135 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1136 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1137 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1138 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1139 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1140 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1141 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1142 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1143 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1144 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1145 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1146 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1147 dst0[chan_index] = tmp0;
1148 }
1149 break;
1150
1151 case TGSI_OPCODE_COS:
1152 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1153 tmp0 = lp_build_cos( &bld->base, tmp0 );
1154 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1155 dst0[chan_index] = tmp0;
1156 }
1157 break;
1158
1159 case TGSI_OPCODE_DDX:
1160 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1161 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1162 }
1163 break;
1164
1165 case TGSI_OPCODE_DDY:
1166 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1167 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1168 }
1169 break;
1170
1171 case TGSI_OPCODE_KILP:
1172 /* predicated kill */
1173 /* FIXME */
1174 return 0;
1175 break;
1176
1177 case TGSI_OPCODE_KIL:
1178 /* conditional kill */
1179 emit_kil( bld, inst );
1180 break;
1181
1182 case TGSI_OPCODE_PK2H:
1183 return 0;
1184 break;
1185
1186 case TGSI_OPCODE_PK2US:
1187 return 0;
1188 break;
1189
1190 case TGSI_OPCODE_PK4B:
1191 return 0;
1192 break;
1193
1194 case TGSI_OPCODE_PK4UB:
1195 return 0;
1196 break;
1197
1198 case TGSI_OPCODE_RFL:
1199 return 0;
1200 break;
1201
1202 case TGSI_OPCODE_SEQ:
1203 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1204 src0 = emit_fetch( bld, inst, 0, chan_index );
1205 src1 = emit_fetch( bld, inst, 1, chan_index );
1206 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1207 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1208 }
1209 break;
1210
1211 case TGSI_OPCODE_SFL:
1212 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1213 dst0[chan_index] = bld->base.zero;
1214 }
1215 break;
1216
1217 case TGSI_OPCODE_SGT:
1218 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1219 src0 = emit_fetch( bld, inst, 0, chan_index );
1220 src1 = emit_fetch( bld, inst, 1, chan_index );
1221 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1222 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1223 }
1224 break;
1225
1226 case TGSI_OPCODE_SIN:
1227 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1228 tmp0 = lp_build_sin( &bld->base, tmp0 );
1229 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1230 dst0[chan_index] = tmp0;
1231 }
1232 break;
1233
1234 case TGSI_OPCODE_SLE:
1235 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1236 src0 = emit_fetch( bld, inst, 0, chan_index );
1237 src1 = emit_fetch( bld, inst, 1, chan_index );
1238 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1239 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1240 }
1241 break;
1242
1243 case TGSI_OPCODE_SNE:
1244 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1245 src0 = emit_fetch( bld, inst, 0, chan_index );
1246 src1 = emit_fetch( bld, inst, 1, chan_index );
1247 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1248 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1249 }
1250 break;
1251
1252 case TGSI_OPCODE_STR:
1253 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1254 dst0[chan_index] = bld->base.one;
1255 }
1256 break;
1257
1258 case TGSI_OPCODE_TEX:
1259 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1260 break;
1261
1262 case TGSI_OPCODE_TXD:
1263 /* FIXME */
1264 return 0;
1265 break;
1266
1267 case TGSI_OPCODE_UP2H:
1268 /* deprecated */
1269 assert (0);
1270 return 0;
1271 break;
1272
1273 case TGSI_OPCODE_UP2US:
1274 /* deprecated */
1275 assert(0);
1276 return 0;
1277 break;
1278
1279 case TGSI_OPCODE_UP4B:
1280 /* deprecated */
1281 assert(0);
1282 return 0;
1283 break;
1284
1285 case TGSI_OPCODE_UP4UB:
1286 /* deprecated */
1287 assert(0);
1288 return 0;
1289 break;
1290
1291 case TGSI_OPCODE_X2D:
1292 /* deprecated? */
1293 assert(0);
1294 return 0;
1295 break;
1296
1297 case TGSI_OPCODE_ARA:
1298 /* deprecated */
1299 assert(0);
1300 return 0;
1301 break;
1302
1303 #if 0
1304 case TGSI_OPCODE_ARR:
1305 /* FIXME */
1306 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1307 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1308 emit_rnd( bld, 0, 0 );
1309 emit_f2it( bld, 0 );
1310 dst0[chan_index] = tmp0;
1311 }
1312 break;
1313 #endif
1314
1315 case TGSI_OPCODE_BRA:
1316 /* deprecated */
1317 assert(0);
1318 return 0;
1319 break;
1320
1321 case TGSI_OPCODE_CAL:
1322 /* FIXME */
1323 return 0;
1324 break;
1325
1326 case TGSI_OPCODE_RET:
1327 /* FIXME */
1328 return 0;
1329 break;
1330
1331 case TGSI_OPCODE_END:
1332 break;
1333
1334 case TGSI_OPCODE_SSG:
1335 /* TGSI_OPCODE_SGN */
1336 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1337 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1338 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1339 }
1340 break;
1341
1342 case TGSI_OPCODE_CMP:
1343 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1344 src0 = emit_fetch( bld, inst, 0, chan_index );
1345 src1 = emit_fetch( bld, inst, 1, chan_index );
1346 src2 = emit_fetch( bld, inst, 2, chan_index );
1347 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1348 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1349 }
1350 break;
1351
1352 case TGSI_OPCODE_SCS:
1353 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1354 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1355 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1356 }
1357 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1358 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1359 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1360 }
1361 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1362 dst0[CHAN_Z] = bld->base.zero;
1363 }
1364 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1365 dst0[CHAN_W] = bld->base.one;
1366 }
1367 break;
1368
1369 case TGSI_OPCODE_TXB:
1370 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1371 break;
1372
1373 case TGSI_OPCODE_NRM:
1374 /* fall-through */
1375 case TGSI_OPCODE_NRM4:
1376 /* 3 or 4-component normalization */
1377 {
1378 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1379
1380 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1381 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1382 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1383 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1384
1385 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1386
1387 /* xmm4 = src.x */
1388 /* xmm0 = src.x * src.x */
1389 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1390 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1391 tmp4 = tmp0;
1392 }
1393 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1394
1395 /* xmm5 = src.y */
1396 /* xmm0 = xmm0 + src.y * src.y */
1397 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1398 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1399 tmp5 = tmp1;
1400 }
1401 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1402 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1403
1404 /* xmm6 = src.z */
1405 /* xmm0 = xmm0 + src.z * src.z */
1406 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1407 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1408 tmp6 = tmp1;
1409 }
1410 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1411 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1412
1413 if (dims == 4) {
1414 /* xmm7 = src.w */
1415 /* xmm0 = xmm0 + src.w * src.w */
1416 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1417 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1418 tmp7 = tmp1;
1419 }
1420 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1421 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1422 }
1423
1424 /* xmm1 = 1 / sqrt(xmm0) */
1425 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1426
1427 /* dst.x = xmm1 * src.x */
1428 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1429 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1430 }
1431
1432 /* dst.y = xmm1 * src.y */
1433 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1434 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1435 }
1436
1437 /* dst.z = xmm1 * src.z */
1438 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1439 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1440 }
1441
1442 /* dst.w = xmm1 * src.w */
1443 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1444 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1445 }
1446 }
1447
1448 /* dst.w = 1.0 */
1449 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1450 dst0[CHAN_W] = bld->base.one;
1451 }
1452 }
1453 break;
1454
1455 case TGSI_OPCODE_DIV:
1456 /* deprecated */
1457 assert( 0 );
1458 return 0;
1459 break;
1460
1461 case TGSI_OPCODE_DP2:
1462 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1463 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1464 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1465 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1466 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1467 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1468 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1469 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1470 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1471 }
1472 break;
1473
1474 case TGSI_OPCODE_TXL:
1475 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1476 break;
1477
1478 case TGSI_OPCODE_TXP:
1479 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1480 break;
1481
1482 case TGSI_OPCODE_BRK:
1483 lp_exec_break(&bld->exec_mask);
1484 break;
1485
1486 case TGSI_OPCODE_IF:
1487 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1488 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1489 tmp0, bld->base.zero);
1490 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1491 break;
1492
1493 case TGSI_OPCODE_BGNFOR:
1494 /* deprecated */
1495 assert(0);
1496 return 0;
1497 break;
1498
1499 case TGSI_OPCODE_BGNLOOP:
1500 lp_exec_bgnloop(&bld->exec_mask);
1501 break;
1502
1503 case TGSI_OPCODE_REP:
1504 /* deprecated */
1505 assert(0);
1506 return 0;
1507 break;
1508
1509 case TGSI_OPCODE_ELSE:
1510 lp_exec_mask_cond_invert(&bld->exec_mask);
1511 break;
1512
1513 case TGSI_OPCODE_ENDIF:
1514 lp_exec_mask_cond_pop(&bld->exec_mask);
1515 break;
1516
1517 case TGSI_OPCODE_ENDFOR:
1518 /* deprecated */
1519 assert(0);
1520 return 0;
1521 break;
1522
1523 case TGSI_OPCODE_ENDLOOP:
1524 lp_exec_endloop(&bld->exec_mask);
1525 break;
1526
1527 case TGSI_OPCODE_ENDREP:
1528 /* deprecated */
1529 assert(0);
1530 return 0;
1531 break;
1532
1533 case TGSI_OPCODE_PUSHA:
1534 /* deprecated? */
1535 assert(0);
1536 return 0;
1537 break;
1538
1539 case TGSI_OPCODE_POPA:
1540 /* deprecated? */
1541 assert(0);
1542 return 0;
1543 break;
1544
1545 case TGSI_OPCODE_CEIL:
1546 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1547 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1548 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1549 }
1550 break;
1551
1552 case TGSI_OPCODE_I2F:
1553 /* deprecated? */
1554 assert(0);
1555 return 0;
1556 break;
1557
1558 case TGSI_OPCODE_NOT:
1559 /* deprecated? */
1560 assert(0);
1561 return 0;
1562 break;
1563
1564 case TGSI_OPCODE_TRUNC:
1565 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1566 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1567 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1568 }
1569 break;
1570
1571 case TGSI_OPCODE_SHL:
1572 /* deprecated? */
1573 assert(0);
1574 return 0;
1575 break;
1576
1577 case TGSI_OPCODE_ISHR:
1578 /* deprecated? */
1579 assert(0);
1580 return 0;
1581 break;
1582
1583 case TGSI_OPCODE_AND:
1584 /* deprecated? */
1585 assert(0);
1586 return 0;
1587 break;
1588
1589 case TGSI_OPCODE_OR:
1590 /* deprecated? */
1591 assert(0);
1592 return 0;
1593 break;
1594
1595 case TGSI_OPCODE_MOD:
1596 /* deprecated? */
1597 assert(0);
1598 return 0;
1599 break;
1600
1601 case TGSI_OPCODE_XOR:
1602 /* deprecated? */
1603 assert(0);
1604 return 0;
1605 break;
1606
1607 case TGSI_OPCODE_SAD:
1608 /* deprecated? */
1609 assert(0);
1610 return 0;
1611 break;
1612
1613 case TGSI_OPCODE_TXF:
1614 /* deprecated? */
1615 assert(0);
1616 return 0;
1617 break;
1618
1619 case TGSI_OPCODE_TXQ:
1620 /* deprecated? */
1621 assert(0);
1622 return 0;
1623 break;
1624
1625 case TGSI_OPCODE_CONT:
1626 lp_exec_continue(&bld->exec_mask);
1627 break;
1628
1629 case TGSI_OPCODE_EMIT:
1630 return 0;
1631 break;
1632
1633 case TGSI_OPCODE_ENDPRIM:
1634 return 0;
1635 break;
1636
1637 case TGSI_OPCODE_NOP:
1638 break;
1639
1640 default:
1641 return 0;
1642 }
1643
1644 if(info->num_dst) {
1645 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1646 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1647 }
1648 }
1649
1650 return 1;
1651 }
1652
1653
1654 void
1655 lp_build_tgsi_soa(LLVMBuilderRef builder,
1656 const struct tgsi_token *tokens,
1657 struct lp_type type,
1658 struct lp_build_mask_context *mask,
1659 LLVMValueRef consts_ptr,
1660 const LLVMValueRef *pos,
1661 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1662 LLVMValueRef (*outputs)[NUM_CHANNELS],
1663 struct lp_build_sampler_soa *sampler)
1664 {
1665 struct lp_build_tgsi_soa_context bld;
1666 struct tgsi_parse_context parse;
1667 uint num_immediates = 0;
1668 unsigned i;
1669
1670 /* Setup build context */
1671 memset(&bld, 0, sizeof bld);
1672 lp_build_context_init(&bld.base, builder, type);
1673 bld.mask = mask;
1674 bld.pos = pos;
1675 bld.inputs = inputs;
1676 bld.outputs = outputs;
1677 bld.consts_ptr = consts_ptr;
1678 bld.sampler = sampler;
1679
1680 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1681
1682 tgsi_parse_init( &parse, tokens );
1683
1684 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1685 tgsi_parse_token( &parse );
1686
1687 switch( parse.FullToken.Token.Type ) {
1688 case TGSI_TOKEN_TYPE_DECLARATION:
1689 /* Inputs already interpolated */
1690 {
1691 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1692 _debug_printf("warning: failed to define LLVM variable\n");
1693 }
1694 break;
1695
1696 case TGSI_TOKEN_TYPE_INSTRUCTION:
1697 {
1698 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1699 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1700 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1701 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1702 info ? info->mnemonic : "<invalid>");
1703 }
1704
1705 break;
1706
1707 case TGSI_TOKEN_TYPE_IMMEDIATE:
1708 /* simply copy the immediate values into the next immediates[] slot */
1709 {
1710 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1711 assert(size <= 4);
1712 assert(num_immediates < LP_MAX_IMMEDIATES);
1713 for( i = 0; i < size; ++i )
1714 bld.immediates[num_immediates][i] =
1715 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1716 for( i = size; i < 4; ++i )
1717 bld.immediates[num_immediates][i] = bld.base.undef;
1718 num_immediates++;
1719 }
1720 break;
1721
1722 case TGSI_TOKEN_TYPE_PROPERTY:
1723 break;
1724
1725 default:
1726 assert( 0 );
1727 }
1728 }
1729 if (0) {
1730 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1731 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1732 debug_printf("11111111111111111111111111111 \n");
1733 tgsi_dump(tokens, 0);
1734 LLVMDumpValue(function);
1735 debug_printf("2222222222222222222222222222 \n");
1736 }
1737 tgsi_parse_free( &parse );
1738 }
1739