gallivm: make sure that the alloca's are the very first thing in the function
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_exec.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81 #define QUAD_TOP_LEFT 0
82 #define QUAD_TOP_RIGHT 1
83 #define QUAD_BOTTOM_LEFT 2
84 #define QUAD_BOTTOM_RIGHT 3
85
86 #define LP_TGSI_MAX_NESTING 16
87
88 struct lp_exec_mask {
89 struct lp_build_context *bld;
90
91 boolean has_mask;
92
93 LLVMTypeRef int_vec_type;
94
95 LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
96 int cond_stack_size;
97 LLVMValueRef cond_mask;
98
99 LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
100 int break_stack_size;
101 LLVMValueRef break_mask;
102
103 LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
104 int cont_stack_size;
105 LLVMValueRef cont_mask;
106
107 LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
108 int loop_stack_size;
109 LLVMBasicBlockRef loop_block;
110
111
112 LLVMValueRef exec_mask;
113 };
114
115 struct lp_build_tgsi_soa_context
116 {
117 struct lp_build_context base;
118
119 LLVMValueRef consts_ptr;
120 const LLVMValueRef *pos;
121 const LLVMValueRef (*inputs)[NUM_CHANNELS];
122 LLVMValueRef (*outputs)[NUM_CHANNELS];
123
124 struct lp_build_sampler_soa *sampler;
125
126 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
127 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
128
129 struct lp_build_mask_context *mask;
130 struct lp_exec_mask exec_mask;
131 };
132
133 static const unsigned char
134 swizzle_left[4] = {
135 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
136 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
137 };
138
139 static const unsigned char
140 swizzle_right[4] = {
141 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
142 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
143 };
144
145 static const unsigned char
146 swizzle_top[4] = {
147 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
148 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
149 };
150
151 static const unsigned char
152 swizzle_bottom[4] = {
153 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
154 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
155 };
156
157 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
158 {
159 mask->bld = bld;
160 mask->has_mask = FALSE;
161 mask->cond_stack_size = 0;
162 mask->loop_stack_size = 0;
163 mask->break_stack_size = 0;
164 mask->cont_stack_size = 0;
165
166 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
167 }
168
169 static void lp_exec_mask_update(struct lp_exec_mask *mask)
170 {
171 if (mask->loop_stack_size) {
172 /*for loops we need to update the entire mask at
173 * runtime */
174 LLVMValueRef tmp;
175 tmp = LLVMBuildAnd(mask->bld->builder,
176 mask->cont_mask,
177 mask->break_mask,
178 "maskcb");
179 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
180 mask->cond_mask,
181 tmp,
182 "maskfull");
183 } else
184 mask->exec_mask = mask->cond_mask;
185
186
187 mask->has_mask = (mask->cond_stack_size > 0 ||
188 mask->loop_stack_size > 0);
189 }
190
191 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
192 LLVMValueRef val)
193 {
194 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
195 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
196 mask->int_vec_type, "");
197
198 lp_exec_mask_update(mask);
199 }
200
201 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
202 {
203 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
204 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
205 mask->cond_mask, "");
206
207 /* means that we didn't have any mask before and that
208 * we were fully enabled */
209 if (mask->cond_stack_size <= 1) {
210 prev_mask = LLVMConstAllOnes(mask->int_vec_type);
211 }
212
213 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
214 inv_mask,
215 prev_mask, "");
216 lp_exec_mask_update(mask);
217 }
218
219 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
220 {
221 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
222 lp_exec_mask_update(mask);
223 }
224
225 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
226 {
227
228 if (mask->cont_stack_size == 0)
229 mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
230 if (mask->cont_stack_size == 0)
231 mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
232 if (mask->cond_stack_size == 0)
233 mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
234 mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
235 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
236 LLVMBuildBr(mask->bld->builder, mask->loop_block);
237 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
238
239 lp_exec_mask_update(mask);
240 }
241
242 static void lp_exec_break(struct lp_exec_mask *mask)
243 {
244 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
245 mask->exec_mask,
246 "break");
247
248 mask->break_stack[mask->break_stack_size++] = mask->break_mask;
249 if (mask->break_stack_size > 1) {
250 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
251 mask->break_mask,
252 exec_mask, "break_full");
253 } else
254 mask->break_mask = exec_mask;
255
256 lp_exec_mask_update(mask);
257 }
258
259 static void lp_exec_continue(struct lp_exec_mask *mask)
260 {
261 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
262 mask->exec_mask,
263 "");
264
265 mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
266 if (mask->cont_stack_size > 1) {
267 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
268 mask->cont_mask,
269 exec_mask, "");
270 } else
271 mask->cont_mask = exec_mask;
272
273 lp_exec_mask_update(mask);
274 }
275
276
277 static void lp_exec_endloop(struct lp_exec_mask *mask)
278 {
279 LLVMBasicBlockRef endloop;
280 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
281 mask->bld->type.length);
282 /* i1cond = (mask == 0) */
283 LLVMValueRef i1cond = LLVMBuildICmp(
284 mask->bld->builder,
285 LLVMIntNE,
286 LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
287 LLVMConstNull(reg_type), "");
288
289 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
290
291 LLVMBuildCondBr(mask->bld->builder,
292 i1cond, mask->loop_block, endloop);
293
294 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
295
296 mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
297 /* pop the break mask */
298 if (mask->cont_stack_size) {
299 mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
300 }
301 if (mask->break_stack_size) {
302 mask->break_mask = mask->cont_stack[--mask->break_stack_size];
303 }
304
305 lp_exec_mask_update(mask);
306 }
307
308 static void lp_exec_mask_store(struct lp_exec_mask *mask,
309 LLVMValueRef val,
310 LLVMValueRef dst)
311 {
312 if (mask->has_mask) {
313 LLVMValueRef real_val, dst_val;
314
315 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
316 real_val = lp_build_select(mask->bld,
317 mask->exec_mask,
318 val, dst_val);
319
320 LLVMBuildStore(mask->bld->builder, real_val, dst);
321 } else
322 LLVMBuildStore(mask->bld->builder, val, dst);
323 }
324
325
326 static LLVMValueRef
327 emit_ddx(struct lp_build_tgsi_soa_context *bld,
328 LLVMValueRef src)
329 {
330 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
331 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
332 return lp_build_sub(&bld->base, src_right, src_left);
333 }
334
335
336 static LLVMValueRef
337 emit_ddy(struct lp_build_tgsi_soa_context *bld,
338 LLVMValueRef src)
339 {
340 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
341 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
342 return lp_build_sub(&bld->base, src_top, src_bottom);
343 }
344
345
346 /**
347 * Register fetch.
348 */
349 static LLVMValueRef
350 emit_fetch(
351 struct lp_build_tgsi_soa_context *bld,
352 const struct tgsi_full_instruction *inst,
353 unsigned index,
354 const unsigned chan_index )
355 {
356 const struct tgsi_full_src_register *reg = &inst->Src[index];
357 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
358 LLVMValueRef res;
359
360 switch (swizzle) {
361 case TGSI_SWIZZLE_X:
362 case TGSI_SWIZZLE_Y:
363 case TGSI_SWIZZLE_Z:
364 case TGSI_SWIZZLE_W:
365
366 switch (reg->Register.File) {
367 case TGSI_FILE_CONSTANT: {
368 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
369 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
370 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
371 res = lp_build_broadcast_scalar(&bld->base, scalar);
372 break;
373 }
374
375 case TGSI_FILE_IMMEDIATE:
376 res = bld->immediates[reg->Register.Index][swizzle];
377 assert(res);
378 break;
379
380 case TGSI_FILE_INPUT:
381 res = bld->inputs[reg->Register.Index][swizzle];
382 assert(res);
383 break;
384
385 case TGSI_FILE_TEMPORARY:
386 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
387 if(!res)
388 return bld->base.undef;
389 break;
390
391 default:
392 assert( 0 );
393 return bld->base.undef;
394 }
395 break;
396
397 default:
398 assert( 0 );
399 return bld->base.undef;
400 }
401
402 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
403 case TGSI_UTIL_SIGN_CLEAR:
404 res = lp_build_abs( &bld->base, res );
405 break;
406
407 case TGSI_UTIL_SIGN_SET:
408 /* TODO: Use bitwese OR for floating point */
409 res = lp_build_abs( &bld->base, res );
410 res = LLVMBuildNeg( bld->base.builder, res, "" );
411 break;
412
413 case TGSI_UTIL_SIGN_TOGGLE:
414 res = LLVMBuildNeg( bld->base.builder, res, "" );
415 break;
416
417 case TGSI_UTIL_SIGN_KEEP:
418 break;
419 }
420
421 return res;
422 }
423
424
425 /**
426 * Register fetch with derivatives.
427 */
428 static void
429 emit_fetch_deriv(
430 struct lp_build_tgsi_soa_context *bld,
431 const struct tgsi_full_instruction *inst,
432 unsigned index,
433 const unsigned chan_index,
434 LLVMValueRef *res,
435 LLVMValueRef *ddx,
436 LLVMValueRef *ddy)
437 {
438 LLVMValueRef src;
439
440 src = emit_fetch(bld, inst, index, chan_index);
441
442 if(res)
443 *res = src;
444
445 /* TODO: use interpolation coeffs for inputs */
446
447 if(ddx)
448 *ddx = emit_ddx(bld, src);
449
450 if(ddy)
451 *ddy = emit_ddy(bld, src);
452 }
453
454
455 /**
456 * Register store.
457 */
458 static void
459 emit_store(
460 struct lp_build_tgsi_soa_context *bld,
461 const struct tgsi_full_instruction *inst,
462 unsigned index,
463 unsigned chan_index,
464 LLVMValueRef value)
465 {
466 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
467
468 switch( inst->Instruction.Saturate ) {
469 case TGSI_SAT_NONE:
470 break;
471
472 case TGSI_SAT_ZERO_ONE:
473 value = lp_build_max(&bld->base, value, bld->base.zero);
474 value = lp_build_min(&bld->base, value, bld->base.one);
475 break;
476
477 case TGSI_SAT_MINUS_PLUS_ONE:
478 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
479 value = lp_build_min(&bld->base, value, bld->base.one);
480 break;
481
482 default:
483 assert(0);
484 }
485
486 switch( reg->Register.File ) {
487 case TGSI_FILE_OUTPUT:
488 lp_exec_mask_store(&bld->exec_mask, value,
489 bld->outputs[reg->Register.Index][chan_index]);
490 break;
491
492 case TGSI_FILE_TEMPORARY:
493 lp_exec_mask_store(&bld->exec_mask, value,
494 bld->temps[reg->Register.Index][chan_index]);
495 break;
496
497 case TGSI_FILE_ADDRESS:
498 /* FIXME */
499 assert(0);
500 break;
501
502 case TGSI_FILE_PREDICATE:
503 /* FIXME */
504 assert(0);
505 break;
506
507 default:
508 assert( 0 );
509 }
510 }
511
512
513 /**
514 * High-level instruction translators.
515 */
516
517
518 static void
519 emit_tex( struct lp_build_tgsi_soa_context *bld,
520 const struct tgsi_full_instruction *inst,
521 boolean apply_lodbias,
522 boolean projected,
523 LLVMValueRef *texel)
524 {
525 const uint unit = inst->Src[1].Register.Index;
526 LLVMValueRef lodbias;
527 LLVMValueRef oow = NULL;
528 LLVMValueRef coords[3];
529 unsigned num_coords;
530 unsigned i;
531
532 switch (inst->Texture.Texture) {
533 case TGSI_TEXTURE_1D:
534 num_coords = 1;
535 break;
536 case TGSI_TEXTURE_2D:
537 case TGSI_TEXTURE_RECT:
538 num_coords = 2;
539 break;
540 case TGSI_TEXTURE_SHADOW1D:
541 case TGSI_TEXTURE_SHADOW2D:
542 case TGSI_TEXTURE_SHADOWRECT:
543 case TGSI_TEXTURE_3D:
544 case TGSI_TEXTURE_CUBE:
545 num_coords = 3;
546 break;
547 default:
548 assert(0);
549 return;
550 }
551
552 if(apply_lodbias)
553 lodbias = emit_fetch( bld, inst, 0, 3 );
554 else
555 lodbias = bld->base.zero;
556
557 if (projected) {
558 oow = emit_fetch( bld, inst, 0, 3 );
559 oow = lp_build_rcp(&bld->base, oow);
560 }
561
562 for (i = 0; i < num_coords; i++) {
563 coords[i] = emit_fetch( bld, inst, 0, i );
564 if (projected)
565 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
566 }
567 for (i = num_coords; i < 3; i++) {
568 coords[i] = bld->base.undef;
569 }
570
571 bld->sampler->emit_fetch_texel(bld->sampler,
572 bld->base.builder,
573 bld->base.type,
574 unit, num_coords, coords, lodbias,
575 texel);
576 }
577
578
579 static void
580 emit_kil(
581 struct lp_build_tgsi_soa_context *bld,
582 const struct tgsi_full_instruction *inst )
583 {
584 const struct tgsi_full_src_register *reg = &inst->Src[0];
585 LLVMValueRef terms[NUM_CHANNELS];
586 LLVMValueRef mask;
587 unsigned chan_index;
588
589 memset(&terms, 0, sizeof terms);
590
591 FOR_EACH_CHANNEL( chan_index ) {
592 unsigned swizzle;
593
594 /* Unswizzle channel */
595 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
596
597 /* Check if the component has not been already tested. */
598 assert(swizzle < NUM_CHANNELS);
599 if( !terms[swizzle] )
600 /* TODO: change the comparison operator instead of setting the sign */
601 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
602 }
603
604 mask = NULL;
605 FOR_EACH_CHANNEL( chan_index ) {
606 if(terms[chan_index]) {
607 LLVMValueRef chan_mask;
608
609 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
610
611 if(mask)
612 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
613 else
614 mask = chan_mask;
615 }
616 }
617
618 if(mask)
619 lp_build_mask_update(bld->mask, mask);
620 }
621
622
623 /**
624 * Check if inst src/dest regs use indirect addressing into temporary
625 * register file.
626 */
627 static boolean
628 indirect_temp_reference(const struct tgsi_full_instruction *inst)
629 {
630 uint i;
631 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
632 const struct tgsi_full_src_register *reg = &inst->Src[i];
633 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
634 reg->Register.Indirect)
635 return TRUE;
636 }
637 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
638 const struct tgsi_full_dst_register *reg = &inst->Dst[i];
639 if (reg->Register.File == TGSI_FILE_TEMPORARY &&
640 reg->Register.Indirect)
641 return TRUE;
642 }
643 return FALSE;
644 }
645
646 static int
647 emit_declaration(
648 struct lp_build_tgsi_soa_context *bld,
649 const struct tgsi_full_declaration *decl)
650 {
651 unsigned first = decl->Range.First;
652 unsigned last = decl->Range.Last;
653 unsigned idx, i;
654 LLVMBasicBlockRef current_block =
655 LLVMGetInsertBlock(bld->base.builder);
656 LLVMBasicBlockRef first_block =
657 LLVMGetEntryBasicBlock(
658 LLVMGetBasicBlockParent(current_block));
659 LLVMValueRef first_inst =
660 LLVMGetFirstInstruction(first_block);
661
662 /* we want alloca's to be the first instruction
663 * in the function so we need to rewind the builder
664 * to the very beginning */
665 LLVMPositionBuilderBefore(bld->base.builder,
666 first_inst);
667
668 for (idx = first; idx <= last; ++idx) {
669 boolean ok;
670
671 switch (decl->Declaration.File) {
672 case TGSI_FILE_TEMPORARY:
673 for (i = 0; i < NUM_CHANNELS; i++)
674 bld->temps[idx][i] = lp_build_alloca(&bld->base);
675 ok = TRUE;
676 break;
677
678 case TGSI_FILE_OUTPUT:
679 for (i = 0; i < NUM_CHANNELS; i++)
680 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
681 ok = TRUE;
682 break;
683
684 default:
685 /* don't need to declare other vars */
686 ok = TRUE;
687 }
688
689 if (!ok) {
690 LLVMPositionBuilderAtEnd(bld->base.builder,
691 current_block);
692 return FALSE;
693 }
694 }
695
696 LLVMPositionBuilderAtEnd(bld->base.builder,
697 current_block);
698 return TRUE;
699 }
700
701 static int
702 emit_instruction(
703 struct lp_build_tgsi_soa_context *bld,
704 const struct tgsi_full_instruction *inst,
705 const struct tgsi_opcode_info *info)
706 {
707 unsigned chan_index;
708 LLVMValueRef src0, src1, src2;
709 LLVMValueRef tmp0, tmp1, tmp2;
710 LLVMValueRef tmp3 = NULL;
711 LLVMValueRef tmp4 = NULL;
712 LLVMValueRef tmp5 = NULL;
713 LLVMValueRef tmp6 = NULL;
714 LLVMValueRef tmp7 = NULL;
715 LLVMValueRef res;
716 LLVMValueRef dst0[NUM_CHANNELS];
717
718 /* we can't handle indirect addressing into temp register file yet */
719 if (indirect_temp_reference(inst))
720 return FALSE;
721
722 /*
723 * Stores and write masks are handled in a general fashion after the long
724 * instruction opcode switch statement.
725 *
726 * Although not stricitly necessary, we avoid generating instructions for
727 * channels which won't be stored, in cases where's that easy. For some
728 * complex instructions, like texture sampling, it is more convenient to
729 * assume a full writemask and then let LLVM optimization passes eliminate
730 * redundant code.
731 */
732
733 assert(info->num_dst <= 1);
734 if(info->num_dst) {
735 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
736 dst0[chan_index] = bld->base.undef;
737 }
738 }
739
740 switch (inst->Instruction.Opcode) {
741 #if 0
742 case TGSI_OPCODE_ARL:
743 /* FIXME */
744 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
745 tmp0 = emit_fetch( bld, inst, 0, chan_index );
746 emit_flr(bld, 0, 0);
747 emit_f2it( bld, 0 );
748 dst0[chan_index] = tmp0;
749 }
750 break;
751 #endif
752
753 case TGSI_OPCODE_MOV:
754 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
755 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
756 }
757 break;
758
759 case TGSI_OPCODE_LIT:
760 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
761 dst0[CHAN_X] = bld->base.one;
762 }
763 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
764 src0 = emit_fetch( bld, inst, 0, CHAN_X );
765 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
766 }
767 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
768 /* XMM[1] = SrcReg[0].yyyy */
769 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
770 /* XMM[1] = max(XMM[1], 0) */
771 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
772 /* XMM[2] = SrcReg[0].wwww */
773 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
774 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
775 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
776 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
777 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
778 }
779 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
780 dst0[CHAN_W] = bld->base.one;
781 }
782 break;
783
784 case TGSI_OPCODE_RCP:
785 /* TGSI_OPCODE_RECIP */
786 src0 = emit_fetch( bld, inst, 0, CHAN_X );
787 res = lp_build_rcp(&bld->base, src0);
788 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
789 dst0[chan_index] = res;
790 }
791 break;
792
793 case TGSI_OPCODE_RSQ:
794 /* TGSI_OPCODE_RECIPSQRT */
795 src0 = emit_fetch( bld, inst, 0, CHAN_X );
796 src0 = lp_build_abs(&bld->base, src0);
797 res = lp_build_rsqrt(&bld->base, src0);
798 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
799 dst0[chan_index] = res;
800 }
801 break;
802
803 case TGSI_OPCODE_EXP:
804 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
805 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
806 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
807 LLVMValueRef *p_exp2_int_part = NULL;
808 LLVMValueRef *p_frac_part = NULL;
809 LLVMValueRef *p_exp2 = NULL;
810
811 src0 = emit_fetch( bld, inst, 0, CHAN_X );
812
813 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
814 p_exp2_int_part = &tmp0;
815 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
816 p_frac_part = &tmp1;
817 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
818 p_exp2 = &tmp2;
819
820 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
821
822 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
823 dst0[CHAN_X] = tmp0;
824 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
825 dst0[CHAN_Y] = tmp1;
826 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
827 dst0[CHAN_Z] = tmp2;
828 }
829 /* dst.w = 1.0 */
830 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
831 dst0[CHAN_W] = bld->base.one;
832 }
833 break;
834
835 case TGSI_OPCODE_LOG:
836 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
837 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
838 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
839 LLVMValueRef *p_floor_log2 = NULL;
840 LLVMValueRef *p_exp = NULL;
841 LLVMValueRef *p_log2 = NULL;
842
843 src0 = emit_fetch( bld, inst, 0, CHAN_X );
844 src0 = lp_build_abs( &bld->base, src0 );
845
846 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
847 p_floor_log2 = &tmp0;
848 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
849 p_exp = &tmp1;
850 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
851 p_log2 = &tmp2;
852
853 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
854
855 /* dst.x = floor(lg2(abs(src.x))) */
856 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
857 dst0[CHAN_X] = tmp0;
858 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
859 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
860 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
861 }
862 /* dst.z = lg2(abs(src.x)) */
863 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
864 dst0[CHAN_Z] = tmp2;
865 }
866 /* dst.w = 1.0 */
867 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
868 dst0[CHAN_W] = bld->base.one;
869 }
870 break;
871
872 case TGSI_OPCODE_MUL:
873 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
874 src0 = emit_fetch( bld, inst, 0, chan_index );
875 src1 = emit_fetch( bld, inst, 1, chan_index );
876 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
877 }
878 break;
879
880 case TGSI_OPCODE_ADD:
881 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
882 src0 = emit_fetch( bld, inst, 0, chan_index );
883 src1 = emit_fetch( bld, inst, 1, chan_index );
884 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
885 }
886 break;
887
888 case TGSI_OPCODE_DP3:
889 /* TGSI_OPCODE_DOT3 */
890 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
891 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
892 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
893 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
894 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
895 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
896 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
897 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
898 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
899 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
900 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
901 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
902 dst0[chan_index] = tmp0;
903 }
904 break;
905
906 case TGSI_OPCODE_DP4:
907 /* TGSI_OPCODE_DOT4 */
908 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
909 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
910 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
911 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
912 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
913 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
914 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
915 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
916 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
917 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
918 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
919 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
920 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
921 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
922 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
923 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
924 dst0[chan_index] = tmp0;
925 }
926 break;
927
928 case TGSI_OPCODE_DST:
929 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
930 dst0[CHAN_X] = bld->base.one;
931 }
932 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
933 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
934 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
935 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
936 }
937 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
938 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
939 }
940 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
941 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
942 }
943 break;
944
945 case TGSI_OPCODE_MIN:
946 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
947 src0 = emit_fetch( bld, inst, 0, chan_index );
948 src1 = emit_fetch( bld, inst, 1, chan_index );
949 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
950 }
951 break;
952
953 case TGSI_OPCODE_MAX:
954 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
955 src0 = emit_fetch( bld, inst, 0, chan_index );
956 src1 = emit_fetch( bld, inst, 1, chan_index );
957 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
958 }
959 break;
960
961 case TGSI_OPCODE_SLT:
962 /* TGSI_OPCODE_SETLT */
963 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
964 src0 = emit_fetch( bld, inst, 0, chan_index );
965 src1 = emit_fetch( bld, inst, 1, chan_index );
966 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
967 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
968 }
969 break;
970
971 case TGSI_OPCODE_SGE:
972 /* TGSI_OPCODE_SETGE */
973 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
974 src0 = emit_fetch( bld, inst, 0, chan_index );
975 src1 = emit_fetch( bld, inst, 1, chan_index );
976 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
977 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
978 }
979 break;
980
981 case TGSI_OPCODE_MAD:
982 /* TGSI_OPCODE_MADD */
983 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
984 tmp0 = emit_fetch( bld, inst, 0, chan_index );
985 tmp1 = emit_fetch( bld, inst, 1, chan_index );
986 tmp2 = emit_fetch( bld, inst, 2, chan_index );
987 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
988 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
989 dst0[chan_index] = tmp0;
990 }
991 break;
992
993 case TGSI_OPCODE_SUB:
994 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
995 tmp0 = emit_fetch( bld, inst, 0, chan_index );
996 tmp1 = emit_fetch( bld, inst, 1, chan_index );
997 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
998 }
999 break;
1000
1001 case TGSI_OPCODE_LRP:
1002 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1003 src0 = emit_fetch( bld, inst, 0, chan_index );
1004 src1 = emit_fetch( bld, inst, 1, chan_index );
1005 src2 = emit_fetch( bld, inst, 2, chan_index );
1006 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1007 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1008 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1009 }
1010 break;
1011
1012 case TGSI_OPCODE_CND:
1013 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1014 src0 = emit_fetch( bld, inst, 0, chan_index );
1015 src1 = emit_fetch( bld, inst, 1, chan_index );
1016 src2 = emit_fetch( bld, inst, 2, chan_index );
1017 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1018 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1019 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1020 }
1021 break;
1022
1023 case TGSI_OPCODE_DP2A:
1024 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1025 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1026 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1027 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1028 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1029 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1030 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1031 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1032 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1033 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1034 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1035 }
1036 break;
1037
1038 case TGSI_OPCODE_FRC:
1039 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1040 src0 = emit_fetch( bld, inst, 0, chan_index );
1041 tmp0 = lp_build_floor(&bld->base, src0);
1042 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1043 dst0[chan_index] = tmp0;
1044 }
1045 break;
1046
1047 case TGSI_OPCODE_CLAMP:
1048 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1049 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1050 src1 = emit_fetch( bld, inst, 1, chan_index );
1051 src2 = emit_fetch( bld, inst, 2, chan_index );
1052 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1053 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1054 dst0[chan_index] = tmp0;
1055 }
1056 break;
1057
1058 case TGSI_OPCODE_FLR:
1059 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1060 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1061 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1062 }
1063 break;
1064
1065 case TGSI_OPCODE_ROUND:
1066 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1067 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1068 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1069 }
1070 break;
1071
1072 case TGSI_OPCODE_EX2: {
1073 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1074 tmp0 = lp_build_exp2( &bld->base, tmp0);
1075 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1076 dst0[chan_index] = tmp0;
1077 }
1078 break;
1079 }
1080
1081 case TGSI_OPCODE_LG2:
1082 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1083 tmp0 = lp_build_log2( &bld->base, tmp0);
1084 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1085 dst0[chan_index] = tmp0;
1086 }
1087 break;
1088
1089 case TGSI_OPCODE_POW:
1090 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1091 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1092 res = lp_build_pow( &bld->base, src0, src1 );
1093 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1094 dst0[chan_index] = res;
1095 }
1096 break;
1097
1098 case TGSI_OPCODE_XPD:
1099 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1100 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1101 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1102 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1103 }
1104 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1105 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1106 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1107 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1108 }
1109 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1110 tmp2 = tmp0;
1111 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1112 tmp5 = tmp3;
1113 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1114 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1115 dst0[CHAN_X] = tmp2;
1116 }
1117 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1118 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1119 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1120 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1121 }
1122 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1123 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1124 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1125 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1126 dst0[CHAN_Y] = tmp3;
1127 }
1128 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1129 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1130 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1131 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1132 dst0[CHAN_Z] = tmp5;
1133 }
1134 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1135 dst0[CHAN_W] = bld->base.one;
1136 }
1137 break;
1138
1139 case TGSI_OPCODE_ABS:
1140 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1141 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1142 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1143 }
1144 break;
1145
1146 case TGSI_OPCODE_RCC:
1147 /* deprecated? */
1148 assert(0);
1149 return 0;
1150
1151 case TGSI_OPCODE_DPH:
1152 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1153 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1154 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1155 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1156 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1157 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1158 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1159 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1160 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1161 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1162 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1163 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1164 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1165 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1166 dst0[chan_index] = tmp0;
1167 }
1168 break;
1169
1170 case TGSI_OPCODE_COS:
1171 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1172 tmp0 = lp_build_cos( &bld->base, tmp0 );
1173 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1174 dst0[chan_index] = tmp0;
1175 }
1176 break;
1177
1178 case TGSI_OPCODE_DDX:
1179 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1180 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1181 }
1182 break;
1183
1184 case TGSI_OPCODE_DDY:
1185 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1186 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1187 }
1188 break;
1189
1190 case TGSI_OPCODE_KILP:
1191 /* predicated kill */
1192 /* FIXME */
1193 return 0;
1194 break;
1195
1196 case TGSI_OPCODE_KIL:
1197 /* conditional kill */
1198 emit_kil( bld, inst );
1199 break;
1200
1201 case TGSI_OPCODE_PK2H:
1202 return 0;
1203 break;
1204
1205 case TGSI_OPCODE_PK2US:
1206 return 0;
1207 break;
1208
1209 case TGSI_OPCODE_PK4B:
1210 return 0;
1211 break;
1212
1213 case TGSI_OPCODE_PK4UB:
1214 return 0;
1215 break;
1216
1217 case TGSI_OPCODE_RFL:
1218 return 0;
1219 break;
1220
1221 case TGSI_OPCODE_SEQ:
1222 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1223 src0 = emit_fetch( bld, inst, 0, chan_index );
1224 src1 = emit_fetch( bld, inst, 1, chan_index );
1225 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1226 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1227 }
1228 break;
1229
1230 case TGSI_OPCODE_SFL:
1231 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1232 dst0[chan_index] = bld->base.zero;
1233 }
1234 break;
1235
1236 case TGSI_OPCODE_SGT:
1237 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1238 src0 = emit_fetch( bld, inst, 0, chan_index );
1239 src1 = emit_fetch( bld, inst, 1, chan_index );
1240 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1241 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1242 }
1243 break;
1244
1245 case TGSI_OPCODE_SIN:
1246 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1247 tmp0 = lp_build_sin( &bld->base, tmp0 );
1248 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1249 dst0[chan_index] = tmp0;
1250 }
1251 break;
1252
1253 case TGSI_OPCODE_SLE:
1254 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1255 src0 = emit_fetch( bld, inst, 0, chan_index );
1256 src1 = emit_fetch( bld, inst, 1, chan_index );
1257 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1258 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1259 }
1260 break;
1261
1262 case TGSI_OPCODE_SNE:
1263 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1264 src0 = emit_fetch( bld, inst, 0, chan_index );
1265 src1 = emit_fetch( bld, inst, 1, chan_index );
1266 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1267 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1268 }
1269 break;
1270
1271 case TGSI_OPCODE_STR:
1272 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1273 dst0[chan_index] = bld->base.one;
1274 }
1275 break;
1276
1277 case TGSI_OPCODE_TEX:
1278 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1279 break;
1280
1281 case TGSI_OPCODE_TXD:
1282 /* FIXME */
1283 return 0;
1284 break;
1285
1286 case TGSI_OPCODE_UP2H:
1287 /* deprecated */
1288 assert (0);
1289 return 0;
1290 break;
1291
1292 case TGSI_OPCODE_UP2US:
1293 /* deprecated */
1294 assert(0);
1295 return 0;
1296 break;
1297
1298 case TGSI_OPCODE_UP4B:
1299 /* deprecated */
1300 assert(0);
1301 return 0;
1302 break;
1303
1304 case TGSI_OPCODE_UP4UB:
1305 /* deprecated */
1306 assert(0);
1307 return 0;
1308 break;
1309
1310 case TGSI_OPCODE_X2D:
1311 /* deprecated? */
1312 assert(0);
1313 return 0;
1314 break;
1315
1316 case TGSI_OPCODE_ARA:
1317 /* deprecated */
1318 assert(0);
1319 return 0;
1320 break;
1321
1322 #if 0
1323 case TGSI_OPCODE_ARR:
1324 /* FIXME */
1325 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1326 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1327 emit_rnd( bld, 0, 0 );
1328 emit_f2it( bld, 0 );
1329 dst0[chan_index] = tmp0;
1330 }
1331 break;
1332 #endif
1333
1334 case TGSI_OPCODE_BRA:
1335 /* deprecated */
1336 assert(0);
1337 return 0;
1338 break;
1339
1340 case TGSI_OPCODE_CAL:
1341 /* FIXME */
1342 return 0;
1343 break;
1344
1345 case TGSI_OPCODE_RET:
1346 /* FIXME */
1347 return 0;
1348 break;
1349
1350 case TGSI_OPCODE_END:
1351 break;
1352
1353 case TGSI_OPCODE_SSG:
1354 /* TGSI_OPCODE_SGN */
1355 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1356 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1357 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1358 }
1359 break;
1360
1361 case TGSI_OPCODE_CMP:
1362 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1363 src0 = emit_fetch( bld, inst, 0, chan_index );
1364 src1 = emit_fetch( bld, inst, 1, chan_index );
1365 src2 = emit_fetch( bld, inst, 2, chan_index );
1366 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1367 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1368 }
1369 break;
1370
1371 case TGSI_OPCODE_SCS:
1372 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1373 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1374 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1375 }
1376 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1377 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1378 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1379 }
1380 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1381 dst0[CHAN_Z] = bld->base.zero;
1382 }
1383 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1384 dst0[CHAN_W] = bld->base.one;
1385 }
1386 break;
1387
1388 case TGSI_OPCODE_TXB:
1389 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1390 break;
1391
1392 case TGSI_OPCODE_NRM:
1393 /* fall-through */
1394 case TGSI_OPCODE_NRM4:
1395 /* 3 or 4-component normalization */
1396 {
1397 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1398
1399 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1400 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1401 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1402 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1403
1404 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1405
1406 /* xmm4 = src.x */
1407 /* xmm0 = src.x * src.x */
1408 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1409 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1410 tmp4 = tmp0;
1411 }
1412 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1413
1414 /* xmm5 = src.y */
1415 /* xmm0 = xmm0 + src.y * src.y */
1416 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1417 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1418 tmp5 = tmp1;
1419 }
1420 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1421 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1422
1423 /* xmm6 = src.z */
1424 /* xmm0 = xmm0 + src.z * src.z */
1425 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1426 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1427 tmp6 = tmp1;
1428 }
1429 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1430 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1431
1432 if (dims == 4) {
1433 /* xmm7 = src.w */
1434 /* xmm0 = xmm0 + src.w * src.w */
1435 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1436 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1437 tmp7 = tmp1;
1438 }
1439 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1440 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1441 }
1442
1443 /* xmm1 = 1 / sqrt(xmm0) */
1444 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1445
1446 /* dst.x = xmm1 * src.x */
1447 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1448 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1449 }
1450
1451 /* dst.y = xmm1 * src.y */
1452 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1453 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1454 }
1455
1456 /* dst.z = xmm1 * src.z */
1457 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1458 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1459 }
1460
1461 /* dst.w = xmm1 * src.w */
1462 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1463 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1464 }
1465 }
1466
1467 /* dst.w = 1.0 */
1468 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1469 dst0[CHAN_W] = bld->base.one;
1470 }
1471 }
1472 break;
1473
1474 case TGSI_OPCODE_DIV:
1475 /* deprecated */
1476 assert( 0 );
1477 return 0;
1478 break;
1479
1480 case TGSI_OPCODE_DP2:
1481 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1482 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1483 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1484 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1485 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1486 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1487 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1488 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1489 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1490 }
1491 break;
1492
1493 case TGSI_OPCODE_TXL:
1494 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1495 break;
1496
1497 case TGSI_OPCODE_TXP:
1498 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1499 break;
1500
1501 case TGSI_OPCODE_BRK:
1502 lp_exec_break(&bld->exec_mask);
1503 break;
1504
1505 case TGSI_OPCODE_IF:
1506 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1507 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1508 tmp0, bld->base.zero);
1509 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1510 break;
1511
1512 case TGSI_OPCODE_BGNFOR:
1513 /* deprecated */
1514 assert(0);
1515 return 0;
1516 break;
1517
1518 case TGSI_OPCODE_BGNLOOP:
1519 lp_exec_bgnloop(&bld->exec_mask);
1520 break;
1521
1522 case TGSI_OPCODE_REP:
1523 /* deprecated */
1524 assert(0);
1525 return 0;
1526 break;
1527
1528 case TGSI_OPCODE_ELSE:
1529 lp_exec_mask_cond_invert(&bld->exec_mask);
1530 break;
1531
1532 case TGSI_OPCODE_ENDIF:
1533 lp_exec_mask_cond_pop(&bld->exec_mask);
1534 break;
1535
1536 case TGSI_OPCODE_ENDFOR:
1537 /* deprecated */
1538 assert(0);
1539 return 0;
1540 break;
1541
1542 case TGSI_OPCODE_ENDLOOP:
1543 lp_exec_endloop(&bld->exec_mask);
1544 break;
1545
1546 case TGSI_OPCODE_ENDREP:
1547 /* deprecated */
1548 assert(0);
1549 return 0;
1550 break;
1551
1552 case TGSI_OPCODE_PUSHA:
1553 /* deprecated? */
1554 assert(0);
1555 return 0;
1556 break;
1557
1558 case TGSI_OPCODE_POPA:
1559 /* deprecated? */
1560 assert(0);
1561 return 0;
1562 break;
1563
1564 case TGSI_OPCODE_CEIL:
1565 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1566 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1567 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1568 }
1569 break;
1570
1571 case TGSI_OPCODE_I2F:
1572 /* deprecated? */
1573 assert(0);
1574 return 0;
1575 break;
1576
1577 case TGSI_OPCODE_NOT:
1578 /* deprecated? */
1579 assert(0);
1580 return 0;
1581 break;
1582
1583 case TGSI_OPCODE_TRUNC:
1584 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1585 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1586 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1587 }
1588 break;
1589
1590 case TGSI_OPCODE_SHL:
1591 /* deprecated? */
1592 assert(0);
1593 return 0;
1594 break;
1595
1596 case TGSI_OPCODE_ISHR:
1597 /* deprecated? */
1598 assert(0);
1599 return 0;
1600 break;
1601
1602 case TGSI_OPCODE_AND:
1603 /* deprecated? */
1604 assert(0);
1605 return 0;
1606 break;
1607
1608 case TGSI_OPCODE_OR:
1609 /* deprecated? */
1610 assert(0);
1611 return 0;
1612 break;
1613
1614 case TGSI_OPCODE_MOD:
1615 /* deprecated? */
1616 assert(0);
1617 return 0;
1618 break;
1619
1620 case TGSI_OPCODE_XOR:
1621 /* deprecated? */
1622 assert(0);
1623 return 0;
1624 break;
1625
1626 case TGSI_OPCODE_SAD:
1627 /* deprecated? */
1628 assert(0);
1629 return 0;
1630 break;
1631
1632 case TGSI_OPCODE_TXF:
1633 /* deprecated? */
1634 assert(0);
1635 return 0;
1636 break;
1637
1638 case TGSI_OPCODE_TXQ:
1639 /* deprecated? */
1640 assert(0);
1641 return 0;
1642 break;
1643
1644 case TGSI_OPCODE_CONT:
1645 lp_exec_continue(&bld->exec_mask);
1646 break;
1647
1648 case TGSI_OPCODE_EMIT:
1649 return 0;
1650 break;
1651
1652 case TGSI_OPCODE_ENDPRIM:
1653 return 0;
1654 break;
1655
1656 case TGSI_OPCODE_NOP:
1657 break;
1658
1659 default:
1660 return 0;
1661 }
1662
1663 if(info->num_dst) {
1664 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1665 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1666 }
1667 }
1668
1669 return 1;
1670 }
1671
1672
1673 void
1674 lp_build_tgsi_soa(LLVMBuilderRef builder,
1675 const struct tgsi_token *tokens,
1676 struct lp_type type,
1677 struct lp_build_mask_context *mask,
1678 LLVMValueRef consts_ptr,
1679 const LLVMValueRef *pos,
1680 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1681 LLVMValueRef (*outputs)[NUM_CHANNELS],
1682 struct lp_build_sampler_soa *sampler)
1683 {
1684 struct lp_build_tgsi_soa_context bld;
1685 struct tgsi_parse_context parse;
1686 uint num_immediates = 0;
1687 unsigned i;
1688
1689 /* Setup build context */
1690 memset(&bld, 0, sizeof bld);
1691 lp_build_context_init(&bld.base, builder, type);
1692 bld.mask = mask;
1693 bld.pos = pos;
1694 bld.inputs = inputs;
1695 bld.outputs = outputs;
1696 bld.consts_ptr = consts_ptr;
1697 bld.sampler = sampler;
1698
1699 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1700
1701 tgsi_parse_init( &parse, tokens );
1702
1703 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1704 tgsi_parse_token( &parse );
1705
1706 switch( parse.FullToken.Token.Type ) {
1707 case TGSI_TOKEN_TYPE_DECLARATION:
1708 /* Inputs already interpolated */
1709 {
1710 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1711 _debug_printf("warning: failed to define LLVM variable\n");
1712 }
1713 break;
1714
1715 case TGSI_TOKEN_TYPE_INSTRUCTION:
1716 {
1717 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1718 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1719 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1720 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1721 info ? info->mnemonic : "<invalid>");
1722 }
1723
1724 break;
1725
1726 case TGSI_TOKEN_TYPE_IMMEDIATE:
1727 /* simply copy the immediate values into the next immediates[] slot */
1728 {
1729 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1730 assert(size <= 4);
1731 assert(num_immediates < LP_MAX_IMMEDIATES);
1732 for( i = 0; i < size; ++i )
1733 bld.immediates[num_immediates][i] =
1734 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1735 for( i = size; i < 4; ++i )
1736 bld.immediates[num_immediates][i] = bld.base.undef;
1737 num_immediates++;
1738 }
1739 break;
1740
1741 case TGSI_TOKEN_TYPE_PROPERTY:
1742 break;
1743
1744 default:
1745 assert( 0 );
1746 }
1747 }
1748 if (0) {
1749 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1750 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1751 debug_printf("11111111111111111111111111111 \n");
1752 tgsi_dump(tokens, 0);
1753 LLVMDumpValue(function);
1754 debug_printf("2222222222222222222222222222 \n");
1755 }
1756 tgsi_parse_free( &parse );
1757 }
1758