Merge branch '7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_exec.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_tgsi.h"
57 #include "lp_bld_debug.h"
58
59
60 #define LP_MAX_TEMPS 256
61 #define LP_MAX_IMMEDIATES 256
62
63
64 #define FOR_EACH_CHANNEL( CHAN )\
65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
66
67 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
69
70 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
72
73 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74 FOR_EACH_CHANNEL( CHAN )\
75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
76
77 #define CHAN_X 0
78 #define CHAN_Y 1
79 #define CHAN_Z 2
80 #define CHAN_W 3
81
82 #define QUAD_TOP_LEFT 0
83 #define QUAD_TOP_RIGHT 1
84 #define QUAD_BOTTOM_LEFT 2
85 #define QUAD_BOTTOM_RIGHT 3
86
87 #define LP_TGSI_MAX_NESTING 16
88
89 struct lp_exec_mask {
90 struct lp_build_context *bld;
91
92 boolean has_mask;
93
94 LLVMTypeRef int_vec_type;
95
96 LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
97 int cond_stack_size;
98 LLVMValueRef cond_mask;
99
100 LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
101 int break_stack_size;
102 LLVMValueRef break_mask;
103
104 LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
105 int cont_stack_size;
106 LLVMValueRef cont_mask;
107
108 LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
109 int loop_stack_size;
110 LLVMBasicBlockRef loop_block;
111
112
113 LLVMValueRef exec_mask;
114 };
115
116 struct lp_build_tgsi_soa_context
117 {
118 struct lp_build_context base;
119
120 LLVMValueRef consts_ptr;
121 const LLVMValueRef *pos;
122 const LLVMValueRef (*inputs)[NUM_CHANNELS];
123 LLVMValueRef (*outputs)[NUM_CHANNELS];
124
125 struct lp_build_sampler_soa *sampler;
126
127 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
128 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
129 LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS];
130
131 /* we allocate an array of temps if we have indirect
132 * addressing and then the temps above is unused */
133 LLVMValueRef temps_array;
134 boolean has_indirect_addressing;
135
136 struct lp_build_mask_context *mask;
137 struct lp_exec_mask exec_mask;
138 };
139
140 static const unsigned char
141 swizzle_left[4] = {
142 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
143 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
144 };
145
146 static const unsigned char
147 swizzle_right[4] = {
148 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
149 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
150 };
151
152 static const unsigned char
153 swizzle_top[4] = {
154 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
155 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
156 };
157
158 static const unsigned char
159 swizzle_bottom[4] = {
160 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
161 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
162 };
163
164 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
165 {
166 mask->bld = bld;
167 mask->has_mask = FALSE;
168 mask->cond_stack_size = 0;
169 mask->loop_stack_size = 0;
170 mask->break_stack_size = 0;
171 mask->cont_stack_size = 0;
172
173 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
174 }
175
176 static void lp_exec_mask_update(struct lp_exec_mask *mask)
177 {
178 if (mask->loop_stack_size) {
179 /*for loops we need to update the entire mask at runtime */
180 LLVMValueRef tmp;
181 assert(mask->break_mask);
182 tmp = LLVMBuildAnd(mask->bld->builder,
183 mask->cont_mask,
184 mask->break_mask,
185 "maskcb");
186 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
187 mask->cond_mask,
188 tmp,
189 "maskfull");
190 } else
191 mask->exec_mask = mask->cond_mask;
192
193
194 mask->has_mask = (mask->cond_stack_size > 0 ||
195 mask->loop_stack_size > 0);
196 }
197
198 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
199 LLVMValueRef val)
200 {
201 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
202 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
203 mask->int_vec_type, "");
204
205 lp_exec_mask_update(mask);
206 }
207
208 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
209 {
210 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
211 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
212 mask->cond_mask, "");
213
214 /* means that we didn't have any mask before and that
215 * we were fully enabled */
216 if (mask->cond_stack_size <= 1) {
217 prev_mask = LLVMConstAllOnes(mask->int_vec_type);
218 }
219
220 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
221 inv_mask,
222 prev_mask, "");
223 lp_exec_mask_update(mask);
224 }
225
226 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
227 {
228 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
229 lp_exec_mask_update(mask);
230 }
231
232 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
233 {
234
235 if (mask->cont_stack_size == 0)
236 mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
237 if (mask->break_stack_size == 0)
238 mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
239 if (mask->cond_stack_size == 0)
240 mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
241
242 mask->break_stack[mask->break_stack_size++] = mask->break_mask;
243 mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
244 mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
245 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
246 LLVMBuildBr(mask->bld->builder, mask->loop_block);
247 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
248
249 lp_exec_mask_update(mask);
250 }
251
252 static void lp_exec_break(struct lp_exec_mask *mask)
253 {
254 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
255 mask->exec_mask,
256 "break");
257
258 /* mask->break_stack_size > 1 implies that we encountered a break
259 * statemant already and if that's the case we want to make sure
260 * our mask is a combination of the previous break and the current
261 * execution mask */
262 if (mask->break_stack_size > 1) {
263 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
264 mask->break_mask,
265 exec_mask, "break_full");
266 } else
267 mask->break_mask = exec_mask;
268
269 lp_exec_mask_update(mask);
270 }
271
272 static void lp_exec_continue(struct lp_exec_mask *mask)
273 {
274 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
275 mask->exec_mask,
276 "");
277
278 if (mask->cont_stack_size > 1) {
279 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
280 mask->cont_mask,
281 exec_mask, "");
282 } else
283 mask->cont_mask = exec_mask;
284
285 lp_exec_mask_update(mask);
286 }
287
288
289 static void lp_exec_endloop(struct lp_exec_mask *mask)
290 {
291 LLVMBasicBlockRef endloop;
292 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
293 mask->bld->type.length);
294 LLVMValueRef i1cond;
295
296 assert(mask->break_mask);
297
298 /* i1cond = (mask == 0) */
299 i1cond = LLVMBuildICmp(
300 mask->bld->builder,
301 LLVMIntNE,
302 LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
303 LLVMConstNull(reg_type), "");
304
305 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
306
307 LLVMBuildCondBr(mask->bld->builder,
308 i1cond, mask->loop_block, endloop);
309
310 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
311
312 mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
313 /* pop the cont mask */
314 if (mask->cont_stack_size) {
315 mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
316 }
317 /* pop the break mask */
318 if (mask->break_stack_size) {
319 mask->break_mask = mask->break_stack[--mask->break_stack_size];
320 }
321
322 lp_exec_mask_update(mask);
323 }
324
325 /* stores val into an address pointed to by dst.
326 * mask->exec_mask is used to figure out which bits of val
327 * should be stored into the address
328 * (0 means don't store this bit, 1 means do store).
329 */
330 static void lp_exec_mask_store(struct lp_exec_mask *mask,
331 LLVMValueRef val,
332 LLVMValueRef dst)
333 {
334 if (mask->has_mask) {
335 LLVMValueRef real_val, dst_val;
336
337 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
338 real_val = lp_build_select(mask->bld,
339 mask->exec_mask,
340 val, dst_val);
341
342 LLVMBuildStore(mask->bld->builder, real_val, dst);
343 } else
344 LLVMBuildStore(mask->bld->builder, val, dst);
345 }
346
347
348 static LLVMValueRef
349 emit_ddx(struct lp_build_tgsi_soa_context *bld,
350 LLVMValueRef src)
351 {
352 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
353 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
354 return lp_build_sub(&bld->base, src_right, src_left);
355 }
356
357
358 static LLVMValueRef
359 emit_ddy(struct lp_build_tgsi_soa_context *bld,
360 LLVMValueRef src)
361 {
362 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
363 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
364 return lp_build_sub(&bld->base, src_top, src_bottom);
365 }
366
367 static LLVMValueRef
368 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
369 unsigned index,
370 unsigned swizzle,
371 boolean is_indirect,
372 LLVMValueRef addr)
373 {
374 if (!bld->has_indirect_addressing) {
375 return bld->temps[index][swizzle];
376 } else {
377 LLVMValueRef lindex =
378 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
379 if (is_indirect)
380 lindex = lp_build_add(&bld->base, lindex, addr);
381 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
382 }
383 }
384
385 /**
386 * Register fetch.
387 */
388 static LLVMValueRef
389 emit_fetch(
390 struct lp_build_tgsi_soa_context *bld,
391 const struct tgsi_full_instruction *inst,
392 unsigned index,
393 const unsigned chan_index )
394 {
395 const struct tgsi_full_src_register *reg = &inst->Src[index];
396 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
397 LLVMValueRef res;
398 LLVMValueRef addr;
399
400 switch (swizzle) {
401 case TGSI_SWIZZLE_X:
402 case TGSI_SWIZZLE_Y:
403 case TGSI_SWIZZLE_Z:
404 case TGSI_SWIZZLE_W:
405
406 if (reg->Register.Indirect) {
407 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
408 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
409 addr = LLVMBuildLoad(bld->base.builder,
410 bld->addr[reg->Indirect.Index][swizzle],
411 "");
412 /* for indexing we want integers */
413 addr = LLVMBuildFPToSI(bld->base.builder, addr,
414 int_vec_type, "");
415 addr = LLVMBuildExtractElement(bld->base.builder,
416 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
417 "");
418 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
419 }
420
421 switch (reg->Register.File) {
422 case TGSI_FILE_CONSTANT: {
423 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
424 LLVMValueRef scalar, scalar_ptr;
425
426 if (reg->Register.Indirect) {
427 /*lp_build_printf(bld->base.builder,
428 "\taddr = %d\n", addr);*/
429 index = lp_build_add(&bld->base, index, addr);
430 }
431 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
432 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
433
434 res = lp_build_broadcast_scalar(&bld->base, scalar);
435 break;
436 }
437
438 case TGSI_FILE_IMMEDIATE:
439 res = bld->immediates[reg->Register.Index][swizzle];
440 assert(res);
441 break;
442
443 case TGSI_FILE_INPUT:
444 res = bld->inputs[reg->Register.Index][swizzle];
445 assert(res);
446 break;
447
448 case TGSI_FILE_TEMPORARY: {
449 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
450 swizzle,
451 reg->Register.Indirect,
452 addr);
453 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
454 if(!res)
455 return bld->base.undef;
456 break;
457 }
458
459 default:
460 assert( 0 );
461 return bld->base.undef;
462 }
463 break;
464
465 default:
466 assert( 0 );
467 return bld->base.undef;
468 }
469
470 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
471 case TGSI_UTIL_SIGN_CLEAR:
472 res = lp_build_abs( &bld->base, res );
473 break;
474
475 case TGSI_UTIL_SIGN_SET:
476 /* TODO: Use bitwese OR for floating point */
477 res = lp_build_abs( &bld->base, res );
478 res = LLVMBuildNeg( bld->base.builder, res, "" );
479 break;
480
481 case TGSI_UTIL_SIGN_TOGGLE:
482 res = LLVMBuildNeg( bld->base.builder, res, "" );
483 break;
484
485 case TGSI_UTIL_SIGN_KEEP:
486 break;
487 }
488
489 return res;
490 }
491
492
493 /**
494 * Register fetch with derivatives.
495 */
496 static void
497 emit_fetch_deriv(
498 struct lp_build_tgsi_soa_context *bld,
499 const struct tgsi_full_instruction *inst,
500 unsigned index,
501 const unsigned chan_index,
502 LLVMValueRef *res,
503 LLVMValueRef *ddx,
504 LLVMValueRef *ddy)
505 {
506 LLVMValueRef src;
507
508 src = emit_fetch(bld, inst, index, chan_index);
509
510 if(res)
511 *res = src;
512
513 /* TODO: use interpolation coeffs for inputs */
514
515 if(ddx)
516 *ddx = emit_ddx(bld, src);
517
518 if(ddy)
519 *ddy = emit_ddy(bld, src);
520 }
521
522
523 /**
524 * Register store.
525 */
526 static void
527 emit_store(
528 struct lp_build_tgsi_soa_context *bld,
529 const struct tgsi_full_instruction *inst,
530 unsigned index,
531 unsigned chan_index,
532 LLVMValueRef value)
533 {
534 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
535 LLVMValueRef addr;
536
537 switch( inst->Instruction.Saturate ) {
538 case TGSI_SAT_NONE:
539 break;
540
541 case TGSI_SAT_ZERO_ONE:
542 value = lp_build_max(&bld->base, value, bld->base.zero);
543 value = lp_build_min(&bld->base, value, bld->base.one);
544 break;
545
546 case TGSI_SAT_MINUS_PLUS_ONE:
547 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
548 value = lp_build_min(&bld->base, value, bld->base.one);
549 break;
550
551 default:
552 assert(0);
553 }
554
555 if (reg->Register.Indirect) {
556 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
557 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
558 addr = LLVMBuildLoad(bld->base.builder,
559 bld->addr[reg->Indirect.Index][swizzle],
560 "");
561 /* for indexing we want integers */
562 addr = LLVMBuildFPToSI(bld->base.builder, addr,
563 int_vec_type, "");
564 addr = LLVMBuildExtractElement(bld->base.builder,
565 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
566 "");
567 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
568 }
569
570 switch( reg->Register.File ) {
571 case TGSI_FILE_OUTPUT:
572 lp_exec_mask_store(&bld->exec_mask, value,
573 bld->outputs[reg->Register.Index][chan_index]);
574 break;
575
576 case TGSI_FILE_TEMPORARY: {
577 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
578 chan_index,
579 reg->Register.Indirect,
580 addr);
581 lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
582 break;
583 }
584
585 case TGSI_FILE_ADDRESS:
586 lp_exec_mask_store(&bld->exec_mask, value,
587 bld->addr[reg->Indirect.Index][chan_index]);
588 break;
589
590 case TGSI_FILE_PREDICATE:
591 /* FIXME */
592 assert(0);
593 break;
594
595 default:
596 assert( 0 );
597 }
598 }
599
600
601 /**
602 * High-level instruction translators.
603 */
604
605
606 static void
607 emit_tex( struct lp_build_tgsi_soa_context *bld,
608 const struct tgsi_full_instruction *inst,
609 boolean apply_lodbias,
610 boolean projected,
611 LLVMValueRef *texel)
612 {
613 const uint unit = inst->Src[1].Register.Index;
614 LLVMValueRef lodbias;
615 LLVMValueRef oow = NULL;
616 LLVMValueRef coords[3];
617 unsigned num_coords;
618 unsigned i;
619
620 switch (inst->Texture.Texture) {
621 case TGSI_TEXTURE_1D:
622 num_coords = 1;
623 break;
624 case TGSI_TEXTURE_2D:
625 case TGSI_TEXTURE_RECT:
626 num_coords = 2;
627 break;
628 case TGSI_TEXTURE_SHADOW1D:
629 case TGSI_TEXTURE_SHADOW2D:
630 case TGSI_TEXTURE_SHADOWRECT:
631 case TGSI_TEXTURE_3D:
632 case TGSI_TEXTURE_CUBE:
633 num_coords = 3;
634 break;
635 default:
636 assert(0);
637 return;
638 }
639
640 if(apply_lodbias)
641 lodbias = emit_fetch( bld, inst, 0, 3 );
642 else
643 lodbias = bld->base.zero;
644
645 if (projected) {
646 oow = emit_fetch( bld, inst, 0, 3 );
647 oow = lp_build_rcp(&bld->base, oow);
648 }
649
650 for (i = 0; i < num_coords; i++) {
651 coords[i] = emit_fetch( bld, inst, 0, i );
652 if (projected)
653 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
654 }
655 for (i = num_coords; i < 3; i++) {
656 coords[i] = bld->base.undef;
657 }
658
659 bld->sampler->emit_fetch_texel(bld->sampler,
660 bld->base.builder,
661 bld->base.type,
662 unit, num_coords, coords, lodbias,
663 texel);
664 }
665
666
667 /**
668 * Kill fragment if any of the src register values are negative.
669 */
670 static void
671 emit_kil(
672 struct lp_build_tgsi_soa_context *bld,
673 const struct tgsi_full_instruction *inst )
674 {
675 const struct tgsi_full_src_register *reg = &inst->Src[0];
676 LLVMValueRef terms[NUM_CHANNELS];
677 LLVMValueRef mask;
678 unsigned chan_index;
679
680 memset(&terms, 0, sizeof terms);
681
682 FOR_EACH_CHANNEL( chan_index ) {
683 unsigned swizzle;
684
685 /* Unswizzle channel */
686 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
687
688 /* Check if the component has not been already tested. */
689 assert(swizzle < NUM_CHANNELS);
690 if( !terms[swizzle] )
691 /* TODO: change the comparison operator instead of setting the sign */
692 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
693 }
694
695 mask = NULL;
696 FOR_EACH_CHANNEL( chan_index ) {
697 if(terms[chan_index]) {
698 LLVMValueRef chan_mask;
699
700 /*
701 * If term < 0 then mask = 0 else mask = ~0.
702 */
703 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
704
705 if(mask)
706 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
707 else
708 mask = chan_mask;
709 }
710 }
711
712 if(mask)
713 lp_build_mask_update(bld->mask, mask);
714 }
715
716
717 /**
718 * Predicated fragment kill.
719 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
720 * The only predication is the execution mask which will apply if
721 * we're inside a loop or conditional.
722 */
723 static void
724 emit_kilp(struct lp_build_tgsi_soa_context *bld,
725 const struct tgsi_full_instruction *inst)
726 {
727 LLVMValueRef mask;
728
729 /* For those channels which are "alive", disable fragment shader
730 * execution.
731 */
732 if (bld->exec_mask.has_mask) {
733 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
734 }
735 else {
736 mask = bld->base.zero;
737 }
738
739 lp_build_mask_update(bld->mask, mask);
740 }
741
742 static int
743 emit_declaration(
744 struct lp_build_tgsi_soa_context *bld,
745 const struct tgsi_full_declaration *decl)
746 {
747 unsigned first = decl->Range.First;
748 unsigned last = decl->Range.Last;
749 unsigned idx, i;
750 LLVMBasicBlockRef current_block =
751 LLVMGetInsertBlock(bld->base.builder);
752 LLVMBasicBlockRef first_block =
753 LLVMGetEntryBasicBlock(
754 LLVMGetBasicBlockParent(current_block));
755 LLVMValueRef first_inst =
756 LLVMGetFirstInstruction(first_block);
757
758 /* we want alloca's to be the first instruction
759 * in the function so we need to rewind the builder
760 * to the very beginning */
761 LLVMPositionBuilderBefore(bld->base.builder,
762 first_inst);
763
764 for (idx = first; idx <= last; ++idx) {
765 switch (decl->Declaration.File) {
766 case TGSI_FILE_TEMPORARY:
767 if (bld->has_indirect_addressing) {
768 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
769 last*4 + 4, 0);
770 bld->temps_array = LLVMBuildArrayAlloca(bld->base.builder,
771 lp_build_vec_type(bld->base.type),
772 val, "");
773 } else {
774 for (i = 0; i < NUM_CHANNELS; i++)
775 bld->temps[idx][i] = lp_build_alloca(&bld->base);
776 }
777 break;
778
779 case TGSI_FILE_OUTPUT:
780 for (i = 0; i < NUM_CHANNELS; i++)
781 bld->outputs[idx][i] = lp_build_alloca(&bld->base);
782 break;
783
784 case TGSI_FILE_ADDRESS:
785 for (i = 0; i < NUM_CHANNELS; i++)
786 bld->addr[idx][i] = lp_build_alloca(&bld->base);
787 break;
788
789 default:
790 /* don't need to declare other vars */
791 break;
792 }
793 }
794
795 LLVMPositionBuilderAtEnd(bld->base.builder,
796 current_block);
797 return TRUE;
798 }
799
800
801 /**
802 * Emit LLVM for one TGSI instruction.
803 * \param return TRUE for success, FALSE otherwise
804 */
805 static boolean
806 emit_instruction(
807 struct lp_build_tgsi_soa_context *bld,
808 const struct tgsi_full_instruction *inst,
809 const struct tgsi_opcode_info *info)
810 {
811 unsigned chan_index;
812 LLVMValueRef src0, src1, src2;
813 LLVMValueRef tmp0, tmp1, tmp2;
814 LLVMValueRef tmp3 = NULL;
815 LLVMValueRef tmp4 = NULL;
816 LLVMValueRef tmp5 = NULL;
817 LLVMValueRef tmp6 = NULL;
818 LLVMValueRef tmp7 = NULL;
819 LLVMValueRef res;
820 LLVMValueRef dst0[NUM_CHANNELS];
821
822 /*
823 * Stores and write masks are handled in a general fashion after the long
824 * instruction opcode switch statement.
825 *
826 * Although not stricitly necessary, we avoid generating instructions for
827 * channels which won't be stored, in cases where's that easy. For some
828 * complex instructions, like texture sampling, it is more convenient to
829 * assume a full writemask and then let LLVM optimization passes eliminate
830 * redundant code.
831 */
832
833 assert(info->num_dst <= 1);
834 if(info->num_dst) {
835 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
836 dst0[chan_index] = bld->base.undef;
837 }
838 }
839
840 switch (inst->Instruction.Opcode) {
841 case TGSI_OPCODE_ARL:
842 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
843 tmp0 = emit_fetch( bld, inst, 0, chan_index );
844 tmp0 = lp_build_floor(&bld->base, tmp0);
845 dst0[chan_index] = tmp0;
846 }
847 break;
848
849 case TGSI_OPCODE_MOV:
850 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
851 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
852 }
853 break;
854
855 case TGSI_OPCODE_LIT:
856 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
857 dst0[CHAN_X] = bld->base.one;
858 }
859 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
860 src0 = emit_fetch( bld, inst, 0, CHAN_X );
861 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
862 }
863 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
864 /* XMM[1] = SrcReg[0].yyyy */
865 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
866 /* XMM[1] = max(XMM[1], 0) */
867 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
868 /* XMM[2] = SrcReg[0].wwww */
869 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
870 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
871 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
872 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
873 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
874 }
875 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
876 dst0[CHAN_W] = bld->base.one;
877 }
878 break;
879
880 case TGSI_OPCODE_RCP:
881 /* TGSI_OPCODE_RECIP */
882 src0 = emit_fetch( bld, inst, 0, CHAN_X );
883 res = lp_build_rcp(&bld->base, src0);
884 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
885 dst0[chan_index] = res;
886 }
887 break;
888
889 case TGSI_OPCODE_RSQ:
890 /* TGSI_OPCODE_RECIPSQRT */
891 src0 = emit_fetch( bld, inst, 0, CHAN_X );
892 src0 = lp_build_abs(&bld->base, src0);
893 res = lp_build_rsqrt(&bld->base, src0);
894 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
895 dst0[chan_index] = res;
896 }
897 break;
898
899 case TGSI_OPCODE_EXP:
900 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
901 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
902 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
903 LLVMValueRef *p_exp2_int_part = NULL;
904 LLVMValueRef *p_frac_part = NULL;
905 LLVMValueRef *p_exp2 = NULL;
906
907 src0 = emit_fetch( bld, inst, 0, CHAN_X );
908
909 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
910 p_exp2_int_part = &tmp0;
911 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
912 p_frac_part = &tmp1;
913 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
914 p_exp2 = &tmp2;
915
916 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
917
918 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
919 dst0[CHAN_X] = tmp0;
920 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
921 dst0[CHAN_Y] = tmp1;
922 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
923 dst0[CHAN_Z] = tmp2;
924 }
925 /* dst.w = 1.0 */
926 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
927 dst0[CHAN_W] = bld->base.one;
928 }
929 break;
930
931 case TGSI_OPCODE_LOG:
932 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
933 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
934 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
935 LLVMValueRef *p_floor_log2 = NULL;
936 LLVMValueRef *p_exp = NULL;
937 LLVMValueRef *p_log2 = NULL;
938
939 src0 = emit_fetch( bld, inst, 0, CHAN_X );
940 src0 = lp_build_abs( &bld->base, src0 );
941
942 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
943 p_floor_log2 = &tmp0;
944 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
945 p_exp = &tmp1;
946 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
947 p_log2 = &tmp2;
948
949 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
950
951 /* dst.x = floor(lg2(abs(src.x))) */
952 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
953 dst0[CHAN_X] = tmp0;
954 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
955 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
956 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
957 }
958 /* dst.z = lg2(abs(src.x)) */
959 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
960 dst0[CHAN_Z] = tmp2;
961 }
962 /* dst.w = 1.0 */
963 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
964 dst0[CHAN_W] = bld->base.one;
965 }
966 break;
967
968 case TGSI_OPCODE_MUL:
969 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
970 src0 = emit_fetch( bld, inst, 0, chan_index );
971 src1 = emit_fetch( bld, inst, 1, chan_index );
972 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
973 }
974 break;
975
976 case TGSI_OPCODE_ADD:
977 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
978 src0 = emit_fetch( bld, inst, 0, chan_index );
979 src1 = emit_fetch( bld, inst, 1, chan_index );
980 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
981 }
982 break;
983
984 case TGSI_OPCODE_DP3:
985 /* TGSI_OPCODE_DOT3 */
986 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
987 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
988 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
989 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
990 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
991 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
992 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
993 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
994 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
995 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
996 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
997 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
998 dst0[chan_index] = tmp0;
999 }
1000 break;
1001
1002 case TGSI_OPCODE_DP4:
1003 /* TGSI_OPCODE_DOT4 */
1004 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1005 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1006 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1007 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1008 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1009 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1010 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1011 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1012 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1013 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1014 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1015 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1016 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1017 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1018 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1019 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1020 dst0[chan_index] = tmp0;
1021 }
1022 break;
1023
1024 case TGSI_OPCODE_DST:
1025 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1026 dst0[CHAN_X] = bld->base.one;
1027 }
1028 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1029 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1030 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1031 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1032 }
1033 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1034 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1035 }
1036 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1037 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1038 }
1039 break;
1040
1041 case TGSI_OPCODE_MIN:
1042 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1043 src0 = emit_fetch( bld, inst, 0, chan_index );
1044 src1 = emit_fetch( bld, inst, 1, chan_index );
1045 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1046 }
1047 break;
1048
1049 case TGSI_OPCODE_MAX:
1050 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1051 src0 = emit_fetch( bld, inst, 0, chan_index );
1052 src1 = emit_fetch( bld, inst, 1, chan_index );
1053 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1054 }
1055 break;
1056
1057 case TGSI_OPCODE_SLT:
1058 /* TGSI_OPCODE_SETLT */
1059 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1060 src0 = emit_fetch( bld, inst, 0, chan_index );
1061 src1 = emit_fetch( bld, inst, 1, chan_index );
1062 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1063 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1064 }
1065 break;
1066
1067 case TGSI_OPCODE_SGE:
1068 /* TGSI_OPCODE_SETGE */
1069 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1070 src0 = emit_fetch( bld, inst, 0, chan_index );
1071 src1 = emit_fetch( bld, inst, 1, chan_index );
1072 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1073 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1074 }
1075 break;
1076
1077 case TGSI_OPCODE_MAD:
1078 /* TGSI_OPCODE_MADD */
1079 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1080 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1081 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1082 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1083 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1084 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1085 dst0[chan_index] = tmp0;
1086 }
1087 break;
1088
1089 case TGSI_OPCODE_SUB:
1090 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1091 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1092 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1093 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1094 }
1095 break;
1096
1097 case TGSI_OPCODE_LRP:
1098 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1099 src0 = emit_fetch( bld, inst, 0, chan_index );
1100 src1 = emit_fetch( bld, inst, 1, chan_index );
1101 src2 = emit_fetch( bld, inst, 2, chan_index );
1102 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1103 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1104 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1105 }
1106 break;
1107
1108 case TGSI_OPCODE_CND:
1109 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1110 src0 = emit_fetch( bld, inst, 0, chan_index );
1111 src1 = emit_fetch( bld, inst, 1, chan_index );
1112 src2 = emit_fetch( bld, inst, 2, chan_index );
1113 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1114 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1115 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1116 }
1117 break;
1118
1119 case TGSI_OPCODE_DP2A:
1120 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1121 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1122 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1123 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1124 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1125 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1126 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1127 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1128 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1129 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1130 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1131 }
1132 break;
1133
1134 case TGSI_OPCODE_FRC:
1135 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1136 src0 = emit_fetch( bld, inst, 0, chan_index );
1137 tmp0 = lp_build_floor(&bld->base, src0);
1138 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1139 dst0[chan_index] = tmp0;
1140 }
1141 break;
1142
1143 case TGSI_OPCODE_CLAMP:
1144 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1145 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1146 src1 = emit_fetch( bld, inst, 1, chan_index );
1147 src2 = emit_fetch( bld, inst, 2, chan_index );
1148 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1149 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1150 dst0[chan_index] = tmp0;
1151 }
1152 break;
1153
1154 case TGSI_OPCODE_FLR:
1155 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1156 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1157 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1158 }
1159 break;
1160
1161 case TGSI_OPCODE_ROUND:
1162 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1163 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1164 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1165 }
1166 break;
1167
1168 case TGSI_OPCODE_EX2: {
1169 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1170 tmp0 = lp_build_exp2( &bld->base, tmp0);
1171 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1172 dst0[chan_index] = tmp0;
1173 }
1174 break;
1175 }
1176
1177 case TGSI_OPCODE_LG2:
1178 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1179 tmp0 = lp_build_log2( &bld->base, tmp0);
1180 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1181 dst0[chan_index] = tmp0;
1182 }
1183 break;
1184
1185 case TGSI_OPCODE_POW:
1186 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1187 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1188 res = lp_build_pow( &bld->base, src0, src1 );
1189 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1190 dst0[chan_index] = res;
1191 }
1192 break;
1193
1194 case TGSI_OPCODE_XPD:
1195 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1196 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1197 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1198 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1199 }
1200 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1201 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1202 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1203 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1204 }
1205 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1206 tmp2 = tmp0;
1207 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1208 tmp5 = tmp3;
1209 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1210 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1211 dst0[CHAN_X] = tmp2;
1212 }
1213 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1214 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1215 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1216 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1217 }
1218 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1219 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1220 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1221 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1222 dst0[CHAN_Y] = tmp3;
1223 }
1224 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1225 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1226 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1227 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1228 dst0[CHAN_Z] = tmp5;
1229 }
1230 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1231 dst0[CHAN_W] = bld->base.one;
1232 }
1233 break;
1234
1235 case TGSI_OPCODE_ABS:
1236 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1237 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1238 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1239 }
1240 break;
1241
1242 case TGSI_OPCODE_RCC:
1243 /* deprecated? */
1244 assert(0);
1245 return FALSE;
1246
1247 case TGSI_OPCODE_DPH:
1248 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1249 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1250 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1251 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1252 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1253 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1254 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1255 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1256 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1257 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1258 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1259 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1260 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1261 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1262 dst0[chan_index] = tmp0;
1263 }
1264 break;
1265
1266 case TGSI_OPCODE_COS:
1267 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1268 tmp0 = lp_build_cos( &bld->base, tmp0 );
1269 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1270 dst0[chan_index] = tmp0;
1271 }
1272 break;
1273
1274 case TGSI_OPCODE_DDX:
1275 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1276 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1277 }
1278 break;
1279
1280 case TGSI_OPCODE_DDY:
1281 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1282 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1283 }
1284 break;
1285
1286 case TGSI_OPCODE_KILP:
1287 /* predicated kill */
1288 emit_kilp( bld, inst );
1289 break;
1290
1291 case TGSI_OPCODE_KIL:
1292 /* conditional kill */
1293 emit_kil( bld, inst );
1294 break;
1295
1296 case TGSI_OPCODE_PK2H:
1297 return FALSE;
1298 break;
1299
1300 case TGSI_OPCODE_PK2US:
1301 return FALSE;
1302 break;
1303
1304 case TGSI_OPCODE_PK4B:
1305 return FALSE;
1306 break;
1307
1308 case TGSI_OPCODE_PK4UB:
1309 return FALSE;
1310 break;
1311
1312 case TGSI_OPCODE_RFL:
1313 return FALSE;
1314 break;
1315
1316 case TGSI_OPCODE_SEQ:
1317 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1318 src0 = emit_fetch( bld, inst, 0, chan_index );
1319 src1 = emit_fetch( bld, inst, 1, chan_index );
1320 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1321 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1322 }
1323 break;
1324
1325 case TGSI_OPCODE_SFL:
1326 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1327 dst0[chan_index] = bld->base.zero;
1328 }
1329 break;
1330
1331 case TGSI_OPCODE_SGT:
1332 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1333 src0 = emit_fetch( bld, inst, 0, chan_index );
1334 src1 = emit_fetch( bld, inst, 1, chan_index );
1335 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1336 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1337 }
1338 break;
1339
1340 case TGSI_OPCODE_SIN:
1341 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1342 tmp0 = lp_build_sin( &bld->base, tmp0 );
1343 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1344 dst0[chan_index] = tmp0;
1345 }
1346 break;
1347
1348 case TGSI_OPCODE_SLE:
1349 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1350 src0 = emit_fetch( bld, inst, 0, chan_index );
1351 src1 = emit_fetch( bld, inst, 1, chan_index );
1352 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1353 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1354 }
1355 break;
1356
1357 case TGSI_OPCODE_SNE:
1358 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1359 src0 = emit_fetch( bld, inst, 0, chan_index );
1360 src1 = emit_fetch( bld, inst, 1, chan_index );
1361 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1362 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1363 }
1364 break;
1365
1366 case TGSI_OPCODE_STR:
1367 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1368 dst0[chan_index] = bld->base.one;
1369 }
1370 break;
1371
1372 case TGSI_OPCODE_TEX:
1373 emit_tex( bld, inst, FALSE, FALSE, dst0 );
1374 break;
1375
1376 case TGSI_OPCODE_TXD:
1377 /* FIXME */
1378 return FALSE;
1379 break;
1380
1381 case TGSI_OPCODE_UP2H:
1382 /* deprecated */
1383 assert (0);
1384 return FALSE;
1385 break;
1386
1387 case TGSI_OPCODE_UP2US:
1388 /* deprecated */
1389 assert(0);
1390 return FALSE;
1391 break;
1392
1393 case TGSI_OPCODE_UP4B:
1394 /* deprecated */
1395 assert(0);
1396 return FALSE;
1397 break;
1398
1399 case TGSI_OPCODE_UP4UB:
1400 /* deprecated */
1401 assert(0);
1402 return FALSE;
1403 break;
1404
1405 case TGSI_OPCODE_X2D:
1406 /* deprecated? */
1407 assert(0);
1408 return FALSE;
1409 break;
1410
1411 case TGSI_OPCODE_ARA:
1412 /* deprecated */
1413 assert(0);
1414 return FALSE;
1415 break;
1416
1417 case TGSI_OPCODE_ARR:
1418 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1419 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1420 tmp0 = lp_build_round(&bld->base, tmp0);
1421 dst0[chan_index] = tmp0;
1422 }
1423 break;
1424
1425 case TGSI_OPCODE_BRA:
1426 /* deprecated */
1427 assert(0);
1428 return FALSE;
1429 break;
1430
1431 case TGSI_OPCODE_CAL:
1432 /* FIXME */
1433 return FALSE;
1434 break;
1435
1436 case TGSI_OPCODE_RET:
1437 /* FIXME */
1438 return FALSE;
1439 break;
1440
1441 case TGSI_OPCODE_END:
1442 break;
1443
1444 case TGSI_OPCODE_SSG:
1445 /* TGSI_OPCODE_SGN */
1446 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1447 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1448 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1449 }
1450 break;
1451
1452 case TGSI_OPCODE_CMP:
1453 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1454 src0 = emit_fetch( bld, inst, 0, chan_index );
1455 src1 = emit_fetch( bld, inst, 1, chan_index );
1456 src2 = emit_fetch( bld, inst, 2, chan_index );
1457 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1458 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1459 }
1460 break;
1461
1462 case TGSI_OPCODE_SCS:
1463 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1464 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1465 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1466 }
1467 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1468 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1469 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1470 }
1471 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1472 dst0[CHAN_Z] = bld->base.zero;
1473 }
1474 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1475 dst0[CHAN_W] = bld->base.one;
1476 }
1477 break;
1478
1479 case TGSI_OPCODE_TXB:
1480 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1481 break;
1482
1483 case TGSI_OPCODE_NRM:
1484 /* fall-through */
1485 case TGSI_OPCODE_NRM4:
1486 /* 3 or 4-component normalization */
1487 {
1488 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1489
1490 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1491 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1492 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1493 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1494
1495 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1496
1497 /* xmm4 = src.x */
1498 /* xmm0 = src.x * src.x */
1499 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1500 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1501 tmp4 = tmp0;
1502 }
1503 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1504
1505 /* xmm5 = src.y */
1506 /* xmm0 = xmm0 + src.y * src.y */
1507 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1508 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1509 tmp5 = tmp1;
1510 }
1511 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1512 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1513
1514 /* xmm6 = src.z */
1515 /* xmm0 = xmm0 + src.z * src.z */
1516 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1517 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1518 tmp6 = tmp1;
1519 }
1520 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1521 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1522
1523 if (dims == 4) {
1524 /* xmm7 = src.w */
1525 /* xmm0 = xmm0 + src.w * src.w */
1526 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1527 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1528 tmp7 = tmp1;
1529 }
1530 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1531 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1532 }
1533
1534 /* xmm1 = 1 / sqrt(xmm0) */
1535 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1536
1537 /* dst.x = xmm1 * src.x */
1538 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1539 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1540 }
1541
1542 /* dst.y = xmm1 * src.y */
1543 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1544 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1545 }
1546
1547 /* dst.z = xmm1 * src.z */
1548 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1549 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1550 }
1551
1552 /* dst.w = xmm1 * src.w */
1553 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1554 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1555 }
1556 }
1557
1558 /* dst.w = 1.0 */
1559 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1560 dst0[CHAN_W] = bld->base.one;
1561 }
1562 }
1563 break;
1564
1565 case TGSI_OPCODE_DIV:
1566 /* deprecated */
1567 assert( 0 );
1568 return FALSE;
1569 break;
1570
1571 case TGSI_OPCODE_DP2:
1572 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1573 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1574 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1575 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1576 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1577 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1578 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1579 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1580 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1581 }
1582 break;
1583
1584 case TGSI_OPCODE_TXL:
1585 emit_tex( bld, inst, TRUE, FALSE, dst0 );
1586 break;
1587
1588 case TGSI_OPCODE_TXP:
1589 emit_tex( bld, inst, FALSE, TRUE, dst0 );
1590 break;
1591
1592 case TGSI_OPCODE_BRK:
1593 lp_exec_break(&bld->exec_mask);
1594 break;
1595
1596 case TGSI_OPCODE_IF:
1597 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1598 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1599 tmp0, bld->base.zero);
1600 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1601 break;
1602
1603 case TGSI_OPCODE_BGNFOR:
1604 /* deprecated */
1605 assert(0);
1606 return FALSE;
1607 break;
1608
1609 case TGSI_OPCODE_BGNLOOP:
1610 lp_exec_bgnloop(&bld->exec_mask);
1611 break;
1612
1613 case TGSI_OPCODE_REP:
1614 /* deprecated */
1615 assert(0);
1616 return FALSE;
1617 break;
1618
1619 case TGSI_OPCODE_ELSE:
1620 lp_exec_mask_cond_invert(&bld->exec_mask);
1621 break;
1622
1623 case TGSI_OPCODE_ENDIF:
1624 lp_exec_mask_cond_pop(&bld->exec_mask);
1625 break;
1626
1627 case TGSI_OPCODE_ENDFOR:
1628 /* deprecated */
1629 assert(0);
1630 return FALSE;
1631 break;
1632
1633 case TGSI_OPCODE_ENDLOOP:
1634 lp_exec_endloop(&bld->exec_mask);
1635 break;
1636
1637 case TGSI_OPCODE_ENDREP:
1638 /* deprecated */
1639 assert(0);
1640 return FALSE;
1641 break;
1642
1643 case TGSI_OPCODE_PUSHA:
1644 /* deprecated? */
1645 assert(0);
1646 return FALSE;
1647 break;
1648
1649 case TGSI_OPCODE_POPA:
1650 /* deprecated? */
1651 assert(0);
1652 return FALSE;
1653 break;
1654
1655 case TGSI_OPCODE_CEIL:
1656 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1657 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1658 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1659 }
1660 break;
1661
1662 case TGSI_OPCODE_I2F:
1663 /* deprecated? */
1664 assert(0);
1665 return FALSE;
1666 break;
1667
1668 case TGSI_OPCODE_NOT:
1669 /* deprecated? */
1670 assert(0);
1671 return FALSE;
1672 break;
1673
1674 case TGSI_OPCODE_TRUNC:
1675 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1676 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1677 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1678 }
1679 break;
1680
1681 case TGSI_OPCODE_SHL:
1682 /* deprecated? */
1683 assert(0);
1684 return FALSE;
1685 break;
1686
1687 case TGSI_OPCODE_ISHR:
1688 /* deprecated? */
1689 assert(0);
1690 return FALSE;
1691 break;
1692
1693 case TGSI_OPCODE_AND:
1694 /* deprecated? */
1695 assert(0);
1696 return FALSE;
1697 break;
1698
1699 case TGSI_OPCODE_OR:
1700 /* deprecated? */
1701 assert(0);
1702 return FALSE;
1703 break;
1704
1705 case TGSI_OPCODE_MOD:
1706 /* deprecated? */
1707 assert(0);
1708 return FALSE;
1709 break;
1710
1711 case TGSI_OPCODE_XOR:
1712 /* deprecated? */
1713 assert(0);
1714 return FALSE;
1715 break;
1716
1717 case TGSI_OPCODE_SAD:
1718 /* deprecated? */
1719 assert(0);
1720 return FALSE;
1721 break;
1722
1723 case TGSI_OPCODE_TXF:
1724 /* deprecated? */
1725 assert(0);
1726 return FALSE;
1727 break;
1728
1729 case TGSI_OPCODE_TXQ:
1730 /* deprecated? */
1731 assert(0);
1732 return FALSE;
1733 break;
1734
1735 case TGSI_OPCODE_CONT:
1736 lp_exec_continue(&bld->exec_mask);
1737 break;
1738
1739 case TGSI_OPCODE_EMIT:
1740 return FALSE;
1741 break;
1742
1743 case TGSI_OPCODE_ENDPRIM:
1744 return FALSE;
1745 break;
1746
1747 case TGSI_OPCODE_NOP:
1748 break;
1749
1750 default:
1751 return FALSE;
1752 }
1753
1754 if(info->num_dst) {
1755 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1756 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1757 }
1758 }
1759
1760 return TRUE;
1761 }
1762
1763
1764 void
1765 lp_build_tgsi_soa(LLVMBuilderRef builder,
1766 const struct tgsi_token *tokens,
1767 struct lp_type type,
1768 struct lp_build_mask_context *mask,
1769 LLVMValueRef consts_ptr,
1770 const LLVMValueRef *pos,
1771 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1772 LLVMValueRef (*outputs)[NUM_CHANNELS],
1773 struct lp_build_sampler_soa *sampler,
1774 struct tgsi_shader_info *info)
1775 {
1776 struct lp_build_tgsi_soa_context bld;
1777 struct tgsi_parse_context parse;
1778 uint num_immediates = 0;
1779 unsigned i;
1780
1781 /* Setup build context */
1782 memset(&bld, 0, sizeof bld);
1783 lp_build_context_init(&bld.base, builder, type);
1784 bld.mask = mask;
1785 bld.pos = pos;
1786 bld.inputs = inputs;
1787 bld.outputs = outputs;
1788 bld.consts_ptr = consts_ptr;
1789 bld.sampler = sampler;
1790 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1791 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1792
1793 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1794
1795 tgsi_parse_init( &parse, tokens );
1796
1797 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1798 tgsi_parse_token( &parse );
1799
1800 switch( parse.FullToken.Token.Type ) {
1801 case TGSI_TOKEN_TYPE_DECLARATION:
1802 /* Inputs already interpolated */
1803 {
1804 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1805 _debug_printf("warning: failed to define LLVM variable\n");
1806 }
1807 break;
1808
1809 case TGSI_TOKEN_TYPE_INSTRUCTION:
1810 {
1811 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1812 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1813 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1814 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1815 info ? info->mnemonic : "<invalid>");
1816 }
1817
1818 break;
1819
1820 case TGSI_TOKEN_TYPE_IMMEDIATE:
1821 /* simply copy the immediate values into the next immediates[] slot */
1822 {
1823 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1824 assert(size <= 4);
1825 assert(num_immediates < LP_MAX_IMMEDIATES);
1826 for( i = 0; i < size; ++i )
1827 bld.immediates[num_immediates][i] =
1828 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1829 for( i = size; i < 4; ++i )
1830 bld.immediates[num_immediates][i] = bld.base.undef;
1831 num_immediates++;
1832 }
1833 break;
1834
1835 case TGSI_TOKEN_TYPE_PROPERTY:
1836 break;
1837
1838 default:
1839 assert( 0 );
1840 }
1841 }
1842 if (0) {
1843 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1844 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1845 debug_printf("11111111111111111111111111111 \n");
1846 tgsi_dump(tokens, 0);
1847 LLVMDumpValue(function);
1848 debug_printf("2222222222222222222222222222 \n");
1849 }
1850 tgsi_parse_free( &parse );
1851 }
1852