fd532c63d6cbc4ba7f8883f5461ff117592c8a91
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_exec.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_tgsi.h"
57 #include "lp_bld_limits.h"
58 #include "lp_bld_debug.h"
59
60
61 #define FOR_EACH_CHANNEL( CHAN )\
62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71 FOR_EACH_CHANNEL( CHAN )\
72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74 #define CHAN_X 0
75 #define CHAN_Y 1
76 #define CHAN_Z 2
77 #define CHAN_W 3
78
79 #define QUAD_TOP_LEFT 0
80 #define QUAD_TOP_RIGHT 1
81 #define QUAD_BOTTOM_LEFT 2
82 #define QUAD_BOTTOM_RIGHT 3
83
84
85 struct lp_exec_mask {
86 struct lp_build_context *bld;
87
88 boolean has_mask;
89
90 LLVMTypeRef int_vec_type;
91
92 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
93 int cond_stack_size;
94 LLVMValueRef cond_mask;
95
96 LLVMBasicBlockRef loop_block;
97 LLVMValueRef cont_mask;
98 LLVMValueRef break_mask;
99 LLVMValueRef break_var;
100 struct {
101 LLVMBasicBlockRef loop_block;
102 LLVMValueRef cont_mask;
103 LLVMValueRef break_mask;
104 LLVMValueRef break_var;
105 } loop_stack[LP_MAX_TGSI_NESTING];
106 int loop_stack_size;
107
108 LLVMValueRef exec_mask;
109 };
110
111 struct lp_build_tgsi_soa_context
112 {
113 struct lp_build_context base;
114
115 /* Builder for integer masks and indices */
116 struct lp_build_context int_bld;
117
118 LLVMValueRef consts_ptr;
119 const LLVMValueRef *pos;
120 const LLVMValueRef (*inputs)[NUM_CHANNELS];
121 LLVMValueRef (*outputs)[NUM_CHANNELS];
122
123 const struct lp_build_sampler_soa *sampler;
124
125 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
126 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
127 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
128 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
129
130 /* we allocate an array of temps if we have indirect
131 * addressing and then the temps above is unused */
132 LLVMValueRef temps_array;
133 boolean has_indirect_addressing;
134
135 struct lp_build_mask_context *mask;
136 struct lp_exec_mask exec_mask;
137 };
138
139 static const unsigned char
140 swizzle_left[4] = {
141 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
142 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
143 };
144
145 static const unsigned char
146 swizzle_right[4] = {
147 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
148 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
149 };
150
151 static const unsigned char
152 swizzle_top[4] = {
153 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
154 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
155 };
156
157 static const unsigned char
158 swizzle_bottom[4] = {
159 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
160 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
161 };
162
163 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
164 {
165 mask->bld = bld;
166 mask->has_mask = FALSE;
167 mask->cond_stack_size = 0;
168 mask->loop_stack_size = 0;
169
170 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
171 mask->break_mask = mask->cont_mask = mask->cond_mask =
172 LLVMConstAllOnes(mask->int_vec_type);
173 }
174
175 static void lp_exec_mask_update(struct lp_exec_mask *mask)
176 {
177 if (mask->loop_stack_size) {
178 /*for loops we need to update the entire mask at runtime */
179 LLVMValueRef tmp;
180 assert(mask->break_mask);
181 tmp = LLVMBuildAnd(mask->bld->builder,
182 mask->cont_mask,
183 mask->break_mask,
184 "maskcb");
185 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
186 mask->cond_mask,
187 tmp,
188 "maskfull");
189 } else
190 mask->exec_mask = mask->cond_mask;
191
192
193 mask->has_mask = (mask->cond_stack_size > 0 ||
194 mask->loop_stack_size > 0);
195 }
196
197 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
198 LLVMValueRef val)
199 {
200 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
201 if (mask->cond_stack_size == 0) {
202 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
203 }
204 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
205 assert(LLVMTypeOf(val) == mask->int_vec_type);
206 mask->cond_mask = val;
207
208 lp_exec_mask_update(mask);
209 }
210
211 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
212 {
213 LLVMValueRef prev_mask;
214 LLVMValueRef inv_mask;
215
216 assert(mask->cond_stack_size);
217 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
218 if (mask->cond_stack_size == 1) {
219 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
220 }
221
222 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
223
224 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
225 inv_mask,
226 prev_mask, "");
227 lp_exec_mask_update(mask);
228 }
229
230 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
231 {
232 assert(mask->cond_stack_size);
233 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
234 lp_exec_mask_update(mask);
235 }
236
237 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
238 {
239 if (mask->loop_stack_size == 0) {
240 assert(mask->loop_block == NULL);
241 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
242 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
243 assert(mask->break_var == NULL);
244 }
245
246 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
247
248 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
249 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
250 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
251 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
252 ++mask->loop_stack_size;
253
254 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
255 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
256
257 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
258 LLVMBuildBr(mask->bld->builder, mask->loop_block);
259 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
260
261 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
262
263 lp_exec_mask_update(mask);
264 }
265
266 static void lp_exec_break(struct lp_exec_mask *mask)
267 {
268 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
269 mask->exec_mask,
270 "break");
271
272 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
273 mask->break_mask,
274 exec_mask, "break_full");
275
276 lp_exec_mask_update(mask);
277 }
278
279 static void lp_exec_continue(struct lp_exec_mask *mask)
280 {
281 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
282 mask->exec_mask,
283 "");
284
285 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
286 mask->cont_mask,
287 exec_mask, "");
288
289 lp_exec_mask_update(mask);
290 }
291
292
293 static void lp_exec_endloop(struct lp_exec_mask *mask)
294 {
295 LLVMBasicBlockRef endloop;
296 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
297 mask->bld->type.length);
298 LLVMValueRef i1cond;
299
300 assert(mask->break_mask);
301
302 /*
303 * Restore the cont_mask, but don't pop
304 */
305 assert(mask->loop_stack_size);
306 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
307 lp_exec_mask_update(mask);
308
309 /*
310 * Unlike the continue mask, the break_mask must be preserved across loop
311 * iterations
312 */
313 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
314
315 /* i1cond = (mask == 0) */
316 i1cond = LLVMBuildICmp(
317 mask->bld->builder,
318 LLVMIntNE,
319 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
320 LLVMConstNull(reg_type), "");
321
322 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
323
324 LLVMBuildCondBr(mask->bld->builder,
325 i1cond, mask->loop_block, endloop);
326
327 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
328
329 assert(mask->loop_stack_size);
330 --mask->loop_stack_size;
331 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
332 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
333 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
334 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
335
336 lp_exec_mask_update(mask);
337 }
338
339 /* stores val into an address pointed to by dst.
340 * mask->exec_mask is used to figure out which bits of val
341 * should be stored into the address
342 * (0 means don't store this bit, 1 means do store).
343 */
344 static void lp_exec_mask_store(struct lp_exec_mask *mask,
345 LLVMValueRef pred,
346 LLVMValueRef val,
347 LLVMValueRef dst)
348 {
349 /* Mix the predicate and execution mask */
350 if (mask->has_mask) {
351 if (pred) {
352 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
353 } else {
354 pred = mask->exec_mask;
355 }
356 }
357
358 if (pred) {
359 LLVMValueRef real_val, dst_val;
360
361 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
362 real_val = lp_build_select(mask->bld,
363 pred,
364 val, dst_val);
365
366 LLVMBuildStore(mask->bld->builder, real_val, dst);
367 } else
368 LLVMBuildStore(mask->bld->builder, val, dst);
369 }
370
371
372 static LLVMValueRef
373 emit_ddx(struct lp_build_tgsi_soa_context *bld,
374 LLVMValueRef src)
375 {
376 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
377 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
378 return lp_build_sub(&bld->base, src_right, src_left);
379 }
380
381
382 static LLVMValueRef
383 emit_ddy(struct lp_build_tgsi_soa_context *bld,
384 LLVMValueRef src)
385 {
386 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
387 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
388 return lp_build_sub(&bld->base, src_top, src_bottom);
389 }
390
391 static LLVMValueRef
392 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
393 unsigned index,
394 unsigned chan,
395 boolean is_indirect,
396 LLVMValueRef addr)
397 {
398 assert(chan < 4);
399 if (!bld->has_indirect_addressing) {
400 return bld->temps[index][chan];
401 } else {
402 LLVMValueRef lindex =
403 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
404 if (is_indirect)
405 lindex = lp_build_add(&bld->base, lindex, addr);
406 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
407 }
408 }
409
410 /**
411 * Register fetch.
412 */
413 static LLVMValueRef
414 emit_fetch(
415 struct lp_build_tgsi_soa_context *bld,
416 const struct tgsi_full_instruction *inst,
417 unsigned index,
418 const unsigned chan_index )
419 {
420 const struct tgsi_full_src_register *reg = &inst->Src[index];
421 const unsigned swizzle =
422 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
423 LLVMValueRef res;
424 LLVMValueRef addr = NULL;
425
426 if (swizzle > 3) {
427 assert(0 && "invalid swizzle in emit_fetch()");
428 return bld->base.undef;
429 }
430
431 if (reg->Register.Indirect) {
432 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
433 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
434 addr = LLVMBuildLoad(bld->base.builder,
435 bld->addr[reg->Indirect.Index][swizzle],
436 "");
437 /* for indexing we want integers */
438 addr = LLVMBuildFPToSI(bld->base.builder, addr,
439 int_vec_type, "");
440 addr = LLVMBuildExtractElement(bld->base.builder,
441 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
442 "");
443 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
444 }
445
446 switch (reg->Register.File) {
447 case TGSI_FILE_CONSTANT:
448 {
449 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
450 reg->Register.Index*4 + swizzle, 0);
451 LLVMValueRef scalar, scalar_ptr;
452
453 if (reg->Register.Indirect) {
454 /*lp_build_printf(bld->base.builder,
455 "\taddr = %d\n", addr);*/
456 index = lp_build_add(&bld->base, index, addr);
457 }
458 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
459 &index, 1, "");
460 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
461
462 res = lp_build_broadcast_scalar(&bld->base, scalar);
463 }
464 break;
465
466 case TGSI_FILE_IMMEDIATE:
467 res = bld->immediates[reg->Register.Index][swizzle];
468 assert(res);
469 break;
470
471 case TGSI_FILE_INPUT:
472 res = bld->inputs[reg->Register.Index][swizzle];
473 assert(res);
474 break;
475
476 case TGSI_FILE_TEMPORARY:
477 {
478 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
479 swizzle,
480 reg->Register.Indirect,
481 addr);
482 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
483 if(!res)
484 return bld->base.undef;
485 }
486 break;
487
488 default:
489 assert(0 && "invalid src register in emit_fetch()");
490 return bld->base.undef;
491 }
492
493 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
494 case TGSI_UTIL_SIGN_CLEAR:
495 res = lp_build_abs( &bld->base, res );
496 break;
497
498 case TGSI_UTIL_SIGN_SET:
499 /* TODO: Use bitwese OR for floating point */
500 res = lp_build_abs( &bld->base, res );
501 res = LLVMBuildNeg( bld->base.builder, res, "" );
502 break;
503
504 case TGSI_UTIL_SIGN_TOGGLE:
505 res = LLVMBuildNeg( bld->base.builder, res, "" );
506 break;
507
508 case TGSI_UTIL_SIGN_KEEP:
509 break;
510 }
511
512 return res;
513 }
514
515
516 /**
517 * Register fetch with derivatives.
518 */
519 static void
520 emit_fetch_deriv(
521 struct lp_build_tgsi_soa_context *bld,
522 const struct tgsi_full_instruction *inst,
523 unsigned index,
524 const unsigned chan_index,
525 LLVMValueRef *res,
526 LLVMValueRef *ddx,
527 LLVMValueRef *ddy)
528 {
529 LLVMValueRef src;
530
531 src = emit_fetch(bld, inst, index, chan_index);
532
533 if(res)
534 *res = src;
535
536 /* TODO: use interpolation coeffs for inputs */
537
538 if(ddx)
539 *ddx = emit_ddx(bld, src);
540
541 if(ddy)
542 *ddy = emit_ddy(bld, src);
543 }
544
545
546 /**
547 * Predicate.
548 */
549 static void
550 emit_fetch_predicate(
551 struct lp_build_tgsi_soa_context *bld,
552 const struct tgsi_full_instruction *inst,
553 LLVMValueRef *pred)
554 {
555 unsigned index;
556 unsigned char swizzles[4];
557 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
558 LLVMValueRef value;
559 unsigned chan;
560
561 if (!inst->Instruction.Predicate) {
562 FOR_EACH_CHANNEL( chan ) {
563 pred[chan] = NULL;
564 }
565 return;
566 }
567
568 swizzles[0] = inst->Predicate.SwizzleX;
569 swizzles[1] = inst->Predicate.SwizzleY;
570 swizzles[2] = inst->Predicate.SwizzleZ;
571 swizzles[3] = inst->Predicate.SwizzleW;
572
573 index = inst->Predicate.Index;
574 assert(index < LP_MAX_TGSI_PREDS);
575
576 FOR_EACH_CHANNEL( chan ) {
577 unsigned swizzle = swizzles[chan];
578
579 /*
580 * Only fetch the predicate register channels that are actually listed
581 * in the swizzles
582 */
583 if (!unswizzled[swizzle]) {
584 value = LLVMBuildLoad(bld->base.builder,
585 bld->preds[index][swizzle], "");
586
587 /*
588 * Convert the value to an integer mask.
589 *
590 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
591 * is needlessly causing two comparisons due to storing the intermediate
592 * result as float vector instead of an integer mask vector.
593 */
594 value = lp_build_compare(bld->base.builder,
595 bld->base.type,
596 PIPE_FUNC_NOTEQUAL,
597 value,
598 bld->base.zero);
599 if (inst->Predicate.Negate) {
600 value = LLVMBuildNot(bld->base.builder, value, "");
601 }
602
603 unswizzled[swizzle] = value;
604 } else {
605 value = unswizzled[swizzle];
606 }
607
608 pred[chan] = value;
609 }
610 }
611
612
613 /**
614 * Register store.
615 */
616 static void
617 emit_store(
618 struct lp_build_tgsi_soa_context *bld,
619 const struct tgsi_full_instruction *inst,
620 unsigned index,
621 unsigned chan_index,
622 LLVMValueRef pred,
623 LLVMValueRef value)
624 {
625 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
626 LLVMValueRef addr = NULL;
627
628 switch( inst->Instruction.Saturate ) {
629 case TGSI_SAT_NONE:
630 break;
631
632 case TGSI_SAT_ZERO_ONE:
633 value = lp_build_max(&bld->base, value, bld->base.zero);
634 value = lp_build_min(&bld->base, value, bld->base.one);
635 break;
636
637 case TGSI_SAT_MINUS_PLUS_ONE:
638 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
639 value = lp_build_min(&bld->base, value, bld->base.one);
640 break;
641
642 default:
643 assert(0);
644 }
645
646 if (reg->Register.Indirect) {
647 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
648 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
649 addr = LLVMBuildLoad(bld->base.builder,
650 bld->addr[reg->Indirect.Index][swizzle],
651 "");
652 /* for indexing we want integers */
653 addr = LLVMBuildFPToSI(bld->base.builder, addr,
654 int_vec_type, "");
655 addr = LLVMBuildExtractElement(bld->base.builder,
656 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
657 "");
658 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
659 }
660
661 switch( reg->Register.File ) {
662 case TGSI_FILE_OUTPUT:
663 lp_exec_mask_store(&bld->exec_mask, pred, value,
664 bld->outputs[reg->Register.Index][chan_index]);
665 break;
666
667 case TGSI_FILE_TEMPORARY: {
668 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
669 chan_index,
670 reg->Register.Indirect,
671 addr);
672 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
673 break;
674 }
675
676 case TGSI_FILE_ADDRESS:
677 lp_exec_mask_store(&bld->exec_mask, pred, value,
678 bld->addr[reg->Indirect.Index][chan_index]);
679 break;
680
681 case TGSI_FILE_PREDICATE:
682 lp_exec_mask_store(&bld->exec_mask, pred, value,
683 bld->preds[index][chan_index]);
684 break;
685
686 default:
687 assert( 0 );
688 }
689 }
690
691
692 /**
693 * High-level instruction translators.
694 */
695
696 enum tex_modifier {
697 TEX_MODIFIER_NONE = 0,
698 TEX_MODIFIER_PROJECTED,
699 TEX_MODIFIER_LOD_BIAS,
700 TEX_MODIFIER_EXPLICIT_LOD,
701 TEX_MODIFIER_EXPLICIT_DERIV
702 };
703
704 static void
705 emit_tex( struct lp_build_tgsi_soa_context *bld,
706 const struct tgsi_full_instruction *inst,
707 enum tex_modifier modifier,
708 LLVMValueRef *texel)
709 {
710 unsigned unit;
711 LLVMValueRef lod_bias, explicit_lod;
712 LLVMValueRef oow = NULL;
713 LLVMValueRef coords[3];
714 LLVMValueRef ddx[3];
715 LLVMValueRef ddy[3];
716 unsigned num_coords;
717 unsigned i;
718
719 if (!bld->sampler) {
720 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
721 for (i = 0; i < 4; i++) {
722 texel[i] = bld->base.undef;
723 }
724 return;
725 }
726
727 switch (inst->Texture.Texture) {
728 case TGSI_TEXTURE_1D:
729 num_coords = 1;
730 break;
731 case TGSI_TEXTURE_2D:
732 case TGSI_TEXTURE_RECT:
733 num_coords = 2;
734 break;
735 case TGSI_TEXTURE_SHADOW1D:
736 case TGSI_TEXTURE_SHADOW2D:
737 case TGSI_TEXTURE_SHADOWRECT:
738 case TGSI_TEXTURE_3D:
739 case TGSI_TEXTURE_CUBE:
740 num_coords = 3;
741 break;
742 default:
743 assert(0);
744 return;
745 }
746
747 if (modifier == TEX_MODIFIER_LOD_BIAS) {
748 lod_bias = emit_fetch( bld, inst, 0, 3 );
749 explicit_lod = NULL;
750 }
751 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
752 lod_bias = NULL;
753 explicit_lod = emit_fetch( bld, inst, 0, 3 );
754 }
755 else {
756 lod_bias = NULL;
757 explicit_lod = NULL;
758 }
759
760 if (modifier == TEX_MODIFIER_PROJECTED) {
761 oow = emit_fetch( bld, inst, 0, 3 );
762 oow = lp_build_rcp(&bld->base, oow);
763 }
764
765 for (i = 0; i < num_coords; i++) {
766 coords[i] = emit_fetch( bld, inst, 0, i );
767 if (modifier == TEX_MODIFIER_PROJECTED)
768 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
769 }
770 for (i = num_coords; i < 3; i++) {
771 coords[i] = bld->base.undef;
772 }
773
774 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
775 for (i = 0; i < num_coords; i++) {
776 ddx[i] = emit_fetch( bld, inst, 1, i );
777 ddy[i] = emit_fetch( bld, inst, 2, i );
778 }
779 unit = inst->Src[3].Register.Index;
780 } else {
781 for (i = 0; i < num_coords; i++) {
782 ddx[i] = emit_ddx( bld, coords[i] );
783 ddy[i] = emit_ddy( bld, coords[i] );
784 }
785 unit = inst->Src[1].Register.Index;
786 }
787 for (i = num_coords; i < 3; i++) {
788 ddx[i] = bld->base.undef;
789 ddy[i] = bld->base.undef;
790 }
791
792 bld->sampler->emit_fetch_texel(bld->sampler,
793 bld->base.builder,
794 bld->base.type,
795 unit, num_coords, coords,
796 ddx, ddy,
797 lod_bias, explicit_lod,
798 texel);
799 }
800
801
802 /**
803 * Kill fragment if any of the src register values are negative.
804 */
805 static void
806 emit_kil(
807 struct lp_build_tgsi_soa_context *bld,
808 const struct tgsi_full_instruction *inst )
809 {
810 const struct tgsi_full_src_register *reg = &inst->Src[0];
811 LLVMValueRef terms[NUM_CHANNELS];
812 LLVMValueRef mask;
813 unsigned chan_index;
814
815 memset(&terms, 0, sizeof terms);
816
817 FOR_EACH_CHANNEL( chan_index ) {
818 unsigned swizzle;
819
820 /* Unswizzle channel */
821 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
822
823 /* Check if the component has not been already tested. */
824 assert(swizzle < NUM_CHANNELS);
825 if( !terms[swizzle] )
826 /* TODO: change the comparison operator instead of setting the sign */
827 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
828 }
829
830 mask = NULL;
831 FOR_EACH_CHANNEL( chan_index ) {
832 if(terms[chan_index]) {
833 LLVMValueRef chan_mask;
834
835 /*
836 * If term < 0 then mask = 0 else mask = ~0.
837 */
838 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
839
840 if(mask)
841 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
842 else
843 mask = chan_mask;
844 }
845 }
846
847 if(mask)
848 lp_build_mask_update(bld->mask, mask);
849 }
850
851
852 /**
853 * Predicated fragment kill.
854 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
855 * The only predication is the execution mask which will apply if
856 * we're inside a loop or conditional.
857 */
858 static void
859 emit_kilp(struct lp_build_tgsi_soa_context *bld,
860 const struct tgsi_full_instruction *inst)
861 {
862 LLVMValueRef mask;
863
864 /* For those channels which are "alive", disable fragment shader
865 * execution.
866 */
867 if (bld->exec_mask.has_mask) {
868 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
869 }
870 else {
871 mask = bld->base.zero;
872 }
873
874 lp_build_mask_update(bld->mask, mask);
875 }
876
877 static void
878 emit_declaration(
879 struct lp_build_tgsi_soa_context *bld,
880 const struct tgsi_full_declaration *decl)
881 {
882 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
883
884 unsigned first = decl->Range.First;
885 unsigned last = decl->Range.Last;
886 unsigned idx, i;
887
888 for (idx = first; idx <= last; ++idx) {
889 switch (decl->Declaration.File) {
890 case TGSI_FILE_TEMPORARY:
891 assert(idx < LP_MAX_TGSI_TEMPS);
892 if (bld->has_indirect_addressing) {
893 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
894 last*4 + 4, 0);
895 bld->temps_array = lp_build_array_alloca(bld->base.builder,
896 vec_type, val, "");
897 } else {
898 for (i = 0; i < NUM_CHANNELS; i++)
899 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
900 vec_type, "");
901 }
902 break;
903
904 case TGSI_FILE_OUTPUT:
905 for (i = 0; i < NUM_CHANNELS; i++)
906 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
907 vec_type, "");
908 break;
909
910 case TGSI_FILE_ADDRESS:
911 assert(idx < LP_MAX_TGSI_ADDRS);
912 for (i = 0; i < NUM_CHANNELS; i++)
913 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
914 vec_type, "");
915 break;
916
917 case TGSI_FILE_PREDICATE:
918 assert(idx < LP_MAX_TGSI_PREDS);
919 for (i = 0; i < NUM_CHANNELS; i++)
920 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
921 vec_type, "");
922 break;
923
924 default:
925 /* don't need to declare other vars */
926 break;
927 }
928 }
929 }
930
931
932 /**
933 * Emit LLVM for one TGSI instruction.
934 * \param return TRUE for success, FALSE otherwise
935 */
936 static boolean
937 emit_instruction(
938 struct lp_build_tgsi_soa_context *bld,
939 const struct tgsi_full_instruction *inst,
940 const struct tgsi_opcode_info *info)
941 {
942 unsigned chan_index;
943 LLVMValueRef src0, src1, src2;
944 LLVMValueRef tmp0, tmp1, tmp2;
945 LLVMValueRef tmp3 = NULL;
946 LLVMValueRef tmp4 = NULL;
947 LLVMValueRef tmp5 = NULL;
948 LLVMValueRef tmp6 = NULL;
949 LLVMValueRef tmp7 = NULL;
950 LLVMValueRef res;
951 LLVMValueRef dst0[NUM_CHANNELS];
952
953 /*
954 * Stores and write masks are handled in a general fashion after the long
955 * instruction opcode switch statement.
956 *
957 * Although not stricitly necessary, we avoid generating instructions for
958 * channels which won't be stored, in cases where's that easy. For some
959 * complex instructions, like texture sampling, it is more convenient to
960 * assume a full writemask and then let LLVM optimization passes eliminate
961 * redundant code.
962 */
963
964 assert(info->num_dst <= 1);
965 if (info->num_dst) {
966 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
967 dst0[chan_index] = bld->base.undef;
968 }
969 }
970
971 switch (inst->Instruction.Opcode) {
972 case TGSI_OPCODE_ARL:
973 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
974 tmp0 = emit_fetch( bld, inst, 0, chan_index );
975 tmp0 = lp_build_floor(&bld->base, tmp0);
976 dst0[chan_index] = tmp0;
977 }
978 break;
979
980 case TGSI_OPCODE_MOV:
981 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
982 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
983 }
984 break;
985
986 case TGSI_OPCODE_LIT:
987 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
988 dst0[CHAN_X] = bld->base.one;
989 }
990 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
991 src0 = emit_fetch( bld, inst, 0, CHAN_X );
992 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
993 }
994 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
995 /* XMM[1] = SrcReg[0].yyyy */
996 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
997 /* XMM[1] = max(XMM[1], 0) */
998 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
999 /* XMM[2] = SrcReg[0].wwww */
1000 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1001 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1002 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1003 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1004 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1005 }
1006 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1007 dst0[CHAN_W] = bld->base.one;
1008 }
1009 break;
1010
1011 case TGSI_OPCODE_RCP:
1012 /* TGSI_OPCODE_RECIP */
1013 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1014 res = lp_build_rcp(&bld->base, src0);
1015 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1016 dst0[chan_index] = res;
1017 }
1018 break;
1019
1020 case TGSI_OPCODE_RSQ:
1021 /* TGSI_OPCODE_RECIPSQRT */
1022 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1023 src0 = lp_build_abs(&bld->base, src0);
1024 res = lp_build_rsqrt(&bld->base, src0);
1025 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1026 dst0[chan_index] = res;
1027 }
1028 break;
1029
1030 case TGSI_OPCODE_EXP:
1031 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1032 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1033 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1034 LLVMValueRef *p_exp2_int_part = NULL;
1035 LLVMValueRef *p_frac_part = NULL;
1036 LLVMValueRef *p_exp2 = NULL;
1037
1038 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1039
1040 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1041 p_exp2_int_part = &tmp0;
1042 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1043 p_frac_part = &tmp1;
1044 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1045 p_exp2 = &tmp2;
1046
1047 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1048
1049 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1050 dst0[CHAN_X] = tmp0;
1051 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1052 dst0[CHAN_Y] = tmp1;
1053 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1054 dst0[CHAN_Z] = tmp2;
1055 }
1056 /* dst.w = 1.0 */
1057 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1058 dst0[CHAN_W] = bld->base.one;
1059 }
1060 break;
1061
1062 case TGSI_OPCODE_LOG:
1063 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1064 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1065 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1066 LLVMValueRef *p_floor_log2 = NULL;
1067 LLVMValueRef *p_exp = NULL;
1068 LLVMValueRef *p_log2 = NULL;
1069
1070 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1071 src0 = lp_build_abs( &bld->base, src0 );
1072
1073 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1074 p_floor_log2 = &tmp0;
1075 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1076 p_exp = &tmp1;
1077 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1078 p_log2 = &tmp2;
1079
1080 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1081
1082 /* dst.x = floor(lg2(abs(src.x))) */
1083 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1084 dst0[CHAN_X] = tmp0;
1085 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1086 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1087 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1088 }
1089 /* dst.z = lg2(abs(src.x)) */
1090 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1091 dst0[CHAN_Z] = tmp2;
1092 }
1093 /* dst.w = 1.0 */
1094 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1095 dst0[CHAN_W] = bld->base.one;
1096 }
1097 break;
1098
1099 case TGSI_OPCODE_MUL:
1100 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1101 src0 = emit_fetch( bld, inst, 0, chan_index );
1102 src1 = emit_fetch( bld, inst, 1, chan_index );
1103 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1104 }
1105 break;
1106
1107 case TGSI_OPCODE_ADD:
1108 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1109 src0 = emit_fetch( bld, inst, 0, chan_index );
1110 src1 = emit_fetch( bld, inst, 1, chan_index );
1111 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1112 }
1113 break;
1114
1115 case TGSI_OPCODE_DP3:
1116 /* TGSI_OPCODE_DOT3 */
1117 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1118 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1119 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1120 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1121 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1122 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1123 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1124 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1125 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1126 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1127 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1128 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1129 dst0[chan_index] = tmp0;
1130 }
1131 break;
1132
1133 case TGSI_OPCODE_DP4:
1134 /* TGSI_OPCODE_DOT4 */
1135 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1136 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1137 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1138 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1139 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1140 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1141 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1142 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1143 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1144 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1145 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1146 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1147 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1148 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1149 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1150 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1151 dst0[chan_index] = tmp0;
1152 }
1153 break;
1154
1155 case TGSI_OPCODE_DST:
1156 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1157 dst0[CHAN_X] = bld->base.one;
1158 }
1159 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1160 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1161 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1162 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1163 }
1164 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1165 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1166 }
1167 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1168 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1169 }
1170 break;
1171
1172 case TGSI_OPCODE_MIN:
1173 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1174 src0 = emit_fetch( bld, inst, 0, chan_index );
1175 src1 = emit_fetch( bld, inst, 1, chan_index );
1176 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1177 }
1178 break;
1179
1180 case TGSI_OPCODE_MAX:
1181 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1182 src0 = emit_fetch( bld, inst, 0, chan_index );
1183 src1 = emit_fetch( bld, inst, 1, chan_index );
1184 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1185 }
1186 break;
1187
1188 case TGSI_OPCODE_SLT:
1189 /* TGSI_OPCODE_SETLT */
1190 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1191 src0 = emit_fetch( bld, inst, 0, chan_index );
1192 src1 = emit_fetch( bld, inst, 1, chan_index );
1193 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1194 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1195 }
1196 break;
1197
1198 case TGSI_OPCODE_SGE:
1199 /* TGSI_OPCODE_SETGE */
1200 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1201 src0 = emit_fetch( bld, inst, 0, chan_index );
1202 src1 = emit_fetch( bld, inst, 1, chan_index );
1203 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1204 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1205 }
1206 break;
1207
1208 case TGSI_OPCODE_MAD:
1209 /* TGSI_OPCODE_MADD */
1210 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1211 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1212 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1213 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1214 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1215 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1216 dst0[chan_index] = tmp0;
1217 }
1218 break;
1219
1220 case TGSI_OPCODE_SUB:
1221 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1222 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1223 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1224 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1225 }
1226 break;
1227
1228 case TGSI_OPCODE_LRP:
1229 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1230 src0 = emit_fetch( bld, inst, 0, chan_index );
1231 src1 = emit_fetch( bld, inst, 1, chan_index );
1232 src2 = emit_fetch( bld, inst, 2, chan_index );
1233 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1234 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1235 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1236 }
1237 break;
1238
1239 case TGSI_OPCODE_CND:
1240 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1241 src0 = emit_fetch( bld, inst, 0, chan_index );
1242 src1 = emit_fetch( bld, inst, 1, chan_index );
1243 src2 = emit_fetch( bld, inst, 2, chan_index );
1244 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1245 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1246 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1247 }
1248 break;
1249
1250 case TGSI_OPCODE_DP2A:
1251 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1252 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1253 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1254 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1255 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1256 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1257 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1258 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1259 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1260 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1261 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1262 }
1263 break;
1264
1265 case TGSI_OPCODE_FRC:
1266 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1267 src0 = emit_fetch( bld, inst, 0, chan_index );
1268 tmp0 = lp_build_floor(&bld->base, src0);
1269 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1270 dst0[chan_index] = tmp0;
1271 }
1272 break;
1273
1274 case TGSI_OPCODE_CLAMP:
1275 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1276 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1277 src1 = emit_fetch( bld, inst, 1, chan_index );
1278 src2 = emit_fetch( bld, inst, 2, chan_index );
1279 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1280 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1281 dst0[chan_index] = tmp0;
1282 }
1283 break;
1284
1285 case TGSI_OPCODE_FLR:
1286 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1287 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1288 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1289 }
1290 break;
1291
1292 case TGSI_OPCODE_ROUND:
1293 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1294 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1295 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1296 }
1297 break;
1298
1299 case TGSI_OPCODE_EX2: {
1300 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1301 tmp0 = lp_build_exp2( &bld->base, tmp0);
1302 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1303 dst0[chan_index] = tmp0;
1304 }
1305 break;
1306 }
1307
1308 case TGSI_OPCODE_LG2:
1309 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1310 tmp0 = lp_build_log2( &bld->base, tmp0);
1311 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1312 dst0[chan_index] = tmp0;
1313 }
1314 break;
1315
1316 case TGSI_OPCODE_POW:
1317 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1318 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1319 res = lp_build_pow( &bld->base, src0, src1 );
1320 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1321 dst0[chan_index] = res;
1322 }
1323 break;
1324
1325 case TGSI_OPCODE_XPD:
1326 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1327 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1328 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1329 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1330 }
1331 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1332 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1333 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1334 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1335 }
1336 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1337 tmp2 = tmp0;
1338 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1339 tmp5 = tmp3;
1340 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1341 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1342 dst0[CHAN_X] = tmp2;
1343 }
1344 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1345 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1346 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1347 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1348 }
1349 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1350 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1351 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1352 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1353 dst0[CHAN_Y] = tmp3;
1354 }
1355 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1356 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1357 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1358 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1359 dst0[CHAN_Z] = tmp5;
1360 }
1361 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1362 dst0[CHAN_W] = bld->base.one;
1363 }
1364 break;
1365
1366 case TGSI_OPCODE_ABS:
1367 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1368 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1369 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1370 }
1371 break;
1372
1373 case TGSI_OPCODE_RCC:
1374 /* deprecated? */
1375 assert(0);
1376 return FALSE;
1377
1378 case TGSI_OPCODE_DPH:
1379 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1380 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1381 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1382 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1383 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1384 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1385 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1386 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1387 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1388 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1389 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1390 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1391 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1392 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1393 dst0[chan_index] = tmp0;
1394 }
1395 break;
1396
1397 case TGSI_OPCODE_COS:
1398 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1399 tmp0 = lp_build_cos( &bld->base, tmp0 );
1400 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1401 dst0[chan_index] = tmp0;
1402 }
1403 break;
1404
1405 case TGSI_OPCODE_DDX:
1406 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1407 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1408 }
1409 break;
1410
1411 case TGSI_OPCODE_DDY:
1412 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1413 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1414 }
1415 break;
1416
1417 case TGSI_OPCODE_KILP:
1418 /* predicated kill */
1419 emit_kilp( bld, inst );
1420 break;
1421
1422 case TGSI_OPCODE_KIL:
1423 /* conditional kill */
1424 emit_kil( bld, inst );
1425 break;
1426
1427 case TGSI_OPCODE_PK2H:
1428 return FALSE;
1429 break;
1430
1431 case TGSI_OPCODE_PK2US:
1432 return FALSE;
1433 break;
1434
1435 case TGSI_OPCODE_PK4B:
1436 return FALSE;
1437 break;
1438
1439 case TGSI_OPCODE_PK4UB:
1440 return FALSE;
1441 break;
1442
1443 case TGSI_OPCODE_RFL:
1444 return FALSE;
1445 break;
1446
1447 case TGSI_OPCODE_SEQ:
1448 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1449 src0 = emit_fetch( bld, inst, 0, chan_index );
1450 src1 = emit_fetch( bld, inst, 1, chan_index );
1451 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1452 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1453 }
1454 break;
1455
1456 case TGSI_OPCODE_SFL:
1457 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1458 dst0[chan_index] = bld->base.zero;
1459 }
1460 break;
1461
1462 case TGSI_OPCODE_SGT:
1463 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1464 src0 = emit_fetch( bld, inst, 0, chan_index );
1465 src1 = emit_fetch( bld, inst, 1, chan_index );
1466 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1467 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1468 }
1469 break;
1470
1471 case TGSI_OPCODE_SIN:
1472 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1473 tmp0 = lp_build_sin( &bld->base, tmp0 );
1474 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1475 dst0[chan_index] = tmp0;
1476 }
1477 break;
1478
1479 case TGSI_OPCODE_SLE:
1480 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1481 src0 = emit_fetch( bld, inst, 0, chan_index );
1482 src1 = emit_fetch( bld, inst, 1, chan_index );
1483 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1484 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1485 }
1486 break;
1487
1488 case TGSI_OPCODE_SNE:
1489 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1490 src0 = emit_fetch( bld, inst, 0, chan_index );
1491 src1 = emit_fetch( bld, inst, 1, chan_index );
1492 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1493 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1494 }
1495 break;
1496
1497 case TGSI_OPCODE_STR:
1498 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1499 dst0[chan_index] = bld->base.one;
1500 }
1501 break;
1502
1503 case TGSI_OPCODE_TEX:
1504 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1505 break;
1506
1507 case TGSI_OPCODE_TXD:
1508 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1509 break;
1510
1511 case TGSI_OPCODE_UP2H:
1512 /* deprecated */
1513 assert (0);
1514 return FALSE;
1515 break;
1516
1517 case TGSI_OPCODE_UP2US:
1518 /* deprecated */
1519 assert(0);
1520 return FALSE;
1521 break;
1522
1523 case TGSI_OPCODE_UP4B:
1524 /* deprecated */
1525 assert(0);
1526 return FALSE;
1527 break;
1528
1529 case TGSI_OPCODE_UP4UB:
1530 /* deprecated */
1531 assert(0);
1532 return FALSE;
1533 break;
1534
1535 case TGSI_OPCODE_X2D:
1536 /* deprecated? */
1537 assert(0);
1538 return FALSE;
1539 break;
1540
1541 case TGSI_OPCODE_ARA:
1542 /* deprecated */
1543 assert(0);
1544 return FALSE;
1545 break;
1546
1547 case TGSI_OPCODE_ARR:
1548 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1549 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1550 tmp0 = lp_build_round(&bld->base, tmp0);
1551 dst0[chan_index] = tmp0;
1552 }
1553 break;
1554
1555 case TGSI_OPCODE_BRA:
1556 /* deprecated */
1557 assert(0);
1558 return FALSE;
1559 break;
1560
1561 case TGSI_OPCODE_CAL:
1562 /* FIXME */
1563 return FALSE;
1564 break;
1565
1566 case TGSI_OPCODE_RET:
1567 /* FIXME */
1568 return FALSE;
1569 break;
1570
1571 case TGSI_OPCODE_END:
1572 break;
1573
1574 case TGSI_OPCODE_SSG:
1575 /* TGSI_OPCODE_SGN */
1576 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1577 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1578 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1579 }
1580 break;
1581
1582 case TGSI_OPCODE_CMP:
1583 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1584 src0 = emit_fetch( bld, inst, 0, chan_index );
1585 src1 = emit_fetch( bld, inst, 1, chan_index );
1586 src2 = emit_fetch( bld, inst, 2, chan_index );
1587 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1588 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1589 }
1590 break;
1591
1592 case TGSI_OPCODE_SCS:
1593 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1594 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1595 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1596 }
1597 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1598 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1599 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1600 }
1601 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1602 dst0[CHAN_Z] = bld->base.zero;
1603 }
1604 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1605 dst0[CHAN_W] = bld->base.one;
1606 }
1607 break;
1608
1609 case TGSI_OPCODE_TXB:
1610 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1611 break;
1612
1613 case TGSI_OPCODE_NRM:
1614 /* fall-through */
1615 case TGSI_OPCODE_NRM4:
1616 /* 3 or 4-component normalization */
1617 {
1618 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1619
1620 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1621 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1622 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1623 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1624
1625 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1626
1627 /* xmm4 = src.x */
1628 /* xmm0 = src.x * src.x */
1629 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1630 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1631 tmp4 = tmp0;
1632 }
1633 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1634
1635 /* xmm5 = src.y */
1636 /* xmm0 = xmm0 + src.y * src.y */
1637 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1638 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1639 tmp5 = tmp1;
1640 }
1641 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1642 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1643
1644 /* xmm6 = src.z */
1645 /* xmm0 = xmm0 + src.z * src.z */
1646 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1647 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1648 tmp6 = tmp1;
1649 }
1650 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1651 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1652
1653 if (dims == 4) {
1654 /* xmm7 = src.w */
1655 /* xmm0 = xmm0 + src.w * src.w */
1656 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1657 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1658 tmp7 = tmp1;
1659 }
1660 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1661 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1662 }
1663
1664 /* xmm1 = 1 / sqrt(xmm0) */
1665 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1666
1667 /* dst.x = xmm1 * src.x */
1668 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1669 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1670 }
1671
1672 /* dst.y = xmm1 * src.y */
1673 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1674 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1675 }
1676
1677 /* dst.z = xmm1 * src.z */
1678 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1679 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1680 }
1681
1682 /* dst.w = xmm1 * src.w */
1683 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1684 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1685 }
1686 }
1687
1688 /* dst.w = 1.0 */
1689 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1690 dst0[CHAN_W] = bld->base.one;
1691 }
1692 }
1693 break;
1694
1695 case TGSI_OPCODE_DIV:
1696 /* deprecated */
1697 assert( 0 );
1698 return FALSE;
1699 break;
1700
1701 case TGSI_OPCODE_DP2:
1702 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1703 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1704 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1705 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1706 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1707 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1708 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1709 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1710 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1711 }
1712 break;
1713
1714 case TGSI_OPCODE_TXL:
1715 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1716 break;
1717
1718 case TGSI_OPCODE_TXP:
1719 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1720 break;
1721
1722 case TGSI_OPCODE_BRK:
1723 lp_exec_break(&bld->exec_mask);
1724 break;
1725
1726 case TGSI_OPCODE_IF:
1727 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1728 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1729 tmp0, bld->base.zero);
1730 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1731 break;
1732
1733 case TGSI_OPCODE_BGNLOOP:
1734 lp_exec_bgnloop(&bld->exec_mask);
1735 break;
1736
1737 case TGSI_OPCODE_ELSE:
1738 lp_exec_mask_cond_invert(&bld->exec_mask);
1739 break;
1740
1741 case TGSI_OPCODE_ENDIF:
1742 lp_exec_mask_cond_pop(&bld->exec_mask);
1743 break;
1744
1745 case TGSI_OPCODE_ENDLOOP:
1746 lp_exec_endloop(&bld->exec_mask);
1747 break;
1748
1749 case TGSI_OPCODE_PUSHA:
1750 /* deprecated? */
1751 assert(0);
1752 return FALSE;
1753 break;
1754
1755 case TGSI_OPCODE_POPA:
1756 /* deprecated? */
1757 assert(0);
1758 return FALSE;
1759 break;
1760
1761 case TGSI_OPCODE_CEIL:
1762 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1763 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1764 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1765 }
1766 break;
1767
1768 case TGSI_OPCODE_I2F:
1769 /* deprecated? */
1770 assert(0);
1771 return FALSE;
1772 break;
1773
1774 case TGSI_OPCODE_NOT:
1775 /* deprecated? */
1776 assert(0);
1777 return FALSE;
1778 break;
1779
1780 case TGSI_OPCODE_TRUNC:
1781 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1782 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1783 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1784 }
1785 break;
1786
1787 case TGSI_OPCODE_SHL:
1788 /* deprecated? */
1789 assert(0);
1790 return FALSE;
1791 break;
1792
1793 case TGSI_OPCODE_ISHR:
1794 /* deprecated? */
1795 assert(0);
1796 return FALSE;
1797 break;
1798
1799 case TGSI_OPCODE_AND:
1800 /* deprecated? */
1801 assert(0);
1802 return FALSE;
1803 break;
1804
1805 case TGSI_OPCODE_OR:
1806 /* deprecated? */
1807 assert(0);
1808 return FALSE;
1809 break;
1810
1811 case TGSI_OPCODE_MOD:
1812 /* deprecated? */
1813 assert(0);
1814 return FALSE;
1815 break;
1816
1817 case TGSI_OPCODE_XOR:
1818 /* deprecated? */
1819 assert(0);
1820 return FALSE;
1821 break;
1822
1823 case TGSI_OPCODE_SAD:
1824 /* deprecated? */
1825 assert(0);
1826 return FALSE;
1827 break;
1828
1829 case TGSI_OPCODE_TXF:
1830 /* deprecated? */
1831 assert(0);
1832 return FALSE;
1833 break;
1834
1835 case TGSI_OPCODE_TXQ:
1836 /* deprecated? */
1837 assert(0);
1838 return FALSE;
1839 break;
1840
1841 case TGSI_OPCODE_CONT:
1842 lp_exec_continue(&bld->exec_mask);
1843 break;
1844
1845 case TGSI_OPCODE_EMIT:
1846 return FALSE;
1847 break;
1848
1849 case TGSI_OPCODE_ENDPRIM:
1850 return FALSE;
1851 break;
1852
1853 case TGSI_OPCODE_NOP:
1854 break;
1855
1856 default:
1857 return FALSE;
1858 }
1859
1860 if(info->num_dst) {
1861 LLVMValueRef pred[NUM_CHANNELS];
1862
1863 emit_fetch_predicate( bld, inst, pred );
1864
1865 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1866 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1867 }
1868 }
1869
1870 return TRUE;
1871 }
1872
1873
1874 void
1875 lp_build_tgsi_soa(LLVMBuilderRef builder,
1876 const struct tgsi_token *tokens,
1877 struct lp_type type,
1878 struct lp_build_mask_context *mask,
1879 LLVMValueRef consts_ptr,
1880 const LLVMValueRef *pos,
1881 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1882 LLVMValueRef (*outputs)[NUM_CHANNELS],
1883 struct lp_build_sampler_soa *sampler,
1884 const struct tgsi_shader_info *info)
1885 {
1886 struct lp_build_tgsi_soa_context bld;
1887 struct tgsi_parse_context parse;
1888 uint num_immediates = 0;
1889 unsigned i;
1890
1891 /* Setup build context */
1892 memset(&bld, 0, sizeof bld);
1893 lp_build_context_init(&bld.base, builder, type);
1894 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1895 bld.mask = mask;
1896 bld.pos = pos;
1897 bld.inputs = inputs;
1898 bld.outputs = outputs;
1899 bld.consts_ptr = consts_ptr;
1900 bld.sampler = sampler;
1901 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1902 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1903
1904 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1905
1906 tgsi_parse_init( &parse, tokens );
1907
1908 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1909 tgsi_parse_token( &parse );
1910
1911 switch( parse.FullToken.Token.Type ) {
1912 case TGSI_TOKEN_TYPE_DECLARATION:
1913 /* Inputs already interpolated */
1914 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1915 break;
1916
1917 case TGSI_TOKEN_TYPE_INSTRUCTION:
1918 {
1919 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1920 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
1921 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
1922 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1923 opcode_info->mnemonic);
1924 }
1925
1926 break;
1927
1928 case TGSI_TOKEN_TYPE_IMMEDIATE:
1929 /* simply copy the immediate values into the next immediates[] slot */
1930 {
1931 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1932 assert(size <= 4);
1933 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1934 for( i = 0; i < size; ++i )
1935 bld.immediates[num_immediates][i] =
1936 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1937 for( i = size; i < 4; ++i )
1938 bld.immediates[num_immediates][i] = bld.base.undef;
1939 num_immediates++;
1940 }
1941 break;
1942
1943 case TGSI_TOKEN_TYPE_PROPERTY:
1944 break;
1945
1946 default:
1947 assert( 0 );
1948 }
1949 }
1950 if (0) {
1951 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1952 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1953 debug_printf("11111111111111111111111111111 \n");
1954 tgsi_dump(tokens, 0);
1955 lp_debug_dump_value(function);
1956 debug_printf("2222222222222222222222222222 \n");
1957 }
1958 tgsi_parse_free( &parse );
1959 }
1960