gallivm: rename a var
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_exec.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_tgsi.h"
57 #include "lp_bld_limits.h"
58 #include "lp_bld_debug.h"
59
60
61 #define FOR_EACH_CHANNEL( CHAN )\
62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71 FOR_EACH_CHANNEL( CHAN )\
72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74 #define CHAN_X 0
75 #define CHAN_Y 1
76 #define CHAN_Z 2
77 #define CHAN_W 3
78
79 #define QUAD_TOP_LEFT 0
80 #define QUAD_TOP_RIGHT 1
81 #define QUAD_BOTTOM_LEFT 2
82 #define QUAD_BOTTOM_RIGHT 3
83
84
85 struct lp_exec_mask {
86 struct lp_build_context *bld;
87
88 boolean has_mask;
89
90 LLVMTypeRef int_vec_type;
91
92 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
93 int cond_stack_size;
94 LLVMValueRef cond_mask;
95
96 LLVMBasicBlockRef loop_block;
97 LLVMValueRef cont_mask;
98 LLVMValueRef break_mask;
99 LLVMValueRef break_var;
100 struct {
101 LLVMBasicBlockRef loop_block;
102 LLVMValueRef cont_mask;
103 LLVMValueRef break_mask;
104 LLVMValueRef break_var;
105 } loop_stack[LP_MAX_TGSI_NESTING];
106 int loop_stack_size;
107
108 LLVMValueRef exec_mask;
109 };
110
111 struct lp_build_tgsi_soa_context
112 {
113 struct lp_build_context base;
114
115 /* Builder for integer masks and indices */
116 struct lp_build_context int_bld;
117
118 LLVMValueRef consts_ptr;
119 const LLVMValueRef *pos;
120 const LLVMValueRef (*inputs)[NUM_CHANNELS];
121 LLVMValueRef (*outputs)[NUM_CHANNELS];
122
123 const struct lp_build_sampler_soa *sampler;
124
125 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
126 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
127 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
128 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
129
130 /* we allocate an array of temps if we have indirect
131 * addressing and then the temps above is unused */
132 LLVMValueRef temps_array;
133 boolean has_indirect_addressing;
134
135 struct lp_build_mask_context *mask;
136 struct lp_exec_mask exec_mask;
137 };
138
139 static const unsigned char
140 swizzle_left[4] = {
141 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
142 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
143 };
144
145 static const unsigned char
146 swizzle_right[4] = {
147 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
148 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
149 };
150
151 static const unsigned char
152 swizzle_top[4] = {
153 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
154 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
155 };
156
157 static const unsigned char
158 swizzle_bottom[4] = {
159 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
160 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
161 };
162
163 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
164 {
165 mask->bld = bld;
166 mask->has_mask = FALSE;
167 mask->cond_stack_size = 0;
168 mask->loop_stack_size = 0;
169
170 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
171 mask->break_mask = mask->cont_mask = mask->cond_mask =
172 LLVMConstAllOnes(mask->int_vec_type);
173 }
174
175 static void lp_exec_mask_update(struct lp_exec_mask *mask)
176 {
177 if (mask->loop_stack_size) {
178 /*for loops we need to update the entire mask at runtime */
179 LLVMValueRef tmp;
180 assert(mask->break_mask);
181 tmp = LLVMBuildAnd(mask->bld->builder,
182 mask->cont_mask,
183 mask->break_mask,
184 "maskcb");
185 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
186 mask->cond_mask,
187 tmp,
188 "maskfull");
189 } else
190 mask->exec_mask = mask->cond_mask;
191
192
193 mask->has_mask = (mask->cond_stack_size > 0 ||
194 mask->loop_stack_size > 0);
195 }
196
197 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
198 LLVMValueRef val)
199 {
200 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
201 if (mask->cond_stack_size == 0) {
202 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
203 }
204 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
205 assert(LLVMTypeOf(val) == mask->int_vec_type);
206 mask->cond_mask = val;
207
208 lp_exec_mask_update(mask);
209 }
210
211 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
212 {
213 LLVMValueRef prev_mask;
214 LLVMValueRef inv_mask;
215
216 assert(mask->cond_stack_size);
217 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
218 if (mask->cond_stack_size == 1) {
219 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
220 }
221
222 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
223
224 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
225 inv_mask,
226 prev_mask, "");
227 lp_exec_mask_update(mask);
228 }
229
230 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
231 {
232 assert(mask->cond_stack_size);
233 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
234 lp_exec_mask_update(mask);
235 }
236
237 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
238 {
239 if (mask->loop_stack_size == 0) {
240 assert(mask->loop_block == NULL);
241 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
242 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
243 assert(mask->break_var == NULL);
244 }
245
246 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
247
248 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
249 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
250 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
251 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
252 ++mask->loop_stack_size;
253
254 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
255 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
256
257 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
258 LLVMBuildBr(mask->bld->builder, mask->loop_block);
259 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
260
261 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
262
263 lp_exec_mask_update(mask);
264 }
265
266 static void lp_exec_break(struct lp_exec_mask *mask)
267 {
268 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
269 mask->exec_mask,
270 "break");
271
272 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
273 mask->break_mask,
274 exec_mask, "break_full");
275
276 lp_exec_mask_update(mask);
277 }
278
279 static void lp_exec_continue(struct lp_exec_mask *mask)
280 {
281 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
282 mask->exec_mask,
283 "");
284
285 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
286 mask->cont_mask,
287 exec_mask, "");
288
289 lp_exec_mask_update(mask);
290 }
291
292
293 static void lp_exec_endloop(struct lp_exec_mask *mask)
294 {
295 LLVMBasicBlockRef endloop;
296 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
297 mask->bld->type.length);
298 LLVMValueRef i1cond;
299
300 assert(mask->break_mask);
301
302 /*
303 * Restore the cont_mask, but don't pop
304 */
305 assert(mask->loop_stack_size);
306 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
307 lp_exec_mask_update(mask);
308
309 /*
310 * Unlike the continue mask, the break_mask must be preserved across loop
311 * iterations
312 */
313 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
314
315 /* i1cond = (mask == 0) */
316 i1cond = LLVMBuildICmp(
317 mask->bld->builder,
318 LLVMIntNE,
319 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
320 LLVMConstNull(reg_type), "");
321
322 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
323
324 LLVMBuildCondBr(mask->bld->builder,
325 i1cond, mask->loop_block, endloop);
326
327 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
328
329 assert(mask->loop_stack_size);
330 --mask->loop_stack_size;
331 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
332 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
333 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
334 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
335
336 lp_exec_mask_update(mask);
337 }
338
339 /* stores val into an address pointed to by dst.
340 * mask->exec_mask is used to figure out which bits of val
341 * should be stored into the address
342 * (0 means don't store this bit, 1 means do store).
343 */
344 static void lp_exec_mask_store(struct lp_exec_mask *mask,
345 LLVMValueRef pred,
346 LLVMValueRef val,
347 LLVMValueRef dst)
348 {
349 /* Mix the predicate and execution mask */
350 if (mask->has_mask) {
351 if (pred) {
352 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
353 } else {
354 pred = mask->exec_mask;
355 }
356 }
357
358 if (pred) {
359 LLVMValueRef real_val, dst_val;
360
361 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
362 real_val = lp_build_select(mask->bld,
363 pred,
364 val, dst_val);
365
366 LLVMBuildStore(mask->bld->builder, real_val, dst);
367 } else
368 LLVMBuildStore(mask->bld->builder, val, dst);
369 }
370
371
372 static LLVMValueRef
373 emit_ddx(struct lp_build_tgsi_soa_context *bld,
374 LLVMValueRef src)
375 {
376 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
377 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
378 return lp_build_sub(&bld->base, src_right, src_left);
379 }
380
381
382 static LLVMValueRef
383 emit_ddy(struct lp_build_tgsi_soa_context *bld,
384 LLVMValueRef src)
385 {
386 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
387 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
388 return lp_build_sub(&bld->base, src_top, src_bottom);
389 }
390
391 static LLVMValueRef
392 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
393 unsigned index,
394 unsigned chan,
395 boolean is_indirect,
396 LLVMValueRef addr)
397 {
398 assert(chan < 4);
399 if (!bld->has_indirect_addressing) {
400 return bld->temps[index][chan];
401 } else {
402 LLVMValueRef lindex =
403 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
404 if (is_indirect)
405 lindex = lp_build_add(&bld->base, lindex, addr);
406 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
407 }
408 }
409
410 /**
411 * Register fetch.
412 */
413 static LLVMValueRef
414 emit_fetch(
415 struct lp_build_tgsi_soa_context *bld,
416 const struct tgsi_full_instruction *inst,
417 unsigned index,
418 const unsigned chan_index )
419 {
420 const struct tgsi_full_src_register *reg = &inst->Src[index];
421 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
422 LLVMValueRef res;
423 LLVMValueRef addr;
424
425 switch (swizzle) {
426 case TGSI_SWIZZLE_X:
427 case TGSI_SWIZZLE_Y:
428 case TGSI_SWIZZLE_Z:
429 case TGSI_SWIZZLE_W:
430
431 if (reg->Register.Indirect) {
432 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
433 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
434 addr = LLVMBuildLoad(bld->base.builder,
435 bld->addr[reg->Indirect.Index][swizzle],
436 "");
437 /* for indexing we want integers */
438 addr = LLVMBuildFPToSI(bld->base.builder, addr,
439 int_vec_type, "");
440 addr = LLVMBuildExtractElement(bld->base.builder,
441 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
442 "");
443 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
444 }
445
446 switch (reg->Register.File) {
447 case TGSI_FILE_CONSTANT: {
448 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
449 LLVMValueRef scalar, scalar_ptr;
450
451 if (reg->Register.Indirect) {
452 /*lp_build_printf(bld->base.builder,
453 "\taddr = %d\n", addr);*/
454 index = lp_build_add(&bld->base, index, addr);
455 }
456 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
457 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
458
459 res = lp_build_broadcast_scalar(&bld->base, scalar);
460 break;
461 }
462
463 case TGSI_FILE_IMMEDIATE:
464 res = bld->immediates[reg->Register.Index][swizzle];
465 assert(res);
466 break;
467
468 case TGSI_FILE_INPUT:
469 res = bld->inputs[reg->Register.Index][swizzle];
470 assert(res);
471 break;
472
473 case TGSI_FILE_TEMPORARY: {
474 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
475 swizzle,
476 reg->Register.Indirect,
477 addr);
478 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
479 if(!res)
480 return bld->base.undef;
481 break;
482 }
483
484 default:
485 assert( 0 );
486 return bld->base.undef;
487 }
488 break;
489
490 default:
491 assert( 0 );
492 return bld->base.undef;
493 }
494
495 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
496 case TGSI_UTIL_SIGN_CLEAR:
497 res = lp_build_abs( &bld->base, res );
498 break;
499
500 case TGSI_UTIL_SIGN_SET:
501 /* TODO: Use bitwese OR for floating point */
502 res = lp_build_abs( &bld->base, res );
503 res = LLVMBuildNeg( bld->base.builder, res, "" );
504 break;
505
506 case TGSI_UTIL_SIGN_TOGGLE:
507 res = LLVMBuildNeg( bld->base.builder, res, "" );
508 break;
509
510 case TGSI_UTIL_SIGN_KEEP:
511 break;
512 }
513
514 return res;
515 }
516
517
518 /**
519 * Register fetch with derivatives.
520 */
521 static void
522 emit_fetch_deriv(
523 struct lp_build_tgsi_soa_context *bld,
524 const struct tgsi_full_instruction *inst,
525 unsigned index,
526 const unsigned chan_index,
527 LLVMValueRef *res,
528 LLVMValueRef *ddx,
529 LLVMValueRef *ddy)
530 {
531 LLVMValueRef src;
532
533 src = emit_fetch(bld, inst, index, chan_index);
534
535 if(res)
536 *res = src;
537
538 /* TODO: use interpolation coeffs for inputs */
539
540 if(ddx)
541 *ddx = emit_ddx(bld, src);
542
543 if(ddy)
544 *ddy = emit_ddy(bld, src);
545 }
546
547
548 /**
549 * Predicate.
550 */
551 static void
552 emit_fetch_predicate(
553 struct lp_build_tgsi_soa_context *bld,
554 const struct tgsi_full_instruction *inst,
555 LLVMValueRef *pred)
556 {
557 unsigned index;
558 unsigned char swizzles[4];
559 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
560 LLVMValueRef value;
561 unsigned chan;
562
563 if (!inst->Instruction.Predicate) {
564 FOR_EACH_CHANNEL( chan ) {
565 pred[chan] = NULL;
566 }
567 return;
568 }
569
570 swizzles[0] = inst->Predicate.SwizzleX;
571 swizzles[1] = inst->Predicate.SwizzleY;
572 swizzles[2] = inst->Predicate.SwizzleZ;
573 swizzles[3] = inst->Predicate.SwizzleW;
574
575 index = inst->Predicate.Index;
576 assert(index < LP_MAX_TGSI_PREDS);
577
578 FOR_EACH_CHANNEL( chan ) {
579 unsigned swizzle = swizzles[chan];
580
581 /*
582 * Only fetch the predicate register channels that are actually listed
583 * in the swizzles
584 */
585 if (!unswizzled[swizzle]) {
586 value = LLVMBuildLoad(bld->base.builder,
587 bld->preds[index][swizzle], "");
588
589 /*
590 * Convert the value to an integer mask.
591 *
592 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
593 * is needlessly causing two comparisons due to storing the intermediate
594 * result as float vector instead of an integer mask vector.
595 */
596 value = lp_build_compare(bld->base.builder,
597 bld->base.type,
598 PIPE_FUNC_NOTEQUAL,
599 value,
600 bld->base.zero);
601 if (inst->Predicate.Negate) {
602 value = LLVMBuildNot(bld->base.builder, value, "");
603 }
604
605 unswizzled[swizzle] = value;
606 } else {
607 value = unswizzled[swizzle];
608 }
609
610 pred[chan] = value;
611 }
612 }
613
614
615 /**
616 * Register store.
617 */
618 static void
619 emit_store(
620 struct lp_build_tgsi_soa_context *bld,
621 const struct tgsi_full_instruction *inst,
622 unsigned index,
623 unsigned chan_index,
624 LLVMValueRef pred,
625 LLVMValueRef value)
626 {
627 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
628 LLVMValueRef addr;
629
630 switch( inst->Instruction.Saturate ) {
631 case TGSI_SAT_NONE:
632 break;
633
634 case TGSI_SAT_ZERO_ONE:
635 value = lp_build_max(&bld->base, value, bld->base.zero);
636 value = lp_build_min(&bld->base, value, bld->base.one);
637 break;
638
639 case TGSI_SAT_MINUS_PLUS_ONE:
640 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
641 value = lp_build_min(&bld->base, value, bld->base.one);
642 break;
643
644 default:
645 assert(0);
646 }
647
648 if (reg->Register.Indirect) {
649 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
650 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
651 addr = LLVMBuildLoad(bld->base.builder,
652 bld->addr[reg->Indirect.Index][swizzle],
653 "");
654 /* for indexing we want integers */
655 addr = LLVMBuildFPToSI(bld->base.builder, addr,
656 int_vec_type, "");
657 addr = LLVMBuildExtractElement(bld->base.builder,
658 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
659 "");
660 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
661 }
662
663 switch( reg->Register.File ) {
664 case TGSI_FILE_OUTPUT:
665 lp_exec_mask_store(&bld->exec_mask, pred, value,
666 bld->outputs[reg->Register.Index][chan_index]);
667 break;
668
669 case TGSI_FILE_TEMPORARY: {
670 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
671 chan_index,
672 reg->Register.Indirect,
673 addr);
674 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
675 break;
676 }
677
678 case TGSI_FILE_ADDRESS:
679 lp_exec_mask_store(&bld->exec_mask, pred, value,
680 bld->addr[reg->Indirect.Index][chan_index]);
681 break;
682
683 case TGSI_FILE_PREDICATE:
684 lp_exec_mask_store(&bld->exec_mask, pred, value,
685 bld->preds[index][chan_index]);
686 break;
687
688 default:
689 assert( 0 );
690 }
691 }
692
693
694 /**
695 * High-level instruction translators.
696 */
697
698 enum tex_modifier {
699 TEX_MODIFIER_NONE = 0,
700 TEX_MODIFIER_PROJECTED,
701 TEX_MODIFIER_LOD_BIAS,
702 TEX_MODIFIER_EXPLICIT_LOD,
703 TEX_MODIFIER_EXPLICIT_DERIV
704 };
705
706 static void
707 emit_tex( struct lp_build_tgsi_soa_context *bld,
708 const struct tgsi_full_instruction *inst,
709 enum tex_modifier modifier,
710 LLVMValueRef *texel)
711 {
712 unsigned unit;
713 LLVMValueRef lod_bias, explicit_lod;
714 LLVMValueRef oow = NULL;
715 LLVMValueRef coords[3];
716 LLVMValueRef ddx[3];
717 LLVMValueRef ddy[3];
718 unsigned num_coords;
719 unsigned i;
720
721 if (!bld->sampler) {
722 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
723 for (i = 0; i < 4; i++) {
724 texel[i] = bld->base.undef;
725 }
726 return;
727 }
728
729 switch (inst->Texture.Texture) {
730 case TGSI_TEXTURE_1D:
731 num_coords = 1;
732 break;
733 case TGSI_TEXTURE_2D:
734 case TGSI_TEXTURE_RECT:
735 num_coords = 2;
736 break;
737 case TGSI_TEXTURE_SHADOW1D:
738 case TGSI_TEXTURE_SHADOW2D:
739 case TGSI_TEXTURE_SHADOWRECT:
740 case TGSI_TEXTURE_3D:
741 case TGSI_TEXTURE_CUBE:
742 num_coords = 3;
743 break;
744 default:
745 assert(0);
746 return;
747 }
748
749 if (modifier == TEX_MODIFIER_LOD_BIAS) {
750 lod_bias = emit_fetch( bld, inst, 0, 3 );
751 explicit_lod = NULL;
752 }
753 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
754 lod_bias = NULL;
755 explicit_lod = emit_fetch( bld, inst, 0, 3 );
756 }
757 else {
758 lod_bias = NULL;
759 explicit_lod = NULL;
760 }
761
762 if (modifier == TEX_MODIFIER_PROJECTED) {
763 oow = emit_fetch( bld, inst, 0, 3 );
764 oow = lp_build_rcp(&bld->base, oow);
765 }
766
767 for (i = 0; i < num_coords; i++) {
768 coords[i] = emit_fetch( bld, inst, 0, i );
769 if (modifier == TEX_MODIFIER_PROJECTED)
770 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
771 }
772 for (i = num_coords; i < 3; i++) {
773 coords[i] = bld->base.undef;
774 }
775
776 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
777 for (i = 0; i < num_coords; i++) {
778 ddx[i] = emit_fetch( bld, inst, 1, i );
779 ddy[i] = emit_fetch( bld, inst, 2, i );
780 }
781 unit = inst->Src[3].Register.Index;
782 } else {
783 for (i = 0; i < num_coords; i++) {
784 ddx[i] = emit_ddx( bld, coords[i] );
785 ddy[i] = emit_ddy( bld, coords[i] );
786 }
787 unit = inst->Src[1].Register.Index;
788 }
789 for (i = num_coords; i < 3; i++) {
790 ddx[i] = bld->base.undef;
791 ddy[i] = bld->base.undef;
792 }
793
794 bld->sampler->emit_fetch_texel(bld->sampler,
795 bld->base.builder,
796 bld->base.type,
797 unit, num_coords, coords,
798 ddx, ddy,
799 lod_bias, explicit_lod,
800 texel);
801 }
802
803
804 /**
805 * Kill fragment if any of the src register values are negative.
806 */
807 static void
808 emit_kil(
809 struct lp_build_tgsi_soa_context *bld,
810 const struct tgsi_full_instruction *inst )
811 {
812 const struct tgsi_full_src_register *reg = &inst->Src[0];
813 LLVMValueRef terms[NUM_CHANNELS];
814 LLVMValueRef mask;
815 unsigned chan_index;
816
817 memset(&terms, 0, sizeof terms);
818
819 FOR_EACH_CHANNEL( chan_index ) {
820 unsigned swizzle;
821
822 /* Unswizzle channel */
823 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
824
825 /* Check if the component has not been already tested. */
826 assert(swizzle < NUM_CHANNELS);
827 if( !terms[swizzle] )
828 /* TODO: change the comparison operator instead of setting the sign */
829 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
830 }
831
832 mask = NULL;
833 FOR_EACH_CHANNEL( chan_index ) {
834 if(terms[chan_index]) {
835 LLVMValueRef chan_mask;
836
837 /*
838 * If term < 0 then mask = 0 else mask = ~0.
839 */
840 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
841
842 if(mask)
843 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
844 else
845 mask = chan_mask;
846 }
847 }
848
849 if(mask)
850 lp_build_mask_update(bld->mask, mask);
851 }
852
853
854 /**
855 * Predicated fragment kill.
856 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
857 * The only predication is the execution mask which will apply if
858 * we're inside a loop or conditional.
859 */
860 static void
861 emit_kilp(struct lp_build_tgsi_soa_context *bld,
862 const struct tgsi_full_instruction *inst)
863 {
864 LLVMValueRef mask;
865
866 /* For those channels which are "alive", disable fragment shader
867 * execution.
868 */
869 if (bld->exec_mask.has_mask) {
870 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
871 }
872 else {
873 mask = bld->base.zero;
874 }
875
876 lp_build_mask_update(bld->mask, mask);
877 }
878
879 static void
880 emit_declaration(
881 struct lp_build_tgsi_soa_context *bld,
882 const struct tgsi_full_declaration *decl)
883 {
884 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
885
886 unsigned first = decl->Range.First;
887 unsigned last = decl->Range.Last;
888 unsigned idx, i;
889
890 for (idx = first; idx <= last; ++idx) {
891 switch (decl->Declaration.File) {
892 case TGSI_FILE_TEMPORARY:
893 assert(idx < LP_MAX_TGSI_TEMPS);
894 if (bld->has_indirect_addressing) {
895 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
896 last*4 + 4, 0);
897 bld->temps_array = lp_build_array_alloca(bld->base.builder,
898 vec_type, val, "");
899 } else {
900 for (i = 0; i < NUM_CHANNELS; i++)
901 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
902 vec_type, "");
903 }
904 break;
905
906 case TGSI_FILE_OUTPUT:
907 for (i = 0; i < NUM_CHANNELS; i++)
908 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
909 vec_type, "");
910 break;
911
912 case TGSI_FILE_ADDRESS:
913 assert(idx < LP_MAX_TGSI_ADDRS);
914 for (i = 0; i < NUM_CHANNELS; i++)
915 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
916 vec_type, "");
917 break;
918
919 case TGSI_FILE_PREDICATE:
920 assert(idx < LP_MAX_TGSI_PREDS);
921 for (i = 0; i < NUM_CHANNELS; i++)
922 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
923 vec_type, "");
924 break;
925
926 default:
927 /* don't need to declare other vars */
928 break;
929 }
930 }
931 }
932
933
934 /**
935 * Emit LLVM for one TGSI instruction.
936 * \param return TRUE for success, FALSE otherwise
937 */
938 static boolean
939 emit_instruction(
940 struct lp_build_tgsi_soa_context *bld,
941 const struct tgsi_full_instruction *inst,
942 const struct tgsi_opcode_info *info)
943 {
944 unsigned chan_index;
945 LLVMValueRef src0, src1, src2;
946 LLVMValueRef tmp0, tmp1, tmp2;
947 LLVMValueRef tmp3 = NULL;
948 LLVMValueRef tmp4 = NULL;
949 LLVMValueRef tmp5 = NULL;
950 LLVMValueRef tmp6 = NULL;
951 LLVMValueRef tmp7 = NULL;
952 LLVMValueRef res;
953 LLVMValueRef dst0[NUM_CHANNELS];
954
955 /*
956 * Stores and write masks are handled in a general fashion after the long
957 * instruction opcode switch statement.
958 *
959 * Although not stricitly necessary, we avoid generating instructions for
960 * channels which won't be stored, in cases where's that easy. For some
961 * complex instructions, like texture sampling, it is more convenient to
962 * assume a full writemask and then let LLVM optimization passes eliminate
963 * redundant code.
964 */
965
966 assert(info->num_dst <= 1);
967 if (info->num_dst) {
968 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
969 dst0[chan_index] = bld->base.undef;
970 }
971 }
972
973 switch (inst->Instruction.Opcode) {
974 case TGSI_OPCODE_ARL:
975 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
976 tmp0 = emit_fetch( bld, inst, 0, chan_index );
977 tmp0 = lp_build_floor(&bld->base, tmp0);
978 dst0[chan_index] = tmp0;
979 }
980 break;
981
982 case TGSI_OPCODE_MOV:
983 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
984 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
985 }
986 break;
987
988 case TGSI_OPCODE_LIT:
989 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
990 dst0[CHAN_X] = bld->base.one;
991 }
992 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
993 src0 = emit_fetch( bld, inst, 0, CHAN_X );
994 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
995 }
996 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
997 /* XMM[1] = SrcReg[0].yyyy */
998 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
999 /* XMM[1] = max(XMM[1], 0) */
1000 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1001 /* XMM[2] = SrcReg[0].wwww */
1002 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1003 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1004 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1005 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1006 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1007 }
1008 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1009 dst0[CHAN_W] = bld->base.one;
1010 }
1011 break;
1012
1013 case TGSI_OPCODE_RCP:
1014 /* TGSI_OPCODE_RECIP */
1015 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1016 res = lp_build_rcp(&bld->base, src0);
1017 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1018 dst0[chan_index] = res;
1019 }
1020 break;
1021
1022 case TGSI_OPCODE_RSQ:
1023 /* TGSI_OPCODE_RECIPSQRT */
1024 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1025 src0 = lp_build_abs(&bld->base, src0);
1026 res = lp_build_rsqrt(&bld->base, src0);
1027 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1028 dst0[chan_index] = res;
1029 }
1030 break;
1031
1032 case TGSI_OPCODE_EXP:
1033 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1034 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1035 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1036 LLVMValueRef *p_exp2_int_part = NULL;
1037 LLVMValueRef *p_frac_part = NULL;
1038 LLVMValueRef *p_exp2 = NULL;
1039
1040 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1041
1042 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1043 p_exp2_int_part = &tmp0;
1044 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1045 p_frac_part = &tmp1;
1046 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1047 p_exp2 = &tmp2;
1048
1049 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1050
1051 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1052 dst0[CHAN_X] = tmp0;
1053 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1054 dst0[CHAN_Y] = tmp1;
1055 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1056 dst0[CHAN_Z] = tmp2;
1057 }
1058 /* dst.w = 1.0 */
1059 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1060 dst0[CHAN_W] = bld->base.one;
1061 }
1062 break;
1063
1064 case TGSI_OPCODE_LOG:
1065 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1066 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1067 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1068 LLVMValueRef *p_floor_log2 = NULL;
1069 LLVMValueRef *p_exp = NULL;
1070 LLVMValueRef *p_log2 = NULL;
1071
1072 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1073 src0 = lp_build_abs( &bld->base, src0 );
1074
1075 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1076 p_floor_log2 = &tmp0;
1077 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1078 p_exp = &tmp1;
1079 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1080 p_log2 = &tmp2;
1081
1082 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1083
1084 /* dst.x = floor(lg2(abs(src.x))) */
1085 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1086 dst0[CHAN_X] = tmp0;
1087 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1088 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1089 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1090 }
1091 /* dst.z = lg2(abs(src.x)) */
1092 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1093 dst0[CHAN_Z] = tmp2;
1094 }
1095 /* dst.w = 1.0 */
1096 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1097 dst0[CHAN_W] = bld->base.one;
1098 }
1099 break;
1100
1101 case TGSI_OPCODE_MUL:
1102 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1103 src0 = emit_fetch( bld, inst, 0, chan_index );
1104 src1 = emit_fetch( bld, inst, 1, chan_index );
1105 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1106 }
1107 break;
1108
1109 case TGSI_OPCODE_ADD:
1110 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1111 src0 = emit_fetch( bld, inst, 0, chan_index );
1112 src1 = emit_fetch( bld, inst, 1, chan_index );
1113 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1114 }
1115 break;
1116
1117 case TGSI_OPCODE_DP3:
1118 /* TGSI_OPCODE_DOT3 */
1119 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1120 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1121 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1122 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1123 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1124 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1125 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1126 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1127 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1128 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1129 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1130 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1131 dst0[chan_index] = tmp0;
1132 }
1133 break;
1134
1135 case TGSI_OPCODE_DP4:
1136 /* TGSI_OPCODE_DOT4 */
1137 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1138 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1139 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1140 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1141 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1142 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1143 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1144 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1145 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1146 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1147 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1148 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1149 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1150 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1151 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1152 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1153 dst0[chan_index] = tmp0;
1154 }
1155 break;
1156
1157 case TGSI_OPCODE_DST:
1158 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1159 dst0[CHAN_X] = bld->base.one;
1160 }
1161 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1162 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1163 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1164 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1165 }
1166 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1167 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1168 }
1169 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1170 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1171 }
1172 break;
1173
1174 case TGSI_OPCODE_MIN:
1175 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1176 src0 = emit_fetch( bld, inst, 0, chan_index );
1177 src1 = emit_fetch( bld, inst, 1, chan_index );
1178 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1179 }
1180 break;
1181
1182 case TGSI_OPCODE_MAX:
1183 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1184 src0 = emit_fetch( bld, inst, 0, chan_index );
1185 src1 = emit_fetch( bld, inst, 1, chan_index );
1186 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1187 }
1188 break;
1189
1190 case TGSI_OPCODE_SLT:
1191 /* TGSI_OPCODE_SETLT */
1192 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1193 src0 = emit_fetch( bld, inst, 0, chan_index );
1194 src1 = emit_fetch( bld, inst, 1, chan_index );
1195 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1196 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1197 }
1198 break;
1199
1200 case TGSI_OPCODE_SGE:
1201 /* TGSI_OPCODE_SETGE */
1202 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1203 src0 = emit_fetch( bld, inst, 0, chan_index );
1204 src1 = emit_fetch( bld, inst, 1, chan_index );
1205 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1206 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1207 }
1208 break;
1209
1210 case TGSI_OPCODE_MAD:
1211 /* TGSI_OPCODE_MADD */
1212 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1213 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1214 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1215 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1216 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1217 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1218 dst0[chan_index] = tmp0;
1219 }
1220 break;
1221
1222 case TGSI_OPCODE_SUB:
1223 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1224 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1225 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1226 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1227 }
1228 break;
1229
1230 case TGSI_OPCODE_LRP:
1231 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1232 src0 = emit_fetch( bld, inst, 0, chan_index );
1233 src1 = emit_fetch( bld, inst, 1, chan_index );
1234 src2 = emit_fetch( bld, inst, 2, chan_index );
1235 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1236 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1237 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1238 }
1239 break;
1240
1241 case TGSI_OPCODE_CND:
1242 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1243 src0 = emit_fetch( bld, inst, 0, chan_index );
1244 src1 = emit_fetch( bld, inst, 1, chan_index );
1245 src2 = emit_fetch( bld, inst, 2, chan_index );
1246 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1247 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1248 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1249 }
1250 break;
1251
1252 case TGSI_OPCODE_DP2A:
1253 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1254 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1255 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1256 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1257 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1258 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1259 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1260 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1261 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1262 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1263 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1264 }
1265 break;
1266
1267 case TGSI_OPCODE_FRC:
1268 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1269 src0 = emit_fetch( bld, inst, 0, chan_index );
1270 tmp0 = lp_build_floor(&bld->base, src0);
1271 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1272 dst0[chan_index] = tmp0;
1273 }
1274 break;
1275
1276 case TGSI_OPCODE_CLAMP:
1277 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1278 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1279 src1 = emit_fetch( bld, inst, 1, chan_index );
1280 src2 = emit_fetch( bld, inst, 2, chan_index );
1281 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1282 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1283 dst0[chan_index] = tmp0;
1284 }
1285 break;
1286
1287 case TGSI_OPCODE_FLR:
1288 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1289 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1290 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1291 }
1292 break;
1293
1294 case TGSI_OPCODE_ROUND:
1295 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1296 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1297 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1298 }
1299 break;
1300
1301 case TGSI_OPCODE_EX2: {
1302 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1303 tmp0 = lp_build_exp2( &bld->base, tmp0);
1304 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1305 dst0[chan_index] = tmp0;
1306 }
1307 break;
1308 }
1309
1310 case TGSI_OPCODE_LG2:
1311 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1312 tmp0 = lp_build_log2( &bld->base, tmp0);
1313 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1314 dst0[chan_index] = tmp0;
1315 }
1316 break;
1317
1318 case TGSI_OPCODE_POW:
1319 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1320 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1321 res = lp_build_pow( &bld->base, src0, src1 );
1322 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1323 dst0[chan_index] = res;
1324 }
1325 break;
1326
1327 case TGSI_OPCODE_XPD:
1328 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1329 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1330 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1331 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1332 }
1333 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1334 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1335 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1336 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1337 }
1338 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1339 tmp2 = tmp0;
1340 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1341 tmp5 = tmp3;
1342 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1343 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1344 dst0[CHAN_X] = tmp2;
1345 }
1346 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1347 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1348 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1349 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1350 }
1351 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1352 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1353 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1354 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1355 dst0[CHAN_Y] = tmp3;
1356 }
1357 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1358 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1359 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1360 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1361 dst0[CHAN_Z] = tmp5;
1362 }
1363 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1364 dst0[CHAN_W] = bld->base.one;
1365 }
1366 break;
1367
1368 case TGSI_OPCODE_ABS:
1369 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1370 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1371 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1372 }
1373 break;
1374
1375 case TGSI_OPCODE_RCC:
1376 /* deprecated? */
1377 assert(0);
1378 return FALSE;
1379
1380 case TGSI_OPCODE_DPH:
1381 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1382 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1383 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1384 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1385 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1386 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1387 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1388 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1389 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1390 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1391 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1392 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1393 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1394 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1395 dst0[chan_index] = tmp0;
1396 }
1397 break;
1398
1399 case TGSI_OPCODE_COS:
1400 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1401 tmp0 = lp_build_cos( &bld->base, tmp0 );
1402 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1403 dst0[chan_index] = tmp0;
1404 }
1405 break;
1406
1407 case TGSI_OPCODE_DDX:
1408 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1409 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1410 }
1411 break;
1412
1413 case TGSI_OPCODE_DDY:
1414 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1415 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1416 }
1417 break;
1418
1419 case TGSI_OPCODE_KILP:
1420 /* predicated kill */
1421 emit_kilp( bld, inst );
1422 break;
1423
1424 case TGSI_OPCODE_KIL:
1425 /* conditional kill */
1426 emit_kil( bld, inst );
1427 break;
1428
1429 case TGSI_OPCODE_PK2H:
1430 return FALSE;
1431 break;
1432
1433 case TGSI_OPCODE_PK2US:
1434 return FALSE;
1435 break;
1436
1437 case TGSI_OPCODE_PK4B:
1438 return FALSE;
1439 break;
1440
1441 case TGSI_OPCODE_PK4UB:
1442 return FALSE;
1443 break;
1444
1445 case TGSI_OPCODE_RFL:
1446 return FALSE;
1447 break;
1448
1449 case TGSI_OPCODE_SEQ:
1450 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1451 src0 = emit_fetch( bld, inst, 0, chan_index );
1452 src1 = emit_fetch( bld, inst, 1, chan_index );
1453 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1454 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1455 }
1456 break;
1457
1458 case TGSI_OPCODE_SFL:
1459 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1460 dst0[chan_index] = bld->base.zero;
1461 }
1462 break;
1463
1464 case TGSI_OPCODE_SGT:
1465 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1466 src0 = emit_fetch( bld, inst, 0, chan_index );
1467 src1 = emit_fetch( bld, inst, 1, chan_index );
1468 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1469 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1470 }
1471 break;
1472
1473 case TGSI_OPCODE_SIN:
1474 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1475 tmp0 = lp_build_sin( &bld->base, tmp0 );
1476 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1477 dst0[chan_index] = tmp0;
1478 }
1479 break;
1480
1481 case TGSI_OPCODE_SLE:
1482 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1483 src0 = emit_fetch( bld, inst, 0, chan_index );
1484 src1 = emit_fetch( bld, inst, 1, chan_index );
1485 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1486 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1487 }
1488 break;
1489
1490 case TGSI_OPCODE_SNE:
1491 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1492 src0 = emit_fetch( bld, inst, 0, chan_index );
1493 src1 = emit_fetch( bld, inst, 1, chan_index );
1494 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1495 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1496 }
1497 break;
1498
1499 case TGSI_OPCODE_STR:
1500 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1501 dst0[chan_index] = bld->base.one;
1502 }
1503 break;
1504
1505 case TGSI_OPCODE_TEX:
1506 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1507 break;
1508
1509 case TGSI_OPCODE_TXD:
1510 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1511 break;
1512
1513 case TGSI_OPCODE_UP2H:
1514 /* deprecated */
1515 assert (0);
1516 return FALSE;
1517 break;
1518
1519 case TGSI_OPCODE_UP2US:
1520 /* deprecated */
1521 assert(0);
1522 return FALSE;
1523 break;
1524
1525 case TGSI_OPCODE_UP4B:
1526 /* deprecated */
1527 assert(0);
1528 return FALSE;
1529 break;
1530
1531 case TGSI_OPCODE_UP4UB:
1532 /* deprecated */
1533 assert(0);
1534 return FALSE;
1535 break;
1536
1537 case TGSI_OPCODE_X2D:
1538 /* deprecated? */
1539 assert(0);
1540 return FALSE;
1541 break;
1542
1543 case TGSI_OPCODE_ARA:
1544 /* deprecated */
1545 assert(0);
1546 return FALSE;
1547 break;
1548
1549 case TGSI_OPCODE_ARR:
1550 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1551 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1552 tmp0 = lp_build_round(&bld->base, tmp0);
1553 dst0[chan_index] = tmp0;
1554 }
1555 break;
1556
1557 case TGSI_OPCODE_BRA:
1558 /* deprecated */
1559 assert(0);
1560 return FALSE;
1561 break;
1562
1563 case TGSI_OPCODE_CAL:
1564 /* FIXME */
1565 return FALSE;
1566 break;
1567
1568 case TGSI_OPCODE_RET:
1569 /* FIXME */
1570 return FALSE;
1571 break;
1572
1573 case TGSI_OPCODE_END:
1574 break;
1575
1576 case TGSI_OPCODE_SSG:
1577 /* TGSI_OPCODE_SGN */
1578 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1579 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1580 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1581 }
1582 break;
1583
1584 case TGSI_OPCODE_CMP:
1585 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1586 src0 = emit_fetch( bld, inst, 0, chan_index );
1587 src1 = emit_fetch( bld, inst, 1, chan_index );
1588 src2 = emit_fetch( bld, inst, 2, chan_index );
1589 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1590 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1591 }
1592 break;
1593
1594 case TGSI_OPCODE_SCS:
1595 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1596 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1597 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1598 }
1599 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1600 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1601 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1602 }
1603 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1604 dst0[CHAN_Z] = bld->base.zero;
1605 }
1606 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1607 dst0[CHAN_W] = bld->base.one;
1608 }
1609 break;
1610
1611 case TGSI_OPCODE_TXB:
1612 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1613 break;
1614
1615 case TGSI_OPCODE_NRM:
1616 /* fall-through */
1617 case TGSI_OPCODE_NRM4:
1618 /* 3 or 4-component normalization */
1619 {
1620 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1621
1622 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1623 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1624 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1625 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1626
1627 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1628
1629 /* xmm4 = src.x */
1630 /* xmm0 = src.x * src.x */
1631 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1632 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1633 tmp4 = tmp0;
1634 }
1635 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1636
1637 /* xmm5 = src.y */
1638 /* xmm0 = xmm0 + src.y * src.y */
1639 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1640 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1641 tmp5 = tmp1;
1642 }
1643 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1644 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1645
1646 /* xmm6 = src.z */
1647 /* xmm0 = xmm0 + src.z * src.z */
1648 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1649 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1650 tmp6 = tmp1;
1651 }
1652 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1653 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1654
1655 if (dims == 4) {
1656 /* xmm7 = src.w */
1657 /* xmm0 = xmm0 + src.w * src.w */
1658 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1659 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1660 tmp7 = tmp1;
1661 }
1662 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1663 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1664 }
1665
1666 /* xmm1 = 1 / sqrt(xmm0) */
1667 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1668
1669 /* dst.x = xmm1 * src.x */
1670 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1671 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1672 }
1673
1674 /* dst.y = xmm1 * src.y */
1675 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1676 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1677 }
1678
1679 /* dst.z = xmm1 * src.z */
1680 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1681 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1682 }
1683
1684 /* dst.w = xmm1 * src.w */
1685 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1686 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1687 }
1688 }
1689
1690 /* dst.w = 1.0 */
1691 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1692 dst0[CHAN_W] = bld->base.one;
1693 }
1694 }
1695 break;
1696
1697 case TGSI_OPCODE_DIV:
1698 /* deprecated */
1699 assert( 0 );
1700 return FALSE;
1701 break;
1702
1703 case TGSI_OPCODE_DP2:
1704 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1705 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1706 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1707 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1708 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1709 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1710 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1711 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1712 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1713 }
1714 break;
1715
1716 case TGSI_OPCODE_TXL:
1717 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1718 break;
1719
1720 case TGSI_OPCODE_TXP:
1721 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1722 break;
1723
1724 case TGSI_OPCODE_BRK:
1725 lp_exec_break(&bld->exec_mask);
1726 break;
1727
1728 case TGSI_OPCODE_IF:
1729 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1730 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1731 tmp0, bld->base.zero);
1732 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1733 break;
1734
1735 case TGSI_OPCODE_BGNLOOP:
1736 lp_exec_bgnloop(&bld->exec_mask);
1737 break;
1738
1739 case TGSI_OPCODE_ELSE:
1740 lp_exec_mask_cond_invert(&bld->exec_mask);
1741 break;
1742
1743 case TGSI_OPCODE_ENDIF:
1744 lp_exec_mask_cond_pop(&bld->exec_mask);
1745 break;
1746
1747 case TGSI_OPCODE_ENDLOOP:
1748 lp_exec_endloop(&bld->exec_mask);
1749 break;
1750
1751 case TGSI_OPCODE_PUSHA:
1752 /* deprecated? */
1753 assert(0);
1754 return FALSE;
1755 break;
1756
1757 case TGSI_OPCODE_POPA:
1758 /* deprecated? */
1759 assert(0);
1760 return FALSE;
1761 break;
1762
1763 case TGSI_OPCODE_CEIL:
1764 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1765 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1766 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1767 }
1768 break;
1769
1770 case TGSI_OPCODE_I2F:
1771 /* deprecated? */
1772 assert(0);
1773 return FALSE;
1774 break;
1775
1776 case TGSI_OPCODE_NOT:
1777 /* deprecated? */
1778 assert(0);
1779 return FALSE;
1780 break;
1781
1782 case TGSI_OPCODE_TRUNC:
1783 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1784 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1785 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1786 }
1787 break;
1788
1789 case TGSI_OPCODE_SHL:
1790 /* deprecated? */
1791 assert(0);
1792 return FALSE;
1793 break;
1794
1795 case TGSI_OPCODE_ISHR:
1796 /* deprecated? */
1797 assert(0);
1798 return FALSE;
1799 break;
1800
1801 case TGSI_OPCODE_AND:
1802 /* deprecated? */
1803 assert(0);
1804 return FALSE;
1805 break;
1806
1807 case TGSI_OPCODE_OR:
1808 /* deprecated? */
1809 assert(0);
1810 return FALSE;
1811 break;
1812
1813 case TGSI_OPCODE_MOD:
1814 /* deprecated? */
1815 assert(0);
1816 return FALSE;
1817 break;
1818
1819 case TGSI_OPCODE_XOR:
1820 /* deprecated? */
1821 assert(0);
1822 return FALSE;
1823 break;
1824
1825 case TGSI_OPCODE_SAD:
1826 /* deprecated? */
1827 assert(0);
1828 return FALSE;
1829 break;
1830
1831 case TGSI_OPCODE_TXF:
1832 /* deprecated? */
1833 assert(0);
1834 return FALSE;
1835 break;
1836
1837 case TGSI_OPCODE_TXQ:
1838 /* deprecated? */
1839 assert(0);
1840 return FALSE;
1841 break;
1842
1843 case TGSI_OPCODE_CONT:
1844 lp_exec_continue(&bld->exec_mask);
1845 break;
1846
1847 case TGSI_OPCODE_EMIT:
1848 return FALSE;
1849 break;
1850
1851 case TGSI_OPCODE_ENDPRIM:
1852 return FALSE;
1853 break;
1854
1855 case TGSI_OPCODE_NOP:
1856 break;
1857
1858 default:
1859 return FALSE;
1860 }
1861
1862 if(info->num_dst) {
1863 LLVMValueRef pred[NUM_CHANNELS];
1864
1865 emit_fetch_predicate( bld, inst, pred );
1866
1867 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1868 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1869 }
1870 }
1871
1872 return TRUE;
1873 }
1874
1875
1876 void
1877 lp_build_tgsi_soa(LLVMBuilderRef builder,
1878 const struct tgsi_token *tokens,
1879 struct lp_type type,
1880 struct lp_build_mask_context *mask,
1881 LLVMValueRef consts_ptr,
1882 const LLVMValueRef *pos,
1883 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1884 LLVMValueRef (*outputs)[NUM_CHANNELS],
1885 struct lp_build_sampler_soa *sampler,
1886 const struct tgsi_shader_info *info)
1887 {
1888 struct lp_build_tgsi_soa_context bld;
1889 struct tgsi_parse_context parse;
1890 uint num_immediates = 0;
1891 unsigned i;
1892
1893 /* Setup build context */
1894 memset(&bld, 0, sizeof bld);
1895 lp_build_context_init(&bld.base, builder, type);
1896 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1897 bld.mask = mask;
1898 bld.pos = pos;
1899 bld.inputs = inputs;
1900 bld.outputs = outputs;
1901 bld.consts_ptr = consts_ptr;
1902 bld.sampler = sampler;
1903 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1904 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1905
1906 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1907
1908 tgsi_parse_init( &parse, tokens );
1909
1910 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1911 tgsi_parse_token( &parse );
1912
1913 switch( parse.FullToken.Token.Type ) {
1914 case TGSI_TOKEN_TYPE_DECLARATION:
1915 /* Inputs already interpolated */
1916 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1917 break;
1918
1919 case TGSI_TOKEN_TYPE_INSTRUCTION:
1920 {
1921 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1922 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
1923 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
1924 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1925 opcode_info->mnemonic);
1926 }
1927
1928 break;
1929
1930 case TGSI_TOKEN_TYPE_IMMEDIATE:
1931 /* simply copy the immediate values into the next immediates[] slot */
1932 {
1933 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1934 assert(size <= 4);
1935 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1936 for( i = 0; i < size; ++i )
1937 bld.immediates[num_immediates][i] =
1938 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1939 for( i = size; i < 4; ++i )
1940 bld.immediates[num_immediates][i] = bld.base.undef;
1941 num_immediates++;
1942 }
1943 break;
1944
1945 case TGSI_TOKEN_TYPE_PROPERTY:
1946 break;
1947
1948 default:
1949 assert( 0 );
1950 }
1951 }
1952 if (0) {
1953 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1954 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1955 debug_printf("11111111111111111111111111111 \n");
1956 tgsi_dump(tokens, 0);
1957 lp_debug_dump_value(function);
1958 debug_printf("2222222222222222222222222222 \n");
1959 }
1960 tgsi_parse_free( &parse );
1961 }
1962