Merge branch 'gallium-msaa'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_exec.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_tgsi.h"
57 #include "lp_bld_limits.h"
58 #include "lp_bld_debug.h"
59
60
61 #define FOR_EACH_CHANNEL( CHAN )\
62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71 FOR_EACH_CHANNEL( CHAN )\
72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74 #define CHAN_X 0
75 #define CHAN_Y 1
76 #define CHAN_Z 2
77 #define CHAN_W 3
78
79 #define QUAD_TOP_LEFT 0
80 #define QUAD_TOP_RIGHT 1
81 #define QUAD_BOTTOM_LEFT 2
82 #define QUAD_BOTTOM_RIGHT 3
83
84 #define LP_MAX_INSTRUCTIONS 256
85
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMBasicBlockRef loop_block;
99 LLVMValueRef cont_mask;
100 LLVMValueRef break_mask;
101 LLVMValueRef break_var;
102 struct {
103 LLVMBasicBlockRef loop_block;
104 LLVMValueRef cont_mask;
105 LLVMValueRef break_mask;
106 LLVMValueRef break_var;
107 } loop_stack[LP_MAX_TGSI_NESTING];
108 int loop_stack_size;
109
110 LLVMValueRef ret_mask;
111 struct {
112 int pc;
113 LLVMValueRef ret_mask;
114 } call_stack[LP_MAX_TGSI_NESTING];
115 int call_stack_size;
116
117 LLVMValueRef exec_mask;
118 };
119
120 struct lp_build_tgsi_soa_context
121 {
122 struct lp_build_context base;
123
124 /* Builder for integer masks and indices */
125 struct lp_build_context int_bld;
126
127 LLVMValueRef consts_ptr;
128 const LLVMValueRef *pos;
129 const LLVMValueRef (*inputs)[NUM_CHANNELS];
130 LLVMValueRef (*outputs)[NUM_CHANNELS];
131
132 const struct lp_build_sampler_soa *sampler;
133
134 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
135 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
136 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
137 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
138
139 /* we allocate an array of temps if we have indirect
140 * addressing and then the temps above is unused */
141 LLVMValueRef temps_array;
142 boolean has_indirect_addressing;
143
144 struct lp_build_mask_context *mask;
145 struct lp_exec_mask exec_mask;
146
147 struct tgsi_full_instruction *instructions;
148 uint max_instructions;
149 };
150
151 static const unsigned char
152 swizzle_left[4] = {
153 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
154 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
155 };
156
157 static const unsigned char
158 swizzle_right[4] = {
159 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
160 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
161 };
162
163 static const unsigned char
164 swizzle_top[4] = {
165 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
166 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
167 };
168
169 static const unsigned char
170 swizzle_bottom[4] = {
171 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
172 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
173 };
174
175 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
176 {
177 mask->bld = bld;
178 mask->has_mask = FALSE;
179 mask->cond_stack_size = 0;
180 mask->loop_stack_size = 0;
181 mask->call_stack_size = 0;
182
183 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
184 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
185 LLVMConstAllOnes(mask->int_vec_type);
186 }
187
188 static void lp_exec_mask_update(struct lp_exec_mask *mask)
189 {
190 if (mask->loop_stack_size) {
191 /*for loops we need to update the entire mask at runtime */
192 LLVMValueRef tmp;
193 assert(mask->break_mask);
194 tmp = LLVMBuildAnd(mask->bld->builder,
195 mask->cont_mask,
196 mask->break_mask,
197 "maskcb");
198 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
199 mask->cond_mask,
200 tmp,
201 "maskfull");
202 } else
203 mask->exec_mask = mask->cond_mask;
204
205 if (mask->call_stack_size) {
206 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
207 mask->exec_mask,
208 mask->ret_mask,
209 "callmask");
210 }
211
212 mask->has_mask = (mask->cond_stack_size > 0 ||
213 mask->loop_stack_size > 0 ||
214 mask->call_stack_size > 0);
215 }
216
217 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
218 LLVMValueRef val)
219 {
220 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
221 if (mask->cond_stack_size == 0) {
222 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
223 }
224 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
225 assert(LLVMTypeOf(val) == mask->int_vec_type);
226 mask->cond_mask = val;
227
228 lp_exec_mask_update(mask);
229 }
230
231 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
232 {
233 LLVMValueRef prev_mask;
234 LLVMValueRef inv_mask;
235
236 assert(mask->cond_stack_size);
237 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
238 if (mask->cond_stack_size == 1) {
239 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
240 }
241
242 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
243
244 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
245 inv_mask,
246 prev_mask, "");
247 lp_exec_mask_update(mask);
248 }
249
250 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
251 {
252 assert(mask->cond_stack_size);
253 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
254 lp_exec_mask_update(mask);
255 }
256
257 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
258 {
259 if (mask->loop_stack_size == 0) {
260 assert(mask->loop_block == NULL);
261 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
262 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
263 assert(mask->break_var == NULL);
264 }
265
266 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
267
268 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
269 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
270 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
271 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
272 ++mask->loop_stack_size;
273
274 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
275 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
276
277 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
278 LLVMBuildBr(mask->bld->builder, mask->loop_block);
279 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
280
281 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
282
283 lp_exec_mask_update(mask);
284 }
285
286 static void lp_exec_break(struct lp_exec_mask *mask)
287 {
288 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
289 mask->exec_mask,
290 "break");
291
292 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
293 mask->break_mask,
294 exec_mask, "break_full");
295
296 lp_exec_mask_update(mask);
297 }
298
299 static void lp_exec_continue(struct lp_exec_mask *mask)
300 {
301 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
302 mask->exec_mask,
303 "");
304
305 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
306 mask->cont_mask,
307 exec_mask, "");
308
309 lp_exec_mask_update(mask);
310 }
311
312
313 static void lp_exec_endloop(struct lp_exec_mask *mask)
314 {
315 LLVMBasicBlockRef endloop;
316 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
317 mask->bld->type.length);
318 LLVMValueRef i1cond;
319
320 assert(mask->break_mask);
321
322 /*
323 * Restore the cont_mask, but don't pop
324 */
325 assert(mask->loop_stack_size);
326 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
327 lp_exec_mask_update(mask);
328
329 /*
330 * Unlike the continue mask, the break_mask must be preserved across loop
331 * iterations
332 */
333 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
334
335 /* i1cond = (mask == 0) */
336 i1cond = LLVMBuildICmp(
337 mask->bld->builder,
338 LLVMIntNE,
339 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
340 LLVMConstNull(reg_type), "");
341
342 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
343
344 LLVMBuildCondBr(mask->bld->builder,
345 i1cond, mask->loop_block, endloop);
346
347 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
348
349 assert(mask->loop_stack_size);
350 --mask->loop_stack_size;
351 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
352 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
353 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
354 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
355
356 lp_exec_mask_update(mask);
357 }
358
359 /* stores val into an address pointed to by dst.
360 * mask->exec_mask is used to figure out which bits of val
361 * should be stored into the address
362 * (0 means don't store this bit, 1 means do store).
363 */
364 static void lp_exec_mask_store(struct lp_exec_mask *mask,
365 LLVMValueRef pred,
366 LLVMValueRef val,
367 LLVMValueRef dst)
368 {
369 /* Mix the predicate and execution mask */
370 if (mask->has_mask) {
371 if (pred) {
372 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
373 } else {
374 pred = mask->exec_mask;
375 }
376 }
377
378 if (pred) {
379 LLVMValueRef real_val, dst_val;
380
381 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
382 real_val = lp_build_select(mask->bld,
383 pred,
384 val, dst_val);
385
386 LLVMBuildStore(mask->bld->builder, real_val, dst);
387 } else
388 LLVMBuildStore(mask->bld->builder, val, dst);
389 }
390
391 static void lp_exec_mask_call(struct lp_exec_mask *mask,
392 int func,
393 int *pc)
394 {
395 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
396 mask->call_stack[mask->call_stack_size].pc = *pc;
397 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
398 mask->call_stack_size++;
399 *pc = func;
400 }
401
402 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
403 {
404 LLVMValueRef exec_mask;
405
406 if (mask->call_stack_size == 0) {
407 /* returning from main() */
408 *pc = -1;
409 return;
410 }
411 exec_mask = LLVMBuildNot(mask->bld->builder,
412 mask->exec_mask,
413 "ret");
414
415 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
416 mask->ret_mask,
417 exec_mask, "ret_full");
418
419 lp_exec_mask_update(mask);
420 }
421
422 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
423 {
424 }
425
426 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
427 {
428 assert(mask->call_stack_size);
429 mask->call_stack_size--;
430 *pc = mask->call_stack[mask->call_stack_size].pc;
431 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
432 lp_exec_mask_update(mask);
433 }
434
435 static LLVMValueRef
436 emit_ddx(struct lp_build_tgsi_soa_context *bld,
437 LLVMValueRef src)
438 {
439 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
440 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
441 return lp_build_sub(&bld->base, src_right, src_left);
442 }
443
444
445 static LLVMValueRef
446 emit_ddy(struct lp_build_tgsi_soa_context *bld,
447 LLVMValueRef src)
448 {
449 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
450 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
451 return lp_build_sub(&bld->base, src_top, src_bottom);
452 }
453
454 static LLVMValueRef
455 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
456 unsigned index,
457 unsigned chan,
458 boolean is_indirect,
459 LLVMValueRef addr)
460 {
461 assert(chan < 4);
462 if (!bld->has_indirect_addressing) {
463 return bld->temps[index][chan];
464 } else {
465 LLVMValueRef lindex =
466 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
467 if (is_indirect)
468 lindex = lp_build_add(&bld->base, lindex, addr);
469 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
470 }
471 }
472
473 /**
474 * Register fetch.
475 */
476 static LLVMValueRef
477 emit_fetch(
478 struct lp_build_tgsi_soa_context *bld,
479 const struct tgsi_full_instruction *inst,
480 unsigned index,
481 const unsigned chan_index )
482 {
483 const struct tgsi_full_src_register *reg = &inst->Src[index];
484 const unsigned swizzle =
485 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
486 LLVMValueRef res;
487 LLVMValueRef addr = NULL;
488
489 if (swizzle > 3) {
490 assert(0 && "invalid swizzle in emit_fetch()");
491 return bld->base.undef;
492 }
493
494 if (reg->Register.Indirect) {
495 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
496 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
497 addr = LLVMBuildLoad(bld->base.builder,
498 bld->addr[reg->Indirect.Index][swizzle],
499 "");
500 /* for indexing we want integers */
501 addr = LLVMBuildFPToSI(bld->base.builder, addr,
502 int_vec_type, "");
503 addr = LLVMBuildExtractElement(bld->base.builder,
504 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
505 "");
506 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
507 }
508
509 switch (reg->Register.File) {
510 case TGSI_FILE_CONSTANT:
511 {
512 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
513 reg->Register.Index*4 + swizzle, 0);
514 LLVMValueRef scalar, scalar_ptr;
515
516 if (reg->Register.Indirect) {
517 /*lp_build_printf(bld->base.builder,
518 "\taddr = %d\n", addr);*/
519 index = lp_build_add(&bld->base, index, addr);
520 }
521 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
522 &index, 1, "");
523 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
524
525 res = lp_build_broadcast_scalar(&bld->base, scalar);
526 }
527 break;
528
529 case TGSI_FILE_IMMEDIATE:
530 res = bld->immediates[reg->Register.Index][swizzle];
531 assert(res);
532 break;
533
534 case TGSI_FILE_INPUT:
535 res = bld->inputs[reg->Register.Index][swizzle];
536 assert(res);
537 break;
538
539 case TGSI_FILE_TEMPORARY:
540 {
541 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
542 swizzle,
543 reg->Register.Indirect,
544 addr);
545 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
546 if(!res)
547 return bld->base.undef;
548 }
549 break;
550
551 default:
552 assert(0 && "invalid src register in emit_fetch()");
553 return bld->base.undef;
554 }
555
556 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
557 case TGSI_UTIL_SIGN_CLEAR:
558 res = lp_build_abs( &bld->base, res );
559 break;
560
561 case TGSI_UTIL_SIGN_SET:
562 /* TODO: Use bitwese OR for floating point */
563 res = lp_build_abs( &bld->base, res );
564 res = LLVMBuildNeg( bld->base.builder, res, "" );
565 break;
566
567 case TGSI_UTIL_SIGN_TOGGLE:
568 res = LLVMBuildNeg( bld->base.builder, res, "" );
569 break;
570
571 case TGSI_UTIL_SIGN_KEEP:
572 break;
573 }
574
575 return res;
576 }
577
578
579 /**
580 * Register fetch with derivatives.
581 */
582 static void
583 emit_fetch_deriv(
584 struct lp_build_tgsi_soa_context *bld,
585 const struct tgsi_full_instruction *inst,
586 unsigned index,
587 const unsigned chan_index,
588 LLVMValueRef *res,
589 LLVMValueRef *ddx,
590 LLVMValueRef *ddy)
591 {
592 LLVMValueRef src;
593
594 src = emit_fetch(bld, inst, index, chan_index);
595
596 if(res)
597 *res = src;
598
599 /* TODO: use interpolation coeffs for inputs */
600
601 if(ddx)
602 *ddx = emit_ddx(bld, src);
603
604 if(ddy)
605 *ddy = emit_ddy(bld, src);
606 }
607
608
609 /**
610 * Predicate.
611 */
612 static void
613 emit_fetch_predicate(
614 struct lp_build_tgsi_soa_context *bld,
615 const struct tgsi_full_instruction *inst,
616 LLVMValueRef *pred)
617 {
618 unsigned index;
619 unsigned char swizzles[4];
620 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
621 LLVMValueRef value;
622 unsigned chan;
623
624 if (!inst->Instruction.Predicate) {
625 FOR_EACH_CHANNEL( chan ) {
626 pred[chan] = NULL;
627 }
628 return;
629 }
630
631 swizzles[0] = inst->Predicate.SwizzleX;
632 swizzles[1] = inst->Predicate.SwizzleY;
633 swizzles[2] = inst->Predicate.SwizzleZ;
634 swizzles[3] = inst->Predicate.SwizzleW;
635
636 index = inst->Predicate.Index;
637 assert(index < LP_MAX_TGSI_PREDS);
638
639 FOR_EACH_CHANNEL( chan ) {
640 unsigned swizzle = swizzles[chan];
641
642 /*
643 * Only fetch the predicate register channels that are actually listed
644 * in the swizzles
645 */
646 if (!unswizzled[swizzle]) {
647 value = LLVMBuildLoad(bld->base.builder,
648 bld->preds[index][swizzle], "");
649
650 /*
651 * Convert the value to an integer mask.
652 *
653 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
654 * is needlessly causing two comparisons due to storing the intermediate
655 * result as float vector instead of an integer mask vector.
656 */
657 value = lp_build_compare(bld->base.builder,
658 bld->base.type,
659 PIPE_FUNC_NOTEQUAL,
660 value,
661 bld->base.zero);
662 if (inst->Predicate.Negate) {
663 value = LLVMBuildNot(bld->base.builder, value, "");
664 }
665
666 unswizzled[swizzle] = value;
667 } else {
668 value = unswizzled[swizzle];
669 }
670
671 pred[chan] = value;
672 }
673 }
674
675
676 /**
677 * Register store.
678 */
679 static void
680 emit_store(
681 struct lp_build_tgsi_soa_context *bld,
682 const struct tgsi_full_instruction *inst,
683 unsigned index,
684 unsigned chan_index,
685 LLVMValueRef pred,
686 LLVMValueRef value)
687 {
688 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
689 LLVMValueRef addr = NULL;
690
691 switch( inst->Instruction.Saturate ) {
692 case TGSI_SAT_NONE:
693 break;
694
695 case TGSI_SAT_ZERO_ONE:
696 value = lp_build_max(&bld->base, value, bld->base.zero);
697 value = lp_build_min(&bld->base, value, bld->base.one);
698 break;
699
700 case TGSI_SAT_MINUS_PLUS_ONE:
701 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
702 value = lp_build_min(&bld->base, value, bld->base.one);
703 break;
704
705 default:
706 assert(0);
707 }
708
709 if (reg->Register.Indirect) {
710 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
711 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
712 addr = LLVMBuildLoad(bld->base.builder,
713 bld->addr[reg->Indirect.Index][swizzle],
714 "");
715 /* for indexing we want integers */
716 addr = LLVMBuildFPToSI(bld->base.builder, addr,
717 int_vec_type, "");
718 addr = LLVMBuildExtractElement(bld->base.builder,
719 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
720 "");
721 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
722 }
723
724 switch( reg->Register.File ) {
725 case TGSI_FILE_OUTPUT:
726 lp_exec_mask_store(&bld->exec_mask, pred, value,
727 bld->outputs[reg->Register.Index][chan_index]);
728 break;
729
730 case TGSI_FILE_TEMPORARY: {
731 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
732 chan_index,
733 reg->Register.Indirect,
734 addr);
735 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
736 break;
737 }
738
739 case TGSI_FILE_ADDRESS:
740 lp_exec_mask_store(&bld->exec_mask, pred, value,
741 bld->addr[reg->Indirect.Index][chan_index]);
742 break;
743
744 case TGSI_FILE_PREDICATE:
745 lp_exec_mask_store(&bld->exec_mask, pred, value,
746 bld->preds[index][chan_index]);
747 break;
748
749 default:
750 assert( 0 );
751 }
752 }
753
754
755 /**
756 * High-level instruction translators.
757 */
758
759 enum tex_modifier {
760 TEX_MODIFIER_NONE = 0,
761 TEX_MODIFIER_PROJECTED,
762 TEX_MODIFIER_LOD_BIAS,
763 TEX_MODIFIER_EXPLICIT_LOD,
764 TEX_MODIFIER_EXPLICIT_DERIV
765 };
766
767 static void
768 emit_tex( struct lp_build_tgsi_soa_context *bld,
769 const struct tgsi_full_instruction *inst,
770 enum tex_modifier modifier,
771 LLVMValueRef *texel)
772 {
773 unsigned unit;
774 LLVMValueRef lod_bias, explicit_lod;
775 LLVMValueRef oow = NULL;
776 LLVMValueRef coords[3];
777 LLVMValueRef ddx[3];
778 LLVMValueRef ddy[3];
779 unsigned num_coords;
780 unsigned i;
781
782 if (!bld->sampler) {
783 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
784 for (i = 0; i < 4; i++) {
785 texel[i] = bld->base.undef;
786 }
787 return;
788 }
789
790 switch (inst->Texture.Texture) {
791 case TGSI_TEXTURE_1D:
792 num_coords = 1;
793 break;
794 case TGSI_TEXTURE_2D:
795 case TGSI_TEXTURE_RECT:
796 num_coords = 2;
797 break;
798 case TGSI_TEXTURE_SHADOW1D:
799 case TGSI_TEXTURE_SHADOW2D:
800 case TGSI_TEXTURE_SHADOWRECT:
801 case TGSI_TEXTURE_3D:
802 case TGSI_TEXTURE_CUBE:
803 num_coords = 3;
804 break;
805 default:
806 assert(0);
807 return;
808 }
809
810 if (modifier == TEX_MODIFIER_LOD_BIAS) {
811 lod_bias = emit_fetch( bld, inst, 0, 3 );
812 explicit_lod = NULL;
813 }
814 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
815 lod_bias = NULL;
816 explicit_lod = emit_fetch( bld, inst, 0, 3 );
817 }
818 else {
819 lod_bias = NULL;
820 explicit_lod = NULL;
821 }
822
823 if (modifier == TEX_MODIFIER_PROJECTED) {
824 oow = emit_fetch( bld, inst, 0, 3 );
825 oow = lp_build_rcp(&bld->base, oow);
826 }
827
828 for (i = 0; i < num_coords; i++) {
829 coords[i] = emit_fetch( bld, inst, 0, i );
830 if (modifier == TEX_MODIFIER_PROJECTED)
831 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
832 }
833 for (i = num_coords; i < 3; i++) {
834 coords[i] = bld->base.undef;
835 }
836
837 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
838 for (i = 0; i < num_coords; i++) {
839 ddx[i] = emit_fetch( bld, inst, 1, i );
840 ddy[i] = emit_fetch( bld, inst, 2, i );
841 }
842 unit = inst->Src[3].Register.Index;
843 } else {
844 for (i = 0; i < num_coords; i++) {
845 ddx[i] = emit_ddx( bld, coords[i] );
846 ddy[i] = emit_ddy( bld, coords[i] );
847 }
848 unit = inst->Src[1].Register.Index;
849 }
850 for (i = num_coords; i < 3; i++) {
851 ddx[i] = bld->base.undef;
852 ddy[i] = bld->base.undef;
853 }
854
855 bld->sampler->emit_fetch_texel(bld->sampler,
856 bld->base.builder,
857 bld->base.type,
858 unit, num_coords, coords,
859 ddx, ddy,
860 lod_bias, explicit_lod,
861 texel);
862 }
863
864
865 /**
866 * Kill fragment if any of the src register values are negative.
867 */
868 static void
869 emit_kil(
870 struct lp_build_tgsi_soa_context *bld,
871 const struct tgsi_full_instruction *inst )
872 {
873 const struct tgsi_full_src_register *reg = &inst->Src[0];
874 LLVMValueRef terms[NUM_CHANNELS];
875 LLVMValueRef mask;
876 unsigned chan_index;
877
878 memset(&terms, 0, sizeof terms);
879
880 FOR_EACH_CHANNEL( chan_index ) {
881 unsigned swizzle;
882
883 /* Unswizzle channel */
884 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
885
886 /* Check if the component has not been already tested. */
887 assert(swizzle < NUM_CHANNELS);
888 if( !terms[swizzle] )
889 /* TODO: change the comparison operator instead of setting the sign */
890 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
891 }
892
893 mask = NULL;
894 FOR_EACH_CHANNEL( chan_index ) {
895 if(terms[chan_index]) {
896 LLVMValueRef chan_mask;
897
898 /*
899 * If term < 0 then mask = 0 else mask = ~0.
900 */
901 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
902
903 if(mask)
904 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
905 else
906 mask = chan_mask;
907 }
908 }
909
910 if(mask)
911 lp_build_mask_update(bld->mask, mask);
912 }
913
914
915 /**
916 * Predicated fragment kill.
917 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
918 * The only predication is the execution mask which will apply if
919 * we're inside a loop or conditional.
920 */
921 static void
922 emit_kilp(struct lp_build_tgsi_soa_context *bld,
923 const struct tgsi_full_instruction *inst)
924 {
925 LLVMValueRef mask;
926
927 /* For those channels which are "alive", disable fragment shader
928 * execution.
929 */
930 if (bld->exec_mask.has_mask) {
931 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
932 }
933 else {
934 mask = bld->base.zero;
935 }
936
937 lp_build_mask_update(bld->mask, mask);
938 }
939
940 static void
941 emit_declaration(
942 struct lp_build_tgsi_soa_context *bld,
943 const struct tgsi_full_declaration *decl)
944 {
945 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
946
947 unsigned first = decl->Range.First;
948 unsigned last = decl->Range.Last;
949 unsigned idx, i;
950
951 for (idx = first; idx <= last; ++idx) {
952 switch (decl->Declaration.File) {
953 case TGSI_FILE_TEMPORARY:
954 assert(idx < LP_MAX_TGSI_TEMPS);
955 if (bld->has_indirect_addressing) {
956 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
957 last*4 + 4, 0);
958 bld->temps_array = lp_build_array_alloca(bld->base.builder,
959 vec_type, array_size, "");
960 } else {
961 for (i = 0; i < NUM_CHANNELS; i++)
962 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
963 vec_type, "");
964 }
965 break;
966
967 case TGSI_FILE_OUTPUT:
968 for (i = 0; i < NUM_CHANNELS; i++)
969 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
970 vec_type, "");
971 break;
972
973 case TGSI_FILE_ADDRESS:
974 assert(idx < LP_MAX_TGSI_ADDRS);
975 for (i = 0; i < NUM_CHANNELS; i++)
976 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
977 vec_type, "");
978 break;
979
980 case TGSI_FILE_PREDICATE:
981 assert(idx < LP_MAX_TGSI_PREDS);
982 for (i = 0; i < NUM_CHANNELS; i++)
983 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
984 vec_type, "");
985 break;
986
987 default:
988 /* don't need to declare other vars */
989 break;
990 }
991 }
992 }
993
994
995 /**
996 * Emit LLVM for one TGSI instruction.
997 * \param return TRUE for success, FALSE otherwise
998 */
999 static boolean
1000 emit_instruction(
1001 struct lp_build_tgsi_soa_context *bld,
1002 const struct tgsi_full_instruction *inst,
1003 const struct tgsi_opcode_info *info,
1004 int *pc)
1005 {
1006 unsigned chan_index;
1007 LLVMValueRef src0, src1, src2;
1008 LLVMValueRef tmp0, tmp1, tmp2;
1009 LLVMValueRef tmp3 = NULL;
1010 LLVMValueRef tmp4 = NULL;
1011 LLVMValueRef tmp5 = NULL;
1012 LLVMValueRef tmp6 = NULL;
1013 LLVMValueRef tmp7 = NULL;
1014 LLVMValueRef res;
1015 LLVMValueRef dst0[NUM_CHANNELS];
1016
1017 /*
1018 * Stores and write masks are handled in a general fashion after the long
1019 * instruction opcode switch statement.
1020 *
1021 * Although not stricitly necessary, we avoid generating instructions for
1022 * channels which won't be stored, in cases where's that easy. For some
1023 * complex instructions, like texture sampling, it is more convenient to
1024 * assume a full writemask and then let LLVM optimization passes eliminate
1025 * redundant code.
1026 */
1027
1028 (*pc)++;
1029
1030 assert(info->num_dst <= 1);
1031 if (info->num_dst) {
1032 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1033 dst0[chan_index] = bld->base.undef;
1034 }
1035 }
1036
1037 switch (inst->Instruction.Opcode) {
1038 case TGSI_OPCODE_ARL:
1039 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1040 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1041 tmp0 = lp_build_floor(&bld->base, tmp0);
1042 dst0[chan_index] = tmp0;
1043 }
1044 break;
1045
1046 case TGSI_OPCODE_MOV:
1047 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1048 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1049 }
1050 break;
1051
1052 case TGSI_OPCODE_LIT:
1053 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1054 dst0[CHAN_X] = bld->base.one;
1055 }
1056 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1057 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1058 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1059 }
1060 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1061 /* XMM[1] = SrcReg[0].yyyy */
1062 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1063 /* XMM[1] = max(XMM[1], 0) */
1064 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1065 /* XMM[2] = SrcReg[0].wwww */
1066 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1067 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1068 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1069 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1070 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1071 }
1072 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1073 dst0[CHAN_W] = bld->base.one;
1074 }
1075 break;
1076
1077 case TGSI_OPCODE_RCP:
1078 /* TGSI_OPCODE_RECIP */
1079 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1080 res = lp_build_rcp(&bld->base, src0);
1081 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1082 dst0[chan_index] = res;
1083 }
1084 break;
1085
1086 case TGSI_OPCODE_RSQ:
1087 /* TGSI_OPCODE_RECIPSQRT */
1088 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1089 src0 = lp_build_abs(&bld->base, src0);
1090 res = lp_build_rsqrt(&bld->base, src0);
1091 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1092 dst0[chan_index] = res;
1093 }
1094 break;
1095
1096 case TGSI_OPCODE_EXP:
1097 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1098 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1099 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1100 LLVMValueRef *p_exp2_int_part = NULL;
1101 LLVMValueRef *p_frac_part = NULL;
1102 LLVMValueRef *p_exp2 = NULL;
1103
1104 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1105
1106 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1107 p_exp2_int_part = &tmp0;
1108 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1109 p_frac_part = &tmp1;
1110 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1111 p_exp2 = &tmp2;
1112
1113 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1114
1115 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1116 dst0[CHAN_X] = tmp0;
1117 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1118 dst0[CHAN_Y] = tmp1;
1119 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1120 dst0[CHAN_Z] = tmp2;
1121 }
1122 /* dst.w = 1.0 */
1123 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1124 dst0[CHAN_W] = bld->base.one;
1125 }
1126 break;
1127
1128 case TGSI_OPCODE_LOG:
1129 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1130 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1131 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1132 LLVMValueRef *p_floor_log2 = NULL;
1133 LLVMValueRef *p_exp = NULL;
1134 LLVMValueRef *p_log2 = NULL;
1135
1136 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1137 src0 = lp_build_abs( &bld->base, src0 );
1138
1139 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1140 p_floor_log2 = &tmp0;
1141 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1142 p_exp = &tmp1;
1143 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1144 p_log2 = &tmp2;
1145
1146 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1147
1148 /* dst.x = floor(lg2(abs(src.x))) */
1149 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1150 dst0[CHAN_X] = tmp0;
1151 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1152 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1153 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1154 }
1155 /* dst.z = lg2(abs(src.x)) */
1156 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1157 dst0[CHAN_Z] = tmp2;
1158 }
1159 /* dst.w = 1.0 */
1160 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1161 dst0[CHAN_W] = bld->base.one;
1162 }
1163 break;
1164
1165 case TGSI_OPCODE_MUL:
1166 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1167 src0 = emit_fetch( bld, inst, 0, chan_index );
1168 src1 = emit_fetch( bld, inst, 1, chan_index );
1169 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1170 }
1171 break;
1172
1173 case TGSI_OPCODE_ADD:
1174 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1175 src0 = emit_fetch( bld, inst, 0, chan_index );
1176 src1 = emit_fetch( bld, inst, 1, chan_index );
1177 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1178 }
1179 break;
1180
1181 case TGSI_OPCODE_DP3:
1182 /* TGSI_OPCODE_DOT3 */
1183 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1184 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1185 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1186 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1187 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1188 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1189 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1190 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1191 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1192 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1193 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1194 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1195 dst0[chan_index] = tmp0;
1196 }
1197 break;
1198
1199 case TGSI_OPCODE_DP4:
1200 /* TGSI_OPCODE_DOT4 */
1201 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1202 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1203 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1204 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1205 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1206 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1207 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1208 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1209 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1210 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1211 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1212 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1213 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1214 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1215 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1216 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1217 dst0[chan_index] = tmp0;
1218 }
1219 break;
1220
1221 case TGSI_OPCODE_DST:
1222 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1223 dst0[CHAN_X] = bld->base.one;
1224 }
1225 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1226 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1227 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1228 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1229 }
1230 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1231 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1232 }
1233 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1234 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1235 }
1236 break;
1237
1238 case TGSI_OPCODE_MIN:
1239 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1240 src0 = emit_fetch( bld, inst, 0, chan_index );
1241 src1 = emit_fetch( bld, inst, 1, chan_index );
1242 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1243 }
1244 break;
1245
1246 case TGSI_OPCODE_MAX:
1247 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1248 src0 = emit_fetch( bld, inst, 0, chan_index );
1249 src1 = emit_fetch( bld, inst, 1, chan_index );
1250 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1251 }
1252 break;
1253
1254 case TGSI_OPCODE_SLT:
1255 /* TGSI_OPCODE_SETLT */
1256 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1257 src0 = emit_fetch( bld, inst, 0, chan_index );
1258 src1 = emit_fetch( bld, inst, 1, chan_index );
1259 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1260 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1261 }
1262 break;
1263
1264 case TGSI_OPCODE_SGE:
1265 /* TGSI_OPCODE_SETGE */
1266 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1267 src0 = emit_fetch( bld, inst, 0, chan_index );
1268 src1 = emit_fetch( bld, inst, 1, chan_index );
1269 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1270 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1271 }
1272 break;
1273
1274 case TGSI_OPCODE_MAD:
1275 /* TGSI_OPCODE_MADD */
1276 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1277 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1278 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1279 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1280 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1281 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1282 dst0[chan_index] = tmp0;
1283 }
1284 break;
1285
1286 case TGSI_OPCODE_SUB:
1287 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1288 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1289 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1290 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1291 }
1292 break;
1293
1294 case TGSI_OPCODE_LRP:
1295 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1296 src0 = emit_fetch( bld, inst, 0, chan_index );
1297 src1 = emit_fetch( bld, inst, 1, chan_index );
1298 src2 = emit_fetch( bld, inst, 2, chan_index );
1299 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1300 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1301 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1302 }
1303 break;
1304
1305 case TGSI_OPCODE_CND:
1306 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1307 src0 = emit_fetch( bld, inst, 0, chan_index );
1308 src1 = emit_fetch( bld, inst, 1, chan_index );
1309 src2 = emit_fetch( bld, inst, 2, chan_index );
1310 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1311 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1312 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1313 }
1314 break;
1315
1316 case TGSI_OPCODE_DP2A:
1317 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1318 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1319 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1320 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1321 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1322 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1323 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1324 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1325 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1326 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1327 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1328 }
1329 break;
1330
1331 case TGSI_OPCODE_FRC:
1332 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1333 src0 = emit_fetch( bld, inst, 0, chan_index );
1334 tmp0 = lp_build_floor(&bld->base, src0);
1335 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1336 dst0[chan_index] = tmp0;
1337 }
1338 break;
1339
1340 case TGSI_OPCODE_CLAMP:
1341 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1342 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1343 src1 = emit_fetch( bld, inst, 1, chan_index );
1344 src2 = emit_fetch( bld, inst, 2, chan_index );
1345 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1346 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1347 dst0[chan_index] = tmp0;
1348 }
1349 break;
1350
1351 case TGSI_OPCODE_FLR:
1352 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1353 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1354 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1355 }
1356 break;
1357
1358 case TGSI_OPCODE_ROUND:
1359 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1360 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1361 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1362 }
1363 break;
1364
1365 case TGSI_OPCODE_EX2: {
1366 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1367 tmp0 = lp_build_exp2( &bld->base, tmp0);
1368 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1369 dst0[chan_index] = tmp0;
1370 }
1371 break;
1372 }
1373
1374 case TGSI_OPCODE_LG2:
1375 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1376 tmp0 = lp_build_log2( &bld->base, tmp0);
1377 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1378 dst0[chan_index] = tmp0;
1379 }
1380 break;
1381
1382 case TGSI_OPCODE_POW:
1383 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1384 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1385 res = lp_build_pow( &bld->base, src0, src1 );
1386 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1387 dst0[chan_index] = res;
1388 }
1389 break;
1390
1391 case TGSI_OPCODE_XPD:
1392 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1393 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1394 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1395 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1396 }
1397 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1398 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1399 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1400 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1401 }
1402 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1403 tmp2 = tmp0;
1404 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1405 tmp5 = tmp3;
1406 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1407 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1408 dst0[CHAN_X] = tmp2;
1409 }
1410 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1411 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1412 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1413 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1414 }
1415 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1416 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1417 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1418 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1419 dst0[CHAN_Y] = tmp3;
1420 }
1421 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1422 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1423 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1424 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1425 dst0[CHAN_Z] = tmp5;
1426 }
1427 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1428 dst0[CHAN_W] = bld->base.one;
1429 }
1430 break;
1431
1432 case TGSI_OPCODE_ABS:
1433 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1434 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1435 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1436 }
1437 break;
1438
1439 case TGSI_OPCODE_RCC:
1440 /* deprecated? */
1441 assert(0);
1442 return FALSE;
1443
1444 case TGSI_OPCODE_DPH:
1445 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1446 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1447 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1448 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1449 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1450 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1451 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1452 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1453 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1454 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1455 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1456 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1457 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1458 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1459 dst0[chan_index] = tmp0;
1460 }
1461 break;
1462
1463 case TGSI_OPCODE_COS:
1464 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1465 tmp0 = lp_build_cos( &bld->base, tmp0 );
1466 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1467 dst0[chan_index] = tmp0;
1468 }
1469 break;
1470
1471 case TGSI_OPCODE_DDX:
1472 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1473 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1474 }
1475 break;
1476
1477 case TGSI_OPCODE_DDY:
1478 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1479 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1480 }
1481 break;
1482
1483 case TGSI_OPCODE_KILP:
1484 /* predicated kill */
1485 emit_kilp( bld, inst );
1486 break;
1487
1488 case TGSI_OPCODE_KIL:
1489 /* conditional kill */
1490 emit_kil( bld, inst );
1491 break;
1492
1493 case TGSI_OPCODE_PK2H:
1494 return FALSE;
1495 break;
1496
1497 case TGSI_OPCODE_PK2US:
1498 return FALSE;
1499 break;
1500
1501 case TGSI_OPCODE_PK4B:
1502 return FALSE;
1503 break;
1504
1505 case TGSI_OPCODE_PK4UB:
1506 return FALSE;
1507 break;
1508
1509 case TGSI_OPCODE_RFL:
1510 return FALSE;
1511 break;
1512
1513 case TGSI_OPCODE_SEQ:
1514 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1515 src0 = emit_fetch( bld, inst, 0, chan_index );
1516 src1 = emit_fetch( bld, inst, 1, chan_index );
1517 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1518 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1519 }
1520 break;
1521
1522 case TGSI_OPCODE_SFL:
1523 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1524 dst0[chan_index] = bld->base.zero;
1525 }
1526 break;
1527
1528 case TGSI_OPCODE_SGT:
1529 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1530 src0 = emit_fetch( bld, inst, 0, chan_index );
1531 src1 = emit_fetch( bld, inst, 1, chan_index );
1532 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1533 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1534 }
1535 break;
1536
1537 case TGSI_OPCODE_SIN:
1538 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1539 tmp0 = lp_build_sin( &bld->base, tmp0 );
1540 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1541 dst0[chan_index] = tmp0;
1542 }
1543 break;
1544
1545 case TGSI_OPCODE_SLE:
1546 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1547 src0 = emit_fetch( bld, inst, 0, chan_index );
1548 src1 = emit_fetch( bld, inst, 1, chan_index );
1549 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1550 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1551 }
1552 break;
1553
1554 case TGSI_OPCODE_SNE:
1555 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1556 src0 = emit_fetch( bld, inst, 0, chan_index );
1557 src1 = emit_fetch( bld, inst, 1, chan_index );
1558 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1559 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1560 }
1561 break;
1562
1563 case TGSI_OPCODE_STR:
1564 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1565 dst0[chan_index] = bld->base.one;
1566 }
1567 break;
1568
1569 case TGSI_OPCODE_TEX:
1570 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1571 break;
1572
1573 case TGSI_OPCODE_TXD:
1574 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1575 break;
1576
1577 case TGSI_OPCODE_UP2H:
1578 /* deprecated */
1579 assert (0);
1580 return FALSE;
1581 break;
1582
1583 case TGSI_OPCODE_UP2US:
1584 /* deprecated */
1585 assert(0);
1586 return FALSE;
1587 break;
1588
1589 case TGSI_OPCODE_UP4B:
1590 /* deprecated */
1591 assert(0);
1592 return FALSE;
1593 break;
1594
1595 case TGSI_OPCODE_UP4UB:
1596 /* deprecated */
1597 assert(0);
1598 return FALSE;
1599 break;
1600
1601 case TGSI_OPCODE_X2D:
1602 /* deprecated? */
1603 assert(0);
1604 return FALSE;
1605 break;
1606
1607 case TGSI_OPCODE_ARA:
1608 /* deprecated */
1609 assert(0);
1610 return FALSE;
1611 break;
1612
1613 case TGSI_OPCODE_ARR:
1614 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1615 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1616 tmp0 = lp_build_round(&bld->base, tmp0);
1617 dst0[chan_index] = tmp0;
1618 }
1619 break;
1620
1621 case TGSI_OPCODE_BRA:
1622 /* deprecated */
1623 assert(0);
1624 return FALSE;
1625 break;
1626
1627 case TGSI_OPCODE_CAL:
1628 lp_exec_mask_call(&bld->exec_mask,
1629 inst->Label.Label,
1630 pc);
1631
1632 break;
1633
1634 case TGSI_OPCODE_RET:
1635 lp_exec_mask_ret(&bld->exec_mask, pc);
1636 break;
1637
1638 case TGSI_OPCODE_END:
1639 *pc = -1;
1640 break;
1641
1642 case TGSI_OPCODE_SSG:
1643 /* TGSI_OPCODE_SGN */
1644 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1645 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1646 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1647 }
1648 break;
1649
1650 case TGSI_OPCODE_CMP:
1651 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1652 src0 = emit_fetch( bld, inst, 0, chan_index );
1653 src1 = emit_fetch( bld, inst, 1, chan_index );
1654 src2 = emit_fetch( bld, inst, 2, chan_index );
1655 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1656 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1657 }
1658 break;
1659
1660 case TGSI_OPCODE_SCS:
1661 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1662 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1663 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1664 }
1665 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1666 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1667 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1668 }
1669 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1670 dst0[CHAN_Z] = bld->base.zero;
1671 }
1672 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1673 dst0[CHAN_W] = bld->base.one;
1674 }
1675 break;
1676
1677 case TGSI_OPCODE_TXB:
1678 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1679 break;
1680
1681 case TGSI_OPCODE_NRM:
1682 /* fall-through */
1683 case TGSI_OPCODE_NRM4:
1684 /* 3 or 4-component normalization */
1685 {
1686 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1687
1688 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1689 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1690 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1691 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1692
1693 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1694
1695 /* xmm4 = src.x */
1696 /* xmm0 = src.x * src.x */
1697 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1698 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1699 tmp4 = tmp0;
1700 }
1701 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1702
1703 /* xmm5 = src.y */
1704 /* xmm0 = xmm0 + src.y * src.y */
1705 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1706 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1707 tmp5 = tmp1;
1708 }
1709 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1710 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1711
1712 /* xmm6 = src.z */
1713 /* xmm0 = xmm0 + src.z * src.z */
1714 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1715 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1716 tmp6 = tmp1;
1717 }
1718 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1719 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1720
1721 if (dims == 4) {
1722 /* xmm7 = src.w */
1723 /* xmm0 = xmm0 + src.w * src.w */
1724 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1725 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1726 tmp7 = tmp1;
1727 }
1728 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1729 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1730 }
1731
1732 /* xmm1 = 1 / sqrt(xmm0) */
1733 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1734
1735 /* dst.x = xmm1 * src.x */
1736 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1737 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1738 }
1739
1740 /* dst.y = xmm1 * src.y */
1741 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1742 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1743 }
1744
1745 /* dst.z = xmm1 * src.z */
1746 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1747 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1748 }
1749
1750 /* dst.w = xmm1 * src.w */
1751 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1752 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1753 }
1754 }
1755
1756 /* dst.w = 1.0 */
1757 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1758 dst0[CHAN_W] = bld->base.one;
1759 }
1760 }
1761 break;
1762
1763 case TGSI_OPCODE_DIV:
1764 /* deprecated */
1765 assert( 0 );
1766 return FALSE;
1767 break;
1768
1769 case TGSI_OPCODE_DP2:
1770 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1771 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1772 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1773 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1774 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1775 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1776 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1777 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1778 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1779 }
1780 break;
1781
1782 case TGSI_OPCODE_TXL:
1783 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1784 break;
1785
1786 case TGSI_OPCODE_TXP:
1787 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1788 break;
1789
1790 case TGSI_OPCODE_BRK:
1791 lp_exec_break(&bld->exec_mask);
1792 break;
1793
1794 case TGSI_OPCODE_IF:
1795 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1796 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1797 tmp0, bld->base.zero);
1798 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1799 break;
1800
1801 case TGSI_OPCODE_BGNLOOP:
1802 lp_exec_bgnloop(&bld->exec_mask);
1803 break;
1804
1805 case TGSI_OPCODE_BGNSUB:
1806 lp_exec_mask_bgnsub(&bld->exec_mask);
1807 break;
1808
1809 case TGSI_OPCODE_ELSE:
1810 lp_exec_mask_cond_invert(&bld->exec_mask);
1811 break;
1812
1813 case TGSI_OPCODE_ENDIF:
1814 lp_exec_mask_cond_pop(&bld->exec_mask);
1815 break;
1816
1817 case TGSI_OPCODE_ENDLOOP:
1818 lp_exec_endloop(&bld->exec_mask);
1819 break;
1820
1821 case TGSI_OPCODE_ENDSUB:
1822 lp_exec_mask_endsub(&bld->exec_mask, pc);
1823 break;
1824
1825 case TGSI_OPCODE_PUSHA:
1826 /* deprecated? */
1827 assert(0);
1828 return FALSE;
1829 break;
1830
1831 case TGSI_OPCODE_POPA:
1832 /* deprecated? */
1833 assert(0);
1834 return FALSE;
1835 break;
1836
1837 case TGSI_OPCODE_CEIL:
1838 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1839 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1840 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1841 }
1842 break;
1843
1844 case TGSI_OPCODE_I2F:
1845 /* deprecated? */
1846 assert(0);
1847 return FALSE;
1848 break;
1849
1850 case TGSI_OPCODE_NOT:
1851 /* deprecated? */
1852 assert(0);
1853 return FALSE;
1854 break;
1855
1856 case TGSI_OPCODE_TRUNC:
1857 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1858 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1859 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1860 }
1861 break;
1862
1863 case TGSI_OPCODE_SHL:
1864 /* deprecated? */
1865 assert(0);
1866 return FALSE;
1867 break;
1868
1869 case TGSI_OPCODE_ISHR:
1870 /* deprecated? */
1871 assert(0);
1872 return FALSE;
1873 break;
1874
1875 case TGSI_OPCODE_AND:
1876 /* deprecated? */
1877 assert(0);
1878 return FALSE;
1879 break;
1880
1881 case TGSI_OPCODE_OR:
1882 /* deprecated? */
1883 assert(0);
1884 return FALSE;
1885 break;
1886
1887 case TGSI_OPCODE_MOD:
1888 /* deprecated? */
1889 assert(0);
1890 return FALSE;
1891 break;
1892
1893 case TGSI_OPCODE_XOR:
1894 /* deprecated? */
1895 assert(0);
1896 return FALSE;
1897 break;
1898
1899 case TGSI_OPCODE_SAD:
1900 /* deprecated? */
1901 assert(0);
1902 return FALSE;
1903 break;
1904
1905 case TGSI_OPCODE_TXF:
1906 /* deprecated? */
1907 assert(0);
1908 return FALSE;
1909 break;
1910
1911 case TGSI_OPCODE_TXQ:
1912 /* deprecated? */
1913 assert(0);
1914 return FALSE;
1915 break;
1916
1917 case TGSI_OPCODE_CONT:
1918 lp_exec_continue(&bld->exec_mask);
1919 break;
1920
1921 case TGSI_OPCODE_EMIT:
1922 return FALSE;
1923 break;
1924
1925 case TGSI_OPCODE_ENDPRIM:
1926 return FALSE;
1927 break;
1928
1929 case TGSI_OPCODE_NOP:
1930 break;
1931
1932 default:
1933 return FALSE;
1934 }
1935
1936 if(info->num_dst) {
1937 LLVMValueRef pred[NUM_CHANNELS];
1938
1939 emit_fetch_predicate( bld, inst, pred );
1940
1941 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1942 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1943 }
1944 }
1945
1946 return TRUE;
1947 }
1948
1949
1950 void
1951 lp_build_tgsi_soa(LLVMBuilderRef builder,
1952 const struct tgsi_token *tokens,
1953 struct lp_type type,
1954 struct lp_build_mask_context *mask,
1955 LLVMValueRef consts_ptr,
1956 const LLVMValueRef *pos,
1957 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1958 LLVMValueRef (*outputs)[NUM_CHANNELS],
1959 struct lp_build_sampler_soa *sampler,
1960 const struct tgsi_shader_info *info)
1961 {
1962 struct lp_build_tgsi_soa_context bld;
1963 struct tgsi_parse_context parse;
1964 uint num_immediates = 0;
1965 uint num_instructions = 0;
1966 unsigned i;
1967 int pc = 0;
1968
1969 /* Setup build context */
1970 memset(&bld, 0, sizeof bld);
1971 lp_build_context_init(&bld.base, builder, type);
1972 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1973 bld.mask = mask;
1974 bld.pos = pos;
1975 bld.inputs = inputs;
1976 bld.outputs = outputs;
1977 bld.consts_ptr = consts_ptr;
1978 bld.sampler = sampler;
1979 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1980 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1981 bld.instructions = (struct tgsi_full_instruction *)
1982 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
1983 bld.max_instructions = LP_MAX_INSTRUCTIONS;
1984
1985 if (!bld.instructions) {
1986 return;
1987 }
1988
1989 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1990
1991 tgsi_parse_init( &parse, tokens );
1992
1993 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1994 tgsi_parse_token( &parse );
1995
1996 switch( parse.FullToken.Token.Type ) {
1997 case TGSI_TOKEN_TYPE_DECLARATION:
1998 /* Inputs already interpolated */
1999 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2000 break;
2001
2002 case TGSI_TOKEN_TYPE_INSTRUCTION:
2003 {
2004 /* save expanded instruction */
2005 if (num_instructions == bld.max_instructions) {
2006 bld.instructions = REALLOC(bld.instructions,
2007 bld.max_instructions
2008 * sizeof(struct tgsi_full_instruction),
2009 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2010 * sizeof(struct tgsi_full_instruction));
2011 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2012 }
2013
2014 memcpy(bld.instructions + num_instructions,
2015 &parse.FullToken.FullInstruction,
2016 sizeof(bld.instructions[0]));
2017
2018 num_instructions++;
2019 }
2020
2021 break;
2022
2023 case TGSI_TOKEN_TYPE_IMMEDIATE:
2024 /* simply copy the immediate values into the next immediates[] slot */
2025 {
2026 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2027 assert(size <= 4);
2028 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2029 for( i = 0; i < size; ++i )
2030 bld.immediates[num_immediates][i] =
2031 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2032 for( i = size; i < 4; ++i )
2033 bld.immediates[num_immediates][i] = bld.base.undef;
2034 num_immediates++;
2035 }
2036 break;
2037
2038 case TGSI_TOKEN_TYPE_PROPERTY:
2039 break;
2040
2041 default:
2042 assert( 0 );
2043 }
2044 }
2045
2046 while (pc != -1) {
2047 struct tgsi_full_instruction *instr = bld.instructions + pc;
2048 const struct tgsi_opcode_info *opcode_info =
2049 tgsi_get_opcode_info(instr->Instruction.Opcode);
2050 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2051 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2052 opcode_info->mnemonic);
2053 }
2054
2055 if (0) {
2056 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2057 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2058 debug_printf("11111111111111111111111111111 \n");
2059 tgsi_dump(tokens, 0);
2060 lp_debug_dump_value(function);
2061 debug_printf("2222222222222222222222222222 \n");
2062 }
2063 tgsi_parse_free( &parse );
2064
2065 if (0) {
2066 LLVMModuleRef module = LLVMGetGlobalParent(
2067 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2068 LLVMDumpModule(module);
2069
2070 }
2071
2072 FREE( bld.instructions );
2073 }
2074