gallivm: Factor out the quad derivative code into a single place. Fix ddy.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_exec.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79
80 #define LP_MAX_INSTRUCTIONS 256
81
82
83 struct lp_exec_mask {
84 struct lp_build_context *bld;
85
86 boolean has_mask;
87
88 LLVMTypeRef int_vec_type;
89
90 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
91 int cond_stack_size;
92 LLVMValueRef cond_mask;
93
94 LLVMBasicBlockRef loop_block;
95 LLVMValueRef cont_mask;
96 LLVMValueRef break_mask;
97 LLVMValueRef break_var;
98 struct {
99 LLVMBasicBlockRef loop_block;
100 LLVMValueRef cont_mask;
101 LLVMValueRef break_mask;
102 LLVMValueRef break_var;
103 } loop_stack[LP_MAX_TGSI_NESTING];
104 int loop_stack_size;
105
106 LLVMValueRef ret_mask;
107 struct {
108 int pc;
109 LLVMValueRef ret_mask;
110 } call_stack[LP_MAX_TGSI_NESTING];
111 int call_stack_size;
112
113 LLVMValueRef exec_mask;
114 };
115
116 struct lp_build_tgsi_soa_context
117 {
118 struct lp_build_context base;
119
120 /* Builder for integer masks and indices */
121 struct lp_build_context int_bld;
122
123 LLVMValueRef consts_ptr;
124 const LLVMValueRef *pos;
125 const LLVMValueRef (*inputs)[NUM_CHANNELS];
126 LLVMValueRef (*outputs)[NUM_CHANNELS];
127
128 const struct lp_build_sampler_soa *sampler;
129
130 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
131 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
132 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
133 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
134
135 /* we allocate an array of temps if we have indirect
136 * addressing and then the temps above is unused */
137 LLVMValueRef temps_array;
138 boolean has_indirect_addressing;
139
140 struct lp_build_mask_context *mask;
141 struct lp_exec_mask exec_mask;
142
143 struct tgsi_full_instruction *instructions;
144 uint max_instructions;
145 };
146
147 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
148 {
149 mask->bld = bld;
150 mask->has_mask = FALSE;
151 mask->cond_stack_size = 0;
152 mask->loop_stack_size = 0;
153 mask->call_stack_size = 0;
154
155 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
156 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
157 LLVMConstAllOnes(mask->int_vec_type);
158 }
159
160 static void lp_exec_mask_update(struct lp_exec_mask *mask)
161 {
162 if (mask->loop_stack_size) {
163 /*for loops we need to update the entire mask at runtime */
164 LLVMValueRef tmp;
165 assert(mask->break_mask);
166 tmp = LLVMBuildAnd(mask->bld->builder,
167 mask->cont_mask,
168 mask->break_mask,
169 "maskcb");
170 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
171 mask->cond_mask,
172 tmp,
173 "maskfull");
174 } else
175 mask->exec_mask = mask->cond_mask;
176
177 if (mask->call_stack_size) {
178 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
179 mask->exec_mask,
180 mask->ret_mask,
181 "callmask");
182 }
183
184 mask->has_mask = (mask->cond_stack_size > 0 ||
185 mask->loop_stack_size > 0 ||
186 mask->call_stack_size > 0);
187 }
188
189 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
190 LLVMValueRef val)
191 {
192 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
193 if (mask->cond_stack_size == 0) {
194 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
195 }
196 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
197 assert(LLVMTypeOf(val) == mask->int_vec_type);
198 mask->cond_mask = val;
199
200 lp_exec_mask_update(mask);
201 }
202
203 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
204 {
205 LLVMValueRef prev_mask;
206 LLVMValueRef inv_mask;
207
208 assert(mask->cond_stack_size);
209 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
210 if (mask->cond_stack_size == 1) {
211 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
212 }
213
214 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
215
216 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
217 inv_mask,
218 prev_mask, "");
219 lp_exec_mask_update(mask);
220 }
221
222 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
223 {
224 assert(mask->cond_stack_size);
225 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
226 lp_exec_mask_update(mask);
227 }
228
229 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
230 {
231 if (mask->loop_stack_size == 0) {
232 assert(mask->loop_block == NULL);
233 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
234 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
235 assert(mask->break_var == NULL);
236 }
237
238 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
239
240 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
241 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
242 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
243 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
244 ++mask->loop_stack_size;
245
246 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
247 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
248
249 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
250 LLVMBuildBr(mask->bld->builder, mask->loop_block);
251 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
252
253 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
254
255 lp_exec_mask_update(mask);
256 }
257
258 static void lp_exec_break(struct lp_exec_mask *mask)
259 {
260 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
261 mask->exec_mask,
262 "break");
263
264 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
265 mask->break_mask,
266 exec_mask, "break_full");
267
268 lp_exec_mask_update(mask);
269 }
270
271 static void lp_exec_continue(struct lp_exec_mask *mask)
272 {
273 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
274 mask->exec_mask,
275 "");
276
277 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
278 mask->cont_mask,
279 exec_mask, "");
280
281 lp_exec_mask_update(mask);
282 }
283
284
285 static void lp_exec_endloop(struct lp_exec_mask *mask)
286 {
287 LLVMBasicBlockRef endloop;
288 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
289 mask->bld->type.length);
290 LLVMValueRef i1cond;
291
292 assert(mask->break_mask);
293
294 /*
295 * Restore the cont_mask, but don't pop
296 */
297 assert(mask->loop_stack_size);
298 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
299 lp_exec_mask_update(mask);
300
301 /*
302 * Unlike the continue mask, the break_mask must be preserved across loop
303 * iterations
304 */
305 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
306
307 /* i1cond = (mask == 0) */
308 i1cond = LLVMBuildICmp(
309 mask->bld->builder,
310 LLVMIntNE,
311 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
312 LLVMConstNull(reg_type), "");
313
314 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
315
316 LLVMBuildCondBr(mask->bld->builder,
317 i1cond, mask->loop_block, endloop);
318
319 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
320
321 assert(mask->loop_stack_size);
322 --mask->loop_stack_size;
323 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
324 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
325 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
326 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
327
328 lp_exec_mask_update(mask);
329 }
330
331 /* stores val into an address pointed to by dst.
332 * mask->exec_mask is used to figure out which bits of val
333 * should be stored into the address
334 * (0 means don't store this bit, 1 means do store).
335 */
336 static void lp_exec_mask_store(struct lp_exec_mask *mask,
337 LLVMValueRef pred,
338 LLVMValueRef val,
339 LLVMValueRef dst)
340 {
341 /* Mix the predicate and execution mask */
342 if (mask->has_mask) {
343 if (pred) {
344 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
345 } else {
346 pred = mask->exec_mask;
347 }
348 }
349
350 if (pred) {
351 LLVMValueRef real_val, dst_val;
352
353 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
354 real_val = lp_build_select(mask->bld,
355 pred,
356 val, dst_val);
357
358 LLVMBuildStore(mask->bld->builder, real_val, dst);
359 } else
360 LLVMBuildStore(mask->bld->builder, val, dst);
361 }
362
363 static void lp_exec_mask_call(struct lp_exec_mask *mask,
364 int func,
365 int *pc)
366 {
367 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
368 mask->call_stack[mask->call_stack_size].pc = *pc;
369 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
370 mask->call_stack_size++;
371 *pc = func;
372 }
373
374 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
375 {
376 LLVMValueRef exec_mask;
377
378 if (mask->call_stack_size == 0) {
379 /* returning from main() */
380 *pc = -1;
381 return;
382 }
383 exec_mask = LLVMBuildNot(mask->bld->builder,
384 mask->exec_mask,
385 "ret");
386
387 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
388 mask->ret_mask,
389 exec_mask, "ret_full");
390
391 lp_exec_mask_update(mask);
392 }
393
394 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
395 {
396 }
397
398 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
399 {
400 assert(mask->call_stack_size);
401 mask->call_stack_size--;
402 *pc = mask->call_stack[mask->call_stack_size].pc;
403 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
404 lp_exec_mask_update(mask);
405 }
406
407 static LLVMValueRef
408 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
409 unsigned index,
410 unsigned chan,
411 boolean is_indirect,
412 LLVMValueRef addr)
413 {
414 assert(chan < 4);
415 if (!bld->has_indirect_addressing) {
416 return bld->temps[index][chan];
417 } else {
418 LLVMValueRef lindex =
419 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
420 if (is_indirect)
421 lindex = lp_build_add(&bld->base, lindex, addr);
422 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
423 }
424 }
425
426 /**
427 * Register fetch.
428 */
429 static LLVMValueRef
430 emit_fetch(
431 struct lp_build_tgsi_soa_context *bld,
432 const struct tgsi_full_instruction *inst,
433 unsigned index,
434 const unsigned chan_index )
435 {
436 const struct tgsi_full_src_register *reg = &inst->Src[index];
437 const unsigned swizzle =
438 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
439 LLVMValueRef res;
440 LLVMValueRef addr = NULL;
441
442 if (swizzle > 3) {
443 assert(0 && "invalid swizzle in emit_fetch()");
444 return bld->base.undef;
445 }
446
447 if (reg->Register.Indirect) {
448 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
449 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
450 addr = LLVMBuildLoad(bld->base.builder,
451 bld->addr[reg->Indirect.Index][swizzle],
452 "");
453 /* for indexing we want integers */
454 addr = LLVMBuildFPToSI(bld->base.builder, addr,
455 int_vec_type, "");
456 addr = LLVMBuildExtractElement(bld->base.builder,
457 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
458 "");
459 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
460 }
461
462 switch (reg->Register.File) {
463 case TGSI_FILE_CONSTANT:
464 {
465 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
466 reg->Register.Index*4 + swizzle, 0);
467 LLVMValueRef scalar, scalar_ptr;
468
469 if (reg->Register.Indirect) {
470 /*lp_build_printf(bld->base.builder,
471 "\taddr = %d\n", addr);*/
472 index = lp_build_add(&bld->base, index, addr);
473 }
474 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
475 &index, 1, "");
476 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
477
478 res = lp_build_broadcast_scalar(&bld->base, scalar);
479 }
480 break;
481
482 case TGSI_FILE_IMMEDIATE:
483 res = bld->immediates[reg->Register.Index][swizzle];
484 assert(res);
485 break;
486
487 case TGSI_FILE_INPUT:
488 res = bld->inputs[reg->Register.Index][swizzle];
489 assert(res);
490 break;
491
492 case TGSI_FILE_TEMPORARY:
493 {
494 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
495 swizzle,
496 reg->Register.Indirect,
497 addr);
498 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
499 if(!res)
500 return bld->base.undef;
501 }
502 break;
503
504 default:
505 assert(0 && "invalid src register in emit_fetch()");
506 return bld->base.undef;
507 }
508
509 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
510 case TGSI_UTIL_SIGN_CLEAR:
511 res = lp_build_abs( &bld->base, res );
512 break;
513
514 case TGSI_UTIL_SIGN_SET:
515 /* TODO: Use bitwese OR for floating point */
516 res = lp_build_abs( &bld->base, res );
517 res = LLVMBuildNeg( bld->base.builder, res, "" );
518 break;
519
520 case TGSI_UTIL_SIGN_TOGGLE:
521 res = LLVMBuildNeg( bld->base.builder, res, "" );
522 break;
523
524 case TGSI_UTIL_SIGN_KEEP:
525 break;
526 }
527
528 return res;
529 }
530
531
532 /**
533 * Register fetch with derivatives.
534 */
535 static void
536 emit_fetch_deriv(
537 struct lp_build_tgsi_soa_context *bld,
538 const struct tgsi_full_instruction *inst,
539 unsigned index,
540 const unsigned chan_index,
541 LLVMValueRef *res,
542 LLVMValueRef *ddx,
543 LLVMValueRef *ddy)
544 {
545 LLVMValueRef src;
546
547 src = emit_fetch(bld, inst, index, chan_index);
548
549 if(res)
550 *res = src;
551
552 /* TODO: use interpolation coeffs for inputs */
553
554 if(ddx)
555 *ddx = lp_build_ddx(&bld->base, src);
556
557 if(ddy)
558 *ddy = lp_build_ddy(&bld->base, src);
559 }
560
561
562 /**
563 * Predicate.
564 */
565 static void
566 emit_fetch_predicate(
567 struct lp_build_tgsi_soa_context *bld,
568 const struct tgsi_full_instruction *inst,
569 LLVMValueRef *pred)
570 {
571 unsigned index;
572 unsigned char swizzles[4];
573 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
574 LLVMValueRef value;
575 unsigned chan;
576
577 if (!inst->Instruction.Predicate) {
578 FOR_EACH_CHANNEL( chan ) {
579 pred[chan] = NULL;
580 }
581 return;
582 }
583
584 swizzles[0] = inst->Predicate.SwizzleX;
585 swizzles[1] = inst->Predicate.SwizzleY;
586 swizzles[2] = inst->Predicate.SwizzleZ;
587 swizzles[3] = inst->Predicate.SwizzleW;
588
589 index = inst->Predicate.Index;
590 assert(index < LP_MAX_TGSI_PREDS);
591
592 FOR_EACH_CHANNEL( chan ) {
593 unsigned swizzle = swizzles[chan];
594
595 /*
596 * Only fetch the predicate register channels that are actually listed
597 * in the swizzles
598 */
599 if (!unswizzled[swizzle]) {
600 value = LLVMBuildLoad(bld->base.builder,
601 bld->preds[index][swizzle], "");
602
603 /*
604 * Convert the value to an integer mask.
605 *
606 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
607 * is needlessly causing two comparisons due to storing the intermediate
608 * result as float vector instead of an integer mask vector.
609 */
610 value = lp_build_compare(bld->base.builder,
611 bld->base.type,
612 PIPE_FUNC_NOTEQUAL,
613 value,
614 bld->base.zero);
615 if (inst->Predicate.Negate) {
616 value = LLVMBuildNot(bld->base.builder, value, "");
617 }
618
619 unswizzled[swizzle] = value;
620 } else {
621 value = unswizzled[swizzle];
622 }
623
624 pred[chan] = value;
625 }
626 }
627
628
629 /**
630 * Register store.
631 */
632 static void
633 emit_store(
634 struct lp_build_tgsi_soa_context *bld,
635 const struct tgsi_full_instruction *inst,
636 unsigned index,
637 unsigned chan_index,
638 LLVMValueRef pred,
639 LLVMValueRef value)
640 {
641 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
642 LLVMValueRef addr = NULL;
643
644 switch( inst->Instruction.Saturate ) {
645 case TGSI_SAT_NONE:
646 break;
647
648 case TGSI_SAT_ZERO_ONE:
649 value = lp_build_max(&bld->base, value, bld->base.zero);
650 value = lp_build_min(&bld->base, value, bld->base.one);
651 break;
652
653 case TGSI_SAT_MINUS_PLUS_ONE:
654 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
655 value = lp_build_min(&bld->base, value, bld->base.one);
656 break;
657
658 default:
659 assert(0);
660 }
661
662 if (reg->Register.Indirect) {
663 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
664 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
665 addr = LLVMBuildLoad(bld->base.builder,
666 bld->addr[reg->Indirect.Index][swizzle],
667 "");
668 /* for indexing we want integers */
669 addr = LLVMBuildFPToSI(bld->base.builder, addr,
670 int_vec_type, "");
671 addr = LLVMBuildExtractElement(bld->base.builder,
672 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
673 "");
674 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
675 }
676
677 switch( reg->Register.File ) {
678 case TGSI_FILE_OUTPUT:
679 lp_exec_mask_store(&bld->exec_mask, pred, value,
680 bld->outputs[reg->Register.Index][chan_index]);
681 break;
682
683 case TGSI_FILE_TEMPORARY: {
684 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
685 chan_index,
686 reg->Register.Indirect,
687 addr);
688 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
689 break;
690 }
691
692 case TGSI_FILE_ADDRESS:
693 lp_exec_mask_store(&bld->exec_mask, pred, value,
694 bld->addr[reg->Indirect.Index][chan_index]);
695 break;
696
697 case TGSI_FILE_PREDICATE:
698 lp_exec_mask_store(&bld->exec_mask, pred, value,
699 bld->preds[index][chan_index]);
700 break;
701
702 default:
703 assert( 0 );
704 }
705 }
706
707
708 /**
709 * High-level instruction translators.
710 */
711
712 enum tex_modifier {
713 TEX_MODIFIER_NONE = 0,
714 TEX_MODIFIER_PROJECTED,
715 TEX_MODIFIER_LOD_BIAS,
716 TEX_MODIFIER_EXPLICIT_LOD,
717 TEX_MODIFIER_EXPLICIT_DERIV
718 };
719
720 static void
721 emit_tex( struct lp_build_tgsi_soa_context *bld,
722 const struct tgsi_full_instruction *inst,
723 enum tex_modifier modifier,
724 LLVMValueRef *texel)
725 {
726 unsigned unit;
727 LLVMValueRef lod_bias, explicit_lod;
728 LLVMValueRef oow = NULL;
729 LLVMValueRef coords[3];
730 LLVMValueRef ddx[3];
731 LLVMValueRef ddy[3];
732 unsigned num_coords;
733 unsigned i;
734
735 if (!bld->sampler) {
736 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
737 for (i = 0; i < 4; i++) {
738 texel[i] = bld->base.undef;
739 }
740 return;
741 }
742
743 switch (inst->Texture.Texture) {
744 case TGSI_TEXTURE_1D:
745 num_coords = 1;
746 break;
747 case TGSI_TEXTURE_2D:
748 case TGSI_TEXTURE_RECT:
749 num_coords = 2;
750 break;
751 case TGSI_TEXTURE_SHADOW1D:
752 case TGSI_TEXTURE_SHADOW2D:
753 case TGSI_TEXTURE_SHADOWRECT:
754 case TGSI_TEXTURE_3D:
755 case TGSI_TEXTURE_CUBE:
756 num_coords = 3;
757 break;
758 default:
759 assert(0);
760 return;
761 }
762
763 if (modifier == TEX_MODIFIER_LOD_BIAS) {
764 lod_bias = emit_fetch( bld, inst, 0, 3 );
765 explicit_lod = NULL;
766 }
767 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
768 lod_bias = NULL;
769 explicit_lod = emit_fetch( bld, inst, 0, 3 );
770 }
771 else {
772 lod_bias = NULL;
773 explicit_lod = NULL;
774 }
775
776 if (modifier == TEX_MODIFIER_PROJECTED) {
777 oow = emit_fetch( bld, inst, 0, 3 );
778 oow = lp_build_rcp(&bld->base, oow);
779 }
780
781 for (i = 0; i < num_coords; i++) {
782 coords[i] = emit_fetch( bld, inst, 0, i );
783 if (modifier == TEX_MODIFIER_PROJECTED)
784 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
785 }
786 for (i = num_coords; i < 3; i++) {
787 coords[i] = bld->base.undef;
788 }
789
790 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
791 for (i = 0; i < num_coords; i++) {
792 ddx[i] = emit_fetch( bld, inst, 1, i );
793 ddy[i] = emit_fetch( bld, inst, 2, i );
794 }
795 unit = inst->Src[3].Register.Index;
796 } else {
797 for (i = 0; i < num_coords; i++) {
798 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
799 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
800 }
801 unit = inst->Src[1].Register.Index;
802 }
803 for (i = num_coords; i < 3; i++) {
804 ddx[i] = bld->base.undef;
805 ddy[i] = bld->base.undef;
806 }
807
808 bld->sampler->emit_fetch_texel(bld->sampler,
809 bld->base.builder,
810 bld->base.type,
811 unit, num_coords, coords,
812 ddx, ddy,
813 lod_bias, explicit_lod,
814 texel);
815 }
816
817
818 /**
819 * Kill fragment if any of the src register values are negative.
820 */
821 static void
822 emit_kil(
823 struct lp_build_tgsi_soa_context *bld,
824 const struct tgsi_full_instruction *inst )
825 {
826 const struct tgsi_full_src_register *reg = &inst->Src[0];
827 LLVMValueRef terms[NUM_CHANNELS];
828 LLVMValueRef mask;
829 unsigned chan_index;
830
831 memset(&terms, 0, sizeof terms);
832
833 FOR_EACH_CHANNEL( chan_index ) {
834 unsigned swizzle;
835
836 /* Unswizzle channel */
837 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
838
839 /* Check if the component has not been already tested. */
840 assert(swizzle < NUM_CHANNELS);
841 if( !terms[swizzle] )
842 /* TODO: change the comparison operator instead of setting the sign */
843 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
844 }
845
846 mask = NULL;
847 FOR_EACH_CHANNEL( chan_index ) {
848 if(terms[chan_index]) {
849 LLVMValueRef chan_mask;
850
851 /*
852 * If term < 0 then mask = 0 else mask = ~0.
853 */
854 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
855
856 if(mask)
857 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
858 else
859 mask = chan_mask;
860 }
861 }
862
863 if(mask)
864 lp_build_mask_update(bld->mask, mask);
865 }
866
867
868 /**
869 * Predicated fragment kill.
870 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
871 * The only predication is the execution mask which will apply if
872 * we're inside a loop or conditional.
873 */
874 static void
875 emit_kilp(struct lp_build_tgsi_soa_context *bld,
876 const struct tgsi_full_instruction *inst)
877 {
878 LLVMValueRef mask;
879
880 /* For those channels which are "alive", disable fragment shader
881 * execution.
882 */
883 if (bld->exec_mask.has_mask) {
884 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
885 }
886 else {
887 mask = bld->base.zero;
888 }
889
890 lp_build_mask_update(bld->mask, mask);
891 }
892
893 static void
894 emit_declaration(
895 struct lp_build_tgsi_soa_context *bld,
896 const struct tgsi_full_declaration *decl)
897 {
898 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
899
900 unsigned first = decl->Range.First;
901 unsigned last = decl->Range.Last;
902 unsigned idx, i;
903
904 for (idx = first; idx <= last; ++idx) {
905 switch (decl->Declaration.File) {
906 case TGSI_FILE_TEMPORARY:
907 assert(idx < LP_MAX_TGSI_TEMPS);
908 if (bld->has_indirect_addressing) {
909 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
910 last*4 + 4, 0);
911 bld->temps_array = lp_build_array_alloca(bld->base.builder,
912 vec_type, array_size, "");
913 } else {
914 for (i = 0; i < NUM_CHANNELS; i++)
915 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
916 vec_type, "");
917 }
918 break;
919
920 case TGSI_FILE_OUTPUT:
921 for (i = 0; i < NUM_CHANNELS; i++)
922 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
923 vec_type, "");
924 break;
925
926 case TGSI_FILE_ADDRESS:
927 assert(idx < LP_MAX_TGSI_ADDRS);
928 for (i = 0; i < NUM_CHANNELS; i++)
929 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
930 vec_type, "");
931 break;
932
933 case TGSI_FILE_PREDICATE:
934 assert(idx < LP_MAX_TGSI_PREDS);
935 for (i = 0; i < NUM_CHANNELS; i++)
936 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
937 vec_type, "");
938 break;
939
940 default:
941 /* don't need to declare other vars */
942 break;
943 }
944 }
945 }
946
947
948 /**
949 * Emit LLVM for one TGSI instruction.
950 * \param return TRUE for success, FALSE otherwise
951 */
952 static boolean
953 emit_instruction(
954 struct lp_build_tgsi_soa_context *bld,
955 const struct tgsi_full_instruction *inst,
956 const struct tgsi_opcode_info *info,
957 int *pc)
958 {
959 unsigned chan_index;
960 LLVMValueRef src0, src1, src2;
961 LLVMValueRef tmp0, tmp1, tmp2;
962 LLVMValueRef tmp3 = NULL;
963 LLVMValueRef tmp4 = NULL;
964 LLVMValueRef tmp5 = NULL;
965 LLVMValueRef tmp6 = NULL;
966 LLVMValueRef tmp7 = NULL;
967 LLVMValueRef res;
968 LLVMValueRef dst0[NUM_CHANNELS];
969
970 /*
971 * Stores and write masks are handled in a general fashion after the long
972 * instruction opcode switch statement.
973 *
974 * Although not stricitly necessary, we avoid generating instructions for
975 * channels which won't be stored, in cases where's that easy. For some
976 * complex instructions, like texture sampling, it is more convenient to
977 * assume a full writemask and then let LLVM optimization passes eliminate
978 * redundant code.
979 */
980
981 (*pc)++;
982
983 assert(info->num_dst <= 1);
984 if (info->num_dst) {
985 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
986 dst0[chan_index] = bld->base.undef;
987 }
988 }
989
990 switch (inst->Instruction.Opcode) {
991 case TGSI_OPCODE_ARL:
992 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
993 tmp0 = emit_fetch( bld, inst, 0, chan_index );
994 tmp0 = lp_build_floor(&bld->base, tmp0);
995 dst0[chan_index] = tmp0;
996 }
997 break;
998
999 case TGSI_OPCODE_MOV:
1000 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1001 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1002 }
1003 break;
1004
1005 case TGSI_OPCODE_LIT:
1006 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1007 dst0[CHAN_X] = bld->base.one;
1008 }
1009 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1010 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1011 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1012 }
1013 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1014 /* XMM[1] = SrcReg[0].yyyy */
1015 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1016 /* XMM[1] = max(XMM[1], 0) */
1017 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1018 /* XMM[2] = SrcReg[0].wwww */
1019 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1020 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1021 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1022 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1023 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1024 }
1025 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1026 dst0[CHAN_W] = bld->base.one;
1027 }
1028 break;
1029
1030 case TGSI_OPCODE_RCP:
1031 /* TGSI_OPCODE_RECIP */
1032 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1033 res = lp_build_rcp(&bld->base, src0);
1034 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1035 dst0[chan_index] = res;
1036 }
1037 break;
1038
1039 case TGSI_OPCODE_RSQ:
1040 /* TGSI_OPCODE_RECIPSQRT */
1041 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1042 src0 = lp_build_abs(&bld->base, src0);
1043 res = lp_build_rsqrt(&bld->base, src0);
1044 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1045 dst0[chan_index] = res;
1046 }
1047 break;
1048
1049 case TGSI_OPCODE_EXP:
1050 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1051 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1052 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1053 LLVMValueRef *p_exp2_int_part = NULL;
1054 LLVMValueRef *p_frac_part = NULL;
1055 LLVMValueRef *p_exp2 = NULL;
1056
1057 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1058
1059 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1060 p_exp2_int_part = &tmp0;
1061 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1062 p_frac_part = &tmp1;
1063 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1064 p_exp2 = &tmp2;
1065
1066 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1067
1068 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1069 dst0[CHAN_X] = tmp0;
1070 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1071 dst0[CHAN_Y] = tmp1;
1072 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1073 dst0[CHAN_Z] = tmp2;
1074 }
1075 /* dst.w = 1.0 */
1076 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1077 dst0[CHAN_W] = bld->base.one;
1078 }
1079 break;
1080
1081 case TGSI_OPCODE_LOG:
1082 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1083 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1084 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1085 LLVMValueRef *p_floor_log2 = NULL;
1086 LLVMValueRef *p_exp = NULL;
1087 LLVMValueRef *p_log2 = NULL;
1088
1089 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1090 src0 = lp_build_abs( &bld->base, src0 );
1091
1092 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1093 p_floor_log2 = &tmp0;
1094 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1095 p_exp = &tmp1;
1096 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1097 p_log2 = &tmp2;
1098
1099 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1100
1101 /* dst.x = floor(lg2(abs(src.x))) */
1102 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1103 dst0[CHAN_X] = tmp0;
1104 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1105 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1106 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1107 }
1108 /* dst.z = lg2(abs(src.x)) */
1109 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1110 dst0[CHAN_Z] = tmp2;
1111 }
1112 /* dst.w = 1.0 */
1113 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1114 dst0[CHAN_W] = bld->base.one;
1115 }
1116 break;
1117
1118 case TGSI_OPCODE_MUL:
1119 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1120 src0 = emit_fetch( bld, inst, 0, chan_index );
1121 src1 = emit_fetch( bld, inst, 1, chan_index );
1122 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1123 }
1124 break;
1125
1126 case TGSI_OPCODE_ADD:
1127 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1128 src0 = emit_fetch( bld, inst, 0, chan_index );
1129 src1 = emit_fetch( bld, inst, 1, chan_index );
1130 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1131 }
1132 break;
1133
1134 case TGSI_OPCODE_DP3:
1135 /* TGSI_OPCODE_DOT3 */
1136 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1137 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1138 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1139 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1140 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1141 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1142 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1143 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1144 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1145 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1146 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1147 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1148 dst0[chan_index] = tmp0;
1149 }
1150 break;
1151
1152 case TGSI_OPCODE_DP4:
1153 /* TGSI_OPCODE_DOT4 */
1154 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1155 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1156 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1157 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1158 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1159 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1160 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1161 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1162 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1163 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1164 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1165 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1166 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1167 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1168 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1169 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1170 dst0[chan_index] = tmp0;
1171 }
1172 break;
1173
1174 case TGSI_OPCODE_DST:
1175 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1176 dst0[CHAN_X] = bld->base.one;
1177 }
1178 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1179 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1180 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1181 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1182 }
1183 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1184 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1185 }
1186 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1187 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1188 }
1189 break;
1190
1191 case TGSI_OPCODE_MIN:
1192 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1193 src0 = emit_fetch( bld, inst, 0, chan_index );
1194 src1 = emit_fetch( bld, inst, 1, chan_index );
1195 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1196 }
1197 break;
1198
1199 case TGSI_OPCODE_MAX:
1200 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1201 src0 = emit_fetch( bld, inst, 0, chan_index );
1202 src1 = emit_fetch( bld, inst, 1, chan_index );
1203 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1204 }
1205 break;
1206
1207 case TGSI_OPCODE_SLT:
1208 /* TGSI_OPCODE_SETLT */
1209 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1210 src0 = emit_fetch( bld, inst, 0, chan_index );
1211 src1 = emit_fetch( bld, inst, 1, chan_index );
1212 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1213 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1214 }
1215 break;
1216
1217 case TGSI_OPCODE_SGE:
1218 /* TGSI_OPCODE_SETGE */
1219 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1220 src0 = emit_fetch( bld, inst, 0, chan_index );
1221 src1 = emit_fetch( bld, inst, 1, chan_index );
1222 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1223 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1224 }
1225 break;
1226
1227 case TGSI_OPCODE_MAD:
1228 /* TGSI_OPCODE_MADD */
1229 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1230 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1231 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1232 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1233 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1234 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1235 dst0[chan_index] = tmp0;
1236 }
1237 break;
1238
1239 case TGSI_OPCODE_SUB:
1240 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1241 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1242 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1243 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1244 }
1245 break;
1246
1247 case TGSI_OPCODE_LRP:
1248 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1249 src0 = emit_fetch( bld, inst, 0, chan_index );
1250 src1 = emit_fetch( bld, inst, 1, chan_index );
1251 src2 = emit_fetch( bld, inst, 2, chan_index );
1252 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1253 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1254 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1255 }
1256 break;
1257
1258 case TGSI_OPCODE_CND:
1259 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1260 src0 = emit_fetch( bld, inst, 0, chan_index );
1261 src1 = emit_fetch( bld, inst, 1, chan_index );
1262 src2 = emit_fetch( bld, inst, 2, chan_index );
1263 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1264 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1265 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1266 }
1267 break;
1268
1269 case TGSI_OPCODE_DP2A:
1270 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1271 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1272 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1273 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1274 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1275 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1276 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1277 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1278 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1279 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1280 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1281 }
1282 break;
1283
1284 case TGSI_OPCODE_FRC:
1285 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1286 src0 = emit_fetch( bld, inst, 0, chan_index );
1287 tmp0 = lp_build_floor(&bld->base, src0);
1288 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1289 dst0[chan_index] = tmp0;
1290 }
1291 break;
1292
1293 case TGSI_OPCODE_CLAMP:
1294 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1295 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1296 src1 = emit_fetch( bld, inst, 1, chan_index );
1297 src2 = emit_fetch( bld, inst, 2, chan_index );
1298 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1299 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1300 dst0[chan_index] = tmp0;
1301 }
1302 break;
1303
1304 case TGSI_OPCODE_FLR:
1305 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1306 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1307 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1308 }
1309 break;
1310
1311 case TGSI_OPCODE_ROUND:
1312 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1313 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1314 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1315 }
1316 break;
1317
1318 case TGSI_OPCODE_EX2: {
1319 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1320 tmp0 = lp_build_exp2( &bld->base, tmp0);
1321 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1322 dst0[chan_index] = tmp0;
1323 }
1324 break;
1325 }
1326
1327 case TGSI_OPCODE_LG2:
1328 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1329 tmp0 = lp_build_log2( &bld->base, tmp0);
1330 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1331 dst0[chan_index] = tmp0;
1332 }
1333 break;
1334
1335 case TGSI_OPCODE_POW:
1336 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1337 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1338 res = lp_build_pow( &bld->base, src0, src1 );
1339 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1340 dst0[chan_index] = res;
1341 }
1342 break;
1343
1344 case TGSI_OPCODE_XPD:
1345 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1346 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1347 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1348 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1349 }
1350 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1351 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1352 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1353 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1354 }
1355 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1356 tmp2 = tmp0;
1357 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1358 tmp5 = tmp3;
1359 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1360 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1361 dst0[CHAN_X] = tmp2;
1362 }
1363 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1364 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1365 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1366 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1367 }
1368 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1369 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1370 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1371 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1372 dst0[CHAN_Y] = tmp3;
1373 }
1374 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1375 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1376 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1377 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1378 dst0[CHAN_Z] = tmp5;
1379 }
1380 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1381 dst0[CHAN_W] = bld->base.one;
1382 }
1383 break;
1384
1385 case TGSI_OPCODE_ABS:
1386 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1387 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1388 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1389 }
1390 break;
1391
1392 case TGSI_OPCODE_RCC:
1393 /* deprecated? */
1394 assert(0);
1395 return FALSE;
1396
1397 case TGSI_OPCODE_DPH:
1398 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1399 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1400 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1401 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1402 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1403 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1404 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1405 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1406 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1407 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1408 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1409 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1410 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1411 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1412 dst0[chan_index] = tmp0;
1413 }
1414 break;
1415
1416 case TGSI_OPCODE_COS:
1417 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1418 tmp0 = lp_build_cos( &bld->base, tmp0 );
1419 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1420 dst0[chan_index] = tmp0;
1421 }
1422 break;
1423
1424 case TGSI_OPCODE_DDX:
1425 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1426 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1427 }
1428 break;
1429
1430 case TGSI_OPCODE_DDY:
1431 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1432 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1433 }
1434 break;
1435
1436 case TGSI_OPCODE_KILP:
1437 /* predicated kill */
1438 emit_kilp( bld, inst );
1439 break;
1440
1441 case TGSI_OPCODE_KIL:
1442 /* conditional kill */
1443 emit_kil( bld, inst );
1444 break;
1445
1446 case TGSI_OPCODE_PK2H:
1447 return FALSE;
1448 break;
1449
1450 case TGSI_OPCODE_PK2US:
1451 return FALSE;
1452 break;
1453
1454 case TGSI_OPCODE_PK4B:
1455 return FALSE;
1456 break;
1457
1458 case TGSI_OPCODE_PK4UB:
1459 return FALSE;
1460 break;
1461
1462 case TGSI_OPCODE_RFL:
1463 return FALSE;
1464 break;
1465
1466 case TGSI_OPCODE_SEQ:
1467 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1468 src0 = emit_fetch( bld, inst, 0, chan_index );
1469 src1 = emit_fetch( bld, inst, 1, chan_index );
1470 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1471 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1472 }
1473 break;
1474
1475 case TGSI_OPCODE_SFL:
1476 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1477 dst0[chan_index] = bld->base.zero;
1478 }
1479 break;
1480
1481 case TGSI_OPCODE_SGT:
1482 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1483 src0 = emit_fetch( bld, inst, 0, chan_index );
1484 src1 = emit_fetch( bld, inst, 1, chan_index );
1485 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1486 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1487 }
1488 break;
1489
1490 case TGSI_OPCODE_SIN:
1491 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1492 tmp0 = lp_build_sin( &bld->base, tmp0 );
1493 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1494 dst0[chan_index] = tmp0;
1495 }
1496 break;
1497
1498 case TGSI_OPCODE_SLE:
1499 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1500 src0 = emit_fetch( bld, inst, 0, chan_index );
1501 src1 = emit_fetch( bld, inst, 1, chan_index );
1502 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1503 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1504 }
1505 break;
1506
1507 case TGSI_OPCODE_SNE:
1508 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1509 src0 = emit_fetch( bld, inst, 0, chan_index );
1510 src1 = emit_fetch( bld, inst, 1, chan_index );
1511 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1512 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1513 }
1514 break;
1515
1516 case TGSI_OPCODE_STR:
1517 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1518 dst0[chan_index] = bld->base.one;
1519 }
1520 break;
1521
1522 case TGSI_OPCODE_TEX:
1523 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1524 break;
1525
1526 case TGSI_OPCODE_TXD:
1527 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1528 break;
1529
1530 case TGSI_OPCODE_UP2H:
1531 /* deprecated */
1532 assert (0);
1533 return FALSE;
1534 break;
1535
1536 case TGSI_OPCODE_UP2US:
1537 /* deprecated */
1538 assert(0);
1539 return FALSE;
1540 break;
1541
1542 case TGSI_OPCODE_UP4B:
1543 /* deprecated */
1544 assert(0);
1545 return FALSE;
1546 break;
1547
1548 case TGSI_OPCODE_UP4UB:
1549 /* deprecated */
1550 assert(0);
1551 return FALSE;
1552 break;
1553
1554 case TGSI_OPCODE_X2D:
1555 /* deprecated? */
1556 assert(0);
1557 return FALSE;
1558 break;
1559
1560 case TGSI_OPCODE_ARA:
1561 /* deprecated */
1562 assert(0);
1563 return FALSE;
1564 break;
1565
1566 case TGSI_OPCODE_ARR:
1567 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1568 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1569 tmp0 = lp_build_round(&bld->base, tmp0);
1570 dst0[chan_index] = tmp0;
1571 }
1572 break;
1573
1574 case TGSI_OPCODE_BRA:
1575 /* deprecated */
1576 assert(0);
1577 return FALSE;
1578 break;
1579
1580 case TGSI_OPCODE_CAL:
1581 lp_exec_mask_call(&bld->exec_mask,
1582 inst->Label.Label,
1583 pc);
1584
1585 break;
1586
1587 case TGSI_OPCODE_RET:
1588 lp_exec_mask_ret(&bld->exec_mask, pc);
1589 break;
1590
1591 case TGSI_OPCODE_END:
1592 *pc = -1;
1593 break;
1594
1595 case TGSI_OPCODE_SSG:
1596 /* TGSI_OPCODE_SGN */
1597 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1598 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1599 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1600 }
1601 break;
1602
1603 case TGSI_OPCODE_CMP:
1604 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1605 src0 = emit_fetch( bld, inst, 0, chan_index );
1606 src1 = emit_fetch( bld, inst, 1, chan_index );
1607 src2 = emit_fetch( bld, inst, 2, chan_index );
1608 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1609 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1610 }
1611 break;
1612
1613 case TGSI_OPCODE_SCS:
1614 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1615 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1616 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1617 }
1618 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1619 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1620 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1621 }
1622 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1623 dst0[CHAN_Z] = bld->base.zero;
1624 }
1625 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1626 dst0[CHAN_W] = bld->base.one;
1627 }
1628 break;
1629
1630 case TGSI_OPCODE_TXB:
1631 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1632 break;
1633
1634 case TGSI_OPCODE_NRM:
1635 /* fall-through */
1636 case TGSI_OPCODE_NRM4:
1637 /* 3 or 4-component normalization */
1638 {
1639 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1640
1641 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1642 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1643 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1644 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1645
1646 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1647
1648 /* xmm4 = src.x */
1649 /* xmm0 = src.x * src.x */
1650 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1651 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1652 tmp4 = tmp0;
1653 }
1654 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1655
1656 /* xmm5 = src.y */
1657 /* xmm0 = xmm0 + src.y * src.y */
1658 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1659 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1660 tmp5 = tmp1;
1661 }
1662 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1663 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1664
1665 /* xmm6 = src.z */
1666 /* xmm0 = xmm0 + src.z * src.z */
1667 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1668 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1669 tmp6 = tmp1;
1670 }
1671 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1672 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1673
1674 if (dims == 4) {
1675 /* xmm7 = src.w */
1676 /* xmm0 = xmm0 + src.w * src.w */
1677 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1678 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1679 tmp7 = tmp1;
1680 }
1681 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1682 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1683 }
1684
1685 /* xmm1 = 1 / sqrt(xmm0) */
1686 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1687
1688 /* dst.x = xmm1 * src.x */
1689 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1690 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1691 }
1692
1693 /* dst.y = xmm1 * src.y */
1694 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1695 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1696 }
1697
1698 /* dst.z = xmm1 * src.z */
1699 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1700 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1701 }
1702
1703 /* dst.w = xmm1 * src.w */
1704 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1705 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1706 }
1707 }
1708
1709 /* dst.w = 1.0 */
1710 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1711 dst0[CHAN_W] = bld->base.one;
1712 }
1713 }
1714 break;
1715
1716 case TGSI_OPCODE_DIV:
1717 /* deprecated */
1718 assert( 0 );
1719 return FALSE;
1720 break;
1721
1722 case TGSI_OPCODE_DP2:
1723 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1724 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1725 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1726 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1727 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1728 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1729 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1730 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1731 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1732 }
1733 break;
1734
1735 case TGSI_OPCODE_TXL:
1736 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1737 break;
1738
1739 case TGSI_OPCODE_TXP:
1740 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1741 break;
1742
1743 case TGSI_OPCODE_BRK:
1744 lp_exec_break(&bld->exec_mask);
1745 break;
1746
1747 case TGSI_OPCODE_IF:
1748 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1749 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1750 tmp0, bld->base.zero);
1751 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1752 break;
1753
1754 case TGSI_OPCODE_BGNLOOP:
1755 lp_exec_bgnloop(&bld->exec_mask);
1756 break;
1757
1758 case TGSI_OPCODE_BGNSUB:
1759 lp_exec_mask_bgnsub(&bld->exec_mask);
1760 break;
1761
1762 case TGSI_OPCODE_ELSE:
1763 lp_exec_mask_cond_invert(&bld->exec_mask);
1764 break;
1765
1766 case TGSI_OPCODE_ENDIF:
1767 lp_exec_mask_cond_pop(&bld->exec_mask);
1768 break;
1769
1770 case TGSI_OPCODE_ENDLOOP:
1771 lp_exec_endloop(&bld->exec_mask);
1772 break;
1773
1774 case TGSI_OPCODE_ENDSUB:
1775 lp_exec_mask_endsub(&bld->exec_mask, pc);
1776 break;
1777
1778 case TGSI_OPCODE_PUSHA:
1779 /* deprecated? */
1780 assert(0);
1781 return FALSE;
1782 break;
1783
1784 case TGSI_OPCODE_POPA:
1785 /* deprecated? */
1786 assert(0);
1787 return FALSE;
1788 break;
1789
1790 case TGSI_OPCODE_CEIL:
1791 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1792 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1793 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1794 }
1795 break;
1796
1797 case TGSI_OPCODE_I2F:
1798 /* deprecated? */
1799 assert(0);
1800 return FALSE;
1801 break;
1802
1803 case TGSI_OPCODE_NOT:
1804 /* deprecated? */
1805 assert(0);
1806 return FALSE;
1807 break;
1808
1809 case TGSI_OPCODE_TRUNC:
1810 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1811 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1812 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1813 }
1814 break;
1815
1816 case TGSI_OPCODE_SHL:
1817 /* deprecated? */
1818 assert(0);
1819 return FALSE;
1820 break;
1821
1822 case TGSI_OPCODE_ISHR:
1823 /* deprecated? */
1824 assert(0);
1825 return FALSE;
1826 break;
1827
1828 case TGSI_OPCODE_AND:
1829 /* deprecated? */
1830 assert(0);
1831 return FALSE;
1832 break;
1833
1834 case TGSI_OPCODE_OR:
1835 /* deprecated? */
1836 assert(0);
1837 return FALSE;
1838 break;
1839
1840 case TGSI_OPCODE_MOD:
1841 /* deprecated? */
1842 assert(0);
1843 return FALSE;
1844 break;
1845
1846 case TGSI_OPCODE_XOR:
1847 /* deprecated? */
1848 assert(0);
1849 return FALSE;
1850 break;
1851
1852 case TGSI_OPCODE_SAD:
1853 /* deprecated? */
1854 assert(0);
1855 return FALSE;
1856 break;
1857
1858 case TGSI_OPCODE_TXF:
1859 /* deprecated? */
1860 assert(0);
1861 return FALSE;
1862 break;
1863
1864 case TGSI_OPCODE_TXQ:
1865 /* deprecated? */
1866 assert(0);
1867 return FALSE;
1868 break;
1869
1870 case TGSI_OPCODE_CONT:
1871 lp_exec_continue(&bld->exec_mask);
1872 break;
1873
1874 case TGSI_OPCODE_EMIT:
1875 return FALSE;
1876 break;
1877
1878 case TGSI_OPCODE_ENDPRIM:
1879 return FALSE;
1880 break;
1881
1882 case TGSI_OPCODE_NOP:
1883 break;
1884
1885 default:
1886 return FALSE;
1887 }
1888
1889 if(info->num_dst) {
1890 LLVMValueRef pred[NUM_CHANNELS];
1891
1892 emit_fetch_predicate( bld, inst, pred );
1893
1894 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1895 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1896 }
1897 }
1898
1899 return TRUE;
1900 }
1901
1902
1903 void
1904 lp_build_tgsi_soa(LLVMBuilderRef builder,
1905 const struct tgsi_token *tokens,
1906 struct lp_type type,
1907 struct lp_build_mask_context *mask,
1908 LLVMValueRef consts_ptr,
1909 const LLVMValueRef *pos,
1910 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1911 LLVMValueRef (*outputs)[NUM_CHANNELS],
1912 struct lp_build_sampler_soa *sampler,
1913 const struct tgsi_shader_info *info)
1914 {
1915 struct lp_build_tgsi_soa_context bld;
1916 struct tgsi_parse_context parse;
1917 uint num_immediates = 0;
1918 uint num_instructions = 0;
1919 unsigned i;
1920 int pc = 0;
1921
1922 /* Setup build context */
1923 memset(&bld, 0, sizeof bld);
1924 lp_build_context_init(&bld.base, builder, type);
1925 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1926 bld.mask = mask;
1927 bld.pos = pos;
1928 bld.inputs = inputs;
1929 bld.outputs = outputs;
1930 bld.consts_ptr = consts_ptr;
1931 bld.sampler = sampler;
1932 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1933 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1934 bld.instructions = (struct tgsi_full_instruction *)
1935 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
1936 bld.max_instructions = LP_MAX_INSTRUCTIONS;
1937
1938 if (!bld.instructions) {
1939 return;
1940 }
1941
1942 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1943
1944 tgsi_parse_init( &parse, tokens );
1945
1946 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1947 tgsi_parse_token( &parse );
1948
1949 switch( parse.FullToken.Token.Type ) {
1950 case TGSI_TOKEN_TYPE_DECLARATION:
1951 /* Inputs already interpolated */
1952 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1953 break;
1954
1955 case TGSI_TOKEN_TYPE_INSTRUCTION:
1956 {
1957 /* save expanded instruction */
1958 if (num_instructions == bld.max_instructions) {
1959 bld.instructions = REALLOC(bld.instructions,
1960 bld.max_instructions
1961 * sizeof(struct tgsi_full_instruction),
1962 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
1963 * sizeof(struct tgsi_full_instruction));
1964 bld.max_instructions += LP_MAX_INSTRUCTIONS;
1965 }
1966
1967 memcpy(bld.instructions + num_instructions,
1968 &parse.FullToken.FullInstruction,
1969 sizeof(bld.instructions[0]));
1970
1971 num_instructions++;
1972 }
1973
1974 break;
1975
1976 case TGSI_TOKEN_TYPE_IMMEDIATE:
1977 /* simply copy the immediate values into the next immediates[] slot */
1978 {
1979 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1980 assert(size <= 4);
1981 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1982 for( i = 0; i < size; ++i )
1983 bld.immediates[num_immediates][i] =
1984 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1985 for( i = size; i < 4; ++i )
1986 bld.immediates[num_immediates][i] = bld.base.undef;
1987 num_immediates++;
1988 }
1989 break;
1990
1991 case TGSI_TOKEN_TYPE_PROPERTY:
1992 break;
1993
1994 default:
1995 assert( 0 );
1996 }
1997 }
1998
1999 while (pc != -1) {
2000 struct tgsi_full_instruction *instr = bld.instructions + pc;
2001 const struct tgsi_opcode_info *opcode_info =
2002 tgsi_get_opcode_info(instr->Instruction.Opcode);
2003 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2004 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2005 opcode_info->mnemonic);
2006 }
2007
2008 if (0) {
2009 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2010 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2011 debug_printf("11111111111111111111111111111 \n");
2012 tgsi_dump(tokens, 0);
2013 lp_debug_dump_value(function);
2014 debug_printf("2222222222222222222222222222 \n");
2015 }
2016 tgsi_parse_free( &parse );
2017
2018 if (0) {
2019 LLVMModuleRef module = LLVMGetGlobalParent(
2020 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2021 LLVMDumpModule(module);
2022
2023 }
2024
2025 FREE( bld.instructions );
2026 }
2027