gallivm: rename a var to avoid compiler warnings
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for integer masks and indices */
122 struct lp_build_context int_bld;
123
124 LLVMValueRef consts_ptr;
125 const LLVMValueRef *pos;
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128
129 const struct lp_build_sampler_soa *sampler;
130
131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
135
136 /* we allocate an array of temps if we have indirect
137 * addressing and then the temps above is unused */
138 LLVMValueRef temps_array;
139 boolean has_indirect_addressing;
140
141 struct lp_build_mask_context *mask;
142 struct lp_exec_mask exec_mask;
143
144 struct tgsi_full_instruction *instructions;
145 uint max_instructions;
146 };
147
148 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
149 {
150 mask->bld = bld;
151 mask->has_mask = FALSE;
152 mask->cond_stack_size = 0;
153 mask->loop_stack_size = 0;
154 mask->call_stack_size = 0;
155
156 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
157 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
158 LLVMConstAllOnes(mask->int_vec_type);
159 }
160
161 static void lp_exec_mask_update(struct lp_exec_mask *mask)
162 {
163 if (mask->loop_stack_size) {
164 /*for loops we need to update the entire mask at runtime */
165 LLVMValueRef tmp;
166 assert(mask->break_mask);
167 tmp = LLVMBuildAnd(mask->bld->builder,
168 mask->cont_mask,
169 mask->break_mask,
170 "maskcb");
171 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
172 mask->cond_mask,
173 tmp,
174 "maskfull");
175 } else
176 mask->exec_mask = mask->cond_mask;
177
178 if (mask->call_stack_size) {
179 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
180 mask->exec_mask,
181 mask->ret_mask,
182 "callmask");
183 }
184
185 mask->has_mask = (mask->cond_stack_size > 0 ||
186 mask->loop_stack_size > 0 ||
187 mask->call_stack_size > 0);
188 }
189
190 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
191 LLVMValueRef val)
192 {
193 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
194 if (mask->cond_stack_size == 0) {
195 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
196 }
197 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
198 assert(LLVMTypeOf(val) == mask->int_vec_type);
199 mask->cond_mask = val;
200
201 lp_exec_mask_update(mask);
202 }
203
204 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
205 {
206 LLVMValueRef prev_mask;
207 LLVMValueRef inv_mask;
208
209 assert(mask->cond_stack_size);
210 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
211 if (mask->cond_stack_size == 1) {
212 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
213 }
214
215 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
216
217 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
218 inv_mask,
219 prev_mask, "");
220 lp_exec_mask_update(mask);
221 }
222
223 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
224 {
225 assert(mask->cond_stack_size);
226 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
227 lp_exec_mask_update(mask);
228 }
229
230 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
231 {
232 if (mask->loop_stack_size == 0) {
233 assert(mask->loop_block == NULL);
234 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
235 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
236 assert(mask->break_var == NULL);
237 }
238
239 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
240
241 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
242 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
243 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
244 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
245 ++mask->loop_stack_size;
246
247 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
248 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
249
250 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
251 LLVMBuildBr(mask->bld->builder, mask->loop_block);
252 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
253
254 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
255
256 lp_exec_mask_update(mask);
257 }
258
259 static void lp_exec_break(struct lp_exec_mask *mask)
260 {
261 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
262 mask->exec_mask,
263 "break");
264
265 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
266 mask->break_mask,
267 exec_mask, "break_full");
268
269 lp_exec_mask_update(mask);
270 }
271
272 static void lp_exec_continue(struct lp_exec_mask *mask)
273 {
274 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
275 mask->exec_mask,
276 "");
277
278 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
279 mask->cont_mask,
280 exec_mask, "");
281
282 lp_exec_mask_update(mask);
283 }
284
285
286 static void lp_exec_endloop(struct lp_exec_mask *mask)
287 {
288 LLVMBasicBlockRef endloop;
289 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
290 mask->bld->type.length);
291 LLVMValueRef i1cond;
292
293 assert(mask->break_mask);
294
295 /*
296 * Restore the cont_mask, but don't pop
297 */
298 assert(mask->loop_stack_size);
299 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
300 lp_exec_mask_update(mask);
301
302 /*
303 * Unlike the continue mask, the break_mask must be preserved across loop
304 * iterations
305 */
306 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
307
308 /* i1cond = (mask == 0) */
309 i1cond = LLVMBuildICmp(
310 mask->bld->builder,
311 LLVMIntNE,
312 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
313 LLVMConstNull(reg_type), "");
314
315 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
316
317 LLVMBuildCondBr(mask->bld->builder,
318 i1cond, mask->loop_block, endloop);
319
320 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
321
322 assert(mask->loop_stack_size);
323 --mask->loop_stack_size;
324 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
325 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
326 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
327 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
328
329 lp_exec_mask_update(mask);
330 }
331
332 /* stores val into an address pointed to by dst.
333 * mask->exec_mask is used to figure out which bits of val
334 * should be stored into the address
335 * (0 means don't store this bit, 1 means do store).
336 */
337 static void lp_exec_mask_store(struct lp_exec_mask *mask,
338 LLVMValueRef pred,
339 LLVMValueRef val,
340 LLVMValueRef dst)
341 {
342 /* Mix the predicate and execution mask */
343 if (mask->has_mask) {
344 if (pred) {
345 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
346 } else {
347 pred = mask->exec_mask;
348 }
349 }
350
351 if (pred) {
352 LLVMValueRef real_val, dst_val;
353
354 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
355 real_val = lp_build_select(mask->bld,
356 pred,
357 val, dst_val);
358
359 LLVMBuildStore(mask->bld->builder, real_val, dst);
360 } else
361 LLVMBuildStore(mask->bld->builder, val, dst);
362 }
363
364 static void lp_exec_mask_call(struct lp_exec_mask *mask,
365 int func,
366 int *pc)
367 {
368 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
369 mask->call_stack[mask->call_stack_size].pc = *pc;
370 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
371 mask->call_stack_size++;
372 *pc = func;
373 }
374
375 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
376 {
377 LLVMValueRef exec_mask;
378
379 if (mask->call_stack_size == 0) {
380 /* returning from main() */
381 *pc = -1;
382 return;
383 }
384 exec_mask = LLVMBuildNot(mask->bld->builder,
385 mask->exec_mask,
386 "ret");
387
388 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
389 mask->ret_mask,
390 exec_mask, "ret_full");
391
392 lp_exec_mask_update(mask);
393 }
394
395 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
396 {
397 }
398
399 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
400 {
401 assert(mask->call_stack_size);
402 mask->call_stack_size--;
403 *pc = mask->call_stack[mask->call_stack_size].pc;
404 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
405 lp_exec_mask_update(mask);
406 }
407
408 static LLVMValueRef
409 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
410 unsigned index,
411 unsigned chan,
412 boolean is_indirect,
413 LLVMValueRef addr)
414 {
415 assert(chan < 4);
416 if (!bld->has_indirect_addressing) {
417 return bld->temps[index][chan];
418 } else {
419 LLVMValueRef lindex =
420 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
421 if (is_indirect)
422 lindex = lp_build_add(&bld->base, lindex, addr);
423 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
424 }
425 }
426
427
428 /**
429 * Gather vector.
430 * XXX the lp_build_gather() function should be capable of doing this
431 * with a little work.
432 */
433 static LLVMValueRef
434 build_gather(struct lp_build_tgsi_soa_context *bld,
435 LLVMValueRef base_ptr,
436 LLVMValueRef indexes)
437 {
438 LLVMValueRef res = bld->base.undef;
439 unsigned i;
440
441 /*
442 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
443 */
444 for (i = 0; i < bld->base.type.length; i++) {
445 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
446 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
447 indexes, ii, "");
448 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
449 &index, 1, "");
450 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
451
452 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
453 }
454
455 return res;
456 }
457
458
459 /**
460 * Register fetch.
461 */
462 static LLVMValueRef
463 emit_fetch(
464 struct lp_build_tgsi_soa_context *bld,
465 const struct tgsi_full_instruction *inst,
466 unsigned src_op,
467 const unsigned chan_index )
468 {
469 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
470 const unsigned swizzle =
471 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
472 LLVMValueRef res;
473 LLVMValueRef addr_vec = NULL;
474
475 if (swizzle > 3) {
476 assert(0 && "invalid swizzle in emit_fetch()");
477 return bld->base.undef;
478 }
479
480 if (reg->Register.Indirect) {
481 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
482 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
483
484 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
485
486 assert(bld->has_indirect_addressing);
487
488 addr_vec = LLVMBuildLoad(bld->base.builder,
489 bld->addr[reg->Indirect.Index][swizzle],
490 "load addr");
491
492 /* for indexing we want integers */
493 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
494 int_vec_type, "");
495
496 /* addr_vec = addr_vec * 4 */
497 addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
498 }
499
500 switch (reg->Register.File) {
501 case TGSI_FILE_CONSTANT:
502 {
503 if (reg->Register.Indirect) {
504 LLVMValueRef index_vec; /* index into the const buffer */
505
506 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
507 index_vec = lp_build_const_int_vec(bld->int_bld.type,
508 reg->Register.Index * 4 + swizzle);
509
510 /* index_vec = index_vec + addr_vec */
511 index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
512
513 /* Gather values from the constant buffer */
514 res = build_gather(bld, bld->consts_ptr, index_vec);
515 }
516 else {
517 LLVMValueRef index; /* index into the const buffer */
518 LLVMValueRef scalar, scalar_ptr;
519
520 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
521
522 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
523 &index, 1, "");
524 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
525
526 res = lp_build_broadcast_scalar(&bld->base, scalar);
527 }
528 }
529 break;
530
531 case TGSI_FILE_IMMEDIATE:
532 res = bld->immediates[reg->Register.Index][swizzle];
533 assert(res);
534 break;
535
536 case TGSI_FILE_INPUT:
537 res = bld->inputs[reg->Register.Index][swizzle];
538 assert(res);
539 break;
540
541 case TGSI_FILE_TEMPORARY:
542 {
543 LLVMValueRef addr = NULL;
544 LLVMValueRef temp_ptr;
545
546 if (reg->Register.Indirect) {
547 LLVMValueRef zero = lp_build_const_int32(0);
548 addr = LLVMBuildExtractElement(bld->base.builder,
549 addr_vec, zero, "");
550 }
551
552 temp_ptr = get_temp_ptr(bld, reg->Register.Index,
553 swizzle,
554 reg->Register.Indirect,
555 addr);
556 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
557 if(!res)
558 return bld->base.undef;
559 }
560 break;
561
562 default:
563 assert(0 && "invalid src register in emit_fetch()");
564 return bld->base.undef;
565 }
566
567 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
568 case TGSI_UTIL_SIGN_CLEAR:
569 res = lp_build_abs( &bld->base, res );
570 break;
571
572 case TGSI_UTIL_SIGN_SET:
573 /* TODO: Use bitwese OR for floating point */
574 res = lp_build_abs( &bld->base, res );
575 res = LLVMBuildNeg( bld->base.builder, res, "" );
576 break;
577
578 case TGSI_UTIL_SIGN_TOGGLE:
579 res = LLVMBuildNeg( bld->base.builder, res, "" );
580 break;
581
582 case TGSI_UTIL_SIGN_KEEP:
583 break;
584 }
585
586 return res;
587 }
588
589
590 /**
591 * Register fetch with derivatives.
592 */
593 static void
594 emit_fetch_deriv(
595 struct lp_build_tgsi_soa_context *bld,
596 const struct tgsi_full_instruction *inst,
597 unsigned index,
598 const unsigned chan_index,
599 LLVMValueRef *res,
600 LLVMValueRef *ddx,
601 LLVMValueRef *ddy)
602 {
603 LLVMValueRef src;
604
605 src = emit_fetch(bld, inst, index, chan_index);
606
607 if(res)
608 *res = src;
609
610 /* TODO: use interpolation coeffs for inputs */
611
612 if(ddx)
613 *ddx = lp_build_ddx(&bld->base, src);
614
615 if(ddy)
616 *ddy = lp_build_ddy(&bld->base, src);
617 }
618
619
620 /**
621 * Predicate.
622 */
623 static void
624 emit_fetch_predicate(
625 struct lp_build_tgsi_soa_context *bld,
626 const struct tgsi_full_instruction *inst,
627 LLVMValueRef *pred)
628 {
629 unsigned index;
630 unsigned char swizzles[4];
631 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
632 LLVMValueRef value;
633 unsigned chan;
634
635 if (!inst->Instruction.Predicate) {
636 FOR_EACH_CHANNEL( chan ) {
637 pred[chan] = NULL;
638 }
639 return;
640 }
641
642 swizzles[0] = inst->Predicate.SwizzleX;
643 swizzles[1] = inst->Predicate.SwizzleY;
644 swizzles[2] = inst->Predicate.SwizzleZ;
645 swizzles[3] = inst->Predicate.SwizzleW;
646
647 index = inst->Predicate.Index;
648 assert(index < LP_MAX_TGSI_PREDS);
649
650 FOR_EACH_CHANNEL( chan ) {
651 unsigned swizzle = swizzles[chan];
652
653 /*
654 * Only fetch the predicate register channels that are actually listed
655 * in the swizzles
656 */
657 if (!unswizzled[swizzle]) {
658 value = LLVMBuildLoad(bld->base.builder,
659 bld->preds[index][swizzle], "");
660
661 /*
662 * Convert the value to an integer mask.
663 *
664 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
665 * is needlessly causing two comparisons due to storing the intermediate
666 * result as float vector instead of an integer mask vector.
667 */
668 value = lp_build_compare(bld->base.builder,
669 bld->base.type,
670 PIPE_FUNC_NOTEQUAL,
671 value,
672 bld->base.zero);
673 if (inst->Predicate.Negate) {
674 value = LLVMBuildNot(bld->base.builder, value, "");
675 }
676
677 unswizzled[swizzle] = value;
678 } else {
679 value = unswizzled[swizzle];
680 }
681
682 pred[chan] = value;
683 }
684 }
685
686
687 /**
688 * Register store.
689 */
690 static void
691 emit_store(
692 struct lp_build_tgsi_soa_context *bld,
693 const struct tgsi_full_instruction *inst,
694 unsigned index,
695 unsigned chan_index,
696 LLVMValueRef pred,
697 LLVMValueRef value)
698 {
699 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
700 LLVMValueRef addr = NULL;
701
702 switch( inst->Instruction.Saturate ) {
703 case TGSI_SAT_NONE:
704 break;
705
706 case TGSI_SAT_ZERO_ONE:
707 value = lp_build_max(&bld->base, value, bld->base.zero);
708 value = lp_build_min(&bld->base, value, bld->base.one);
709 break;
710
711 case TGSI_SAT_MINUS_PLUS_ONE:
712 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
713 value = lp_build_min(&bld->base, value, bld->base.one);
714 break;
715
716 default:
717 assert(0);
718 }
719
720 if (reg->Register.Indirect) {
721 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
722 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
723 addr = LLVMBuildLoad(bld->base.builder,
724 bld->addr[reg->Indirect.Index][swizzle],
725 "");
726 /* for indexing we want integers */
727 addr = LLVMBuildFPToSI(bld->base.builder, addr,
728 int_vec_type, "");
729 addr = LLVMBuildExtractElement(bld->base.builder,
730 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
731 "");
732 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
733 }
734
735 switch( reg->Register.File ) {
736 case TGSI_FILE_OUTPUT:
737 lp_exec_mask_store(&bld->exec_mask, pred, value,
738 bld->outputs[reg->Register.Index][chan_index]);
739 break;
740
741 case TGSI_FILE_TEMPORARY: {
742 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
743 chan_index,
744 reg->Register.Indirect,
745 addr);
746 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
747 break;
748 }
749
750 case TGSI_FILE_ADDRESS:
751 lp_exec_mask_store(&bld->exec_mask, pred, value,
752 bld->addr[reg->Indirect.Index][chan_index]);
753 break;
754
755 case TGSI_FILE_PREDICATE:
756 lp_exec_mask_store(&bld->exec_mask, pred, value,
757 bld->preds[index][chan_index]);
758 break;
759
760 default:
761 assert( 0 );
762 }
763 }
764
765
766 /**
767 * High-level instruction translators.
768 */
769
770 enum tex_modifier {
771 TEX_MODIFIER_NONE = 0,
772 TEX_MODIFIER_PROJECTED,
773 TEX_MODIFIER_LOD_BIAS,
774 TEX_MODIFIER_EXPLICIT_LOD,
775 TEX_MODIFIER_EXPLICIT_DERIV
776 };
777
778 static void
779 emit_tex( struct lp_build_tgsi_soa_context *bld,
780 const struct tgsi_full_instruction *inst,
781 enum tex_modifier modifier,
782 LLVMValueRef *texel)
783 {
784 unsigned unit;
785 LLVMValueRef lod_bias, explicit_lod;
786 LLVMValueRef oow = NULL;
787 LLVMValueRef coords[3];
788 LLVMValueRef ddx[3];
789 LLVMValueRef ddy[3];
790 unsigned num_coords;
791 unsigned i;
792
793 if (!bld->sampler) {
794 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
795 for (i = 0; i < 4; i++) {
796 texel[i] = bld->base.undef;
797 }
798 return;
799 }
800
801 switch (inst->Texture.Texture) {
802 case TGSI_TEXTURE_1D:
803 num_coords = 1;
804 break;
805 case TGSI_TEXTURE_2D:
806 case TGSI_TEXTURE_RECT:
807 num_coords = 2;
808 break;
809 case TGSI_TEXTURE_SHADOW1D:
810 case TGSI_TEXTURE_SHADOW2D:
811 case TGSI_TEXTURE_SHADOWRECT:
812 case TGSI_TEXTURE_3D:
813 case TGSI_TEXTURE_CUBE:
814 num_coords = 3;
815 break;
816 default:
817 assert(0);
818 return;
819 }
820
821 if (modifier == TEX_MODIFIER_LOD_BIAS) {
822 lod_bias = emit_fetch( bld, inst, 0, 3 );
823 explicit_lod = NULL;
824 }
825 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
826 lod_bias = NULL;
827 explicit_lod = emit_fetch( bld, inst, 0, 3 );
828 }
829 else {
830 lod_bias = NULL;
831 explicit_lod = NULL;
832 }
833
834 if (modifier == TEX_MODIFIER_PROJECTED) {
835 oow = emit_fetch( bld, inst, 0, 3 );
836 oow = lp_build_rcp(&bld->base, oow);
837 }
838
839 for (i = 0; i < num_coords; i++) {
840 coords[i] = emit_fetch( bld, inst, 0, i );
841 if (modifier == TEX_MODIFIER_PROJECTED)
842 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
843 }
844 for (i = num_coords; i < 3; i++) {
845 coords[i] = bld->base.undef;
846 }
847
848 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
849 for (i = 0; i < num_coords; i++) {
850 ddx[i] = emit_fetch( bld, inst, 1, i );
851 ddy[i] = emit_fetch( bld, inst, 2, i );
852 }
853 unit = inst->Src[3].Register.Index;
854 } else {
855 for (i = 0; i < num_coords; i++) {
856 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
857 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
858 }
859 unit = inst->Src[1].Register.Index;
860 }
861 for (i = num_coords; i < 3; i++) {
862 ddx[i] = bld->base.undef;
863 ddy[i] = bld->base.undef;
864 }
865
866 bld->sampler->emit_fetch_texel(bld->sampler,
867 bld->base.builder,
868 bld->base.type,
869 unit, num_coords, coords,
870 ddx, ddy,
871 lod_bias, explicit_lod,
872 texel);
873 }
874
875
876 /**
877 * Kill fragment if any of the src register values are negative.
878 */
879 static void
880 emit_kil(
881 struct lp_build_tgsi_soa_context *bld,
882 const struct tgsi_full_instruction *inst )
883 {
884 const struct tgsi_full_src_register *reg = &inst->Src[0];
885 LLVMValueRef terms[NUM_CHANNELS];
886 LLVMValueRef mask;
887 unsigned chan_index;
888
889 memset(&terms, 0, sizeof terms);
890
891 FOR_EACH_CHANNEL( chan_index ) {
892 unsigned swizzle;
893
894 /* Unswizzle channel */
895 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
896
897 /* Check if the component has not been already tested. */
898 assert(swizzle < NUM_CHANNELS);
899 if( !terms[swizzle] )
900 /* TODO: change the comparison operator instead of setting the sign */
901 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
902 }
903
904 mask = NULL;
905 FOR_EACH_CHANNEL( chan_index ) {
906 if(terms[chan_index]) {
907 LLVMValueRef chan_mask;
908
909 /*
910 * If term < 0 then mask = 0 else mask = ~0.
911 */
912 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
913
914 if(mask)
915 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
916 else
917 mask = chan_mask;
918 }
919 }
920
921 if(mask)
922 lp_build_mask_update(bld->mask, mask);
923 }
924
925
926 /**
927 * Predicated fragment kill.
928 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
929 * The only predication is the execution mask which will apply if
930 * we're inside a loop or conditional.
931 */
932 static void
933 emit_kilp(struct lp_build_tgsi_soa_context *bld,
934 const struct tgsi_full_instruction *inst)
935 {
936 LLVMValueRef mask;
937
938 /* For those channels which are "alive", disable fragment shader
939 * execution.
940 */
941 if (bld->exec_mask.has_mask) {
942 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
943 }
944 else {
945 mask = bld->base.zero;
946 }
947
948 lp_build_mask_update(bld->mask, mask);
949 }
950
951 static void
952 emit_declaration(
953 struct lp_build_tgsi_soa_context *bld,
954 const struct tgsi_full_declaration *decl)
955 {
956 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
957
958 unsigned first = decl->Range.First;
959 unsigned last = decl->Range.Last;
960 unsigned idx, i;
961
962 for (idx = first; idx <= last; ++idx) {
963 switch (decl->Declaration.File) {
964 case TGSI_FILE_TEMPORARY:
965 assert(idx < LP_MAX_TGSI_TEMPS);
966 if (bld->has_indirect_addressing) {
967 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
968 last*4 + 4, 0);
969 bld->temps_array = lp_build_array_alloca(bld->base.builder,
970 vec_type, array_size, "");
971 } else {
972 for (i = 0; i < NUM_CHANNELS; i++)
973 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
974 vec_type, "");
975 }
976 break;
977
978 case TGSI_FILE_OUTPUT:
979 for (i = 0; i < NUM_CHANNELS; i++)
980 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
981 vec_type, "");
982 break;
983
984 case TGSI_FILE_ADDRESS:
985 assert(idx < LP_MAX_TGSI_ADDRS);
986 for (i = 0; i < NUM_CHANNELS; i++)
987 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
988 vec_type, "");
989 break;
990
991 case TGSI_FILE_PREDICATE:
992 assert(idx < LP_MAX_TGSI_PREDS);
993 for (i = 0; i < NUM_CHANNELS; i++)
994 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
995 vec_type, "");
996 break;
997
998 default:
999 /* don't need to declare other vars */
1000 break;
1001 }
1002 }
1003 }
1004
1005
1006 /**
1007 * Emit LLVM for one TGSI instruction.
1008 * \param return TRUE for success, FALSE otherwise
1009 */
1010 static boolean
1011 emit_instruction(
1012 struct lp_build_tgsi_soa_context *bld,
1013 const struct tgsi_full_instruction *inst,
1014 const struct tgsi_opcode_info *info,
1015 int *pc)
1016 {
1017 unsigned chan_index;
1018 LLVMValueRef src0, src1, src2;
1019 LLVMValueRef tmp0, tmp1, tmp2;
1020 LLVMValueRef tmp3 = NULL;
1021 LLVMValueRef tmp4 = NULL;
1022 LLVMValueRef tmp5 = NULL;
1023 LLVMValueRef tmp6 = NULL;
1024 LLVMValueRef tmp7 = NULL;
1025 LLVMValueRef res;
1026 LLVMValueRef dst0[NUM_CHANNELS];
1027
1028 /*
1029 * Stores and write masks are handled in a general fashion after the long
1030 * instruction opcode switch statement.
1031 *
1032 * Although not stricitly necessary, we avoid generating instructions for
1033 * channels which won't be stored, in cases where's that easy. For some
1034 * complex instructions, like texture sampling, it is more convenient to
1035 * assume a full writemask and then let LLVM optimization passes eliminate
1036 * redundant code.
1037 */
1038
1039 (*pc)++;
1040
1041 assert(info->num_dst <= 1);
1042 if (info->num_dst) {
1043 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1044 dst0[chan_index] = bld->base.undef;
1045 }
1046 }
1047
1048 switch (inst->Instruction.Opcode) {
1049 case TGSI_OPCODE_ARL:
1050 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1051 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1052 tmp0 = lp_build_floor(&bld->base, tmp0);
1053 dst0[chan_index] = tmp0;
1054 }
1055 break;
1056
1057 case TGSI_OPCODE_MOV:
1058 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1059 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1060 }
1061 break;
1062
1063 case TGSI_OPCODE_LIT:
1064 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1065 dst0[CHAN_X] = bld->base.one;
1066 }
1067 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1068 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1069 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1070 }
1071 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1072 /* XMM[1] = SrcReg[0].yyyy */
1073 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1074 /* XMM[1] = max(XMM[1], 0) */
1075 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1076 /* XMM[2] = SrcReg[0].wwww */
1077 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1078 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1079 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1080 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1081 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1082 }
1083 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1084 dst0[CHAN_W] = bld->base.one;
1085 }
1086 break;
1087
1088 case TGSI_OPCODE_RCP:
1089 /* TGSI_OPCODE_RECIP */
1090 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1091 res = lp_build_rcp(&bld->base, src0);
1092 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1093 dst0[chan_index] = res;
1094 }
1095 break;
1096
1097 case TGSI_OPCODE_RSQ:
1098 /* TGSI_OPCODE_RECIPSQRT */
1099 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1100 src0 = lp_build_abs(&bld->base, src0);
1101 res = lp_build_rsqrt(&bld->base, src0);
1102 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1103 dst0[chan_index] = res;
1104 }
1105 break;
1106
1107 case TGSI_OPCODE_EXP:
1108 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1109 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1110 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1111 LLVMValueRef *p_exp2_int_part = NULL;
1112 LLVMValueRef *p_frac_part = NULL;
1113 LLVMValueRef *p_exp2 = NULL;
1114
1115 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1116
1117 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1118 p_exp2_int_part = &tmp0;
1119 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1120 p_frac_part = &tmp1;
1121 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1122 p_exp2 = &tmp2;
1123
1124 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1125
1126 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1127 dst0[CHAN_X] = tmp0;
1128 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1129 dst0[CHAN_Y] = tmp1;
1130 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1131 dst0[CHAN_Z] = tmp2;
1132 }
1133 /* dst.w = 1.0 */
1134 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1135 dst0[CHAN_W] = bld->base.one;
1136 }
1137 break;
1138
1139 case TGSI_OPCODE_LOG:
1140 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1141 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1142 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1143 LLVMValueRef *p_floor_log2 = NULL;
1144 LLVMValueRef *p_exp = NULL;
1145 LLVMValueRef *p_log2 = NULL;
1146
1147 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1148 src0 = lp_build_abs( &bld->base, src0 );
1149
1150 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1151 p_floor_log2 = &tmp0;
1152 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1153 p_exp = &tmp1;
1154 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1155 p_log2 = &tmp2;
1156
1157 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1158
1159 /* dst.x = floor(lg2(abs(src.x))) */
1160 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1161 dst0[CHAN_X] = tmp0;
1162 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1163 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1164 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1165 }
1166 /* dst.z = lg2(abs(src.x)) */
1167 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1168 dst0[CHAN_Z] = tmp2;
1169 }
1170 /* dst.w = 1.0 */
1171 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1172 dst0[CHAN_W] = bld->base.one;
1173 }
1174 break;
1175
1176 case TGSI_OPCODE_MUL:
1177 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1178 src0 = emit_fetch( bld, inst, 0, chan_index );
1179 src1 = emit_fetch( bld, inst, 1, chan_index );
1180 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1181 }
1182 break;
1183
1184 case TGSI_OPCODE_ADD:
1185 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1186 src0 = emit_fetch( bld, inst, 0, chan_index );
1187 src1 = emit_fetch( bld, inst, 1, chan_index );
1188 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1189 }
1190 break;
1191
1192 case TGSI_OPCODE_DP3:
1193 /* TGSI_OPCODE_DOT3 */
1194 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1195 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1196 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1197 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1198 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1199 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1200 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1201 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1202 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1203 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1204 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1205 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1206 dst0[chan_index] = tmp0;
1207 }
1208 break;
1209
1210 case TGSI_OPCODE_DP4:
1211 /* TGSI_OPCODE_DOT4 */
1212 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1213 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1214 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1215 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1216 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1217 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1218 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1219 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1220 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1221 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1222 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1223 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1224 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1225 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1226 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1227 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1228 dst0[chan_index] = tmp0;
1229 }
1230 break;
1231
1232 case TGSI_OPCODE_DST:
1233 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1234 dst0[CHAN_X] = bld->base.one;
1235 }
1236 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1237 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1238 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1239 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1240 }
1241 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1242 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1243 }
1244 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1245 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1246 }
1247 break;
1248
1249 case TGSI_OPCODE_MIN:
1250 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1251 src0 = emit_fetch( bld, inst, 0, chan_index );
1252 src1 = emit_fetch( bld, inst, 1, chan_index );
1253 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1254 }
1255 break;
1256
1257 case TGSI_OPCODE_MAX:
1258 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1259 src0 = emit_fetch( bld, inst, 0, chan_index );
1260 src1 = emit_fetch( bld, inst, 1, chan_index );
1261 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1262 }
1263 break;
1264
1265 case TGSI_OPCODE_SLT:
1266 /* TGSI_OPCODE_SETLT */
1267 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1268 src0 = emit_fetch( bld, inst, 0, chan_index );
1269 src1 = emit_fetch( bld, inst, 1, chan_index );
1270 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1271 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1272 }
1273 break;
1274
1275 case TGSI_OPCODE_SGE:
1276 /* TGSI_OPCODE_SETGE */
1277 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1278 src0 = emit_fetch( bld, inst, 0, chan_index );
1279 src1 = emit_fetch( bld, inst, 1, chan_index );
1280 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1281 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1282 }
1283 break;
1284
1285 case TGSI_OPCODE_MAD:
1286 /* TGSI_OPCODE_MADD */
1287 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1288 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1289 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1290 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1291 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1292 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1293 dst0[chan_index] = tmp0;
1294 }
1295 break;
1296
1297 case TGSI_OPCODE_SUB:
1298 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1299 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1300 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1301 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1302 }
1303 break;
1304
1305 case TGSI_OPCODE_LRP:
1306 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1307 src0 = emit_fetch( bld, inst, 0, chan_index );
1308 src1 = emit_fetch( bld, inst, 1, chan_index );
1309 src2 = emit_fetch( bld, inst, 2, chan_index );
1310 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1311 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1312 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1313 }
1314 break;
1315
1316 case TGSI_OPCODE_CND:
1317 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1318 src0 = emit_fetch( bld, inst, 0, chan_index );
1319 src1 = emit_fetch( bld, inst, 1, chan_index );
1320 src2 = emit_fetch( bld, inst, 2, chan_index );
1321 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1322 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1323 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1324 }
1325 break;
1326
1327 case TGSI_OPCODE_DP2A:
1328 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1329 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1330 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1331 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1332 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1333 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1334 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1335 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1336 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1337 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1338 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1339 }
1340 break;
1341
1342 case TGSI_OPCODE_FRC:
1343 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1344 src0 = emit_fetch( bld, inst, 0, chan_index );
1345 tmp0 = lp_build_floor(&bld->base, src0);
1346 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1347 dst0[chan_index] = tmp0;
1348 }
1349 break;
1350
1351 case TGSI_OPCODE_CLAMP:
1352 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1353 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1354 src1 = emit_fetch( bld, inst, 1, chan_index );
1355 src2 = emit_fetch( bld, inst, 2, chan_index );
1356 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1357 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1358 dst0[chan_index] = tmp0;
1359 }
1360 break;
1361
1362 case TGSI_OPCODE_FLR:
1363 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1364 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1365 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1366 }
1367 break;
1368
1369 case TGSI_OPCODE_ROUND:
1370 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1371 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1372 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1373 }
1374 break;
1375
1376 case TGSI_OPCODE_EX2: {
1377 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1378 tmp0 = lp_build_exp2( &bld->base, tmp0);
1379 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1380 dst0[chan_index] = tmp0;
1381 }
1382 break;
1383 }
1384
1385 case TGSI_OPCODE_LG2:
1386 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1387 tmp0 = lp_build_log2( &bld->base, tmp0);
1388 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1389 dst0[chan_index] = tmp0;
1390 }
1391 break;
1392
1393 case TGSI_OPCODE_POW:
1394 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1395 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1396 res = lp_build_pow( &bld->base, src0, src1 );
1397 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1398 dst0[chan_index] = res;
1399 }
1400 break;
1401
1402 case TGSI_OPCODE_XPD:
1403 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1404 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1405 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1406 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1407 }
1408 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1409 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1410 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1411 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1412 }
1413 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1414 tmp2 = tmp0;
1415 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1416 tmp5 = tmp3;
1417 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1418 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1419 dst0[CHAN_X] = tmp2;
1420 }
1421 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1422 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1423 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1424 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1425 }
1426 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1427 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1428 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1429 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1430 dst0[CHAN_Y] = tmp3;
1431 }
1432 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1433 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1434 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1435 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1436 dst0[CHAN_Z] = tmp5;
1437 }
1438 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1439 dst0[CHAN_W] = bld->base.one;
1440 }
1441 break;
1442
1443 case TGSI_OPCODE_ABS:
1444 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1445 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1446 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1447 }
1448 break;
1449
1450 case TGSI_OPCODE_RCC:
1451 /* deprecated? */
1452 assert(0);
1453 return FALSE;
1454
1455 case TGSI_OPCODE_DPH:
1456 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1457 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1458 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1459 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1460 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1461 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1462 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1463 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1464 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1465 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1466 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1467 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1468 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1469 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1470 dst0[chan_index] = tmp0;
1471 }
1472 break;
1473
1474 case TGSI_OPCODE_COS:
1475 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1476 tmp0 = lp_build_cos( &bld->base, tmp0 );
1477 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1478 dst0[chan_index] = tmp0;
1479 }
1480 break;
1481
1482 case TGSI_OPCODE_DDX:
1483 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1484 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1485 }
1486 break;
1487
1488 case TGSI_OPCODE_DDY:
1489 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1490 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1491 }
1492 break;
1493
1494 case TGSI_OPCODE_KILP:
1495 /* predicated kill */
1496 emit_kilp( bld, inst );
1497 break;
1498
1499 case TGSI_OPCODE_KIL:
1500 /* conditional kill */
1501 emit_kil( bld, inst );
1502 break;
1503
1504 case TGSI_OPCODE_PK2H:
1505 return FALSE;
1506 break;
1507
1508 case TGSI_OPCODE_PK2US:
1509 return FALSE;
1510 break;
1511
1512 case TGSI_OPCODE_PK4B:
1513 return FALSE;
1514 break;
1515
1516 case TGSI_OPCODE_PK4UB:
1517 return FALSE;
1518 break;
1519
1520 case TGSI_OPCODE_RFL:
1521 return FALSE;
1522 break;
1523
1524 case TGSI_OPCODE_SEQ:
1525 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1526 src0 = emit_fetch( bld, inst, 0, chan_index );
1527 src1 = emit_fetch( bld, inst, 1, chan_index );
1528 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1529 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1530 }
1531 break;
1532
1533 case TGSI_OPCODE_SFL:
1534 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1535 dst0[chan_index] = bld->base.zero;
1536 }
1537 break;
1538
1539 case TGSI_OPCODE_SGT:
1540 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1541 src0 = emit_fetch( bld, inst, 0, chan_index );
1542 src1 = emit_fetch( bld, inst, 1, chan_index );
1543 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1544 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1545 }
1546 break;
1547
1548 case TGSI_OPCODE_SIN:
1549 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1550 tmp0 = lp_build_sin( &bld->base, tmp0 );
1551 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1552 dst0[chan_index] = tmp0;
1553 }
1554 break;
1555
1556 case TGSI_OPCODE_SLE:
1557 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1558 src0 = emit_fetch( bld, inst, 0, chan_index );
1559 src1 = emit_fetch( bld, inst, 1, chan_index );
1560 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1561 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1562 }
1563 break;
1564
1565 case TGSI_OPCODE_SNE:
1566 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1567 src0 = emit_fetch( bld, inst, 0, chan_index );
1568 src1 = emit_fetch( bld, inst, 1, chan_index );
1569 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1570 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1571 }
1572 break;
1573
1574 case TGSI_OPCODE_STR:
1575 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1576 dst0[chan_index] = bld->base.one;
1577 }
1578 break;
1579
1580 case TGSI_OPCODE_TEX:
1581 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1582 break;
1583
1584 case TGSI_OPCODE_TXD:
1585 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1586 break;
1587
1588 case TGSI_OPCODE_UP2H:
1589 /* deprecated */
1590 assert (0);
1591 return FALSE;
1592 break;
1593
1594 case TGSI_OPCODE_UP2US:
1595 /* deprecated */
1596 assert(0);
1597 return FALSE;
1598 break;
1599
1600 case TGSI_OPCODE_UP4B:
1601 /* deprecated */
1602 assert(0);
1603 return FALSE;
1604 break;
1605
1606 case TGSI_OPCODE_UP4UB:
1607 /* deprecated */
1608 assert(0);
1609 return FALSE;
1610 break;
1611
1612 case TGSI_OPCODE_X2D:
1613 /* deprecated? */
1614 assert(0);
1615 return FALSE;
1616 break;
1617
1618 case TGSI_OPCODE_ARA:
1619 /* deprecated */
1620 assert(0);
1621 return FALSE;
1622 break;
1623
1624 case TGSI_OPCODE_ARR:
1625 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1626 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1627 tmp0 = lp_build_round(&bld->base, tmp0);
1628 dst0[chan_index] = tmp0;
1629 }
1630 break;
1631
1632 case TGSI_OPCODE_BRA:
1633 /* deprecated */
1634 assert(0);
1635 return FALSE;
1636 break;
1637
1638 case TGSI_OPCODE_CAL:
1639 lp_exec_mask_call(&bld->exec_mask,
1640 inst->Label.Label,
1641 pc);
1642
1643 break;
1644
1645 case TGSI_OPCODE_RET:
1646 lp_exec_mask_ret(&bld->exec_mask, pc);
1647 break;
1648
1649 case TGSI_OPCODE_END:
1650 *pc = -1;
1651 break;
1652
1653 case TGSI_OPCODE_SSG:
1654 /* TGSI_OPCODE_SGN */
1655 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1656 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1657 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1658 }
1659 break;
1660
1661 case TGSI_OPCODE_CMP:
1662 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1663 src0 = emit_fetch( bld, inst, 0, chan_index );
1664 src1 = emit_fetch( bld, inst, 1, chan_index );
1665 src2 = emit_fetch( bld, inst, 2, chan_index );
1666 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1667 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1668 }
1669 break;
1670
1671 case TGSI_OPCODE_SCS:
1672 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1673 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1674 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1675 }
1676 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1677 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1678 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1679 }
1680 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1681 dst0[CHAN_Z] = bld->base.zero;
1682 }
1683 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1684 dst0[CHAN_W] = bld->base.one;
1685 }
1686 break;
1687
1688 case TGSI_OPCODE_TXB:
1689 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1690 break;
1691
1692 case TGSI_OPCODE_NRM:
1693 /* fall-through */
1694 case TGSI_OPCODE_NRM4:
1695 /* 3 or 4-component normalization */
1696 {
1697 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1698
1699 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1700 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1701 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1702 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1703
1704 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1705
1706 /* xmm4 = src.x */
1707 /* xmm0 = src.x * src.x */
1708 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1709 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1710 tmp4 = tmp0;
1711 }
1712 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1713
1714 /* xmm5 = src.y */
1715 /* xmm0 = xmm0 + src.y * src.y */
1716 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1717 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1718 tmp5 = tmp1;
1719 }
1720 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1721 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1722
1723 /* xmm6 = src.z */
1724 /* xmm0 = xmm0 + src.z * src.z */
1725 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1726 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1727 tmp6 = tmp1;
1728 }
1729 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1730 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1731
1732 if (dims == 4) {
1733 /* xmm7 = src.w */
1734 /* xmm0 = xmm0 + src.w * src.w */
1735 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1736 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1737 tmp7 = tmp1;
1738 }
1739 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1740 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1741 }
1742
1743 /* xmm1 = 1 / sqrt(xmm0) */
1744 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1745
1746 /* dst.x = xmm1 * src.x */
1747 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1748 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1749 }
1750
1751 /* dst.y = xmm1 * src.y */
1752 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1753 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1754 }
1755
1756 /* dst.z = xmm1 * src.z */
1757 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1758 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1759 }
1760
1761 /* dst.w = xmm1 * src.w */
1762 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1763 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1764 }
1765 }
1766
1767 /* dst.w = 1.0 */
1768 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1769 dst0[CHAN_W] = bld->base.one;
1770 }
1771 }
1772 break;
1773
1774 case TGSI_OPCODE_DIV:
1775 /* deprecated */
1776 assert( 0 );
1777 return FALSE;
1778 break;
1779
1780 case TGSI_OPCODE_DP2:
1781 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1782 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1783 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1784 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1785 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1786 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1787 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1788 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1789 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1790 }
1791 break;
1792
1793 case TGSI_OPCODE_TXL:
1794 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1795 break;
1796
1797 case TGSI_OPCODE_TXP:
1798 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1799 break;
1800
1801 case TGSI_OPCODE_BRK:
1802 lp_exec_break(&bld->exec_mask);
1803 break;
1804
1805 case TGSI_OPCODE_IF:
1806 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1807 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1808 tmp0, bld->base.zero);
1809 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1810 break;
1811
1812 case TGSI_OPCODE_BGNLOOP:
1813 lp_exec_bgnloop(&bld->exec_mask);
1814 break;
1815
1816 case TGSI_OPCODE_BGNSUB:
1817 lp_exec_mask_bgnsub(&bld->exec_mask);
1818 break;
1819
1820 case TGSI_OPCODE_ELSE:
1821 lp_exec_mask_cond_invert(&bld->exec_mask);
1822 break;
1823
1824 case TGSI_OPCODE_ENDIF:
1825 lp_exec_mask_cond_pop(&bld->exec_mask);
1826 break;
1827
1828 case TGSI_OPCODE_ENDLOOP:
1829 lp_exec_endloop(&bld->exec_mask);
1830 break;
1831
1832 case TGSI_OPCODE_ENDSUB:
1833 lp_exec_mask_endsub(&bld->exec_mask, pc);
1834 break;
1835
1836 case TGSI_OPCODE_PUSHA:
1837 /* deprecated? */
1838 assert(0);
1839 return FALSE;
1840 break;
1841
1842 case TGSI_OPCODE_POPA:
1843 /* deprecated? */
1844 assert(0);
1845 return FALSE;
1846 break;
1847
1848 case TGSI_OPCODE_CEIL:
1849 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1850 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1851 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1852 }
1853 break;
1854
1855 case TGSI_OPCODE_I2F:
1856 /* deprecated? */
1857 assert(0);
1858 return FALSE;
1859 break;
1860
1861 case TGSI_OPCODE_NOT:
1862 /* deprecated? */
1863 assert(0);
1864 return FALSE;
1865 break;
1866
1867 case TGSI_OPCODE_TRUNC:
1868 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1869 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1870 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1871 }
1872 break;
1873
1874 case TGSI_OPCODE_SHL:
1875 /* deprecated? */
1876 assert(0);
1877 return FALSE;
1878 break;
1879
1880 case TGSI_OPCODE_ISHR:
1881 /* deprecated? */
1882 assert(0);
1883 return FALSE;
1884 break;
1885
1886 case TGSI_OPCODE_AND:
1887 /* deprecated? */
1888 assert(0);
1889 return FALSE;
1890 break;
1891
1892 case TGSI_OPCODE_OR:
1893 /* deprecated? */
1894 assert(0);
1895 return FALSE;
1896 break;
1897
1898 case TGSI_OPCODE_MOD:
1899 /* deprecated? */
1900 assert(0);
1901 return FALSE;
1902 break;
1903
1904 case TGSI_OPCODE_XOR:
1905 /* deprecated? */
1906 assert(0);
1907 return FALSE;
1908 break;
1909
1910 case TGSI_OPCODE_SAD:
1911 /* deprecated? */
1912 assert(0);
1913 return FALSE;
1914 break;
1915
1916 case TGSI_OPCODE_TXF:
1917 /* deprecated? */
1918 assert(0);
1919 return FALSE;
1920 break;
1921
1922 case TGSI_OPCODE_TXQ:
1923 /* deprecated? */
1924 assert(0);
1925 return FALSE;
1926 break;
1927
1928 case TGSI_OPCODE_CONT:
1929 lp_exec_continue(&bld->exec_mask);
1930 break;
1931
1932 case TGSI_OPCODE_EMIT:
1933 return FALSE;
1934 break;
1935
1936 case TGSI_OPCODE_ENDPRIM:
1937 return FALSE;
1938 break;
1939
1940 case TGSI_OPCODE_NOP:
1941 break;
1942
1943 default:
1944 return FALSE;
1945 }
1946
1947 if(info->num_dst) {
1948 LLVMValueRef pred[NUM_CHANNELS];
1949
1950 emit_fetch_predicate( bld, inst, pred );
1951
1952 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1953 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1954 }
1955 }
1956
1957 return TRUE;
1958 }
1959
1960
1961 void
1962 lp_build_tgsi_soa(LLVMBuilderRef builder,
1963 const struct tgsi_token *tokens,
1964 struct lp_type type,
1965 struct lp_build_mask_context *mask,
1966 LLVMValueRef consts_ptr,
1967 const LLVMValueRef *pos,
1968 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1969 LLVMValueRef (*outputs)[NUM_CHANNELS],
1970 struct lp_build_sampler_soa *sampler,
1971 const struct tgsi_shader_info *info)
1972 {
1973 struct lp_build_tgsi_soa_context bld;
1974 struct tgsi_parse_context parse;
1975 uint num_immediates = 0;
1976 uint num_instructions = 0;
1977 unsigned i;
1978 int pc = 0;
1979
1980 /* Setup build context */
1981 memset(&bld, 0, sizeof bld);
1982 lp_build_context_init(&bld.base, builder, type);
1983 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1984 bld.mask = mask;
1985 bld.pos = pos;
1986 bld.inputs = inputs;
1987 bld.outputs = outputs;
1988 bld.consts_ptr = consts_ptr;
1989 bld.sampler = sampler;
1990 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1991 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1992 bld.instructions = (struct tgsi_full_instruction *)
1993 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
1994 bld.max_instructions = LP_MAX_INSTRUCTIONS;
1995
1996 if (!bld.instructions) {
1997 return;
1998 }
1999
2000 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2001
2002 tgsi_parse_init( &parse, tokens );
2003
2004 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2005 tgsi_parse_token( &parse );
2006
2007 switch( parse.FullToken.Token.Type ) {
2008 case TGSI_TOKEN_TYPE_DECLARATION:
2009 /* Inputs already interpolated */
2010 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2011 break;
2012
2013 case TGSI_TOKEN_TYPE_INSTRUCTION:
2014 {
2015 /* save expanded instruction */
2016 if (num_instructions == bld.max_instructions) {
2017 bld.instructions = REALLOC(bld.instructions,
2018 bld.max_instructions
2019 * sizeof(struct tgsi_full_instruction),
2020 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2021 * sizeof(struct tgsi_full_instruction));
2022 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2023 }
2024
2025 memcpy(bld.instructions + num_instructions,
2026 &parse.FullToken.FullInstruction,
2027 sizeof(bld.instructions[0]));
2028
2029 num_instructions++;
2030 }
2031
2032 break;
2033
2034 case TGSI_TOKEN_TYPE_IMMEDIATE:
2035 /* simply copy the immediate values into the next immediates[] slot */
2036 {
2037 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2038 assert(size <= 4);
2039 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2040 for( i = 0; i < size; ++i )
2041 bld.immediates[num_immediates][i] =
2042 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2043 for( i = size; i < 4; ++i )
2044 bld.immediates[num_immediates][i] = bld.base.undef;
2045 num_immediates++;
2046 }
2047 break;
2048
2049 case TGSI_TOKEN_TYPE_PROPERTY:
2050 break;
2051
2052 default:
2053 assert( 0 );
2054 }
2055 }
2056
2057 while (pc != -1) {
2058 struct tgsi_full_instruction *instr = bld.instructions + pc;
2059 const struct tgsi_opcode_info *opcode_info =
2060 tgsi_get_opcode_info(instr->Instruction.Opcode);
2061 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2062 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2063 opcode_info->mnemonic);
2064 }
2065
2066 if (0) {
2067 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2068 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2069 debug_printf("11111111111111111111111111111 \n");
2070 tgsi_dump(tokens, 0);
2071 lp_debug_dump_value(function);
2072 debug_printf("2222222222222222222222222222 \n");
2073 }
2074 tgsi_parse_free( &parse );
2075
2076 if (0) {
2077 LLVMModuleRef module = LLVMGetGlobalParent(
2078 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2079 LLVMDumpModule(module);
2080
2081 }
2082
2083 FREE( bld.instructions );
2084 }
2085