gallivm/llvmpipe: add const qualifiers
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_exec.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_tgsi.h"
57 #include "lp_bld_limits.h"
58 #include "lp_bld_debug.h"
59
60
61 #define FOR_EACH_CHANNEL( CHAN )\
62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71 FOR_EACH_CHANNEL( CHAN )\
72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74 #define CHAN_X 0
75 #define CHAN_Y 1
76 #define CHAN_Z 2
77 #define CHAN_W 3
78
79 #define QUAD_TOP_LEFT 0
80 #define QUAD_TOP_RIGHT 1
81 #define QUAD_BOTTOM_LEFT 2
82 #define QUAD_BOTTOM_RIGHT 3
83
84
85 struct lp_exec_mask {
86 struct lp_build_context *bld;
87
88 boolean has_mask;
89
90 LLVMTypeRef int_vec_type;
91
92 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
93 int cond_stack_size;
94 LLVMValueRef cond_mask;
95
96 LLVMBasicBlockRef loop_block;
97 LLVMValueRef cont_mask;
98 LLVMValueRef break_mask;
99 LLVMValueRef break_var;
100 struct {
101 LLVMBasicBlockRef loop_block;
102 LLVMValueRef cont_mask;
103 LLVMValueRef break_mask;
104 LLVMValueRef break_var;
105 } loop_stack[LP_MAX_TGSI_NESTING];
106 int loop_stack_size;
107
108 LLVMValueRef exec_mask;
109 };
110
111 struct lp_build_tgsi_soa_context
112 {
113 struct lp_build_context base;
114
115 /* Builder for integer masks and indices */
116 struct lp_build_context int_bld;
117
118 LLVMValueRef consts_ptr;
119 const LLVMValueRef *pos;
120 const LLVMValueRef (*inputs)[NUM_CHANNELS];
121 LLVMValueRef (*outputs)[NUM_CHANNELS];
122
123 const struct lp_build_sampler_soa *sampler;
124
125 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
126 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
127 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
128 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
129
130 /* we allocate an array of temps if we have indirect
131 * addressing and then the temps above is unused */
132 LLVMValueRef temps_array;
133 boolean has_indirect_addressing;
134
135 struct lp_build_mask_context *mask;
136 struct lp_exec_mask exec_mask;
137 };
138
139 static const unsigned char
140 swizzle_left[4] = {
141 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
142 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
143 };
144
145 static const unsigned char
146 swizzle_right[4] = {
147 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
148 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
149 };
150
151 static const unsigned char
152 swizzle_top[4] = {
153 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
154 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
155 };
156
157 static const unsigned char
158 swizzle_bottom[4] = {
159 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
160 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
161 };
162
163 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
164 {
165 mask->bld = bld;
166 mask->has_mask = FALSE;
167 mask->cond_stack_size = 0;
168 mask->loop_stack_size = 0;
169
170 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
171 mask->break_mask = mask->cont_mask = mask->cond_mask =
172 LLVMConstAllOnes(mask->int_vec_type);
173 }
174
175 static void lp_exec_mask_update(struct lp_exec_mask *mask)
176 {
177 if (mask->loop_stack_size) {
178 /*for loops we need to update the entire mask at runtime */
179 LLVMValueRef tmp;
180 assert(mask->break_mask);
181 tmp = LLVMBuildAnd(mask->bld->builder,
182 mask->cont_mask,
183 mask->break_mask,
184 "maskcb");
185 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
186 mask->cond_mask,
187 tmp,
188 "maskfull");
189 } else
190 mask->exec_mask = mask->cond_mask;
191
192
193 mask->has_mask = (mask->cond_stack_size > 0 ||
194 mask->loop_stack_size > 0);
195 }
196
197 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
198 LLVMValueRef val)
199 {
200 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
201 if (mask->cond_stack_size == 0) {
202 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
203 }
204 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
205 assert(LLVMTypeOf(val) == mask->int_vec_type);
206 mask->cond_mask = val;
207
208 lp_exec_mask_update(mask);
209 }
210
211 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
212 {
213 LLVMValueRef prev_mask;
214 LLVMValueRef inv_mask;
215
216 assert(mask->cond_stack_size);
217 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
218 if (mask->cond_stack_size == 1) {
219 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
220 }
221
222 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
223
224 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
225 inv_mask,
226 prev_mask, "");
227 lp_exec_mask_update(mask);
228 }
229
230 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
231 {
232 assert(mask->cond_stack_size);
233 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
234 lp_exec_mask_update(mask);
235 }
236
237 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
238 {
239 if (mask->loop_stack_size == 0) {
240 assert(mask->loop_block == NULL);
241 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
242 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
243 assert(mask->break_var == NULL);
244 }
245
246 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
247
248 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
249 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
250 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
251 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
252 ++mask->loop_stack_size;
253
254 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
255 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
256
257 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
258 LLVMBuildBr(mask->bld->builder, mask->loop_block);
259 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
260
261 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
262
263 lp_exec_mask_update(mask);
264 }
265
266 static void lp_exec_break(struct lp_exec_mask *mask)
267 {
268 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
269 mask->exec_mask,
270 "break");
271
272 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
273 mask->break_mask,
274 exec_mask, "break_full");
275
276 lp_exec_mask_update(mask);
277 }
278
279 static void lp_exec_continue(struct lp_exec_mask *mask)
280 {
281 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
282 mask->exec_mask,
283 "");
284
285 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
286 mask->cont_mask,
287 exec_mask, "");
288
289 lp_exec_mask_update(mask);
290 }
291
292
293 static void lp_exec_endloop(struct lp_exec_mask *mask)
294 {
295 LLVMBasicBlockRef endloop;
296 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
297 mask->bld->type.length);
298 LLVMValueRef i1cond;
299
300 assert(mask->break_mask);
301
302 /*
303 * Restore the cont_mask, but don't pop
304 */
305 assert(mask->loop_stack_size);
306 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
307 lp_exec_mask_update(mask);
308
309 /*
310 * Unlike the continue mask, the break_mask must be preserved across loop
311 * iterations
312 */
313 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
314
315 /* i1cond = (mask == 0) */
316 i1cond = LLVMBuildICmp(
317 mask->bld->builder,
318 LLVMIntNE,
319 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
320 LLVMConstNull(reg_type), "");
321
322 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
323
324 LLVMBuildCondBr(mask->bld->builder,
325 i1cond, mask->loop_block, endloop);
326
327 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
328
329 assert(mask->loop_stack_size);
330 --mask->loop_stack_size;
331 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
332 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
333 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
334 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
335
336 lp_exec_mask_update(mask);
337 }
338
339 /* stores val into an address pointed to by dst.
340 * mask->exec_mask is used to figure out which bits of val
341 * should be stored into the address
342 * (0 means don't store this bit, 1 means do store).
343 */
344 static void lp_exec_mask_store(struct lp_exec_mask *mask,
345 LLVMValueRef pred,
346 LLVMValueRef val,
347 LLVMValueRef dst)
348 {
349 /* Mix the predicate and execution mask */
350 if (mask->has_mask) {
351 if (pred) {
352 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
353 } else {
354 pred = mask->exec_mask;
355 }
356 }
357
358 if (pred) {
359 LLVMValueRef real_val, dst_val;
360
361 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
362 real_val = lp_build_select(mask->bld,
363 pred,
364 val, dst_val);
365
366 LLVMBuildStore(mask->bld->builder, real_val, dst);
367 } else
368 LLVMBuildStore(mask->bld->builder, val, dst);
369 }
370
371
372 static LLVMValueRef
373 emit_ddx(struct lp_build_tgsi_soa_context *bld,
374 LLVMValueRef src)
375 {
376 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
377 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
378 return lp_build_sub(&bld->base, src_right, src_left);
379 }
380
381
382 static LLVMValueRef
383 emit_ddy(struct lp_build_tgsi_soa_context *bld,
384 LLVMValueRef src)
385 {
386 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
387 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
388 return lp_build_sub(&bld->base, src_top, src_bottom);
389 }
390
391 static LLVMValueRef
392 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
393 unsigned index,
394 unsigned swizzle,
395 boolean is_indirect,
396 LLVMValueRef addr)
397 {
398 if (!bld->has_indirect_addressing) {
399 return bld->temps[index][swizzle];
400 } else {
401 LLVMValueRef lindex =
402 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
403 if (is_indirect)
404 lindex = lp_build_add(&bld->base, lindex, addr);
405 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
406 }
407 }
408
409 /**
410 * Register fetch.
411 */
412 static LLVMValueRef
413 emit_fetch(
414 struct lp_build_tgsi_soa_context *bld,
415 const struct tgsi_full_instruction *inst,
416 unsigned index,
417 const unsigned chan_index )
418 {
419 const struct tgsi_full_src_register *reg = &inst->Src[index];
420 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
421 LLVMValueRef res;
422 LLVMValueRef addr;
423
424 switch (swizzle) {
425 case TGSI_SWIZZLE_X:
426 case TGSI_SWIZZLE_Y:
427 case TGSI_SWIZZLE_Z:
428 case TGSI_SWIZZLE_W:
429
430 if (reg->Register.Indirect) {
431 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
432 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
433 addr = LLVMBuildLoad(bld->base.builder,
434 bld->addr[reg->Indirect.Index][swizzle],
435 "");
436 /* for indexing we want integers */
437 addr = LLVMBuildFPToSI(bld->base.builder, addr,
438 int_vec_type, "");
439 addr = LLVMBuildExtractElement(bld->base.builder,
440 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
441 "");
442 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
443 }
444
445 switch (reg->Register.File) {
446 case TGSI_FILE_CONSTANT: {
447 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
448 LLVMValueRef scalar, scalar_ptr;
449
450 if (reg->Register.Indirect) {
451 /*lp_build_printf(bld->base.builder,
452 "\taddr = %d\n", addr);*/
453 index = lp_build_add(&bld->base, index, addr);
454 }
455 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
456 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
457
458 res = lp_build_broadcast_scalar(&bld->base, scalar);
459 break;
460 }
461
462 case TGSI_FILE_IMMEDIATE:
463 res = bld->immediates[reg->Register.Index][swizzle];
464 assert(res);
465 break;
466
467 case TGSI_FILE_INPUT:
468 res = bld->inputs[reg->Register.Index][swizzle];
469 assert(res);
470 break;
471
472 case TGSI_FILE_TEMPORARY: {
473 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
474 swizzle,
475 reg->Register.Indirect,
476 addr);
477 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
478 if(!res)
479 return bld->base.undef;
480 break;
481 }
482
483 default:
484 assert( 0 );
485 return bld->base.undef;
486 }
487 break;
488
489 default:
490 assert( 0 );
491 return bld->base.undef;
492 }
493
494 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
495 case TGSI_UTIL_SIGN_CLEAR:
496 res = lp_build_abs( &bld->base, res );
497 break;
498
499 case TGSI_UTIL_SIGN_SET:
500 /* TODO: Use bitwese OR for floating point */
501 res = lp_build_abs( &bld->base, res );
502 res = LLVMBuildNeg( bld->base.builder, res, "" );
503 break;
504
505 case TGSI_UTIL_SIGN_TOGGLE:
506 res = LLVMBuildNeg( bld->base.builder, res, "" );
507 break;
508
509 case TGSI_UTIL_SIGN_KEEP:
510 break;
511 }
512
513 return res;
514 }
515
516
517 /**
518 * Register fetch with derivatives.
519 */
520 static void
521 emit_fetch_deriv(
522 struct lp_build_tgsi_soa_context *bld,
523 const struct tgsi_full_instruction *inst,
524 unsigned index,
525 const unsigned chan_index,
526 LLVMValueRef *res,
527 LLVMValueRef *ddx,
528 LLVMValueRef *ddy)
529 {
530 LLVMValueRef src;
531
532 src = emit_fetch(bld, inst, index, chan_index);
533
534 if(res)
535 *res = src;
536
537 /* TODO: use interpolation coeffs for inputs */
538
539 if(ddx)
540 *ddx = emit_ddx(bld, src);
541
542 if(ddy)
543 *ddy = emit_ddy(bld, src);
544 }
545
546
547 /**
548 * Predicate.
549 */
550 static void
551 emit_fetch_predicate(
552 struct lp_build_tgsi_soa_context *bld,
553 const struct tgsi_full_instruction *inst,
554 LLVMValueRef *pred)
555 {
556 unsigned index;
557 unsigned char swizzles[4];
558 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
559 LLVMValueRef value;
560 unsigned chan;
561
562 if (!inst->Instruction.Predicate) {
563 FOR_EACH_CHANNEL( chan ) {
564 pred[chan] = NULL;
565 }
566 return;
567 }
568
569 swizzles[0] = inst->Predicate.SwizzleX;
570 swizzles[1] = inst->Predicate.SwizzleY;
571 swizzles[2] = inst->Predicate.SwizzleZ;
572 swizzles[3] = inst->Predicate.SwizzleW;
573
574 index = inst->Predicate.Index;
575 assert(index < LP_MAX_TGSI_PREDS);
576
577 FOR_EACH_CHANNEL( chan ) {
578 unsigned swizzle = swizzles[chan];
579
580 /*
581 * Only fetch the predicate register channels that are actually listed
582 * in the swizzles
583 */
584 if (!unswizzled[swizzle]) {
585 value = LLVMBuildLoad(bld->base.builder,
586 bld->preds[index][swizzle], "");
587
588 /*
589 * Convert the value to an integer mask.
590 *
591 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
592 * is needlessly causing two comparisons due to storing the intermediate
593 * result as float vector instead of an integer mask vector.
594 */
595 value = lp_build_compare(bld->base.builder,
596 bld->base.type,
597 PIPE_FUNC_NOTEQUAL,
598 value,
599 bld->base.zero);
600 if (inst->Predicate.Negate) {
601 value = LLVMBuildNot(bld->base.builder, value, "");
602 }
603
604 unswizzled[swizzle] = value;
605 } else {
606 value = unswizzled[swizzle];
607 }
608
609 pred[chan] = value;
610 }
611 }
612
613
614 /**
615 * Register store.
616 */
617 static void
618 emit_store(
619 struct lp_build_tgsi_soa_context *bld,
620 const struct tgsi_full_instruction *inst,
621 unsigned index,
622 unsigned chan_index,
623 LLVMValueRef pred,
624 LLVMValueRef value)
625 {
626 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
627 LLVMValueRef addr;
628
629 switch( inst->Instruction.Saturate ) {
630 case TGSI_SAT_NONE:
631 break;
632
633 case TGSI_SAT_ZERO_ONE:
634 value = lp_build_max(&bld->base, value, bld->base.zero);
635 value = lp_build_min(&bld->base, value, bld->base.one);
636 break;
637
638 case TGSI_SAT_MINUS_PLUS_ONE:
639 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
640 value = lp_build_min(&bld->base, value, bld->base.one);
641 break;
642
643 default:
644 assert(0);
645 }
646
647 if (reg->Register.Indirect) {
648 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
649 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
650 addr = LLVMBuildLoad(bld->base.builder,
651 bld->addr[reg->Indirect.Index][swizzle],
652 "");
653 /* for indexing we want integers */
654 addr = LLVMBuildFPToSI(bld->base.builder, addr,
655 int_vec_type, "");
656 addr = LLVMBuildExtractElement(bld->base.builder,
657 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
658 "");
659 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
660 }
661
662 switch( reg->Register.File ) {
663 case TGSI_FILE_OUTPUT:
664 lp_exec_mask_store(&bld->exec_mask, pred, value,
665 bld->outputs[reg->Register.Index][chan_index]);
666 break;
667
668 case TGSI_FILE_TEMPORARY: {
669 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
670 chan_index,
671 reg->Register.Indirect,
672 addr);
673 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
674 break;
675 }
676
677 case TGSI_FILE_ADDRESS:
678 lp_exec_mask_store(&bld->exec_mask, pred, value,
679 bld->addr[reg->Indirect.Index][chan_index]);
680 break;
681
682 case TGSI_FILE_PREDICATE:
683 lp_exec_mask_store(&bld->exec_mask, pred, value,
684 bld->preds[index][chan_index]);
685 break;
686
687 default:
688 assert( 0 );
689 }
690 }
691
692
693 /**
694 * High-level instruction translators.
695 */
696
697 enum tex_modifier {
698 TEX_MODIFIER_NONE = 0,
699 TEX_MODIFIER_PROJECTED,
700 TEX_MODIFIER_LOD_BIAS,
701 TEX_MODIFIER_EXPLICIT_LOD,
702 TEX_MODIFIER_EXPLICIT_DERIV
703 };
704
705 static void
706 emit_tex( struct lp_build_tgsi_soa_context *bld,
707 const struct tgsi_full_instruction *inst,
708 enum tex_modifier modifier,
709 LLVMValueRef *texel)
710 {
711 unsigned unit;
712 LLVMValueRef lod_bias, explicit_lod;
713 LLVMValueRef oow = NULL;
714 LLVMValueRef coords[3];
715 LLVMValueRef ddx[3];
716 LLVMValueRef ddy[3];
717 unsigned num_coords;
718 unsigned i;
719
720 if (!bld->sampler) {
721 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
722 for (i = 0; i < 4; i++) {
723 texel[i] = bld->base.undef;
724 }
725 return;
726 }
727
728 switch (inst->Texture.Texture) {
729 case TGSI_TEXTURE_1D:
730 num_coords = 1;
731 break;
732 case TGSI_TEXTURE_2D:
733 case TGSI_TEXTURE_RECT:
734 num_coords = 2;
735 break;
736 case TGSI_TEXTURE_SHADOW1D:
737 case TGSI_TEXTURE_SHADOW2D:
738 case TGSI_TEXTURE_SHADOWRECT:
739 case TGSI_TEXTURE_3D:
740 case TGSI_TEXTURE_CUBE:
741 num_coords = 3;
742 break;
743 default:
744 assert(0);
745 return;
746 }
747
748 if (modifier == TEX_MODIFIER_LOD_BIAS) {
749 lod_bias = emit_fetch( bld, inst, 0, 3 );
750 explicit_lod = NULL;
751 }
752 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
753 lod_bias = NULL;
754 explicit_lod = emit_fetch( bld, inst, 0, 3 );
755 }
756 else {
757 lod_bias = NULL;
758 explicit_lod = NULL;
759 }
760
761 if (modifier == TEX_MODIFIER_PROJECTED) {
762 oow = emit_fetch( bld, inst, 0, 3 );
763 oow = lp_build_rcp(&bld->base, oow);
764 }
765
766 for (i = 0; i < num_coords; i++) {
767 coords[i] = emit_fetch( bld, inst, 0, i );
768 if (modifier == TEX_MODIFIER_PROJECTED)
769 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
770 }
771 for (i = num_coords; i < 3; i++) {
772 coords[i] = bld->base.undef;
773 }
774
775 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
776 for (i = 0; i < num_coords; i++) {
777 ddx[i] = emit_fetch( bld, inst, 1, i );
778 ddy[i] = emit_fetch( bld, inst, 2, i );
779 }
780 unit = inst->Src[3].Register.Index;
781 } else {
782 for (i = 0; i < num_coords; i++) {
783 ddx[i] = emit_ddx( bld, coords[i] );
784 ddy[i] = emit_ddy( bld, coords[i] );
785 }
786 unit = inst->Src[1].Register.Index;
787 }
788 for (i = num_coords; i < 3; i++) {
789 ddx[i] = bld->base.undef;
790 ddy[i] = bld->base.undef;
791 }
792
793 bld->sampler->emit_fetch_texel(bld->sampler,
794 bld->base.builder,
795 bld->base.type,
796 unit, num_coords, coords,
797 ddx, ddy,
798 lod_bias, explicit_lod,
799 texel);
800 }
801
802
803 /**
804 * Kill fragment if any of the src register values are negative.
805 */
806 static void
807 emit_kil(
808 struct lp_build_tgsi_soa_context *bld,
809 const struct tgsi_full_instruction *inst )
810 {
811 const struct tgsi_full_src_register *reg = &inst->Src[0];
812 LLVMValueRef terms[NUM_CHANNELS];
813 LLVMValueRef mask;
814 unsigned chan_index;
815
816 memset(&terms, 0, sizeof terms);
817
818 FOR_EACH_CHANNEL( chan_index ) {
819 unsigned swizzle;
820
821 /* Unswizzle channel */
822 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
823
824 /* Check if the component has not been already tested. */
825 assert(swizzle < NUM_CHANNELS);
826 if( !terms[swizzle] )
827 /* TODO: change the comparison operator instead of setting the sign */
828 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
829 }
830
831 mask = NULL;
832 FOR_EACH_CHANNEL( chan_index ) {
833 if(terms[chan_index]) {
834 LLVMValueRef chan_mask;
835
836 /*
837 * If term < 0 then mask = 0 else mask = ~0.
838 */
839 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
840
841 if(mask)
842 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
843 else
844 mask = chan_mask;
845 }
846 }
847
848 if(mask)
849 lp_build_mask_update(bld->mask, mask);
850 }
851
852
853 /**
854 * Predicated fragment kill.
855 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
856 * The only predication is the execution mask which will apply if
857 * we're inside a loop or conditional.
858 */
859 static void
860 emit_kilp(struct lp_build_tgsi_soa_context *bld,
861 const struct tgsi_full_instruction *inst)
862 {
863 LLVMValueRef mask;
864
865 /* For those channels which are "alive", disable fragment shader
866 * execution.
867 */
868 if (bld->exec_mask.has_mask) {
869 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
870 }
871 else {
872 mask = bld->base.zero;
873 }
874
875 lp_build_mask_update(bld->mask, mask);
876 }
877
878 static void
879 emit_declaration(
880 struct lp_build_tgsi_soa_context *bld,
881 const struct tgsi_full_declaration *decl)
882 {
883 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
884
885 unsigned first = decl->Range.First;
886 unsigned last = decl->Range.Last;
887 unsigned idx, i;
888
889 for (idx = first; idx <= last; ++idx) {
890 switch (decl->Declaration.File) {
891 case TGSI_FILE_TEMPORARY:
892 assert(idx < LP_MAX_TGSI_TEMPS);
893 if (bld->has_indirect_addressing) {
894 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
895 last*4 + 4, 0);
896 bld->temps_array = lp_build_array_alloca(bld->base.builder,
897 vec_type, val, "");
898 } else {
899 for (i = 0; i < NUM_CHANNELS; i++)
900 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
901 vec_type, "");
902 }
903 break;
904
905 case TGSI_FILE_OUTPUT:
906 for (i = 0; i < NUM_CHANNELS; i++)
907 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
908 vec_type, "");
909 break;
910
911 case TGSI_FILE_ADDRESS:
912 assert(idx < LP_MAX_TGSI_ADDRS);
913 for (i = 0; i < NUM_CHANNELS; i++)
914 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
915 vec_type, "");
916 break;
917
918 case TGSI_FILE_PREDICATE:
919 assert(idx < LP_MAX_TGSI_PREDS);
920 for (i = 0; i < NUM_CHANNELS; i++)
921 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
922 vec_type, "");
923 break;
924
925 default:
926 /* don't need to declare other vars */
927 break;
928 }
929 }
930 }
931
932
933 /**
934 * Emit LLVM for one TGSI instruction.
935 * \param return TRUE for success, FALSE otherwise
936 */
937 static boolean
938 emit_instruction(
939 struct lp_build_tgsi_soa_context *bld,
940 const struct tgsi_full_instruction *inst,
941 const struct tgsi_opcode_info *info)
942 {
943 unsigned chan_index;
944 LLVMValueRef src0, src1, src2;
945 LLVMValueRef tmp0, tmp1, tmp2;
946 LLVMValueRef tmp3 = NULL;
947 LLVMValueRef tmp4 = NULL;
948 LLVMValueRef tmp5 = NULL;
949 LLVMValueRef tmp6 = NULL;
950 LLVMValueRef tmp7 = NULL;
951 LLVMValueRef res;
952 LLVMValueRef dst0[NUM_CHANNELS];
953
954 /*
955 * Stores and write masks are handled in a general fashion after the long
956 * instruction opcode switch statement.
957 *
958 * Although not stricitly necessary, we avoid generating instructions for
959 * channels which won't be stored, in cases where's that easy. For some
960 * complex instructions, like texture sampling, it is more convenient to
961 * assume a full writemask and then let LLVM optimization passes eliminate
962 * redundant code.
963 */
964
965 assert(info->num_dst <= 1);
966 if (info->num_dst) {
967 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
968 dst0[chan_index] = bld->base.undef;
969 }
970 }
971
972 switch (inst->Instruction.Opcode) {
973 case TGSI_OPCODE_ARL:
974 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
975 tmp0 = emit_fetch( bld, inst, 0, chan_index );
976 tmp0 = lp_build_floor(&bld->base, tmp0);
977 dst0[chan_index] = tmp0;
978 }
979 break;
980
981 case TGSI_OPCODE_MOV:
982 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
983 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
984 }
985 break;
986
987 case TGSI_OPCODE_LIT:
988 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
989 dst0[CHAN_X] = bld->base.one;
990 }
991 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
992 src0 = emit_fetch( bld, inst, 0, CHAN_X );
993 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
994 }
995 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
996 /* XMM[1] = SrcReg[0].yyyy */
997 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
998 /* XMM[1] = max(XMM[1], 0) */
999 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1000 /* XMM[2] = SrcReg[0].wwww */
1001 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1002 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1003 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1004 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1005 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1006 }
1007 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1008 dst0[CHAN_W] = bld->base.one;
1009 }
1010 break;
1011
1012 case TGSI_OPCODE_RCP:
1013 /* TGSI_OPCODE_RECIP */
1014 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1015 res = lp_build_rcp(&bld->base, src0);
1016 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1017 dst0[chan_index] = res;
1018 }
1019 break;
1020
1021 case TGSI_OPCODE_RSQ:
1022 /* TGSI_OPCODE_RECIPSQRT */
1023 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1024 src0 = lp_build_abs(&bld->base, src0);
1025 res = lp_build_rsqrt(&bld->base, src0);
1026 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1027 dst0[chan_index] = res;
1028 }
1029 break;
1030
1031 case TGSI_OPCODE_EXP:
1032 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1033 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1034 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1035 LLVMValueRef *p_exp2_int_part = NULL;
1036 LLVMValueRef *p_frac_part = NULL;
1037 LLVMValueRef *p_exp2 = NULL;
1038
1039 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1040
1041 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1042 p_exp2_int_part = &tmp0;
1043 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1044 p_frac_part = &tmp1;
1045 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1046 p_exp2 = &tmp2;
1047
1048 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1049
1050 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1051 dst0[CHAN_X] = tmp0;
1052 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1053 dst0[CHAN_Y] = tmp1;
1054 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1055 dst0[CHAN_Z] = tmp2;
1056 }
1057 /* dst.w = 1.0 */
1058 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1059 dst0[CHAN_W] = bld->base.one;
1060 }
1061 break;
1062
1063 case TGSI_OPCODE_LOG:
1064 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1065 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1066 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1067 LLVMValueRef *p_floor_log2 = NULL;
1068 LLVMValueRef *p_exp = NULL;
1069 LLVMValueRef *p_log2 = NULL;
1070
1071 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1072 src0 = lp_build_abs( &bld->base, src0 );
1073
1074 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1075 p_floor_log2 = &tmp0;
1076 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1077 p_exp = &tmp1;
1078 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1079 p_log2 = &tmp2;
1080
1081 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1082
1083 /* dst.x = floor(lg2(abs(src.x))) */
1084 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1085 dst0[CHAN_X] = tmp0;
1086 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1087 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1088 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1089 }
1090 /* dst.z = lg2(abs(src.x)) */
1091 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1092 dst0[CHAN_Z] = tmp2;
1093 }
1094 /* dst.w = 1.0 */
1095 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1096 dst0[CHAN_W] = bld->base.one;
1097 }
1098 break;
1099
1100 case TGSI_OPCODE_MUL:
1101 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1102 src0 = emit_fetch( bld, inst, 0, chan_index );
1103 src1 = emit_fetch( bld, inst, 1, chan_index );
1104 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1105 }
1106 break;
1107
1108 case TGSI_OPCODE_ADD:
1109 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1110 src0 = emit_fetch( bld, inst, 0, chan_index );
1111 src1 = emit_fetch( bld, inst, 1, chan_index );
1112 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1113 }
1114 break;
1115
1116 case TGSI_OPCODE_DP3:
1117 /* TGSI_OPCODE_DOT3 */
1118 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1119 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1120 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1121 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1122 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1123 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1124 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1125 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1126 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1127 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1128 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1129 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1130 dst0[chan_index] = tmp0;
1131 }
1132 break;
1133
1134 case TGSI_OPCODE_DP4:
1135 /* TGSI_OPCODE_DOT4 */
1136 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1137 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1138 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1139 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1140 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1141 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1142 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1143 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1144 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1145 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1146 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1147 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1148 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1149 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1150 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1151 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1152 dst0[chan_index] = tmp0;
1153 }
1154 break;
1155
1156 case TGSI_OPCODE_DST:
1157 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1158 dst0[CHAN_X] = bld->base.one;
1159 }
1160 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1161 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1162 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1163 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1164 }
1165 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1166 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1167 }
1168 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1169 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1170 }
1171 break;
1172
1173 case TGSI_OPCODE_MIN:
1174 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1175 src0 = emit_fetch( bld, inst, 0, chan_index );
1176 src1 = emit_fetch( bld, inst, 1, chan_index );
1177 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1178 }
1179 break;
1180
1181 case TGSI_OPCODE_MAX:
1182 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1183 src0 = emit_fetch( bld, inst, 0, chan_index );
1184 src1 = emit_fetch( bld, inst, 1, chan_index );
1185 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1186 }
1187 break;
1188
1189 case TGSI_OPCODE_SLT:
1190 /* TGSI_OPCODE_SETLT */
1191 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1192 src0 = emit_fetch( bld, inst, 0, chan_index );
1193 src1 = emit_fetch( bld, inst, 1, chan_index );
1194 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1195 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1196 }
1197 break;
1198
1199 case TGSI_OPCODE_SGE:
1200 /* TGSI_OPCODE_SETGE */
1201 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1202 src0 = emit_fetch( bld, inst, 0, chan_index );
1203 src1 = emit_fetch( bld, inst, 1, chan_index );
1204 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1205 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1206 }
1207 break;
1208
1209 case TGSI_OPCODE_MAD:
1210 /* TGSI_OPCODE_MADD */
1211 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1212 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1213 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1214 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1215 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1216 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1217 dst0[chan_index] = tmp0;
1218 }
1219 break;
1220
1221 case TGSI_OPCODE_SUB:
1222 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1223 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1224 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1225 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1226 }
1227 break;
1228
1229 case TGSI_OPCODE_LRP:
1230 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1231 src0 = emit_fetch( bld, inst, 0, chan_index );
1232 src1 = emit_fetch( bld, inst, 1, chan_index );
1233 src2 = emit_fetch( bld, inst, 2, chan_index );
1234 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1235 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1236 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1237 }
1238 break;
1239
1240 case TGSI_OPCODE_CND:
1241 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1242 src0 = emit_fetch( bld, inst, 0, chan_index );
1243 src1 = emit_fetch( bld, inst, 1, chan_index );
1244 src2 = emit_fetch( bld, inst, 2, chan_index );
1245 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1246 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1247 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1248 }
1249 break;
1250
1251 case TGSI_OPCODE_DP2A:
1252 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1253 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1254 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1255 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1256 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1257 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1258 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1259 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1260 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1261 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1262 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1263 }
1264 break;
1265
1266 case TGSI_OPCODE_FRC:
1267 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1268 src0 = emit_fetch( bld, inst, 0, chan_index );
1269 tmp0 = lp_build_floor(&bld->base, src0);
1270 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1271 dst0[chan_index] = tmp0;
1272 }
1273 break;
1274
1275 case TGSI_OPCODE_CLAMP:
1276 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1277 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1278 src1 = emit_fetch( bld, inst, 1, chan_index );
1279 src2 = emit_fetch( bld, inst, 2, chan_index );
1280 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1281 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1282 dst0[chan_index] = tmp0;
1283 }
1284 break;
1285
1286 case TGSI_OPCODE_FLR:
1287 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1288 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1289 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1290 }
1291 break;
1292
1293 case TGSI_OPCODE_ROUND:
1294 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1295 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1296 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1297 }
1298 break;
1299
1300 case TGSI_OPCODE_EX2: {
1301 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1302 tmp0 = lp_build_exp2( &bld->base, tmp0);
1303 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1304 dst0[chan_index] = tmp0;
1305 }
1306 break;
1307 }
1308
1309 case TGSI_OPCODE_LG2:
1310 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1311 tmp0 = lp_build_log2( &bld->base, tmp0);
1312 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1313 dst0[chan_index] = tmp0;
1314 }
1315 break;
1316
1317 case TGSI_OPCODE_POW:
1318 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1319 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1320 res = lp_build_pow( &bld->base, src0, src1 );
1321 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1322 dst0[chan_index] = res;
1323 }
1324 break;
1325
1326 case TGSI_OPCODE_XPD:
1327 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1328 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1329 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1330 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1331 }
1332 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1333 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1334 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1335 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1336 }
1337 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1338 tmp2 = tmp0;
1339 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1340 tmp5 = tmp3;
1341 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1342 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1343 dst0[CHAN_X] = tmp2;
1344 }
1345 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1346 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1347 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1348 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1349 }
1350 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1351 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1352 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1353 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1354 dst0[CHAN_Y] = tmp3;
1355 }
1356 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1357 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1358 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1359 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1360 dst0[CHAN_Z] = tmp5;
1361 }
1362 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1363 dst0[CHAN_W] = bld->base.one;
1364 }
1365 break;
1366
1367 case TGSI_OPCODE_ABS:
1368 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1369 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1370 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1371 }
1372 break;
1373
1374 case TGSI_OPCODE_RCC:
1375 /* deprecated? */
1376 assert(0);
1377 return FALSE;
1378
1379 case TGSI_OPCODE_DPH:
1380 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1381 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1382 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1383 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1384 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1385 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1386 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1387 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1388 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1389 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1390 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1391 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1392 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1393 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1394 dst0[chan_index] = tmp0;
1395 }
1396 break;
1397
1398 case TGSI_OPCODE_COS:
1399 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1400 tmp0 = lp_build_cos( &bld->base, tmp0 );
1401 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1402 dst0[chan_index] = tmp0;
1403 }
1404 break;
1405
1406 case TGSI_OPCODE_DDX:
1407 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1408 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1409 }
1410 break;
1411
1412 case TGSI_OPCODE_DDY:
1413 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1414 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1415 }
1416 break;
1417
1418 case TGSI_OPCODE_KILP:
1419 /* predicated kill */
1420 emit_kilp( bld, inst );
1421 break;
1422
1423 case TGSI_OPCODE_KIL:
1424 /* conditional kill */
1425 emit_kil( bld, inst );
1426 break;
1427
1428 case TGSI_OPCODE_PK2H:
1429 return FALSE;
1430 break;
1431
1432 case TGSI_OPCODE_PK2US:
1433 return FALSE;
1434 break;
1435
1436 case TGSI_OPCODE_PK4B:
1437 return FALSE;
1438 break;
1439
1440 case TGSI_OPCODE_PK4UB:
1441 return FALSE;
1442 break;
1443
1444 case TGSI_OPCODE_RFL:
1445 return FALSE;
1446 break;
1447
1448 case TGSI_OPCODE_SEQ:
1449 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1450 src0 = emit_fetch( bld, inst, 0, chan_index );
1451 src1 = emit_fetch( bld, inst, 1, chan_index );
1452 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1453 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1454 }
1455 break;
1456
1457 case TGSI_OPCODE_SFL:
1458 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1459 dst0[chan_index] = bld->base.zero;
1460 }
1461 break;
1462
1463 case TGSI_OPCODE_SGT:
1464 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1465 src0 = emit_fetch( bld, inst, 0, chan_index );
1466 src1 = emit_fetch( bld, inst, 1, chan_index );
1467 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1468 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1469 }
1470 break;
1471
1472 case TGSI_OPCODE_SIN:
1473 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1474 tmp0 = lp_build_sin( &bld->base, tmp0 );
1475 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1476 dst0[chan_index] = tmp0;
1477 }
1478 break;
1479
1480 case TGSI_OPCODE_SLE:
1481 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1482 src0 = emit_fetch( bld, inst, 0, chan_index );
1483 src1 = emit_fetch( bld, inst, 1, chan_index );
1484 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1485 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1486 }
1487 break;
1488
1489 case TGSI_OPCODE_SNE:
1490 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1491 src0 = emit_fetch( bld, inst, 0, chan_index );
1492 src1 = emit_fetch( bld, inst, 1, chan_index );
1493 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1494 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1495 }
1496 break;
1497
1498 case TGSI_OPCODE_STR:
1499 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1500 dst0[chan_index] = bld->base.one;
1501 }
1502 break;
1503
1504 case TGSI_OPCODE_TEX:
1505 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1506 break;
1507
1508 case TGSI_OPCODE_TXD:
1509 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1510 break;
1511
1512 case TGSI_OPCODE_UP2H:
1513 /* deprecated */
1514 assert (0);
1515 return FALSE;
1516 break;
1517
1518 case TGSI_OPCODE_UP2US:
1519 /* deprecated */
1520 assert(0);
1521 return FALSE;
1522 break;
1523
1524 case TGSI_OPCODE_UP4B:
1525 /* deprecated */
1526 assert(0);
1527 return FALSE;
1528 break;
1529
1530 case TGSI_OPCODE_UP4UB:
1531 /* deprecated */
1532 assert(0);
1533 return FALSE;
1534 break;
1535
1536 case TGSI_OPCODE_X2D:
1537 /* deprecated? */
1538 assert(0);
1539 return FALSE;
1540 break;
1541
1542 case TGSI_OPCODE_ARA:
1543 /* deprecated */
1544 assert(0);
1545 return FALSE;
1546 break;
1547
1548 case TGSI_OPCODE_ARR:
1549 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1550 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1551 tmp0 = lp_build_round(&bld->base, tmp0);
1552 dst0[chan_index] = tmp0;
1553 }
1554 break;
1555
1556 case TGSI_OPCODE_BRA:
1557 /* deprecated */
1558 assert(0);
1559 return FALSE;
1560 break;
1561
1562 case TGSI_OPCODE_CAL:
1563 /* FIXME */
1564 return FALSE;
1565 break;
1566
1567 case TGSI_OPCODE_RET:
1568 /* FIXME */
1569 return FALSE;
1570 break;
1571
1572 case TGSI_OPCODE_END:
1573 break;
1574
1575 case TGSI_OPCODE_SSG:
1576 /* TGSI_OPCODE_SGN */
1577 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1578 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1579 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1580 }
1581 break;
1582
1583 case TGSI_OPCODE_CMP:
1584 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1585 src0 = emit_fetch( bld, inst, 0, chan_index );
1586 src1 = emit_fetch( bld, inst, 1, chan_index );
1587 src2 = emit_fetch( bld, inst, 2, chan_index );
1588 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1589 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1590 }
1591 break;
1592
1593 case TGSI_OPCODE_SCS:
1594 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1595 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1596 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1597 }
1598 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1599 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1600 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1601 }
1602 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1603 dst0[CHAN_Z] = bld->base.zero;
1604 }
1605 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1606 dst0[CHAN_W] = bld->base.one;
1607 }
1608 break;
1609
1610 case TGSI_OPCODE_TXB:
1611 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1612 break;
1613
1614 case TGSI_OPCODE_NRM:
1615 /* fall-through */
1616 case TGSI_OPCODE_NRM4:
1617 /* 3 or 4-component normalization */
1618 {
1619 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1620
1621 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1622 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1623 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1624 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1625
1626 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1627
1628 /* xmm4 = src.x */
1629 /* xmm0 = src.x * src.x */
1630 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1631 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1632 tmp4 = tmp0;
1633 }
1634 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1635
1636 /* xmm5 = src.y */
1637 /* xmm0 = xmm0 + src.y * src.y */
1638 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1639 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1640 tmp5 = tmp1;
1641 }
1642 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1643 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1644
1645 /* xmm6 = src.z */
1646 /* xmm0 = xmm0 + src.z * src.z */
1647 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1648 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1649 tmp6 = tmp1;
1650 }
1651 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1652 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1653
1654 if (dims == 4) {
1655 /* xmm7 = src.w */
1656 /* xmm0 = xmm0 + src.w * src.w */
1657 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1658 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1659 tmp7 = tmp1;
1660 }
1661 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1662 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1663 }
1664
1665 /* xmm1 = 1 / sqrt(xmm0) */
1666 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1667
1668 /* dst.x = xmm1 * src.x */
1669 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1670 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1671 }
1672
1673 /* dst.y = xmm1 * src.y */
1674 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1675 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1676 }
1677
1678 /* dst.z = xmm1 * src.z */
1679 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1680 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1681 }
1682
1683 /* dst.w = xmm1 * src.w */
1684 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1685 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1686 }
1687 }
1688
1689 /* dst.w = 1.0 */
1690 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1691 dst0[CHAN_W] = bld->base.one;
1692 }
1693 }
1694 break;
1695
1696 case TGSI_OPCODE_DIV:
1697 /* deprecated */
1698 assert( 0 );
1699 return FALSE;
1700 break;
1701
1702 case TGSI_OPCODE_DP2:
1703 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1704 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1705 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1706 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1707 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1708 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1709 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1710 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1711 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1712 }
1713 break;
1714
1715 case TGSI_OPCODE_TXL:
1716 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1717 break;
1718
1719 case TGSI_OPCODE_TXP:
1720 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1721 break;
1722
1723 case TGSI_OPCODE_BRK:
1724 lp_exec_break(&bld->exec_mask);
1725 break;
1726
1727 case TGSI_OPCODE_IF:
1728 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1729 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1730 tmp0, bld->base.zero);
1731 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1732 break;
1733
1734 case TGSI_OPCODE_BGNLOOP:
1735 lp_exec_bgnloop(&bld->exec_mask);
1736 break;
1737
1738 case TGSI_OPCODE_ELSE:
1739 lp_exec_mask_cond_invert(&bld->exec_mask);
1740 break;
1741
1742 case TGSI_OPCODE_ENDIF:
1743 lp_exec_mask_cond_pop(&bld->exec_mask);
1744 break;
1745
1746 case TGSI_OPCODE_ENDLOOP:
1747 lp_exec_endloop(&bld->exec_mask);
1748 break;
1749
1750 case TGSI_OPCODE_PUSHA:
1751 /* deprecated? */
1752 assert(0);
1753 return FALSE;
1754 break;
1755
1756 case TGSI_OPCODE_POPA:
1757 /* deprecated? */
1758 assert(0);
1759 return FALSE;
1760 break;
1761
1762 case TGSI_OPCODE_CEIL:
1763 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1764 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1765 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1766 }
1767 break;
1768
1769 case TGSI_OPCODE_I2F:
1770 /* deprecated? */
1771 assert(0);
1772 return FALSE;
1773 break;
1774
1775 case TGSI_OPCODE_NOT:
1776 /* deprecated? */
1777 assert(0);
1778 return FALSE;
1779 break;
1780
1781 case TGSI_OPCODE_TRUNC:
1782 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1783 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1784 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1785 }
1786 break;
1787
1788 case TGSI_OPCODE_SHL:
1789 /* deprecated? */
1790 assert(0);
1791 return FALSE;
1792 break;
1793
1794 case TGSI_OPCODE_ISHR:
1795 /* deprecated? */
1796 assert(0);
1797 return FALSE;
1798 break;
1799
1800 case TGSI_OPCODE_AND:
1801 /* deprecated? */
1802 assert(0);
1803 return FALSE;
1804 break;
1805
1806 case TGSI_OPCODE_OR:
1807 /* deprecated? */
1808 assert(0);
1809 return FALSE;
1810 break;
1811
1812 case TGSI_OPCODE_MOD:
1813 /* deprecated? */
1814 assert(0);
1815 return FALSE;
1816 break;
1817
1818 case TGSI_OPCODE_XOR:
1819 /* deprecated? */
1820 assert(0);
1821 return FALSE;
1822 break;
1823
1824 case TGSI_OPCODE_SAD:
1825 /* deprecated? */
1826 assert(0);
1827 return FALSE;
1828 break;
1829
1830 case TGSI_OPCODE_TXF:
1831 /* deprecated? */
1832 assert(0);
1833 return FALSE;
1834 break;
1835
1836 case TGSI_OPCODE_TXQ:
1837 /* deprecated? */
1838 assert(0);
1839 return FALSE;
1840 break;
1841
1842 case TGSI_OPCODE_CONT:
1843 lp_exec_continue(&bld->exec_mask);
1844 break;
1845
1846 case TGSI_OPCODE_EMIT:
1847 return FALSE;
1848 break;
1849
1850 case TGSI_OPCODE_ENDPRIM:
1851 return FALSE;
1852 break;
1853
1854 case TGSI_OPCODE_NOP:
1855 break;
1856
1857 default:
1858 return FALSE;
1859 }
1860
1861 if(info->num_dst) {
1862 LLVMValueRef pred[NUM_CHANNELS];
1863
1864 emit_fetch_predicate( bld, inst, pred );
1865
1866 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1867 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1868 }
1869 }
1870
1871 return TRUE;
1872 }
1873
1874
1875 void
1876 lp_build_tgsi_soa(LLVMBuilderRef builder,
1877 const struct tgsi_token *tokens,
1878 struct lp_type type,
1879 struct lp_build_mask_context *mask,
1880 LLVMValueRef consts_ptr,
1881 const LLVMValueRef *pos,
1882 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1883 LLVMValueRef (*outputs)[NUM_CHANNELS],
1884 struct lp_build_sampler_soa *sampler,
1885 const struct tgsi_shader_info *info)
1886 {
1887 struct lp_build_tgsi_soa_context bld;
1888 struct tgsi_parse_context parse;
1889 uint num_immediates = 0;
1890 unsigned i;
1891
1892 /* Setup build context */
1893 memset(&bld, 0, sizeof bld);
1894 lp_build_context_init(&bld.base, builder, type);
1895 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1896 bld.mask = mask;
1897 bld.pos = pos;
1898 bld.inputs = inputs;
1899 bld.outputs = outputs;
1900 bld.consts_ptr = consts_ptr;
1901 bld.sampler = sampler;
1902 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1903 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1904
1905 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1906
1907 tgsi_parse_init( &parse, tokens );
1908
1909 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1910 tgsi_parse_token( &parse );
1911
1912 switch( parse.FullToken.Token.Type ) {
1913 case TGSI_TOKEN_TYPE_DECLARATION:
1914 /* Inputs already interpolated */
1915 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1916 break;
1917
1918 case TGSI_TOKEN_TYPE_INSTRUCTION:
1919 {
1920 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1921 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
1922 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
1923 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1924 opcode_info->mnemonic);
1925 }
1926
1927 break;
1928
1929 case TGSI_TOKEN_TYPE_IMMEDIATE:
1930 /* simply copy the immediate values into the next immediates[] slot */
1931 {
1932 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1933 assert(size <= 4);
1934 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1935 for( i = 0; i < size; ++i )
1936 bld.immediates[num_immediates][i] =
1937 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1938 for( i = size; i < 4; ++i )
1939 bld.immediates[num_immediates][i] = bld.base.undef;
1940 num_immediates++;
1941 }
1942 break;
1943
1944 case TGSI_TOKEN_TYPE_PROPERTY:
1945 break;
1946
1947 default:
1948 assert( 0 );
1949 }
1950 }
1951 if (0) {
1952 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1953 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1954 debug_printf("11111111111111111111111111111 \n");
1955 tgsi_dump(tokens, 0);
1956 lp_debug_dump_value(function);
1957 debug_printf("2222222222222222222222222222 \n");
1958 }
1959 tgsi_parse_free( &parse );
1960 }
1961