Merge commit 'origin/master' into gallium-msaa
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_exec.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_tgsi.h"
57 #include "lp_bld_limits.h"
58 #include "lp_bld_debug.h"
59
60
61 #define FOR_EACH_CHANNEL( CHAN )\
62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71 FOR_EACH_CHANNEL( CHAN )\
72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74 #define CHAN_X 0
75 #define CHAN_Y 1
76 #define CHAN_Z 2
77 #define CHAN_W 3
78
79 #define QUAD_TOP_LEFT 0
80 #define QUAD_TOP_RIGHT 1
81 #define QUAD_BOTTOM_LEFT 2
82 #define QUAD_BOTTOM_RIGHT 3
83
84
85 struct lp_exec_mask {
86 struct lp_build_context *bld;
87
88 boolean has_mask;
89
90 LLVMTypeRef int_vec_type;
91
92 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
93 int cond_stack_size;
94 LLVMValueRef cond_mask;
95
96 LLVMValueRef break_stack[LP_MAX_TGSI_NESTING];
97 int break_stack_size;
98 LLVMValueRef break_mask;
99
100 LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING];
101 int cont_stack_size;
102 LLVMValueRef cont_mask;
103
104 LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106 LLVMBasicBlockRef loop_block;
107
108
109 LLVMValueRef exec_mask;
110 };
111
112 struct lp_build_tgsi_soa_context
113 {
114 struct lp_build_context base;
115
116 LLVMValueRef consts_ptr;
117 const LLVMValueRef *pos;
118 const LLVMValueRef (*inputs)[NUM_CHANNELS];
119 LLVMValueRef (*outputs)[NUM_CHANNELS];
120
121 struct lp_build_sampler_soa *sampler;
122
123 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
124 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
125 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
126
127 /* we allocate an array of temps if we have indirect
128 * addressing and then the temps above is unused */
129 LLVMValueRef temps_array;
130 boolean has_indirect_addressing;
131
132 struct lp_build_mask_context *mask;
133 struct lp_exec_mask exec_mask;
134 };
135
136 static const unsigned char
137 swizzle_left[4] = {
138 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
139 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
140 };
141
142 static const unsigned char
143 swizzle_right[4] = {
144 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
145 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
146 };
147
148 static const unsigned char
149 swizzle_top[4] = {
150 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
151 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
152 };
153
154 static const unsigned char
155 swizzle_bottom[4] = {
156 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
157 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
158 };
159
160 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
161 {
162 mask->bld = bld;
163 mask->has_mask = FALSE;
164 mask->cond_stack_size = 0;
165 mask->loop_stack_size = 0;
166 mask->break_stack_size = 0;
167 mask->cont_stack_size = 0;
168
169 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
170 }
171
172 static void lp_exec_mask_update(struct lp_exec_mask *mask)
173 {
174 if (mask->loop_stack_size) {
175 /*for loops we need to update the entire mask at runtime */
176 LLVMValueRef tmp;
177 assert(mask->break_mask);
178 tmp = LLVMBuildAnd(mask->bld->builder,
179 mask->cont_mask,
180 mask->break_mask,
181 "maskcb");
182 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
183 mask->cond_mask,
184 tmp,
185 "maskfull");
186 } else
187 mask->exec_mask = mask->cond_mask;
188
189
190 mask->has_mask = (mask->cond_stack_size > 0 ||
191 mask->loop_stack_size > 0);
192 }
193
194 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
195 LLVMValueRef val)
196 {
197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
198 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
199 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
200 mask->int_vec_type, "");
201
202 lp_exec_mask_update(mask);
203 }
204
205 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
206 {
207 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
208 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
209 mask->cond_mask, "");
210
211 /* means that we didn't have any mask before and that
212 * we were fully enabled */
213 if (mask->cond_stack_size <= 1) {
214 prev_mask = LLVMConstAllOnes(mask->int_vec_type);
215 }
216
217 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
218 inv_mask,
219 prev_mask, "");
220 lp_exec_mask_update(mask);
221 }
222
223 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
224 {
225 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
226 lp_exec_mask_update(mask);
227 }
228
229 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
230 {
231
232 if (mask->cont_stack_size == 0)
233 mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
234 if (mask->break_stack_size == 0)
235 mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
236 if (mask->cond_stack_size == 0)
237 mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
238
239 assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
240 assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING);
241 assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
242
243 mask->break_stack[mask->break_stack_size++] = mask->break_mask;
244 mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
245 mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
246 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
247 LLVMBuildBr(mask->bld->builder, mask->loop_block);
248 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
249
250 lp_exec_mask_update(mask);
251 }
252
253 static void lp_exec_break(struct lp_exec_mask *mask)
254 {
255 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
256 mask->exec_mask,
257 "break");
258
259 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
260 mask->break_mask,
261 exec_mask, "break_full");
262
263 lp_exec_mask_update(mask);
264 }
265
266 static void lp_exec_continue(struct lp_exec_mask *mask)
267 {
268 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
269 mask->exec_mask,
270 "");
271
272 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
273 mask->cont_mask,
274 exec_mask, "");
275
276 lp_exec_mask_update(mask);
277 }
278
279
280 static void lp_exec_endloop(struct lp_exec_mask *mask)
281 {
282 LLVMBasicBlockRef endloop;
283 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
284 mask->bld->type.length);
285 LLVMValueRef i1cond;
286
287 assert(mask->break_mask);
288
289 /* i1cond = (mask == 0) */
290 i1cond = LLVMBuildICmp(
291 mask->bld->builder,
292 LLVMIntNE,
293 LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
294 LLVMConstNull(reg_type), "");
295
296 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
297
298 LLVMBuildCondBr(mask->bld->builder,
299 i1cond, mask->loop_block, endloop);
300
301 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
302
303 mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
304 /* pop the cont mask */
305 if (mask->cont_stack_size) {
306 mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
307 }
308 /* pop the break mask */
309 if (mask->break_stack_size) {
310 mask->break_mask = mask->break_stack[--mask->break_stack_size];
311 }
312
313 lp_exec_mask_update(mask);
314 }
315
316 /* stores val into an address pointed to by dst.
317 * mask->exec_mask is used to figure out which bits of val
318 * should be stored into the address
319 * (0 means don't store this bit, 1 means do store).
320 */
321 static void lp_exec_mask_store(struct lp_exec_mask *mask,
322 LLVMValueRef val,
323 LLVMValueRef dst)
324 {
325 if (mask->has_mask) {
326 LLVMValueRef real_val, dst_val;
327
328 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
329 real_val = lp_build_select(mask->bld,
330 mask->exec_mask,
331 val, dst_val);
332
333 LLVMBuildStore(mask->bld->builder, real_val, dst);
334 } else
335 LLVMBuildStore(mask->bld->builder, val, dst);
336 }
337
338
339 static LLVMValueRef
340 emit_ddx(struct lp_build_tgsi_soa_context *bld,
341 LLVMValueRef src)
342 {
343 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
344 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
345 return lp_build_sub(&bld->base, src_right, src_left);
346 }
347
348
349 static LLVMValueRef
350 emit_ddy(struct lp_build_tgsi_soa_context *bld,
351 LLVMValueRef src)
352 {
353 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
354 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
355 return lp_build_sub(&bld->base, src_top, src_bottom);
356 }
357
358 static LLVMValueRef
359 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
360 unsigned index,
361 unsigned swizzle,
362 boolean is_indirect,
363 LLVMValueRef addr)
364 {
365 if (!bld->has_indirect_addressing) {
366 return bld->temps[index][swizzle];
367 } else {
368 LLVMValueRef lindex =
369 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
370 if (is_indirect)
371 lindex = lp_build_add(&bld->base, lindex, addr);
372 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
373 }
374 }
375
376 /**
377 * Register fetch.
378 */
379 static LLVMValueRef
380 emit_fetch(
381 struct lp_build_tgsi_soa_context *bld,
382 const struct tgsi_full_instruction *inst,
383 unsigned index,
384 const unsigned chan_index )
385 {
386 const struct tgsi_full_src_register *reg = &inst->Src[index];
387 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
388 LLVMValueRef res;
389 LLVMValueRef addr;
390
391 switch (swizzle) {
392 case TGSI_SWIZZLE_X:
393 case TGSI_SWIZZLE_Y:
394 case TGSI_SWIZZLE_Z:
395 case TGSI_SWIZZLE_W:
396
397 if (reg->Register.Indirect) {
398 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
399 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
400 addr = LLVMBuildLoad(bld->base.builder,
401 bld->addr[reg->Indirect.Index][swizzle],
402 "");
403 /* for indexing we want integers */
404 addr = LLVMBuildFPToSI(bld->base.builder, addr,
405 int_vec_type, "");
406 addr = LLVMBuildExtractElement(bld->base.builder,
407 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
408 "");
409 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
410 }
411
412 switch (reg->Register.File) {
413 case TGSI_FILE_CONSTANT: {
414 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
415 LLVMValueRef scalar, scalar_ptr;
416
417 if (reg->Register.Indirect) {
418 /*lp_build_printf(bld->base.builder,
419 "\taddr = %d\n", addr);*/
420 index = lp_build_add(&bld->base, index, addr);
421 }
422 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
423 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
424
425 res = lp_build_broadcast_scalar(&bld->base, scalar);
426 break;
427 }
428
429 case TGSI_FILE_IMMEDIATE:
430 res = bld->immediates[reg->Register.Index][swizzle];
431 assert(res);
432 break;
433
434 case TGSI_FILE_INPUT:
435 res = bld->inputs[reg->Register.Index][swizzle];
436 assert(res);
437 break;
438
439 case TGSI_FILE_TEMPORARY: {
440 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
441 swizzle,
442 reg->Register.Indirect,
443 addr);
444 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
445 if(!res)
446 return bld->base.undef;
447 break;
448 }
449
450 default:
451 assert( 0 );
452 return bld->base.undef;
453 }
454 break;
455
456 default:
457 assert( 0 );
458 return bld->base.undef;
459 }
460
461 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
462 case TGSI_UTIL_SIGN_CLEAR:
463 res = lp_build_abs( &bld->base, res );
464 break;
465
466 case TGSI_UTIL_SIGN_SET:
467 /* TODO: Use bitwese OR for floating point */
468 res = lp_build_abs( &bld->base, res );
469 res = LLVMBuildNeg( bld->base.builder, res, "" );
470 break;
471
472 case TGSI_UTIL_SIGN_TOGGLE:
473 res = LLVMBuildNeg( bld->base.builder, res, "" );
474 break;
475
476 case TGSI_UTIL_SIGN_KEEP:
477 break;
478 }
479
480 return res;
481 }
482
483
484 /**
485 * Register fetch with derivatives.
486 */
487 static void
488 emit_fetch_deriv(
489 struct lp_build_tgsi_soa_context *bld,
490 const struct tgsi_full_instruction *inst,
491 unsigned index,
492 const unsigned chan_index,
493 LLVMValueRef *res,
494 LLVMValueRef *ddx,
495 LLVMValueRef *ddy)
496 {
497 LLVMValueRef src;
498
499 src = emit_fetch(bld, inst, index, chan_index);
500
501 if(res)
502 *res = src;
503
504 /* TODO: use interpolation coeffs for inputs */
505
506 if(ddx)
507 *ddx = emit_ddx(bld, src);
508
509 if(ddy)
510 *ddy = emit_ddy(bld, src);
511 }
512
513
514 /**
515 * Register store.
516 */
517 static void
518 emit_store(
519 struct lp_build_tgsi_soa_context *bld,
520 const struct tgsi_full_instruction *inst,
521 unsigned index,
522 unsigned chan_index,
523 LLVMValueRef value)
524 {
525 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
526 LLVMValueRef addr;
527
528 switch( inst->Instruction.Saturate ) {
529 case TGSI_SAT_NONE:
530 break;
531
532 case TGSI_SAT_ZERO_ONE:
533 value = lp_build_max(&bld->base, value, bld->base.zero);
534 value = lp_build_min(&bld->base, value, bld->base.one);
535 break;
536
537 case TGSI_SAT_MINUS_PLUS_ONE:
538 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
539 value = lp_build_min(&bld->base, value, bld->base.one);
540 break;
541
542 default:
543 assert(0);
544 }
545
546 if (reg->Register.Indirect) {
547 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
548 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
549 addr = LLVMBuildLoad(bld->base.builder,
550 bld->addr[reg->Indirect.Index][swizzle],
551 "");
552 /* for indexing we want integers */
553 addr = LLVMBuildFPToSI(bld->base.builder, addr,
554 int_vec_type, "");
555 addr = LLVMBuildExtractElement(bld->base.builder,
556 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
557 "");
558 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
559 }
560
561 switch( reg->Register.File ) {
562 case TGSI_FILE_OUTPUT:
563 lp_exec_mask_store(&bld->exec_mask, value,
564 bld->outputs[reg->Register.Index][chan_index]);
565 break;
566
567 case TGSI_FILE_TEMPORARY: {
568 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
569 chan_index,
570 reg->Register.Indirect,
571 addr);
572 lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
573 break;
574 }
575
576 case TGSI_FILE_ADDRESS:
577 lp_exec_mask_store(&bld->exec_mask, value,
578 bld->addr[reg->Indirect.Index][chan_index]);
579 break;
580
581 case TGSI_FILE_PREDICATE:
582 /* FIXME */
583 break;
584
585 default:
586 assert( 0 );
587 }
588 }
589
590
591 /**
592 * High-level instruction translators.
593 */
594
595 enum tex_modifier {
596 TEX_MODIFIER_NONE = 0,
597 TEX_MODIFIER_PROJECTED,
598 TEX_MODIFIER_LOD_BIAS,
599 TEX_MODIFIER_EXPLICIT_LOD,
600 TEX_MODIFIER_EXPLICIT_DERIV
601 };
602
603 static void
604 emit_tex( struct lp_build_tgsi_soa_context *bld,
605 const struct tgsi_full_instruction *inst,
606 enum tex_modifier modifier,
607 LLVMValueRef *texel)
608 {
609 unsigned unit;
610 LLVMValueRef lod_bias, explicit_lod;
611 LLVMValueRef oow = NULL;
612 LLVMValueRef coords[3];
613 LLVMValueRef ddx[3];
614 LLVMValueRef ddy[3];
615 unsigned num_coords;
616 unsigned i;
617
618 if (!bld->sampler) {
619 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
620 for (i = 0; i < 4; i++) {
621 texel[i] = bld->base.undef;
622 }
623 return;
624 }
625
626 switch (inst->Texture.Texture) {
627 case TGSI_TEXTURE_1D:
628 num_coords = 1;
629 break;
630 case TGSI_TEXTURE_2D:
631 case TGSI_TEXTURE_RECT:
632 num_coords = 2;
633 break;
634 case TGSI_TEXTURE_SHADOW1D:
635 case TGSI_TEXTURE_SHADOW2D:
636 case TGSI_TEXTURE_SHADOWRECT:
637 case TGSI_TEXTURE_3D:
638 case TGSI_TEXTURE_CUBE:
639 num_coords = 3;
640 break;
641 default:
642 assert(0);
643 return;
644 }
645
646 if (modifier == TEX_MODIFIER_LOD_BIAS) {
647 lod_bias = emit_fetch( bld, inst, 0, 3 );
648 explicit_lod = NULL;
649 }
650 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
651 lod_bias = NULL;
652 explicit_lod = emit_fetch( bld, inst, 0, 3 );
653 }
654 else {
655 lod_bias = NULL;
656 explicit_lod = NULL;
657 }
658
659 if (modifier == TEX_MODIFIER_PROJECTED) {
660 oow = emit_fetch( bld, inst, 0, 3 );
661 oow = lp_build_rcp(&bld->base, oow);
662 }
663
664 for (i = 0; i < num_coords; i++) {
665 coords[i] = emit_fetch( bld, inst, 0, i );
666 if (modifier == TEX_MODIFIER_PROJECTED)
667 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
668 }
669 for (i = num_coords; i < 3; i++) {
670 coords[i] = bld->base.undef;
671 }
672
673 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
674 for (i = 0; i < num_coords; i++) {
675 ddx[i] = emit_fetch( bld, inst, 1, i );
676 ddy[i] = emit_fetch( bld, inst, 2, i );
677 }
678 unit = inst->Src[3].Register.Index;
679 } else {
680 for (i = 0; i < num_coords; i++) {
681 ddx[i] = emit_ddx( bld, coords[i] );
682 ddy[i] = emit_ddy( bld, coords[i] );
683 }
684 unit = inst->Src[1].Register.Index;
685 }
686 for (i = num_coords; i < 3; i++) {
687 ddx[i] = bld->base.undef;
688 ddy[i] = bld->base.undef;
689 }
690
691 bld->sampler->emit_fetch_texel(bld->sampler,
692 bld->base.builder,
693 bld->base.type,
694 unit, num_coords, coords,
695 ddx, ddy,
696 lod_bias, explicit_lod,
697 texel);
698 }
699
700
701 /**
702 * Kill fragment if any of the src register values are negative.
703 */
704 static void
705 emit_kil(
706 struct lp_build_tgsi_soa_context *bld,
707 const struct tgsi_full_instruction *inst )
708 {
709 const struct tgsi_full_src_register *reg = &inst->Src[0];
710 LLVMValueRef terms[NUM_CHANNELS];
711 LLVMValueRef mask;
712 unsigned chan_index;
713
714 memset(&terms, 0, sizeof terms);
715
716 FOR_EACH_CHANNEL( chan_index ) {
717 unsigned swizzle;
718
719 /* Unswizzle channel */
720 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
721
722 /* Check if the component has not been already tested. */
723 assert(swizzle < NUM_CHANNELS);
724 if( !terms[swizzle] )
725 /* TODO: change the comparison operator instead of setting the sign */
726 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
727 }
728
729 mask = NULL;
730 FOR_EACH_CHANNEL( chan_index ) {
731 if(terms[chan_index]) {
732 LLVMValueRef chan_mask;
733
734 /*
735 * If term < 0 then mask = 0 else mask = ~0.
736 */
737 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
738
739 if(mask)
740 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
741 else
742 mask = chan_mask;
743 }
744 }
745
746 if(mask)
747 lp_build_mask_update(bld->mask, mask);
748 }
749
750
751 /**
752 * Predicated fragment kill.
753 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
754 * The only predication is the execution mask which will apply if
755 * we're inside a loop or conditional.
756 */
757 static void
758 emit_kilp(struct lp_build_tgsi_soa_context *bld,
759 const struct tgsi_full_instruction *inst)
760 {
761 LLVMValueRef mask;
762
763 /* For those channels which are "alive", disable fragment shader
764 * execution.
765 */
766 if (bld->exec_mask.has_mask) {
767 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
768 }
769 else {
770 mask = bld->base.zero;
771 }
772
773 lp_build_mask_update(bld->mask, mask);
774 }
775
776 static void
777 emit_declaration(
778 struct lp_build_tgsi_soa_context *bld,
779 const struct tgsi_full_declaration *decl)
780 {
781 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
782
783 unsigned first = decl->Range.First;
784 unsigned last = decl->Range.Last;
785 unsigned idx, i;
786
787 for (idx = first; idx <= last; ++idx) {
788 switch (decl->Declaration.File) {
789 case TGSI_FILE_TEMPORARY:
790 assert(idx < LP_MAX_TGSI_TEMPS);
791 if (bld->has_indirect_addressing) {
792 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
793 last*4 + 4, 0);
794 bld->temps_array = lp_build_array_alloca(bld->base.builder,
795 vec_type, val, "");
796 } else {
797 for (i = 0; i < NUM_CHANNELS; i++)
798 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
799 vec_type, "");
800 }
801 break;
802
803 case TGSI_FILE_OUTPUT:
804 for (i = 0; i < NUM_CHANNELS; i++)
805 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
806 vec_type, "");
807 break;
808
809 case TGSI_FILE_ADDRESS:
810 assert(idx < LP_MAX_TGSI_ADDRS);
811 for (i = 0; i < NUM_CHANNELS; i++)
812 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
813 vec_type, "");
814 break;
815
816 case TGSI_FILE_PREDICATE:
817 _debug_printf("warning: predicate registers not yet implemented\n");
818 break;
819
820 default:
821 /* don't need to declare other vars */
822 break;
823 }
824 }
825 }
826
827
828 /**
829 * Emit LLVM for one TGSI instruction.
830 * \param return TRUE for success, FALSE otherwise
831 */
832 static boolean
833 emit_instruction(
834 struct lp_build_tgsi_soa_context *bld,
835 const struct tgsi_full_instruction *inst,
836 const struct tgsi_opcode_info *info)
837 {
838 unsigned chan_index;
839 LLVMValueRef src0, src1, src2;
840 LLVMValueRef tmp0, tmp1, tmp2;
841 LLVMValueRef tmp3 = NULL;
842 LLVMValueRef tmp4 = NULL;
843 LLVMValueRef tmp5 = NULL;
844 LLVMValueRef tmp6 = NULL;
845 LLVMValueRef tmp7 = NULL;
846 LLVMValueRef res;
847 LLVMValueRef dst0[NUM_CHANNELS];
848
849 /*
850 * Stores and write masks are handled in a general fashion after the long
851 * instruction opcode switch statement.
852 *
853 * Although not stricitly necessary, we avoid generating instructions for
854 * channels which won't be stored, in cases where's that easy. For some
855 * complex instructions, like texture sampling, it is more convenient to
856 * assume a full writemask and then let LLVM optimization passes eliminate
857 * redundant code.
858 */
859
860 assert(info->num_dst <= 1);
861 if(info->num_dst) {
862 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
863 dst0[chan_index] = bld->base.undef;
864 }
865 }
866
867 switch (inst->Instruction.Opcode) {
868 case TGSI_OPCODE_ARL:
869 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
870 tmp0 = emit_fetch( bld, inst, 0, chan_index );
871 tmp0 = lp_build_floor(&bld->base, tmp0);
872 dst0[chan_index] = tmp0;
873 }
874 break;
875
876 case TGSI_OPCODE_MOV:
877 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
878 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
879 }
880 break;
881
882 case TGSI_OPCODE_LIT:
883 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
884 dst0[CHAN_X] = bld->base.one;
885 }
886 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
887 src0 = emit_fetch( bld, inst, 0, CHAN_X );
888 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
889 }
890 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
891 /* XMM[1] = SrcReg[0].yyyy */
892 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
893 /* XMM[1] = max(XMM[1], 0) */
894 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
895 /* XMM[2] = SrcReg[0].wwww */
896 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
897 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
898 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
899 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
900 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
901 }
902 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
903 dst0[CHAN_W] = bld->base.one;
904 }
905 break;
906
907 case TGSI_OPCODE_RCP:
908 /* TGSI_OPCODE_RECIP */
909 src0 = emit_fetch( bld, inst, 0, CHAN_X );
910 res = lp_build_rcp(&bld->base, src0);
911 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
912 dst0[chan_index] = res;
913 }
914 break;
915
916 case TGSI_OPCODE_RSQ:
917 /* TGSI_OPCODE_RECIPSQRT */
918 src0 = emit_fetch( bld, inst, 0, CHAN_X );
919 src0 = lp_build_abs(&bld->base, src0);
920 res = lp_build_rsqrt(&bld->base, src0);
921 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
922 dst0[chan_index] = res;
923 }
924 break;
925
926 case TGSI_OPCODE_EXP:
927 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
928 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
929 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
930 LLVMValueRef *p_exp2_int_part = NULL;
931 LLVMValueRef *p_frac_part = NULL;
932 LLVMValueRef *p_exp2 = NULL;
933
934 src0 = emit_fetch( bld, inst, 0, CHAN_X );
935
936 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
937 p_exp2_int_part = &tmp0;
938 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
939 p_frac_part = &tmp1;
940 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
941 p_exp2 = &tmp2;
942
943 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
944
945 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
946 dst0[CHAN_X] = tmp0;
947 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
948 dst0[CHAN_Y] = tmp1;
949 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
950 dst0[CHAN_Z] = tmp2;
951 }
952 /* dst.w = 1.0 */
953 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
954 dst0[CHAN_W] = bld->base.one;
955 }
956 break;
957
958 case TGSI_OPCODE_LOG:
959 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
960 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
961 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
962 LLVMValueRef *p_floor_log2 = NULL;
963 LLVMValueRef *p_exp = NULL;
964 LLVMValueRef *p_log2 = NULL;
965
966 src0 = emit_fetch( bld, inst, 0, CHAN_X );
967 src0 = lp_build_abs( &bld->base, src0 );
968
969 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
970 p_floor_log2 = &tmp0;
971 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
972 p_exp = &tmp1;
973 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
974 p_log2 = &tmp2;
975
976 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
977
978 /* dst.x = floor(lg2(abs(src.x))) */
979 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
980 dst0[CHAN_X] = tmp0;
981 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
982 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
983 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
984 }
985 /* dst.z = lg2(abs(src.x)) */
986 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
987 dst0[CHAN_Z] = tmp2;
988 }
989 /* dst.w = 1.0 */
990 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
991 dst0[CHAN_W] = bld->base.one;
992 }
993 break;
994
995 case TGSI_OPCODE_MUL:
996 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
997 src0 = emit_fetch( bld, inst, 0, chan_index );
998 src1 = emit_fetch( bld, inst, 1, chan_index );
999 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1000 }
1001 break;
1002
1003 case TGSI_OPCODE_ADD:
1004 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1005 src0 = emit_fetch( bld, inst, 0, chan_index );
1006 src1 = emit_fetch( bld, inst, 1, chan_index );
1007 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1008 }
1009 break;
1010
1011 case TGSI_OPCODE_DP3:
1012 /* TGSI_OPCODE_DOT3 */
1013 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1014 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1015 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1016 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1017 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1018 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1019 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1020 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1021 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1022 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1023 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1024 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1025 dst0[chan_index] = tmp0;
1026 }
1027 break;
1028
1029 case TGSI_OPCODE_DP4:
1030 /* TGSI_OPCODE_DOT4 */
1031 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1032 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1033 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1034 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1035 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1036 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1037 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1038 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1039 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1040 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1041 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1042 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1043 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1044 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1045 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1046 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1047 dst0[chan_index] = tmp0;
1048 }
1049 break;
1050
1051 case TGSI_OPCODE_DST:
1052 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1053 dst0[CHAN_X] = bld->base.one;
1054 }
1055 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1056 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1057 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1058 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1059 }
1060 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1061 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1062 }
1063 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1064 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1065 }
1066 break;
1067
1068 case TGSI_OPCODE_MIN:
1069 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1070 src0 = emit_fetch( bld, inst, 0, chan_index );
1071 src1 = emit_fetch( bld, inst, 1, chan_index );
1072 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1073 }
1074 break;
1075
1076 case TGSI_OPCODE_MAX:
1077 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1078 src0 = emit_fetch( bld, inst, 0, chan_index );
1079 src1 = emit_fetch( bld, inst, 1, chan_index );
1080 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1081 }
1082 break;
1083
1084 case TGSI_OPCODE_SLT:
1085 /* TGSI_OPCODE_SETLT */
1086 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1087 src0 = emit_fetch( bld, inst, 0, chan_index );
1088 src1 = emit_fetch( bld, inst, 1, chan_index );
1089 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1090 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1091 }
1092 break;
1093
1094 case TGSI_OPCODE_SGE:
1095 /* TGSI_OPCODE_SETGE */
1096 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1097 src0 = emit_fetch( bld, inst, 0, chan_index );
1098 src1 = emit_fetch( bld, inst, 1, chan_index );
1099 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1100 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1101 }
1102 break;
1103
1104 case TGSI_OPCODE_MAD:
1105 /* TGSI_OPCODE_MADD */
1106 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1107 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1108 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1109 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1110 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1111 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1112 dst0[chan_index] = tmp0;
1113 }
1114 break;
1115
1116 case TGSI_OPCODE_SUB:
1117 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1118 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1119 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1120 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1121 }
1122 break;
1123
1124 case TGSI_OPCODE_LRP:
1125 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1126 src0 = emit_fetch( bld, inst, 0, chan_index );
1127 src1 = emit_fetch( bld, inst, 1, chan_index );
1128 src2 = emit_fetch( bld, inst, 2, chan_index );
1129 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1130 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1131 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1132 }
1133 break;
1134
1135 case TGSI_OPCODE_CND:
1136 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1137 src0 = emit_fetch( bld, inst, 0, chan_index );
1138 src1 = emit_fetch( bld, inst, 1, chan_index );
1139 src2 = emit_fetch( bld, inst, 2, chan_index );
1140 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1141 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1142 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1143 }
1144 break;
1145
1146 case TGSI_OPCODE_DP2A:
1147 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1148 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1149 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1150 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1151 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1152 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1153 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1154 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1155 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1156 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1157 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1158 }
1159 break;
1160
1161 case TGSI_OPCODE_FRC:
1162 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1163 src0 = emit_fetch( bld, inst, 0, chan_index );
1164 tmp0 = lp_build_floor(&bld->base, src0);
1165 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1166 dst0[chan_index] = tmp0;
1167 }
1168 break;
1169
1170 case TGSI_OPCODE_CLAMP:
1171 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1172 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1173 src1 = emit_fetch( bld, inst, 1, chan_index );
1174 src2 = emit_fetch( bld, inst, 2, chan_index );
1175 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1176 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1177 dst0[chan_index] = tmp0;
1178 }
1179 break;
1180
1181 case TGSI_OPCODE_FLR:
1182 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1183 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1184 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1185 }
1186 break;
1187
1188 case TGSI_OPCODE_ROUND:
1189 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1190 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1191 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1192 }
1193 break;
1194
1195 case TGSI_OPCODE_EX2: {
1196 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1197 tmp0 = lp_build_exp2( &bld->base, tmp0);
1198 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1199 dst0[chan_index] = tmp0;
1200 }
1201 break;
1202 }
1203
1204 case TGSI_OPCODE_LG2:
1205 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1206 tmp0 = lp_build_log2( &bld->base, tmp0);
1207 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1208 dst0[chan_index] = tmp0;
1209 }
1210 break;
1211
1212 case TGSI_OPCODE_POW:
1213 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1214 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1215 res = lp_build_pow( &bld->base, src0, src1 );
1216 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1217 dst0[chan_index] = res;
1218 }
1219 break;
1220
1221 case TGSI_OPCODE_XPD:
1222 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1223 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1224 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1225 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1226 }
1227 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1228 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1229 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1230 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1231 }
1232 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1233 tmp2 = tmp0;
1234 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1235 tmp5 = tmp3;
1236 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1237 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1238 dst0[CHAN_X] = tmp2;
1239 }
1240 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1241 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1242 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1243 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1244 }
1245 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1246 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1247 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1248 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1249 dst0[CHAN_Y] = tmp3;
1250 }
1251 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1252 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1253 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1254 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1255 dst0[CHAN_Z] = tmp5;
1256 }
1257 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1258 dst0[CHAN_W] = bld->base.one;
1259 }
1260 break;
1261
1262 case TGSI_OPCODE_ABS:
1263 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1264 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1265 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1266 }
1267 break;
1268
1269 case TGSI_OPCODE_RCC:
1270 /* deprecated? */
1271 assert(0);
1272 return FALSE;
1273
1274 case TGSI_OPCODE_DPH:
1275 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1276 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1277 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1278 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1279 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1280 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1281 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1282 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1283 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1284 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1285 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1286 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1287 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1288 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1289 dst0[chan_index] = tmp0;
1290 }
1291 break;
1292
1293 case TGSI_OPCODE_COS:
1294 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1295 tmp0 = lp_build_cos( &bld->base, tmp0 );
1296 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1297 dst0[chan_index] = tmp0;
1298 }
1299 break;
1300
1301 case TGSI_OPCODE_DDX:
1302 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1303 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1304 }
1305 break;
1306
1307 case TGSI_OPCODE_DDY:
1308 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1309 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1310 }
1311 break;
1312
1313 case TGSI_OPCODE_KILP:
1314 /* predicated kill */
1315 emit_kilp( bld, inst );
1316 break;
1317
1318 case TGSI_OPCODE_KIL:
1319 /* conditional kill */
1320 emit_kil( bld, inst );
1321 break;
1322
1323 case TGSI_OPCODE_PK2H:
1324 return FALSE;
1325 break;
1326
1327 case TGSI_OPCODE_PK2US:
1328 return FALSE;
1329 break;
1330
1331 case TGSI_OPCODE_PK4B:
1332 return FALSE;
1333 break;
1334
1335 case TGSI_OPCODE_PK4UB:
1336 return FALSE;
1337 break;
1338
1339 case TGSI_OPCODE_RFL:
1340 return FALSE;
1341 break;
1342
1343 case TGSI_OPCODE_SEQ:
1344 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1345 src0 = emit_fetch( bld, inst, 0, chan_index );
1346 src1 = emit_fetch( bld, inst, 1, chan_index );
1347 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1348 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1349 }
1350 break;
1351
1352 case TGSI_OPCODE_SFL:
1353 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1354 dst0[chan_index] = bld->base.zero;
1355 }
1356 break;
1357
1358 case TGSI_OPCODE_SGT:
1359 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1360 src0 = emit_fetch( bld, inst, 0, chan_index );
1361 src1 = emit_fetch( bld, inst, 1, chan_index );
1362 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1363 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1364 }
1365 break;
1366
1367 case TGSI_OPCODE_SIN:
1368 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1369 tmp0 = lp_build_sin( &bld->base, tmp0 );
1370 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1371 dst0[chan_index] = tmp0;
1372 }
1373 break;
1374
1375 case TGSI_OPCODE_SLE:
1376 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1377 src0 = emit_fetch( bld, inst, 0, chan_index );
1378 src1 = emit_fetch( bld, inst, 1, chan_index );
1379 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1380 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1381 }
1382 break;
1383
1384 case TGSI_OPCODE_SNE:
1385 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1386 src0 = emit_fetch( bld, inst, 0, chan_index );
1387 src1 = emit_fetch( bld, inst, 1, chan_index );
1388 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1389 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1390 }
1391 break;
1392
1393 case TGSI_OPCODE_STR:
1394 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1395 dst0[chan_index] = bld->base.one;
1396 }
1397 break;
1398
1399 case TGSI_OPCODE_TEX:
1400 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1401 break;
1402
1403 case TGSI_OPCODE_TXD:
1404 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1405 break;
1406
1407 case TGSI_OPCODE_UP2H:
1408 /* deprecated */
1409 assert (0);
1410 return FALSE;
1411 break;
1412
1413 case TGSI_OPCODE_UP2US:
1414 /* deprecated */
1415 assert(0);
1416 return FALSE;
1417 break;
1418
1419 case TGSI_OPCODE_UP4B:
1420 /* deprecated */
1421 assert(0);
1422 return FALSE;
1423 break;
1424
1425 case TGSI_OPCODE_UP4UB:
1426 /* deprecated */
1427 assert(0);
1428 return FALSE;
1429 break;
1430
1431 case TGSI_OPCODE_X2D:
1432 /* deprecated? */
1433 assert(0);
1434 return FALSE;
1435 break;
1436
1437 case TGSI_OPCODE_ARA:
1438 /* deprecated */
1439 assert(0);
1440 return FALSE;
1441 break;
1442
1443 case TGSI_OPCODE_ARR:
1444 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1445 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1446 tmp0 = lp_build_round(&bld->base, tmp0);
1447 dst0[chan_index] = tmp0;
1448 }
1449 break;
1450
1451 case TGSI_OPCODE_BRA:
1452 /* deprecated */
1453 assert(0);
1454 return FALSE;
1455 break;
1456
1457 case TGSI_OPCODE_CAL:
1458 /* FIXME */
1459 return FALSE;
1460 break;
1461
1462 case TGSI_OPCODE_RET:
1463 /* FIXME */
1464 return FALSE;
1465 break;
1466
1467 case TGSI_OPCODE_END:
1468 break;
1469
1470 case TGSI_OPCODE_SSG:
1471 /* TGSI_OPCODE_SGN */
1472 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1473 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1474 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1475 }
1476 break;
1477
1478 case TGSI_OPCODE_CMP:
1479 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1480 src0 = emit_fetch( bld, inst, 0, chan_index );
1481 src1 = emit_fetch( bld, inst, 1, chan_index );
1482 src2 = emit_fetch( bld, inst, 2, chan_index );
1483 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1484 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1485 }
1486 break;
1487
1488 case TGSI_OPCODE_SCS:
1489 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1490 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1491 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1492 }
1493 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1494 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1495 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1496 }
1497 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1498 dst0[CHAN_Z] = bld->base.zero;
1499 }
1500 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1501 dst0[CHAN_W] = bld->base.one;
1502 }
1503 break;
1504
1505 case TGSI_OPCODE_TXB:
1506 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1507 break;
1508
1509 case TGSI_OPCODE_NRM:
1510 /* fall-through */
1511 case TGSI_OPCODE_NRM4:
1512 /* 3 or 4-component normalization */
1513 {
1514 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1515
1516 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1517 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1518 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1519 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1520
1521 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1522
1523 /* xmm4 = src.x */
1524 /* xmm0 = src.x * src.x */
1525 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1526 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1527 tmp4 = tmp0;
1528 }
1529 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1530
1531 /* xmm5 = src.y */
1532 /* xmm0 = xmm0 + src.y * src.y */
1533 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1534 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1535 tmp5 = tmp1;
1536 }
1537 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1538 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1539
1540 /* xmm6 = src.z */
1541 /* xmm0 = xmm0 + src.z * src.z */
1542 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1543 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1544 tmp6 = tmp1;
1545 }
1546 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1547 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1548
1549 if (dims == 4) {
1550 /* xmm7 = src.w */
1551 /* xmm0 = xmm0 + src.w * src.w */
1552 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1553 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1554 tmp7 = tmp1;
1555 }
1556 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1557 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1558 }
1559
1560 /* xmm1 = 1 / sqrt(xmm0) */
1561 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1562
1563 /* dst.x = xmm1 * src.x */
1564 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1565 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1566 }
1567
1568 /* dst.y = xmm1 * src.y */
1569 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1570 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1571 }
1572
1573 /* dst.z = xmm1 * src.z */
1574 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1575 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1576 }
1577
1578 /* dst.w = xmm1 * src.w */
1579 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1580 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1581 }
1582 }
1583
1584 /* dst.w = 1.0 */
1585 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1586 dst0[CHAN_W] = bld->base.one;
1587 }
1588 }
1589 break;
1590
1591 case TGSI_OPCODE_DIV:
1592 /* deprecated */
1593 assert( 0 );
1594 return FALSE;
1595 break;
1596
1597 case TGSI_OPCODE_DP2:
1598 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1599 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1600 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1601 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1602 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1603 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1604 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1605 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1606 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1607 }
1608 break;
1609
1610 case TGSI_OPCODE_TXL:
1611 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1612 break;
1613
1614 case TGSI_OPCODE_TXP:
1615 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1616 break;
1617
1618 case TGSI_OPCODE_BRK:
1619 lp_exec_break(&bld->exec_mask);
1620 break;
1621
1622 case TGSI_OPCODE_IF:
1623 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1624 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1625 tmp0, bld->base.zero);
1626 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1627 break;
1628
1629 case TGSI_OPCODE_BGNLOOP:
1630 lp_exec_bgnloop(&bld->exec_mask);
1631 break;
1632
1633 case TGSI_OPCODE_ELSE:
1634 lp_exec_mask_cond_invert(&bld->exec_mask);
1635 break;
1636
1637 case TGSI_OPCODE_ENDIF:
1638 lp_exec_mask_cond_pop(&bld->exec_mask);
1639 break;
1640
1641 case TGSI_OPCODE_ENDLOOP:
1642 lp_exec_endloop(&bld->exec_mask);
1643 break;
1644
1645 case TGSI_OPCODE_PUSHA:
1646 /* deprecated? */
1647 assert(0);
1648 return FALSE;
1649 break;
1650
1651 case TGSI_OPCODE_POPA:
1652 /* deprecated? */
1653 assert(0);
1654 return FALSE;
1655 break;
1656
1657 case TGSI_OPCODE_CEIL:
1658 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1659 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1660 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1661 }
1662 break;
1663
1664 case TGSI_OPCODE_I2F:
1665 /* deprecated? */
1666 assert(0);
1667 return FALSE;
1668 break;
1669
1670 case TGSI_OPCODE_NOT:
1671 /* deprecated? */
1672 assert(0);
1673 return FALSE;
1674 break;
1675
1676 case TGSI_OPCODE_TRUNC:
1677 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1678 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1679 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1680 }
1681 break;
1682
1683 case TGSI_OPCODE_SHL:
1684 /* deprecated? */
1685 assert(0);
1686 return FALSE;
1687 break;
1688
1689 case TGSI_OPCODE_ISHR:
1690 /* deprecated? */
1691 assert(0);
1692 return FALSE;
1693 break;
1694
1695 case TGSI_OPCODE_AND:
1696 /* deprecated? */
1697 assert(0);
1698 return FALSE;
1699 break;
1700
1701 case TGSI_OPCODE_OR:
1702 /* deprecated? */
1703 assert(0);
1704 return FALSE;
1705 break;
1706
1707 case TGSI_OPCODE_MOD:
1708 /* deprecated? */
1709 assert(0);
1710 return FALSE;
1711 break;
1712
1713 case TGSI_OPCODE_XOR:
1714 /* deprecated? */
1715 assert(0);
1716 return FALSE;
1717 break;
1718
1719 case TGSI_OPCODE_SAD:
1720 /* deprecated? */
1721 assert(0);
1722 return FALSE;
1723 break;
1724
1725 case TGSI_OPCODE_TXF:
1726 /* deprecated? */
1727 assert(0);
1728 return FALSE;
1729 break;
1730
1731 case TGSI_OPCODE_TXQ:
1732 /* deprecated? */
1733 assert(0);
1734 return FALSE;
1735 break;
1736
1737 case TGSI_OPCODE_CONT:
1738 lp_exec_continue(&bld->exec_mask);
1739 break;
1740
1741 case TGSI_OPCODE_EMIT:
1742 return FALSE;
1743 break;
1744
1745 case TGSI_OPCODE_ENDPRIM:
1746 return FALSE;
1747 break;
1748
1749 case TGSI_OPCODE_NOP:
1750 break;
1751
1752 default:
1753 return FALSE;
1754 }
1755
1756 if(info->num_dst) {
1757 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1758 emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1759 }
1760 }
1761
1762 return TRUE;
1763 }
1764
1765
1766 void
1767 lp_build_tgsi_soa(LLVMBuilderRef builder,
1768 const struct tgsi_token *tokens,
1769 struct lp_type type,
1770 struct lp_build_mask_context *mask,
1771 LLVMValueRef consts_ptr,
1772 const LLVMValueRef *pos,
1773 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1774 LLVMValueRef (*outputs)[NUM_CHANNELS],
1775 struct lp_build_sampler_soa *sampler,
1776 struct tgsi_shader_info *info)
1777 {
1778 struct lp_build_tgsi_soa_context bld;
1779 struct tgsi_parse_context parse;
1780 uint num_immediates = 0;
1781 unsigned i;
1782
1783 /* Setup build context */
1784 memset(&bld, 0, sizeof bld);
1785 lp_build_context_init(&bld.base, builder, type);
1786 bld.mask = mask;
1787 bld.pos = pos;
1788 bld.inputs = inputs;
1789 bld.outputs = outputs;
1790 bld.consts_ptr = consts_ptr;
1791 bld.sampler = sampler;
1792 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1793 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1794
1795 lp_exec_mask_init(&bld.exec_mask, &bld.base);
1796
1797 tgsi_parse_init( &parse, tokens );
1798
1799 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1800 tgsi_parse_token( &parse );
1801
1802 switch( parse.FullToken.Token.Type ) {
1803 case TGSI_TOKEN_TYPE_DECLARATION:
1804 /* Inputs already interpolated */
1805 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1806 break;
1807
1808 case TGSI_TOKEN_TYPE_INSTRUCTION:
1809 {
1810 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1811 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
1812 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
1813 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1814 opcode_info->mnemonic);
1815 }
1816
1817 break;
1818
1819 case TGSI_TOKEN_TYPE_IMMEDIATE:
1820 /* simply copy the immediate values into the next immediates[] slot */
1821 {
1822 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1823 assert(size <= 4);
1824 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1825 for( i = 0; i < size; ++i )
1826 bld.immediates[num_immediates][i] =
1827 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1828 for( i = size; i < 4; ++i )
1829 bld.immediates[num_immediates][i] = bld.base.undef;
1830 num_immediates++;
1831 }
1832 break;
1833
1834 case TGSI_TOKEN_TYPE_PROPERTY:
1835 break;
1836
1837 default:
1838 assert( 0 );
1839 }
1840 }
1841 if (0) {
1842 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1843 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1844 debug_printf("11111111111111111111111111111 \n");
1845 tgsi_dump(tokens, 0);
1846 LLVMDumpValue(function);
1847 debug_printf("2222222222222222222222222222 \n");
1848 }
1849 tgsi_parse_free( &parse );
1850 }
1851