gallivm: re-org, comments for get_temp_ptr()
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for integer masks and indices */
122 struct lp_build_context int_bld;
123
124 LLVMValueRef consts_ptr;
125 const LLVMValueRef *pos;
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128
129 const struct lp_build_sampler_soa *sampler;
130
131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
135
136 /* we allocate an array of temps if we have indirect
137 * addressing and then the temps above is unused */
138 LLVMValueRef temps_array;
139 boolean has_indirect_addressing;
140
141 struct lp_build_mask_context *mask;
142 struct lp_exec_mask exec_mask;
143
144 struct tgsi_full_instruction *instructions;
145 uint max_instructions;
146 };
147
148 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
149 {
150 mask->bld = bld;
151 mask->has_mask = FALSE;
152 mask->cond_stack_size = 0;
153 mask->loop_stack_size = 0;
154 mask->call_stack_size = 0;
155
156 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
157 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
158 LLVMConstAllOnes(mask->int_vec_type);
159 }
160
161 static void lp_exec_mask_update(struct lp_exec_mask *mask)
162 {
163 if (mask->loop_stack_size) {
164 /*for loops we need to update the entire mask at runtime */
165 LLVMValueRef tmp;
166 assert(mask->break_mask);
167 tmp = LLVMBuildAnd(mask->bld->builder,
168 mask->cont_mask,
169 mask->break_mask,
170 "maskcb");
171 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
172 mask->cond_mask,
173 tmp,
174 "maskfull");
175 } else
176 mask->exec_mask = mask->cond_mask;
177
178 if (mask->call_stack_size) {
179 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
180 mask->exec_mask,
181 mask->ret_mask,
182 "callmask");
183 }
184
185 mask->has_mask = (mask->cond_stack_size > 0 ||
186 mask->loop_stack_size > 0 ||
187 mask->call_stack_size > 0);
188 }
189
190 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
191 LLVMValueRef val)
192 {
193 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
194 if (mask->cond_stack_size == 0) {
195 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
196 }
197 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
198 assert(LLVMTypeOf(val) == mask->int_vec_type);
199 mask->cond_mask = val;
200
201 lp_exec_mask_update(mask);
202 }
203
204 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
205 {
206 LLVMValueRef prev_mask;
207 LLVMValueRef inv_mask;
208
209 assert(mask->cond_stack_size);
210 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
211 if (mask->cond_stack_size == 1) {
212 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
213 }
214
215 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
216
217 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
218 inv_mask,
219 prev_mask, "");
220 lp_exec_mask_update(mask);
221 }
222
223 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
224 {
225 assert(mask->cond_stack_size);
226 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
227 lp_exec_mask_update(mask);
228 }
229
230 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
231 {
232 if (mask->loop_stack_size == 0) {
233 assert(mask->loop_block == NULL);
234 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
235 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
236 assert(mask->break_var == NULL);
237 }
238
239 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
240
241 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
242 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
243 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
244 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
245 ++mask->loop_stack_size;
246
247 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
248 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
249
250 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
251 LLVMBuildBr(mask->bld->builder, mask->loop_block);
252 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
253
254 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
255
256 lp_exec_mask_update(mask);
257 }
258
259 static void lp_exec_break(struct lp_exec_mask *mask)
260 {
261 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
262 mask->exec_mask,
263 "break");
264
265 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
266 mask->break_mask,
267 exec_mask, "break_full");
268
269 lp_exec_mask_update(mask);
270 }
271
272 static void lp_exec_continue(struct lp_exec_mask *mask)
273 {
274 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
275 mask->exec_mask,
276 "");
277
278 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
279 mask->cont_mask,
280 exec_mask, "");
281
282 lp_exec_mask_update(mask);
283 }
284
285
286 static void lp_exec_endloop(struct lp_exec_mask *mask)
287 {
288 LLVMBasicBlockRef endloop;
289 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
290 mask->bld->type.length);
291 LLVMValueRef i1cond;
292
293 assert(mask->break_mask);
294
295 /*
296 * Restore the cont_mask, but don't pop
297 */
298 assert(mask->loop_stack_size);
299 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
300 lp_exec_mask_update(mask);
301
302 /*
303 * Unlike the continue mask, the break_mask must be preserved across loop
304 * iterations
305 */
306 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
307
308 /* i1cond = (mask == 0) */
309 i1cond = LLVMBuildICmp(
310 mask->bld->builder,
311 LLVMIntNE,
312 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
313 LLVMConstNull(reg_type), "");
314
315 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
316
317 LLVMBuildCondBr(mask->bld->builder,
318 i1cond, mask->loop_block, endloop);
319
320 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
321
322 assert(mask->loop_stack_size);
323 --mask->loop_stack_size;
324 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
325 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
326 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
327 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
328
329 lp_exec_mask_update(mask);
330 }
331
332 /* stores val into an address pointed to by dst.
333 * mask->exec_mask is used to figure out which bits of val
334 * should be stored into the address
335 * (0 means don't store this bit, 1 means do store).
336 */
337 static void lp_exec_mask_store(struct lp_exec_mask *mask,
338 LLVMValueRef pred,
339 LLVMValueRef val,
340 LLVMValueRef dst)
341 {
342 /* Mix the predicate and execution mask */
343 if (mask->has_mask) {
344 if (pred) {
345 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
346 } else {
347 pred = mask->exec_mask;
348 }
349 }
350
351 if (pred) {
352 LLVMValueRef real_val, dst_val;
353
354 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
355 real_val = lp_build_select(mask->bld,
356 pred,
357 val, dst_val);
358
359 LLVMBuildStore(mask->bld->builder, real_val, dst);
360 } else
361 LLVMBuildStore(mask->bld->builder, val, dst);
362 }
363
364 static void lp_exec_mask_call(struct lp_exec_mask *mask,
365 int func,
366 int *pc)
367 {
368 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
369 mask->call_stack[mask->call_stack_size].pc = *pc;
370 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
371 mask->call_stack_size++;
372 *pc = func;
373 }
374
375 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
376 {
377 LLVMValueRef exec_mask;
378
379 if (mask->call_stack_size == 0) {
380 /* returning from main() */
381 *pc = -1;
382 return;
383 }
384 exec_mask = LLVMBuildNot(mask->bld->builder,
385 mask->exec_mask,
386 "ret");
387
388 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
389 mask->ret_mask,
390 exec_mask, "ret_full");
391
392 lp_exec_mask_update(mask);
393 }
394
395 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
396 {
397 }
398
399 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
400 {
401 assert(mask->call_stack_size);
402 mask->call_stack_size--;
403 *pc = mask->call_stack[mask->call_stack_size].pc;
404 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
405 lp_exec_mask_update(mask);
406 }
407
408
409 /**
410 * Return pointer to a temporary register channel (src or dest).
411 * \param index which temporary register
412 * \param chan which channel of the temp register.
413 * \param is_indirect if true, add 'addr' to the index
414 * \param addr indirect addressing offset (should already have been
415 * multiplied by four).
416 */
417 static LLVMValueRef
418 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
419 unsigned index,
420 unsigned chan,
421 boolean is_indirect,
422 LLVMValueRef addr)
423 {
424 assert(chan < 4);
425 if (bld->has_indirect_addressing) {
426 LLVMValueRef lindex =
427 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
428 if (is_indirect)
429 lindex = lp_build_add(&bld->base, lindex, addr);
430 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
431 }
432 else {
433 return bld->temps[index][chan];
434 }
435 }
436
437
438 /**
439 * Gather vector.
440 * XXX the lp_build_gather() function should be capable of doing this
441 * with a little work.
442 */
443 static LLVMValueRef
444 build_gather(struct lp_build_tgsi_soa_context *bld,
445 LLVMValueRef base_ptr,
446 LLVMValueRef indexes)
447 {
448 LLVMValueRef res = bld->base.undef;
449 unsigned i;
450
451 /*
452 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
453 */
454 for (i = 0; i < bld->base.type.length; i++) {
455 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
456 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
457 indexes, ii, "");
458 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
459 &index, 1, "");
460 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
461
462 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
463 }
464
465 return res;
466 }
467
468
469 /**
470 * Register fetch.
471 */
472 static LLVMValueRef
473 emit_fetch(
474 struct lp_build_tgsi_soa_context *bld,
475 const struct tgsi_full_instruction *inst,
476 unsigned src_op,
477 const unsigned chan_index )
478 {
479 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
480 const unsigned swizzle =
481 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
482 LLVMValueRef res;
483 LLVMValueRef addr_vec = NULL;
484
485 if (swizzle > 3) {
486 assert(0 && "invalid swizzle in emit_fetch()");
487 return bld->base.undef;
488 }
489
490 if (reg->Register.Indirect) {
491 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
492 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
493
494 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
495
496 assert(bld->has_indirect_addressing);
497
498 addr_vec = LLVMBuildLoad(bld->base.builder,
499 bld->addr[reg->Indirect.Index][swizzle],
500 "load addr");
501
502 /* for indexing we want integers */
503 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
504 int_vec_type, "");
505
506 /* addr_vec = addr_vec * 4 */
507 addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
508 }
509
510 switch (reg->Register.File) {
511 case TGSI_FILE_CONSTANT:
512 {
513 if (reg->Register.Indirect) {
514 LLVMValueRef index_vec; /* index into the const buffer */
515
516 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
517 index_vec = lp_build_const_int_vec(bld->int_bld.type,
518 reg->Register.Index * 4 + swizzle);
519
520 /* index_vec = index_vec + addr_vec */
521 index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
522
523 /* Gather values from the constant buffer */
524 res = build_gather(bld, bld->consts_ptr, index_vec);
525 }
526 else {
527 LLVMValueRef index; /* index into the const buffer */
528 LLVMValueRef scalar, scalar_ptr;
529
530 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
531
532 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
533 &index, 1, "");
534 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
535
536 res = lp_build_broadcast_scalar(&bld->base, scalar);
537 }
538 }
539 break;
540
541 case TGSI_FILE_IMMEDIATE:
542 res = bld->immediates[reg->Register.Index][swizzle];
543 assert(res);
544 break;
545
546 case TGSI_FILE_INPUT:
547 res = bld->inputs[reg->Register.Index][swizzle];
548 assert(res);
549 break;
550
551 case TGSI_FILE_TEMPORARY:
552 {
553 LLVMValueRef addr = NULL;
554 LLVMValueRef temp_ptr;
555
556 if (reg->Register.Indirect) {
557 LLVMValueRef zero = lp_build_const_int32(0);
558 addr = LLVMBuildExtractElement(bld->base.builder,
559 addr_vec, zero, "");
560 }
561
562 temp_ptr = get_temp_ptr(bld, reg->Register.Index,
563 swizzle,
564 reg->Register.Indirect,
565 addr);
566 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
567 if(!res)
568 return bld->base.undef;
569 }
570 break;
571
572 default:
573 assert(0 && "invalid src register in emit_fetch()");
574 return bld->base.undef;
575 }
576
577 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
578 case TGSI_UTIL_SIGN_CLEAR:
579 res = lp_build_abs( &bld->base, res );
580 break;
581
582 case TGSI_UTIL_SIGN_SET:
583 /* TODO: Use bitwese OR for floating point */
584 res = lp_build_abs( &bld->base, res );
585 res = LLVMBuildNeg( bld->base.builder, res, "" );
586 break;
587
588 case TGSI_UTIL_SIGN_TOGGLE:
589 res = LLVMBuildNeg( bld->base.builder, res, "" );
590 break;
591
592 case TGSI_UTIL_SIGN_KEEP:
593 break;
594 }
595
596 return res;
597 }
598
599
600 /**
601 * Register fetch with derivatives.
602 */
603 static void
604 emit_fetch_deriv(
605 struct lp_build_tgsi_soa_context *bld,
606 const struct tgsi_full_instruction *inst,
607 unsigned index,
608 const unsigned chan_index,
609 LLVMValueRef *res,
610 LLVMValueRef *ddx,
611 LLVMValueRef *ddy)
612 {
613 LLVMValueRef src;
614
615 src = emit_fetch(bld, inst, index, chan_index);
616
617 if(res)
618 *res = src;
619
620 /* TODO: use interpolation coeffs for inputs */
621
622 if(ddx)
623 *ddx = lp_build_ddx(&bld->base, src);
624
625 if(ddy)
626 *ddy = lp_build_ddy(&bld->base, src);
627 }
628
629
630 /**
631 * Predicate.
632 */
633 static void
634 emit_fetch_predicate(
635 struct lp_build_tgsi_soa_context *bld,
636 const struct tgsi_full_instruction *inst,
637 LLVMValueRef *pred)
638 {
639 unsigned index;
640 unsigned char swizzles[4];
641 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
642 LLVMValueRef value;
643 unsigned chan;
644
645 if (!inst->Instruction.Predicate) {
646 FOR_EACH_CHANNEL( chan ) {
647 pred[chan] = NULL;
648 }
649 return;
650 }
651
652 swizzles[0] = inst->Predicate.SwizzleX;
653 swizzles[1] = inst->Predicate.SwizzleY;
654 swizzles[2] = inst->Predicate.SwizzleZ;
655 swizzles[3] = inst->Predicate.SwizzleW;
656
657 index = inst->Predicate.Index;
658 assert(index < LP_MAX_TGSI_PREDS);
659
660 FOR_EACH_CHANNEL( chan ) {
661 unsigned swizzle = swizzles[chan];
662
663 /*
664 * Only fetch the predicate register channels that are actually listed
665 * in the swizzles
666 */
667 if (!unswizzled[swizzle]) {
668 value = LLVMBuildLoad(bld->base.builder,
669 bld->preds[index][swizzle], "");
670
671 /*
672 * Convert the value to an integer mask.
673 *
674 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
675 * is needlessly causing two comparisons due to storing the intermediate
676 * result as float vector instead of an integer mask vector.
677 */
678 value = lp_build_compare(bld->base.builder,
679 bld->base.type,
680 PIPE_FUNC_NOTEQUAL,
681 value,
682 bld->base.zero);
683 if (inst->Predicate.Negate) {
684 value = LLVMBuildNot(bld->base.builder, value, "");
685 }
686
687 unswizzled[swizzle] = value;
688 } else {
689 value = unswizzled[swizzle];
690 }
691
692 pred[chan] = value;
693 }
694 }
695
696
697 /**
698 * Register store.
699 */
700 static void
701 emit_store(
702 struct lp_build_tgsi_soa_context *bld,
703 const struct tgsi_full_instruction *inst,
704 unsigned index,
705 unsigned chan_index,
706 LLVMValueRef pred,
707 LLVMValueRef value)
708 {
709 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
710 LLVMValueRef addr = NULL;
711
712 switch( inst->Instruction.Saturate ) {
713 case TGSI_SAT_NONE:
714 break;
715
716 case TGSI_SAT_ZERO_ONE:
717 value = lp_build_max(&bld->base, value, bld->base.zero);
718 value = lp_build_min(&bld->base, value, bld->base.one);
719 break;
720
721 case TGSI_SAT_MINUS_PLUS_ONE:
722 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
723 value = lp_build_min(&bld->base, value, bld->base.one);
724 break;
725
726 default:
727 assert(0);
728 }
729
730 if (reg->Register.Indirect) {
731 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
732 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
733 addr = LLVMBuildLoad(bld->base.builder,
734 bld->addr[reg->Indirect.Index][swizzle],
735 "");
736 /* for indexing we want integers */
737 addr = LLVMBuildFPToSI(bld->base.builder, addr,
738 int_vec_type, "");
739 addr = LLVMBuildExtractElement(bld->base.builder,
740 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
741 "");
742 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
743 }
744
745 switch( reg->Register.File ) {
746 case TGSI_FILE_OUTPUT:
747 lp_exec_mask_store(&bld->exec_mask, pred, value,
748 bld->outputs[reg->Register.Index][chan_index]);
749 break;
750
751 case TGSI_FILE_TEMPORARY: {
752 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
753 chan_index,
754 reg->Register.Indirect,
755 addr);
756 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
757 break;
758 }
759
760 case TGSI_FILE_ADDRESS:
761 lp_exec_mask_store(&bld->exec_mask, pred, value,
762 bld->addr[reg->Indirect.Index][chan_index]);
763 break;
764
765 case TGSI_FILE_PREDICATE:
766 lp_exec_mask_store(&bld->exec_mask, pred, value,
767 bld->preds[index][chan_index]);
768 break;
769
770 default:
771 assert( 0 );
772 }
773 }
774
775
776 /**
777 * High-level instruction translators.
778 */
779
780 enum tex_modifier {
781 TEX_MODIFIER_NONE = 0,
782 TEX_MODIFIER_PROJECTED,
783 TEX_MODIFIER_LOD_BIAS,
784 TEX_MODIFIER_EXPLICIT_LOD,
785 TEX_MODIFIER_EXPLICIT_DERIV
786 };
787
788 static void
789 emit_tex( struct lp_build_tgsi_soa_context *bld,
790 const struct tgsi_full_instruction *inst,
791 enum tex_modifier modifier,
792 LLVMValueRef *texel)
793 {
794 unsigned unit;
795 LLVMValueRef lod_bias, explicit_lod;
796 LLVMValueRef oow = NULL;
797 LLVMValueRef coords[3];
798 LLVMValueRef ddx[3];
799 LLVMValueRef ddy[3];
800 unsigned num_coords;
801 unsigned i;
802
803 if (!bld->sampler) {
804 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
805 for (i = 0; i < 4; i++) {
806 texel[i] = bld->base.undef;
807 }
808 return;
809 }
810
811 switch (inst->Texture.Texture) {
812 case TGSI_TEXTURE_1D:
813 num_coords = 1;
814 break;
815 case TGSI_TEXTURE_2D:
816 case TGSI_TEXTURE_RECT:
817 num_coords = 2;
818 break;
819 case TGSI_TEXTURE_SHADOW1D:
820 case TGSI_TEXTURE_SHADOW2D:
821 case TGSI_TEXTURE_SHADOWRECT:
822 case TGSI_TEXTURE_3D:
823 case TGSI_TEXTURE_CUBE:
824 num_coords = 3;
825 break;
826 default:
827 assert(0);
828 return;
829 }
830
831 if (modifier == TEX_MODIFIER_LOD_BIAS) {
832 lod_bias = emit_fetch( bld, inst, 0, 3 );
833 explicit_lod = NULL;
834 }
835 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
836 lod_bias = NULL;
837 explicit_lod = emit_fetch( bld, inst, 0, 3 );
838 }
839 else {
840 lod_bias = NULL;
841 explicit_lod = NULL;
842 }
843
844 if (modifier == TEX_MODIFIER_PROJECTED) {
845 oow = emit_fetch( bld, inst, 0, 3 );
846 oow = lp_build_rcp(&bld->base, oow);
847 }
848
849 for (i = 0; i < num_coords; i++) {
850 coords[i] = emit_fetch( bld, inst, 0, i );
851 if (modifier == TEX_MODIFIER_PROJECTED)
852 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
853 }
854 for (i = num_coords; i < 3; i++) {
855 coords[i] = bld->base.undef;
856 }
857
858 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
859 for (i = 0; i < num_coords; i++) {
860 ddx[i] = emit_fetch( bld, inst, 1, i );
861 ddy[i] = emit_fetch( bld, inst, 2, i );
862 }
863 unit = inst->Src[3].Register.Index;
864 } else {
865 for (i = 0; i < num_coords; i++) {
866 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
867 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
868 }
869 unit = inst->Src[1].Register.Index;
870 }
871 for (i = num_coords; i < 3; i++) {
872 ddx[i] = bld->base.undef;
873 ddy[i] = bld->base.undef;
874 }
875
876 bld->sampler->emit_fetch_texel(bld->sampler,
877 bld->base.builder,
878 bld->base.type,
879 unit, num_coords, coords,
880 ddx, ddy,
881 lod_bias, explicit_lod,
882 texel);
883 }
884
885
886 /**
887 * Kill fragment if any of the src register values are negative.
888 */
889 static void
890 emit_kil(
891 struct lp_build_tgsi_soa_context *bld,
892 const struct tgsi_full_instruction *inst )
893 {
894 const struct tgsi_full_src_register *reg = &inst->Src[0];
895 LLVMValueRef terms[NUM_CHANNELS];
896 LLVMValueRef mask;
897 unsigned chan_index;
898
899 memset(&terms, 0, sizeof terms);
900
901 FOR_EACH_CHANNEL( chan_index ) {
902 unsigned swizzle;
903
904 /* Unswizzle channel */
905 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
906
907 /* Check if the component has not been already tested. */
908 assert(swizzle < NUM_CHANNELS);
909 if( !terms[swizzle] )
910 /* TODO: change the comparison operator instead of setting the sign */
911 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
912 }
913
914 mask = NULL;
915 FOR_EACH_CHANNEL( chan_index ) {
916 if(terms[chan_index]) {
917 LLVMValueRef chan_mask;
918
919 /*
920 * If term < 0 then mask = 0 else mask = ~0.
921 */
922 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
923
924 if(mask)
925 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
926 else
927 mask = chan_mask;
928 }
929 }
930
931 if(mask)
932 lp_build_mask_update(bld->mask, mask);
933 }
934
935
936 /**
937 * Predicated fragment kill.
938 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
939 * The only predication is the execution mask which will apply if
940 * we're inside a loop or conditional.
941 */
942 static void
943 emit_kilp(struct lp_build_tgsi_soa_context *bld,
944 const struct tgsi_full_instruction *inst)
945 {
946 LLVMValueRef mask;
947
948 /* For those channels which are "alive", disable fragment shader
949 * execution.
950 */
951 if (bld->exec_mask.has_mask) {
952 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
953 }
954 else {
955 mask = bld->base.zero;
956 }
957
958 lp_build_mask_update(bld->mask, mask);
959 }
960
961 static void
962 emit_declaration(
963 struct lp_build_tgsi_soa_context *bld,
964 const struct tgsi_full_declaration *decl)
965 {
966 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
967
968 unsigned first = decl->Range.First;
969 unsigned last = decl->Range.Last;
970 unsigned idx, i;
971
972 for (idx = first; idx <= last; ++idx) {
973 switch (decl->Declaration.File) {
974 case TGSI_FILE_TEMPORARY:
975 assert(idx < LP_MAX_TGSI_TEMPS);
976 if (bld->has_indirect_addressing) {
977 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
978 last*4 + 4, 0);
979 bld->temps_array = lp_build_array_alloca(bld->base.builder,
980 vec_type, array_size, "");
981 } else {
982 for (i = 0; i < NUM_CHANNELS; i++)
983 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
984 vec_type, "");
985 }
986 break;
987
988 case TGSI_FILE_OUTPUT:
989 for (i = 0; i < NUM_CHANNELS; i++)
990 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
991 vec_type, "");
992 break;
993
994 case TGSI_FILE_ADDRESS:
995 assert(idx < LP_MAX_TGSI_ADDRS);
996 for (i = 0; i < NUM_CHANNELS; i++)
997 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
998 vec_type, "");
999 break;
1000
1001 case TGSI_FILE_PREDICATE:
1002 assert(idx < LP_MAX_TGSI_PREDS);
1003 for (i = 0; i < NUM_CHANNELS; i++)
1004 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1005 vec_type, "");
1006 break;
1007
1008 default:
1009 /* don't need to declare other vars */
1010 break;
1011 }
1012 }
1013 }
1014
1015
1016 /**
1017 * Emit LLVM for one TGSI instruction.
1018 * \param return TRUE for success, FALSE otherwise
1019 */
1020 static boolean
1021 emit_instruction(
1022 struct lp_build_tgsi_soa_context *bld,
1023 const struct tgsi_full_instruction *inst,
1024 const struct tgsi_opcode_info *info,
1025 int *pc)
1026 {
1027 unsigned chan_index;
1028 LLVMValueRef src0, src1, src2;
1029 LLVMValueRef tmp0, tmp1, tmp2;
1030 LLVMValueRef tmp3 = NULL;
1031 LLVMValueRef tmp4 = NULL;
1032 LLVMValueRef tmp5 = NULL;
1033 LLVMValueRef tmp6 = NULL;
1034 LLVMValueRef tmp7 = NULL;
1035 LLVMValueRef res;
1036 LLVMValueRef dst0[NUM_CHANNELS];
1037
1038 /*
1039 * Stores and write masks are handled in a general fashion after the long
1040 * instruction opcode switch statement.
1041 *
1042 * Although not stricitly necessary, we avoid generating instructions for
1043 * channels which won't be stored, in cases where's that easy. For some
1044 * complex instructions, like texture sampling, it is more convenient to
1045 * assume a full writemask and then let LLVM optimization passes eliminate
1046 * redundant code.
1047 */
1048
1049 (*pc)++;
1050
1051 assert(info->num_dst <= 1);
1052 if (info->num_dst) {
1053 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1054 dst0[chan_index] = bld->base.undef;
1055 }
1056 }
1057
1058 switch (inst->Instruction.Opcode) {
1059 case TGSI_OPCODE_ARL:
1060 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1061 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1062 tmp0 = lp_build_floor(&bld->base, tmp0);
1063 dst0[chan_index] = tmp0;
1064 }
1065 break;
1066
1067 case TGSI_OPCODE_MOV:
1068 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1069 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1070 }
1071 break;
1072
1073 case TGSI_OPCODE_LIT:
1074 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1075 dst0[CHAN_X] = bld->base.one;
1076 }
1077 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1078 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1079 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1080 }
1081 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1082 /* XMM[1] = SrcReg[0].yyyy */
1083 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1084 /* XMM[1] = max(XMM[1], 0) */
1085 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1086 /* XMM[2] = SrcReg[0].wwww */
1087 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1088 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1089 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1090 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1091 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1092 }
1093 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1094 dst0[CHAN_W] = bld->base.one;
1095 }
1096 break;
1097
1098 case TGSI_OPCODE_RCP:
1099 /* TGSI_OPCODE_RECIP */
1100 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1101 res = lp_build_rcp(&bld->base, src0);
1102 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1103 dst0[chan_index] = res;
1104 }
1105 break;
1106
1107 case TGSI_OPCODE_RSQ:
1108 /* TGSI_OPCODE_RECIPSQRT */
1109 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1110 src0 = lp_build_abs(&bld->base, src0);
1111 res = lp_build_rsqrt(&bld->base, src0);
1112 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1113 dst0[chan_index] = res;
1114 }
1115 break;
1116
1117 case TGSI_OPCODE_EXP:
1118 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1119 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1120 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1121 LLVMValueRef *p_exp2_int_part = NULL;
1122 LLVMValueRef *p_frac_part = NULL;
1123 LLVMValueRef *p_exp2 = NULL;
1124
1125 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1126
1127 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1128 p_exp2_int_part = &tmp0;
1129 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1130 p_frac_part = &tmp1;
1131 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1132 p_exp2 = &tmp2;
1133
1134 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1135
1136 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1137 dst0[CHAN_X] = tmp0;
1138 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1139 dst0[CHAN_Y] = tmp1;
1140 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1141 dst0[CHAN_Z] = tmp2;
1142 }
1143 /* dst.w = 1.0 */
1144 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1145 dst0[CHAN_W] = bld->base.one;
1146 }
1147 break;
1148
1149 case TGSI_OPCODE_LOG:
1150 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1151 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1152 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1153 LLVMValueRef *p_floor_log2 = NULL;
1154 LLVMValueRef *p_exp = NULL;
1155 LLVMValueRef *p_log2 = NULL;
1156
1157 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1158 src0 = lp_build_abs( &bld->base, src0 );
1159
1160 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1161 p_floor_log2 = &tmp0;
1162 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1163 p_exp = &tmp1;
1164 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1165 p_log2 = &tmp2;
1166
1167 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1168
1169 /* dst.x = floor(lg2(abs(src.x))) */
1170 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1171 dst0[CHAN_X] = tmp0;
1172 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1173 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1174 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1175 }
1176 /* dst.z = lg2(abs(src.x)) */
1177 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1178 dst0[CHAN_Z] = tmp2;
1179 }
1180 /* dst.w = 1.0 */
1181 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1182 dst0[CHAN_W] = bld->base.one;
1183 }
1184 break;
1185
1186 case TGSI_OPCODE_MUL:
1187 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1188 src0 = emit_fetch( bld, inst, 0, chan_index );
1189 src1 = emit_fetch( bld, inst, 1, chan_index );
1190 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1191 }
1192 break;
1193
1194 case TGSI_OPCODE_ADD:
1195 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1196 src0 = emit_fetch( bld, inst, 0, chan_index );
1197 src1 = emit_fetch( bld, inst, 1, chan_index );
1198 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1199 }
1200 break;
1201
1202 case TGSI_OPCODE_DP3:
1203 /* TGSI_OPCODE_DOT3 */
1204 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1205 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1206 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1207 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1208 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1209 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1210 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1211 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1212 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1213 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1214 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1215 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1216 dst0[chan_index] = tmp0;
1217 }
1218 break;
1219
1220 case TGSI_OPCODE_DP4:
1221 /* TGSI_OPCODE_DOT4 */
1222 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1223 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1224 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1225 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1226 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1227 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1228 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1229 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1230 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1231 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1232 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1233 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1234 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1235 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1236 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1237 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1238 dst0[chan_index] = tmp0;
1239 }
1240 break;
1241
1242 case TGSI_OPCODE_DST:
1243 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1244 dst0[CHAN_X] = bld->base.one;
1245 }
1246 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1247 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1248 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1249 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1250 }
1251 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1252 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1253 }
1254 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1255 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1256 }
1257 break;
1258
1259 case TGSI_OPCODE_MIN:
1260 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1261 src0 = emit_fetch( bld, inst, 0, chan_index );
1262 src1 = emit_fetch( bld, inst, 1, chan_index );
1263 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1264 }
1265 break;
1266
1267 case TGSI_OPCODE_MAX:
1268 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1269 src0 = emit_fetch( bld, inst, 0, chan_index );
1270 src1 = emit_fetch( bld, inst, 1, chan_index );
1271 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1272 }
1273 break;
1274
1275 case TGSI_OPCODE_SLT:
1276 /* TGSI_OPCODE_SETLT */
1277 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1278 src0 = emit_fetch( bld, inst, 0, chan_index );
1279 src1 = emit_fetch( bld, inst, 1, chan_index );
1280 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1281 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1282 }
1283 break;
1284
1285 case TGSI_OPCODE_SGE:
1286 /* TGSI_OPCODE_SETGE */
1287 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1288 src0 = emit_fetch( bld, inst, 0, chan_index );
1289 src1 = emit_fetch( bld, inst, 1, chan_index );
1290 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1291 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1292 }
1293 break;
1294
1295 case TGSI_OPCODE_MAD:
1296 /* TGSI_OPCODE_MADD */
1297 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1298 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1299 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1300 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1301 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1302 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1303 dst0[chan_index] = tmp0;
1304 }
1305 break;
1306
1307 case TGSI_OPCODE_SUB:
1308 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1309 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1310 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1311 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1312 }
1313 break;
1314
1315 case TGSI_OPCODE_LRP:
1316 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1317 src0 = emit_fetch( bld, inst, 0, chan_index );
1318 src1 = emit_fetch( bld, inst, 1, chan_index );
1319 src2 = emit_fetch( bld, inst, 2, chan_index );
1320 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1321 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1322 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1323 }
1324 break;
1325
1326 case TGSI_OPCODE_CND:
1327 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1328 src0 = emit_fetch( bld, inst, 0, chan_index );
1329 src1 = emit_fetch( bld, inst, 1, chan_index );
1330 src2 = emit_fetch( bld, inst, 2, chan_index );
1331 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1332 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1333 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1334 }
1335 break;
1336
1337 case TGSI_OPCODE_DP2A:
1338 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1339 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1340 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1341 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1342 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1343 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1344 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1345 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1346 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1347 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1348 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1349 }
1350 break;
1351
1352 case TGSI_OPCODE_FRC:
1353 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1354 src0 = emit_fetch( bld, inst, 0, chan_index );
1355 tmp0 = lp_build_floor(&bld->base, src0);
1356 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1357 dst0[chan_index] = tmp0;
1358 }
1359 break;
1360
1361 case TGSI_OPCODE_CLAMP:
1362 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1363 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1364 src1 = emit_fetch( bld, inst, 1, chan_index );
1365 src2 = emit_fetch( bld, inst, 2, chan_index );
1366 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1367 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1368 dst0[chan_index] = tmp0;
1369 }
1370 break;
1371
1372 case TGSI_OPCODE_FLR:
1373 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1374 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1375 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1376 }
1377 break;
1378
1379 case TGSI_OPCODE_ROUND:
1380 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1381 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1382 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1383 }
1384 break;
1385
1386 case TGSI_OPCODE_EX2: {
1387 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1388 tmp0 = lp_build_exp2( &bld->base, tmp0);
1389 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1390 dst0[chan_index] = tmp0;
1391 }
1392 break;
1393 }
1394
1395 case TGSI_OPCODE_LG2:
1396 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1397 tmp0 = lp_build_log2( &bld->base, tmp0);
1398 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1399 dst0[chan_index] = tmp0;
1400 }
1401 break;
1402
1403 case TGSI_OPCODE_POW:
1404 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1405 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1406 res = lp_build_pow( &bld->base, src0, src1 );
1407 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1408 dst0[chan_index] = res;
1409 }
1410 break;
1411
1412 case TGSI_OPCODE_XPD:
1413 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1414 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1415 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1416 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1417 }
1418 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1419 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1420 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1421 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1422 }
1423 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1424 tmp2 = tmp0;
1425 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1426 tmp5 = tmp3;
1427 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1428 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1429 dst0[CHAN_X] = tmp2;
1430 }
1431 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1432 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1433 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1434 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1435 }
1436 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1437 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1438 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1439 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1440 dst0[CHAN_Y] = tmp3;
1441 }
1442 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1443 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1444 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1445 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1446 dst0[CHAN_Z] = tmp5;
1447 }
1448 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1449 dst0[CHAN_W] = bld->base.one;
1450 }
1451 break;
1452
1453 case TGSI_OPCODE_ABS:
1454 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1455 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1456 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1457 }
1458 break;
1459
1460 case TGSI_OPCODE_RCC:
1461 /* deprecated? */
1462 assert(0);
1463 return FALSE;
1464
1465 case TGSI_OPCODE_DPH:
1466 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1467 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1468 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1469 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1470 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1471 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1472 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1473 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1474 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1475 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1476 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1477 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1478 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1479 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1480 dst0[chan_index] = tmp0;
1481 }
1482 break;
1483
1484 case TGSI_OPCODE_COS:
1485 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1486 tmp0 = lp_build_cos( &bld->base, tmp0 );
1487 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1488 dst0[chan_index] = tmp0;
1489 }
1490 break;
1491
1492 case TGSI_OPCODE_DDX:
1493 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1494 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1495 }
1496 break;
1497
1498 case TGSI_OPCODE_DDY:
1499 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1500 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1501 }
1502 break;
1503
1504 case TGSI_OPCODE_KILP:
1505 /* predicated kill */
1506 emit_kilp( bld, inst );
1507 break;
1508
1509 case TGSI_OPCODE_KIL:
1510 /* conditional kill */
1511 emit_kil( bld, inst );
1512 break;
1513
1514 case TGSI_OPCODE_PK2H:
1515 return FALSE;
1516 break;
1517
1518 case TGSI_OPCODE_PK2US:
1519 return FALSE;
1520 break;
1521
1522 case TGSI_OPCODE_PK4B:
1523 return FALSE;
1524 break;
1525
1526 case TGSI_OPCODE_PK4UB:
1527 return FALSE;
1528 break;
1529
1530 case TGSI_OPCODE_RFL:
1531 return FALSE;
1532 break;
1533
1534 case TGSI_OPCODE_SEQ:
1535 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1536 src0 = emit_fetch( bld, inst, 0, chan_index );
1537 src1 = emit_fetch( bld, inst, 1, chan_index );
1538 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1539 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1540 }
1541 break;
1542
1543 case TGSI_OPCODE_SFL:
1544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1545 dst0[chan_index] = bld->base.zero;
1546 }
1547 break;
1548
1549 case TGSI_OPCODE_SGT:
1550 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1551 src0 = emit_fetch( bld, inst, 0, chan_index );
1552 src1 = emit_fetch( bld, inst, 1, chan_index );
1553 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1554 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1555 }
1556 break;
1557
1558 case TGSI_OPCODE_SIN:
1559 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1560 tmp0 = lp_build_sin( &bld->base, tmp0 );
1561 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1562 dst0[chan_index] = tmp0;
1563 }
1564 break;
1565
1566 case TGSI_OPCODE_SLE:
1567 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1568 src0 = emit_fetch( bld, inst, 0, chan_index );
1569 src1 = emit_fetch( bld, inst, 1, chan_index );
1570 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1571 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1572 }
1573 break;
1574
1575 case TGSI_OPCODE_SNE:
1576 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1577 src0 = emit_fetch( bld, inst, 0, chan_index );
1578 src1 = emit_fetch( bld, inst, 1, chan_index );
1579 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1580 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1581 }
1582 break;
1583
1584 case TGSI_OPCODE_STR:
1585 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1586 dst0[chan_index] = bld->base.one;
1587 }
1588 break;
1589
1590 case TGSI_OPCODE_TEX:
1591 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1592 break;
1593
1594 case TGSI_OPCODE_TXD:
1595 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1596 break;
1597
1598 case TGSI_OPCODE_UP2H:
1599 /* deprecated */
1600 assert (0);
1601 return FALSE;
1602 break;
1603
1604 case TGSI_OPCODE_UP2US:
1605 /* deprecated */
1606 assert(0);
1607 return FALSE;
1608 break;
1609
1610 case TGSI_OPCODE_UP4B:
1611 /* deprecated */
1612 assert(0);
1613 return FALSE;
1614 break;
1615
1616 case TGSI_OPCODE_UP4UB:
1617 /* deprecated */
1618 assert(0);
1619 return FALSE;
1620 break;
1621
1622 case TGSI_OPCODE_X2D:
1623 /* deprecated? */
1624 assert(0);
1625 return FALSE;
1626 break;
1627
1628 case TGSI_OPCODE_ARA:
1629 /* deprecated */
1630 assert(0);
1631 return FALSE;
1632 break;
1633
1634 case TGSI_OPCODE_ARR:
1635 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1636 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1637 tmp0 = lp_build_round(&bld->base, tmp0);
1638 dst0[chan_index] = tmp0;
1639 }
1640 break;
1641
1642 case TGSI_OPCODE_BRA:
1643 /* deprecated */
1644 assert(0);
1645 return FALSE;
1646 break;
1647
1648 case TGSI_OPCODE_CAL:
1649 lp_exec_mask_call(&bld->exec_mask,
1650 inst->Label.Label,
1651 pc);
1652
1653 break;
1654
1655 case TGSI_OPCODE_RET:
1656 lp_exec_mask_ret(&bld->exec_mask, pc);
1657 break;
1658
1659 case TGSI_OPCODE_END:
1660 *pc = -1;
1661 break;
1662
1663 case TGSI_OPCODE_SSG:
1664 /* TGSI_OPCODE_SGN */
1665 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1666 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1667 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1668 }
1669 break;
1670
1671 case TGSI_OPCODE_CMP:
1672 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1673 src0 = emit_fetch( bld, inst, 0, chan_index );
1674 src1 = emit_fetch( bld, inst, 1, chan_index );
1675 src2 = emit_fetch( bld, inst, 2, chan_index );
1676 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1677 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1678 }
1679 break;
1680
1681 case TGSI_OPCODE_SCS:
1682 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1683 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1684 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1685 }
1686 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1687 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1688 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1689 }
1690 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1691 dst0[CHAN_Z] = bld->base.zero;
1692 }
1693 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1694 dst0[CHAN_W] = bld->base.one;
1695 }
1696 break;
1697
1698 case TGSI_OPCODE_TXB:
1699 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1700 break;
1701
1702 case TGSI_OPCODE_NRM:
1703 /* fall-through */
1704 case TGSI_OPCODE_NRM4:
1705 /* 3 or 4-component normalization */
1706 {
1707 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1708
1709 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1710 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1711 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1712 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1713
1714 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1715
1716 /* xmm4 = src.x */
1717 /* xmm0 = src.x * src.x */
1718 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1719 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1720 tmp4 = tmp0;
1721 }
1722 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1723
1724 /* xmm5 = src.y */
1725 /* xmm0 = xmm0 + src.y * src.y */
1726 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1727 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1728 tmp5 = tmp1;
1729 }
1730 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1731 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1732
1733 /* xmm6 = src.z */
1734 /* xmm0 = xmm0 + src.z * src.z */
1735 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1736 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1737 tmp6 = tmp1;
1738 }
1739 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1740 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1741
1742 if (dims == 4) {
1743 /* xmm7 = src.w */
1744 /* xmm0 = xmm0 + src.w * src.w */
1745 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1746 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1747 tmp7 = tmp1;
1748 }
1749 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1750 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1751 }
1752
1753 /* xmm1 = 1 / sqrt(xmm0) */
1754 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1755
1756 /* dst.x = xmm1 * src.x */
1757 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1758 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1759 }
1760
1761 /* dst.y = xmm1 * src.y */
1762 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1763 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1764 }
1765
1766 /* dst.z = xmm1 * src.z */
1767 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1768 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1769 }
1770
1771 /* dst.w = xmm1 * src.w */
1772 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1773 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1774 }
1775 }
1776
1777 /* dst.w = 1.0 */
1778 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1779 dst0[CHAN_W] = bld->base.one;
1780 }
1781 }
1782 break;
1783
1784 case TGSI_OPCODE_DIV:
1785 /* deprecated */
1786 assert( 0 );
1787 return FALSE;
1788 break;
1789
1790 case TGSI_OPCODE_DP2:
1791 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1792 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1793 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1794 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1795 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1796 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1797 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1798 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1799 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1800 }
1801 break;
1802
1803 case TGSI_OPCODE_TXL:
1804 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1805 break;
1806
1807 case TGSI_OPCODE_TXP:
1808 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1809 break;
1810
1811 case TGSI_OPCODE_BRK:
1812 lp_exec_break(&bld->exec_mask);
1813 break;
1814
1815 case TGSI_OPCODE_IF:
1816 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1817 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1818 tmp0, bld->base.zero);
1819 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1820 break;
1821
1822 case TGSI_OPCODE_BGNLOOP:
1823 lp_exec_bgnloop(&bld->exec_mask);
1824 break;
1825
1826 case TGSI_OPCODE_BGNSUB:
1827 lp_exec_mask_bgnsub(&bld->exec_mask);
1828 break;
1829
1830 case TGSI_OPCODE_ELSE:
1831 lp_exec_mask_cond_invert(&bld->exec_mask);
1832 break;
1833
1834 case TGSI_OPCODE_ENDIF:
1835 lp_exec_mask_cond_pop(&bld->exec_mask);
1836 break;
1837
1838 case TGSI_OPCODE_ENDLOOP:
1839 lp_exec_endloop(&bld->exec_mask);
1840 break;
1841
1842 case TGSI_OPCODE_ENDSUB:
1843 lp_exec_mask_endsub(&bld->exec_mask, pc);
1844 break;
1845
1846 case TGSI_OPCODE_PUSHA:
1847 /* deprecated? */
1848 assert(0);
1849 return FALSE;
1850 break;
1851
1852 case TGSI_OPCODE_POPA:
1853 /* deprecated? */
1854 assert(0);
1855 return FALSE;
1856 break;
1857
1858 case TGSI_OPCODE_CEIL:
1859 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1860 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1861 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1862 }
1863 break;
1864
1865 case TGSI_OPCODE_I2F:
1866 /* deprecated? */
1867 assert(0);
1868 return FALSE;
1869 break;
1870
1871 case TGSI_OPCODE_NOT:
1872 /* deprecated? */
1873 assert(0);
1874 return FALSE;
1875 break;
1876
1877 case TGSI_OPCODE_TRUNC:
1878 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1879 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1880 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1881 }
1882 break;
1883
1884 case TGSI_OPCODE_SHL:
1885 /* deprecated? */
1886 assert(0);
1887 return FALSE;
1888 break;
1889
1890 case TGSI_OPCODE_ISHR:
1891 /* deprecated? */
1892 assert(0);
1893 return FALSE;
1894 break;
1895
1896 case TGSI_OPCODE_AND:
1897 /* deprecated? */
1898 assert(0);
1899 return FALSE;
1900 break;
1901
1902 case TGSI_OPCODE_OR:
1903 /* deprecated? */
1904 assert(0);
1905 return FALSE;
1906 break;
1907
1908 case TGSI_OPCODE_MOD:
1909 /* deprecated? */
1910 assert(0);
1911 return FALSE;
1912 break;
1913
1914 case TGSI_OPCODE_XOR:
1915 /* deprecated? */
1916 assert(0);
1917 return FALSE;
1918 break;
1919
1920 case TGSI_OPCODE_SAD:
1921 /* deprecated? */
1922 assert(0);
1923 return FALSE;
1924 break;
1925
1926 case TGSI_OPCODE_TXF:
1927 /* deprecated? */
1928 assert(0);
1929 return FALSE;
1930 break;
1931
1932 case TGSI_OPCODE_TXQ:
1933 /* deprecated? */
1934 assert(0);
1935 return FALSE;
1936 break;
1937
1938 case TGSI_OPCODE_CONT:
1939 lp_exec_continue(&bld->exec_mask);
1940 break;
1941
1942 case TGSI_OPCODE_EMIT:
1943 return FALSE;
1944 break;
1945
1946 case TGSI_OPCODE_ENDPRIM:
1947 return FALSE;
1948 break;
1949
1950 case TGSI_OPCODE_NOP:
1951 break;
1952
1953 default:
1954 return FALSE;
1955 }
1956
1957 if(info->num_dst) {
1958 LLVMValueRef pred[NUM_CHANNELS];
1959
1960 emit_fetch_predicate( bld, inst, pred );
1961
1962 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1963 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1964 }
1965 }
1966
1967 return TRUE;
1968 }
1969
1970
1971 void
1972 lp_build_tgsi_soa(LLVMBuilderRef builder,
1973 const struct tgsi_token *tokens,
1974 struct lp_type type,
1975 struct lp_build_mask_context *mask,
1976 LLVMValueRef consts_ptr,
1977 const LLVMValueRef *pos,
1978 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1979 LLVMValueRef (*outputs)[NUM_CHANNELS],
1980 struct lp_build_sampler_soa *sampler,
1981 const struct tgsi_shader_info *info)
1982 {
1983 struct lp_build_tgsi_soa_context bld;
1984 struct tgsi_parse_context parse;
1985 uint num_immediates = 0;
1986 uint num_instructions = 0;
1987 unsigned i;
1988 int pc = 0;
1989
1990 /* Setup build context */
1991 memset(&bld, 0, sizeof bld);
1992 lp_build_context_init(&bld.base, builder, type);
1993 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1994 bld.mask = mask;
1995 bld.pos = pos;
1996 bld.inputs = inputs;
1997 bld.outputs = outputs;
1998 bld.consts_ptr = consts_ptr;
1999 bld.sampler = sampler;
2000 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
2001 info->opcode_count[TGSI_OPCODE_ARL] > 0;
2002 bld.instructions = (struct tgsi_full_instruction *)
2003 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2004 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2005
2006 if (!bld.instructions) {
2007 return;
2008 }
2009
2010 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2011
2012 tgsi_parse_init( &parse, tokens );
2013
2014 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2015 tgsi_parse_token( &parse );
2016
2017 switch( parse.FullToken.Token.Type ) {
2018 case TGSI_TOKEN_TYPE_DECLARATION:
2019 /* Inputs already interpolated */
2020 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2021 break;
2022
2023 case TGSI_TOKEN_TYPE_INSTRUCTION:
2024 {
2025 /* save expanded instruction */
2026 if (num_instructions == bld.max_instructions) {
2027 bld.instructions = REALLOC(bld.instructions,
2028 bld.max_instructions
2029 * sizeof(struct tgsi_full_instruction),
2030 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2031 * sizeof(struct tgsi_full_instruction));
2032 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2033 }
2034
2035 memcpy(bld.instructions + num_instructions,
2036 &parse.FullToken.FullInstruction,
2037 sizeof(bld.instructions[0]));
2038
2039 num_instructions++;
2040 }
2041
2042 break;
2043
2044 case TGSI_TOKEN_TYPE_IMMEDIATE:
2045 /* simply copy the immediate values into the next immediates[] slot */
2046 {
2047 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2048 assert(size <= 4);
2049 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2050 for( i = 0; i < size; ++i )
2051 bld.immediates[num_immediates][i] =
2052 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2053 for( i = size; i < 4; ++i )
2054 bld.immediates[num_immediates][i] = bld.base.undef;
2055 num_immediates++;
2056 }
2057 break;
2058
2059 case TGSI_TOKEN_TYPE_PROPERTY:
2060 break;
2061
2062 default:
2063 assert( 0 );
2064 }
2065 }
2066
2067 while (pc != -1) {
2068 struct tgsi_full_instruction *instr = bld.instructions + pc;
2069 const struct tgsi_opcode_info *opcode_info =
2070 tgsi_get_opcode_info(instr->Instruction.Opcode);
2071 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2072 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2073 opcode_info->mnemonic);
2074 }
2075
2076 if (0) {
2077 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2078 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2079 debug_printf("11111111111111111111111111111 \n");
2080 tgsi_dump(tokens, 0);
2081 lp_debug_dump_value(function);
2082 debug_printf("2222222222222222222222222222 \n");
2083 }
2084 tgsi_parse_free( &parse );
2085
2086 if (0) {
2087 LLVMModuleRef module = LLVMGetGlobalParent(
2088 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2089 LLVMDumpModule(module);
2090
2091 }
2092
2093 FREE( bld.instructions );
2094 }
2095