gallivm: Always use floating-point operators for floating-point types
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for integer masks and indices */
122 struct lp_build_context int_bld;
123
124 LLVMValueRef consts_ptr;
125 const LLVMValueRef *pos;
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128
129 const struct lp_build_sampler_soa *sampler;
130
131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
135
136 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
137 * set in the indirect_files field.
138 * The temps[] array above is unused then.
139 */
140 LLVMValueRef temps_array;
141
142 /** bitmask indicating which register files are accessed indirectly */
143 unsigned indirect_files;
144
145 struct lp_build_mask_context *mask;
146 struct lp_exec_mask exec_mask;
147
148 struct tgsi_full_instruction *instructions;
149 uint max_instructions;
150 };
151
152 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
153 {
154 mask->bld = bld;
155 mask->has_mask = FALSE;
156 mask->cond_stack_size = 0;
157 mask->loop_stack_size = 0;
158 mask->call_stack_size = 0;
159
160 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
161 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
162 LLVMConstAllOnes(mask->int_vec_type);
163 }
164
165 static void lp_exec_mask_update(struct lp_exec_mask *mask)
166 {
167 if (mask->loop_stack_size) {
168 /*for loops we need to update the entire mask at runtime */
169 LLVMValueRef tmp;
170 assert(mask->break_mask);
171 tmp = LLVMBuildAnd(mask->bld->builder,
172 mask->cont_mask,
173 mask->break_mask,
174 "maskcb");
175 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
176 mask->cond_mask,
177 tmp,
178 "maskfull");
179 } else
180 mask->exec_mask = mask->cond_mask;
181
182 if (mask->call_stack_size) {
183 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
184 mask->exec_mask,
185 mask->ret_mask,
186 "callmask");
187 }
188
189 mask->has_mask = (mask->cond_stack_size > 0 ||
190 mask->loop_stack_size > 0 ||
191 mask->call_stack_size > 0);
192 }
193
194 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
195 LLVMValueRef val)
196 {
197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
198 if (mask->cond_stack_size == 0) {
199 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
200 }
201 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
202 assert(LLVMTypeOf(val) == mask->int_vec_type);
203 mask->cond_mask = val;
204
205 lp_exec_mask_update(mask);
206 }
207
208 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
209 {
210 LLVMValueRef prev_mask;
211 LLVMValueRef inv_mask;
212
213 assert(mask->cond_stack_size);
214 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
215 if (mask->cond_stack_size == 1) {
216 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
217 }
218
219 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
220
221 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
222 inv_mask,
223 prev_mask, "");
224 lp_exec_mask_update(mask);
225 }
226
227 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
228 {
229 assert(mask->cond_stack_size);
230 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
231 lp_exec_mask_update(mask);
232 }
233
234 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
235 {
236 if (mask->loop_stack_size == 0) {
237 assert(mask->loop_block == NULL);
238 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
239 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
240 assert(mask->break_var == NULL);
241 }
242
243 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
244
245 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
246 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
247 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
248 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
249 ++mask->loop_stack_size;
250
251 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
252 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
253
254 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
255 LLVMBuildBr(mask->bld->builder, mask->loop_block);
256 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
257
258 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
259
260 lp_exec_mask_update(mask);
261 }
262
263 static void lp_exec_break(struct lp_exec_mask *mask)
264 {
265 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
266 mask->exec_mask,
267 "break");
268
269 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
270 mask->break_mask,
271 exec_mask, "break_full");
272
273 lp_exec_mask_update(mask);
274 }
275
276 static void lp_exec_continue(struct lp_exec_mask *mask)
277 {
278 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
279 mask->exec_mask,
280 "");
281
282 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
283 mask->cont_mask,
284 exec_mask, "");
285
286 lp_exec_mask_update(mask);
287 }
288
289
290 static void lp_exec_endloop(struct lp_exec_mask *mask)
291 {
292 LLVMBasicBlockRef endloop;
293 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
294 mask->bld->type.length);
295 LLVMValueRef i1cond;
296
297 assert(mask->break_mask);
298
299 /*
300 * Restore the cont_mask, but don't pop
301 */
302 assert(mask->loop_stack_size);
303 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
304 lp_exec_mask_update(mask);
305
306 /*
307 * Unlike the continue mask, the break_mask must be preserved across loop
308 * iterations
309 */
310 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
311
312 /* i1cond = (mask == 0) */
313 i1cond = LLVMBuildICmp(
314 mask->bld->builder,
315 LLVMIntNE,
316 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
317 LLVMConstNull(reg_type), "");
318
319 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
320
321 LLVMBuildCondBr(mask->bld->builder,
322 i1cond, mask->loop_block, endloop);
323
324 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
325
326 assert(mask->loop_stack_size);
327 --mask->loop_stack_size;
328 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
329 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
330 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
331 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
332
333 lp_exec_mask_update(mask);
334 }
335
336 /* stores val into an address pointed to by dst.
337 * mask->exec_mask is used to figure out which bits of val
338 * should be stored into the address
339 * (0 means don't store this bit, 1 means do store).
340 */
341 static void lp_exec_mask_store(struct lp_exec_mask *mask,
342 LLVMValueRef pred,
343 LLVMValueRef val,
344 LLVMValueRef dst)
345 {
346 /* Mix the predicate and execution mask */
347 if (mask->has_mask) {
348 if (pred) {
349 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
350 } else {
351 pred = mask->exec_mask;
352 }
353 }
354
355 if (pred) {
356 LLVMValueRef real_val, dst_val;
357
358 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
359 real_val = lp_build_select(mask->bld,
360 pred,
361 val, dst_val);
362
363 LLVMBuildStore(mask->bld->builder, real_val, dst);
364 } else
365 LLVMBuildStore(mask->bld->builder, val, dst);
366 }
367
368 static void lp_exec_mask_call(struct lp_exec_mask *mask,
369 int func,
370 int *pc)
371 {
372 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
373 mask->call_stack[mask->call_stack_size].pc = *pc;
374 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
375 mask->call_stack_size++;
376 *pc = func;
377 }
378
379 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
380 {
381 LLVMValueRef exec_mask;
382
383 if (mask->call_stack_size == 0) {
384 /* returning from main() */
385 *pc = -1;
386 return;
387 }
388 exec_mask = LLVMBuildNot(mask->bld->builder,
389 mask->exec_mask,
390 "ret");
391
392 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
393 mask->ret_mask,
394 exec_mask, "ret_full");
395
396 lp_exec_mask_update(mask);
397 }
398
399 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
400 {
401 }
402
403 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
404 {
405 assert(mask->call_stack_size);
406 mask->call_stack_size--;
407 *pc = mask->call_stack[mask->call_stack_size].pc;
408 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
409 lp_exec_mask_update(mask);
410 }
411
412
413 /**
414 * Return pointer to a temporary register channel (src or dest).
415 * Note that indirect addressing cannot be handled here.
416 * \param index which temporary register
417 * \param chan which channel of the temp register.
418 */
419 static LLVMValueRef
420 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
421 unsigned index,
422 unsigned chan)
423 {
424 assert(chan < 4);
425 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
426 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
427 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
428 }
429 else {
430 return bld->temps[index][chan];
431 }
432 }
433
434
435 /**
436 * Gather vector.
437 * XXX the lp_build_gather() function should be capable of doing this
438 * with a little work.
439 */
440 static LLVMValueRef
441 build_gather(struct lp_build_tgsi_soa_context *bld,
442 LLVMValueRef base_ptr,
443 LLVMValueRef indexes)
444 {
445 LLVMValueRef res = bld->base.undef;
446 unsigned i;
447
448 /*
449 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
450 */
451 for (i = 0; i < bld->base.type.length; i++) {
452 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
453 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
454 indexes, ii, "");
455 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
456 &index, 1, "");
457 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
458
459 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
460 }
461
462 return res;
463 }
464
465
466 /**
467 * Read the current value of the ADDR register, convert the floats to
468 * ints, multiply by four and return the vector of offsets.
469 * The offsets will be used to index into the constant buffer or
470 * temporary register file.
471 */
472 static LLVMValueRef
473 get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
474 const struct tgsi_src_register *indirect_reg)
475 {
476 /* always use X component of address register */
477 const int x = indirect_reg->SwizzleX;
478 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
479 uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x);
480 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
481 LLVMValueRef addr_vec;
482
483 addr_vec = LLVMBuildLoad(bld->base.builder,
484 bld->addr[indirect_reg->Index][swizzle],
485 "load addr reg");
486
487 /* for indexing we want integers */
488 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
489 int_vec_type, "");
490
491 /* addr_vec = addr_vec * 4 */
492 addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
493
494 return addr_vec;
495 }
496
497
498 /**
499 * Register fetch.
500 */
501 static LLVMValueRef
502 emit_fetch(
503 struct lp_build_tgsi_soa_context *bld,
504 const struct tgsi_full_instruction *inst,
505 unsigned src_op,
506 const unsigned chan_index )
507 {
508 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
509 const struct lp_type type = bld->base.type;
510 const unsigned swizzle =
511 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
512 LLVMValueRef res;
513 LLVMValueRef addr_vec = NULL;
514
515 if (swizzle > 3) {
516 assert(0 && "invalid swizzle in emit_fetch()");
517 return bld->base.undef;
518 }
519
520 if (reg->Register.Indirect) {
521 assert(bld->indirect_files);
522 addr_vec = get_indirect_offsets(bld, &reg->Indirect);
523 }
524
525 switch (reg->Register.File) {
526 case TGSI_FILE_CONSTANT:
527 if (reg->Register.Indirect) {
528 LLVMValueRef index_vec; /* index into the const buffer */
529
530 assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT));
531
532 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
533 index_vec = lp_build_const_int_vec(bld->int_bld.type,
534 reg->Register.Index * 4 + swizzle);
535
536 /* index_vec = index_vec + addr_vec */
537 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
538
539 /* Gather values from the constant buffer */
540 res = build_gather(bld, bld->consts_ptr, index_vec);
541 }
542 else {
543 LLVMValueRef index; /* index into the const buffer */
544 LLVMValueRef scalar, scalar_ptr;
545
546 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
547
548 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
549 &index, 1, "");
550 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
551
552 res = lp_build_broadcast_scalar(&bld->base, scalar);
553 }
554 break;
555
556 case TGSI_FILE_IMMEDIATE:
557 res = bld->immediates[reg->Register.Index][swizzle];
558 assert(res);
559 break;
560
561 case TGSI_FILE_INPUT:
562 res = bld->inputs[reg->Register.Index][swizzle];
563 assert(res);
564 break;
565
566 case TGSI_FILE_TEMPORARY:
567 if (reg->Register.Indirect) {
568 LLVMValueRef vec_len =
569 lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length);
570 LLVMValueRef index_vec; /* index into the const buffer */
571 LLVMValueRef temps_array;
572 LLVMTypeRef float4_ptr_type;
573
574 assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
575
576 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
577 index_vec = lp_build_const_int_vec(bld->int_bld.type,
578 reg->Register.Index * 4 + swizzle);
579
580 /* index_vec += addr_vec */
581 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
582
583 /* index_vec *= vector_length */
584 index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len);
585
586 /* cast temps_array pointer to float* */
587 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
588 temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array,
589 float4_ptr_type, "");
590
591 /* Gather values from the temporary register array */
592 res = build_gather(bld, temps_array, index_vec);
593 }
594 else {
595 LLVMValueRef temp_ptr;
596 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
597 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
598 if (!res)
599 return bld->base.undef;
600 }
601 break;
602
603 default:
604 assert(0 && "invalid src register in emit_fetch()");
605 return bld->base.undef;
606 }
607
608 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
609 case TGSI_UTIL_SIGN_CLEAR:
610 res = lp_build_abs( &bld->base, res );
611 break;
612
613 case TGSI_UTIL_SIGN_SET:
614 /* TODO: Use bitwese OR for floating point */
615 res = lp_build_abs( &bld->base, res );
616 /* fall through */
617 case TGSI_UTIL_SIGN_TOGGLE:
618 if (type.floating)
619 res = LLVMBuildFNeg( bld->base.builder, res, "" );
620 else
621 res = LLVMBuildNeg( bld->base.builder, res, "" );
622 break;
623
624 case TGSI_UTIL_SIGN_KEEP:
625 break;
626 }
627
628 return res;
629 }
630
631
632 /**
633 * Register fetch with derivatives.
634 */
635 static void
636 emit_fetch_deriv(
637 struct lp_build_tgsi_soa_context *bld,
638 const struct tgsi_full_instruction *inst,
639 unsigned index,
640 const unsigned chan_index,
641 LLVMValueRef *res,
642 LLVMValueRef *ddx,
643 LLVMValueRef *ddy)
644 {
645 LLVMValueRef src;
646
647 src = emit_fetch(bld, inst, index, chan_index);
648
649 if(res)
650 *res = src;
651
652 /* TODO: use interpolation coeffs for inputs */
653
654 if(ddx)
655 *ddx = lp_build_ddx(&bld->base, src);
656
657 if(ddy)
658 *ddy = lp_build_ddy(&bld->base, src);
659 }
660
661
662 /**
663 * Predicate.
664 */
665 static void
666 emit_fetch_predicate(
667 struct lp_build_tgsi_soa_context *bld,
668 const struct tgsi_full_instruction *inst,
669 LLVMValueRef *pred)
670 {
671 unsigned index;
672 unsigned char swizzles[4];
673 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
674 LLVMValueRef value;
675 unsigned chan;
676
677 if (!inst->Instruction.Predicate) {
678 FOR_EACH_CHANNEL( chan ) {
679 pred[chan] = NULL;
680 }
681 return;
682 }
683
684 swizzles[0] = inst->Predicate.SwizzleX;
685 swizzles[1] = inst->Predicate.SwizzleY;
686 swizzles[2] = inst->Predicate.SwizzleZ;
687 swizzles[3] = inst->Predicate.SwizzleW;
688
689 index = inst->Predicate.Index;
690 assert(index < LP_MAX_TGSI_PREDS);
691
692 FOR_EACH_CHANNEL( chan ) {
693 unsigned swizzle = swizzles[chan];
694
695 /*
696 * Only fetch the predicate register channels that are actually listed
697 * in the swizzles
698 */
699 if (!unswizzled[swizzle]) {
700 value = LLVMBuildLoad(bld->base.builder,
701 bld->preds[index][swizzle], "");
702
703 /*
704 * Convert the value to an integer mask.
705 *
706 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
707 * is needlessly causing two comparisons due to storing the intermediate
708 * result as float vector instead of an integer mask vector.
709 */
710 value = lp_build_compare(bld->base.builder,
711 bld->base.type,
712 PIPE_FUNC_NOTEQUAL,
713 value,
714 bld->base.zero);
715 if (inst->Predicate.Negate) {
716 value = LLVMBuildNot(bld->base.builder, value, "");
717 }
718
719 unswizzled[swizzle] = value;
720 } else {
721 value = unswizzled[swizzle];
722 }
723
724 pred[chan] = value;
725 }
726 }
727
728
729 /**
730 * Register store.
731 */
732 static void
733 emit_store(
734 struct lp_build_tgsi_soa_context *bld,
735 const struct tgsi_full_instruction *inst,
736 unsigned index,
737 unsigned chan_index,
738 LLVMValueRef pred,
739 LLVMValueRef value)
740 {
741 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
742 LLVMValueRef addr = NULL;
743
744 switch( inst->Instruction.Saturate ) {
745 case TGSI_SAT_NONE:
746 break;
747
748 case TGSI_SAT_ZERO_ONE:
749 value = lp_build_max(&bld->base, value, bld->base.zero);
750 value = lp_build_min(&bld->base, value, bld->base.one);
751 break;
752
753 case TGSI_SAT_MINUS_PLUS_ONE:
754 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
755 value = lp_build_min(&bld->base, value, bld->base.one);
756 break;
757
758 default:
759 assert(0);
760 }
761
762 if (reg->Register.Indirect) {
763 /* XXX use get_indirect_offsets() here eventually */
764 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
765 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
766
767 assert(bld->indirect_files);
768
769 addr = LLVMBuildLoad(bld->base.builder,
770 bld->addr[reg->Indirect.Index][swizzle],
771 "");
772 /* for indexing we want integers */
773 addr = LLVMBuildFPToSI(bld->base.builder, addr,
774 int_vec_type, "");
775 addr = LLVMBuildExtractElement(bld->base.builder,
776 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
777 "");
778 addr = LLVMBuildMul(bld->base.builder,
779 addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
780 "");
781 }
782
783 switch( reg->Register.File ) {
784 case TGSI_FILE_OUTPUT:
785 lp_exec_mask_store(&bld->exec_mask, pred, value,
786 bld->outputs[reg->Register.Index][chan_index]);
787 break;
788
789 case TGSI_FILE_TEMPORARY:
790 if (reg->Register.Indirect) {
791 /* XXX not done yet */
792 debug_printf("WARNING: LLVM scatter store of temp regs"
793 " not implemented\n");
794 }
795 else {
796 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
797 chan_index);
798 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
799 }
800 break;
801
802 case TGSI_FILE_ADDRESS:
803 lp_exec_mask_store(&bld->exec_mask, pred, value,
804 bld->addr[reg->Indirect.Index][chan_index]);
805 break;
806
807 case TGSI_FILE_PREDICATE:
808 lp_exec_mask_store(&bld->exec_mask, pred, value,
809 bld->preds[index][chan_index]);
810 break;
811
812 default:
813 assert( 0 );
814 }
815 }
816
817
818 /**
819 * High-level instruction translators.
820 */
821
822 enum tex_modifier {
823 TEX_MODIFIER_NONE = 0,
824 TEX_MODIFIER_PROJECTED,
825 TEX_MODIFIER_LOD_BIAS,
826 TEX_MODIFIER_EXPLICIT_LOD,
827 TEX_MODIFIER_EXPLICIT_DERIV
828 };
829
830 static void
831 emit_tex( struct lp_build_tgsi_soa_context *bld,
832 const struct tgsi_full_instruction *inst,
833 enum tex_modifier modifier,
834 LLVMValueRef *texel)
835 {
836 unsigned unit;
837 LLVMValueRef lod_bias, explicit_lod;
838 LLVMValueRef oow = NULL;
839 LLVMValueRef coords[3];
840 LLVMValueRef ddx[3];
841 LLVMValueRef ddy[3];
842 unsigned num_coords;
843 unsigned i;
844
845 if (!bld->sampler) {
846 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
847 for (i = 0; i < 4; i++) {
848 texel[i] = bld->base.undef;
849 }
850 return;
851 }
852
853 switch (inst->Texture.Texture) {
854 case TGSI_TEXTURE_1D:
855 num_coords = 1;
856 break;
857 case TGSI_TEXTURE_2D:
858 case TGSI_TEXTURE_RECT:
859 num_coords = 2;
860 break;
861 case TGSI_TEXTURE_SHADOW1D:
862 case TGSI_TEXTURE_SHADOW2D:
863 case TGSI_TEXTURE_SHADOWRECT:
864 case TGSI_TEXTURE_3D:
865 case TGSI_TEXTURE_CUBE:
866 num_coords = 3;
867 break;
868 default:
869 assert(0);
870 return;
871 }
872
873 if (modifier == TEX_MODIFIER_LOD_BIAS) {
874 lod_bias = emit_fetch( bld, inst, 0, 3 );
875 explicit_lod = NULL;
876 }
877 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
878 lod_bias = NULL;
879 explicit_lod = emit_fetch( bld, inst, 0, 3 );
880 }
881 else {
882 lod_bias = NULL;
883 explicit_lod = NULL;
884 }
885
886 if (modifier == TEX_MODIFIER_PROJECTED) {
887 oow = emit_fetch( bld, inst, 0, 3 );
888 oow = lp_build_rcp(&bld->base, oow);
889 }
890
891 for (i = 0; i < num_coords; i++) {
892 coords[i] = emit_fetch( bld, inst, 0, i );
893 if (modifier == TEX_MODIFIER_PROJECTED)
894 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
895 }
896 for (i = num_coords; i < 3; i++) {
897 coords[i] = bld->base.undef;
898 }
899
900 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
901 for (i = 0; i < num_coords; i++) {
902 ddx[i] = emit_fetch( bld, inst, 1, i );
903 ddy[i] = emit_fetch( bld, inst, 2, i );
904 }
905 unit = inst->Src[3].Register.Index;
906 } else {
907 for (i = 0; i < num_coords; i++) {
908 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
909 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
910 }
911 unit = inst->Src[1].Register.Index;
912 }
913 for (i = num_coords; i < 3; i++) {
914 ddx[i] = bld->base.undef;
915 ddy[i] = bld->base.undef;
916 }
917
918 bld->sampler->emit_fetch_texel(bld->sampler,
919 bld->base.builder,
920 bld->base.type,
921 unit, num_coords, coords,
922 ddx, ddy,
923 lod_bias, explicit_lod,
924 texel);
925 }
926
927
928 /**
929 * Kill fragment if any of the src register values are negative.
930 */
931 static void
932 emit_kil(
933 struct lp_build_tgsi_soa_context *bld,
934 const struct tgsi_full_instruction *inst )
935 {
936 const struct tgsi_full_src_register *reg = &inst->Src[0];
937 LLVMValueRef terms[NUM_CHANNELS];
938 LLVMValueRef mask;
939 unsigned chan_index;
940
941 memset(&terms, 0, sizeof terms);
942
943 FOR_EACH_CHANNEL( chan_index ) {
944 unsigned swizzle;
945
946 /* Unswizzle channel */
947 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
948
949 /* Check if the component has not been already tested. */
950 assert(swizzle < NUM_CHANNELS);
951 if( !terms[swizzle] )
952 /* TODO: change the comparison operator instead of setting the sign */
953 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
954 }
955
956 mask = NULL;
957 FOR_EACH_CHANNEL( chan_index ) {
958 if(terms[chan_index]) {
959 LLVMValueRef chan_mask;
960
961 /*
962 * If term < 0 then mask = 0 else mask = ~0.
963 */
964 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
965
966 if(mask)
967 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
968 else
969 mask = chan_mask;
970 }
971 }
972
973 if(mask)
974 lp_build_mask_update(bld->mask, mask);
975 }
976
977
978 /**
979 * Predicated fragment kill.
980 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
981 * The only predication is the execution mask which will apply if
982 * we're inside a loop or conditional.
983 */
984 static void
985 emit_kilp(struct lp_build_tgsi_soa_context *bld,
986 const struct tgsi_full_instruction *inst)
987 {
988 LLVMValueRef mask;
989
990 /* For those channels which are "alive", disable fragment shader
991 * execution.
992 */
993 if (bld->exec_mask.has_mask) {
994 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
995 }
996 else {
997 mask = bld->base.zero;
998 }
999
1000 lp_build_mask_update(bld->mask, mask);
1001 }
1002
1003 static void
1004 emit_declaration(
1005 struct lp_build_tgsi_soa_context *bld,
1006 const struct tgsi_full_declaration *decl)
1007 {
1008 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
1009
1010 unsigned first = decl->Range.First;
1011 unsigned last = decl->Range.Last;
1012 unsigned idx, i;
1013
1014 for (idx = first; idx <= last; ++idx) {
1015 switch (decl->Declaration.File) {
1016 case TGSI_FILE_TEMPORARY:
1017 assert(idx < LP_MAX_TGSI_TEMPS);
1018 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1019 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1020 last*4 + 4, 0);
1021 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1022 vec_type, array_size, "");
1023 } else {
1024 for (i = 0; i < NUM_CHANNELS; i++)
1025 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1026 vec_type, "");
1027 }
1028 break;
1029
1030 case TGSI_FILE_OUTPUT:
1031 for (i = 0; i < NUM_CHANNELS; i++)
1032 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1033 vec_type, "");
1034 break;
1035
1036 case TGSI_FILE_ADDRESS:
1037 assert(idx < LP_MAX_TGSI_ADDRS);
1038 for (i = 0; i < NUM_CHANNELS; i++)
1039 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1040 vec_type, "");
1041 break;
1042
1043 case TGSI_FILE_PREDICATE:
1044 assert(idx < LP_MAX_TGSI_PREDS);
1045 for (i = 0; i < NUM_CHANNELS; i++)
1046 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1047 vec_type, "");
1048 break;
1049
1050 default:
1051 /* don't need to declare other vars */
1052 break;
1053 }
1054 }
1055 }
1056
1057
1058 /**
1059 * Emit LLVM for one TGSI instruction.
1060 * \param return TRUE for success, FALSE otherwise
1061 */
1062 static boolean
1063 emit_instruction(
1064 struct lp_build_tgsi_soa_context *bld,
1065 const struct tgsi_full_instruction *inst,
1066 const struct tgsi_opcode_info *info,
1067 int *pc)
1068 {
1069 unsigned chan_index;
1070 LLVMValueRef src0, src1, src2;
1071 LLVMValueRef tmp0, tmp1, tmp2;
1072 LLVMValueRef tmp3 = NULL;
1073 LLVMValueRef tmp4 = NULL;
1074 LLVMValueRef tmp5 = NULL;
1075 LLVMValueRef tmp6 = NULL;
1076 LLVMValueRef tmp7 = NULL;
1077 LLVMValueRef res;
1078 LLVMValueRef dst0[NUM_CHANNELS];
1079
1080 /*
1081 * Stores and write masks are handled in a general fashion after the long
1082 * instruction opcode switch statement.
1083 *
1084 * Although not stricitly necessary, we avoid generating instructions for
1085 * channels which won't be stored, in cases where's that easy. For some
1086 * complex instructions, like texture sampling, it is more convenient to
1087 * assume a full writemask and then let LLVM optimization passes eliminate
1088 * redundant code.
1089 */
1090
1091 (*pc)++;
1092
1093 assert(info->num_dst <= 1);
1094 if (info->num_dst) {
1095 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1096 dst0[chan_index] = bld->base.undef;
1097 }
1098 }
1099
1100 switch (inst->Instruction.Opcode) {
1101 case TGSI_OPCODE_ARL:
1102 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1103 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1104 tmp0 = lp_build_floor(&bld->base, tmp0);
1105 dst0[chan_index] = tmp0;
1106 }
1107 break;
1108
1109 case TGSI_OPCODE_MOV:
1110 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1111 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1112 }
1113 break;
1114
1115 case TGSI_OPCODE_LIT:
1116 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1117 dst0[CHAN_X] = bld->base.one;
1118 }
1119 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1120 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1121 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1122 }
1123 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1124 /* XMM[1] = SrcReg[0].yyyy */
1125 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1126 /* XMM[1] = max(XMM[1], 0) */
1127 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1128 /* XMM[2] = SrcReg[0].wwww */
1129 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1130 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1131 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1132 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1133 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1134 }
1135 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1136 dst0[CHAN_W] = bld->base.one;
1137 }
1138 break;
1139
1140 case TGSI_OPCODE_RCP:
1141 /* TGSI_OPCODE_RECIP */
1142 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1143 res = lp_build_rcp(&bld->base, src0);
1144 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1145 dst0[chan_index] = res;
1146 }
1147 break;
1148
1149 case TGSI_OPCODE_RSQ:
1150 /* TGSI_OPCODE_RECIPSQRT */
1151 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1152 src0 = lp_build_abs(&bld->base, src0);
1153 res = lp_build_rsqrt(&bld->base, src0);
1154 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1155 dst0[chan_index] = res;
1156 }
1157 break;
1158
1159 case TGSI_OPCODE_EXP:
1160 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1161 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1162 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1163 LLVMValueRef *p_exp2_int_part = NULL;
1164 LLVMValueRef *p_frac_part = NULL;
1165 LLVMValueRef *p_exp2 = NULL;
1166
1167 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1168
1169 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1170 p_exp2_int_part = &tmp0;
1171 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1172 p_frac_part = &tmp1;
1173 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1174 p_exp2 = &tmp2;
1175
1176 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1177
1178 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1179 dst0[CHAN_X] = tmp0;
1180 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1181 dst0[CHAN_Y] = tmp1;
1182 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1183 dst0[CHAN_Z] = tmp2;
1184 }
1185 /* dst.w = 1.0 */
1186 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1187 dst0[CHAN_W] = bld->base.one;
1188 }
1189 break;
1190
1191 case TGSI_OPCODE_LOG:
1192 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1193 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1194 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1195 LLVMValueRef *p_floor_log2 = NULL;
1196 LLVMValueRef *p_exp = NULL;
1197 LLVMValueRef *p_log2 = NULL;
1198
1199 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1200 src0 = lp_build_abs( &bld->base, src0 );
1201
1202 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1203 p_floor_log2 = &tmp0;
1204 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1205 p_exp = &tmp1;
1206 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1207 p_log2 = &tmp2;
1208
1209 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1210
1211 /* dst.x = floor(lg2(abs(src.x))) */
1212 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1213 dst0[CHAN_X] = tmp0;
1214 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1215 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1216 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1217 }
1218 /* dst.z = lg2(abs(src.x)) */
1219 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1220 dst0[CHAN_Z] = tmp2;
1221 }
1222 /* dst.w = 1.0 */
1223 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1224 dst0[CHAN_W] = bld->base.one;
1225 }
1226 break;
1227
1228 case TGSI_OPCODE_MUL:
1229 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1230 src0 = emit_fetch( bld, inst, 0, chan_index );
1231 src1 = emit_fetch( bld, inst, 1, chan_index );
1232 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1233 }
1234 break;
1235
1236 case TGSI_OPCODE_ADD:
1237 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1238 src0 = emit_fetch( bld, inst, 0, chan_index );
1239 src1 = emit_fetch( bld, inst, 1, chan_index );
1240 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1241 }
1242 break;
1243
1244 case TGSI_OPCODE_DP3:
1245 /* TGSI_OPCODE_DOT3 */
1246 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1247 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1248 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1249 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1250 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1251 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1252 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1253 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1254 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1255 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1256 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1257 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1258 dst0[chan_index] = tmp0;
1259 }
1260 break;
1261
1262 case TGSI_OPCODE_DP4:
1263 /* TGSI_OPCODE_DOT4 */
1264 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1265 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1266 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1267 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1268 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1269 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1270 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1271 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1272 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1273 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1274 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1275 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1276 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1277 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1278 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1279 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1280 dst0[chan_index] = tmp0;
1281 }
1282 break;
1283
1284 case TGSI_OPCODE_DST:
1285 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1286 dst0[CHAN_X] = bld->base.one;
1287 }
1288 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1289 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1290 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1291 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1292 }
1293 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1294 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1295 }
1296 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1297 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1298 }
1299 break;
1300
1301 case TGSI_OPCODE_MIN:
1302 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1303 src0 = emit_fetch( bld, inst, 0, chan_index );
1304 src1 = emit_fetch( bld, inst, 1, chan_index );
1305 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1306 }
1307 break;
1308
1309 case TGSI_OPCODE_MAX:
1310 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1311 src0 = emit_fetch( bld, inst, 0, chan_index );
1312 src1 = emit_fetch( bld, inst, 1, chan_index );
1313 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1314 }
1315 break;
1316
1317 case TGSI_OPCODE_SLT:
1318 /* TGSI_OPCODE_SETLT */
1319 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1320 src0 = emit_fetch( bld, inst, 0, chan_index );
1321 src1 = emit_fetch( bld, inst, 1, chan_index );
1322 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1323 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1324 }
1325 break;
1326
1327 case TGSI_OPCODE_SGE:
1328 /* TGSI_OPCODE_SETGE */
1329 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1330 src0 = emit_fetch( bld, inst, 0, chan_index );
1331 src1 = emit_fetch( bld, inst, 1, chan_index );
1332 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1333 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1334 }
1335 break;
1336
1337 case TGSI_OPCODE_MAD:
1338 /* TGSI_OPCODE_MADD */
1339 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1340 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1341 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1342 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1343 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1344 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1345 dst0[chan_index] = tmp0;
1346 }
1347 break;
1348
1349 case TGSI_OPCODE_SUB:
1350 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1351 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1352 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1353 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1354 }
1355 break;
1356
1357 case TGSI_OPCODE_LRP:
1358 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1359 src0 = emit_fetch( bld, inst, 0, chan_index );
1360 src1 = emit_fetch( bld, inst, 1, chan_index );
1361 src2 = emit_fetch( bld, inst, 2, chan_index );
1362 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1363 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1364 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1365 }
1366 break;
1367
1368 case TGSI_OPCODE_CND:
1369 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1370 src0 = emit_fetch( bld, inst, 0, chan_index );
1371 src1 = emit_fetch( bld, inst, 1, chan_index );
1372 src2 = emit_fetch( bld, inst, 2, chan_index );
1373 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1374 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1375 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1376 }
1377 break;
1378
1379 case TGSI_OPCODE_DP2A:
1380 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1381 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1382 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1383 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1384 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1385 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1386 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1387 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1388 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1389 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1390 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1391 }
1392 break;
1393
1394 case TGSI_OPCODE_FRC:
1395 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1396 src0 = emit_fetch( bld, inst, 0, chan_index );
1397 tmp0 = lp_build_floor(&bld->base, src0);
1398 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1399 dst0[chan_index] = tmp0;
1400 }
1401 break;
1402
1403 case TGSI_OPCODE_CLAMP:
1404 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1405 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1406 src1 = emit_fetch( bld, inst, 1, chan_index );
1407 src2 = emit_fetch( bld, inst, 2, chan_index );
1408 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1409 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1410 dst0[chan_index] = tmp0;
1411 }
1412 break;
1413
1414 case TGSI_OPCODE_FLR:
1415 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1416 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1417 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1418 }
1419 break;
1420
1421 case TGSI_OPCODE_ROUND:
1422 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1423 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1424 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1425 }
1426 break;
1427
1428 case TGSI_OPCODE_EX2: {
1429 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1430 tmp0 = lp_build_exp2( &bld->base, tmp0);
1431 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1432 dst0[chan_index] = tmp0;
1433 }
1434 break;
1435 }
1436
1437 case TGSI_OPCODE_LG2:
1438 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1439 tmp0 = lp_build_log2( &bld->base, tmp0);
1440 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1441 dst0[chan_index] = tmp0;
1442 }
1443 break;
1444
1445 case TGSI_OPCODE_POW:
1446 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1447 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1448 res = lp_build_pow( &bld->base, src0, src1 );
1449 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1450 dst0[chan_index] = res;
1451 }
1452 break;
1453
1454 case TGSI_OPCODE_XPD:
1455 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1456 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1457 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1458 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1459 }
1460 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1461 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1462 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1463 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1464 }
1465 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1466 tmp2 = tmp0;
1467 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1468 tmp5 = tmp3;
1469 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1470 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1471 dst0[CHAN_X] = tmp2;
1472 }
1473 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1474 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1475 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1476 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1477 }
1478 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1479 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1480 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1481 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1482 dst0[CHAN_Y] = tmp3;
1483 }
1484 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1485 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1486 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1487 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1488 dst0[CHAN_Z] = tmp5;
1489 }
1490 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1491 dst0[CHAN_W] = bld->base.one;
1492 }
1493 break;
1494
1495 case TGSI_OPCODE_ABS:
1496 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1497 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1498 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1499 }
1500 break;
1501
1502 case TGSI_OPCODE_RCC:
1503 /* deprecated? */
1504 assert(0);
1505 return FALSE;
1506
1507 case TGSI_OPCODE_DPH:
1508 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1509 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1510 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1511 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1512 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1513 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1514 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1515 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1516 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1517 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1518 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1519 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1520 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1521 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1522 dst0[chan_index] = tmp0;
1523 }
1524 break;
1525
1526 case TGSI_OPCODE_COS:
1527 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1528 tmp0 = lp_build_cos( &bld->base, tmp0 );
1529 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1530 dst0[chan_index] = tmp0;
1531 }
1532 break;
1533
1534 case TGSI_OPCODE_DDX:
1535 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1536 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1537 }
1538 break;
1539
1540 case TGSI_OPCODE_DDY:
1541 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1542 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1543 }
1544 break;
1545
1546 case TGSI_OPCODE_KILP:
1547 /* predicated kill */
1548 emit_kilp( bld, inst );
1549 break;
1550
1551 case TGSI_OPCODE_KIL:
1552 /* conditional kill */
1553 emit_kil( bld, inst );
1554 break;
1555
1556 case TGSI_OPCODE_PK2H:
1557 return FALSE;
1558 break;
1559
1560 case TGSI_OPCODE_PK2US:
1561 return FALSE;
1562 break;
1563
1564 case TGSI_OPCODE_PK4B:
1565 return FALSE;
1566 break;
1567
1568 case TGSI_OPCODE_PK4UB:
1569 return FALSE;
1570 break;
1571
1572 case TGSI_OPCODE_RFL:
1573 return FALSE;
1574 break;
1575
1576 case TGSI_OPCODE_SEQ:
1577 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1578 src0 = emit_fetch( bld, inst, 0, chan_index );
1579 src1 = emit_fetch( bld, inst, 1, chan_index );
1580 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1581 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1582 }
1583 break;
1584
1585 case TGSI_OPCODE_SFL:
1586 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1587 dst0[chan_index] = bld->base.zero;
1588 }
1589 break;
1590
1591 case TGSI_OPCODE_SGT:
1592 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1593 src0 = emit_fetch( bld, inst, 0, chan_index );
1594 src1 = emit_fetch( bld, inst, 1, chan_index );
1595 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1596 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1597 }
1598 break;
1599
1600 case TGSI_OPCODE_SIN:
1601 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1602 tmp0 = lp_build_sin( &bld->base, tmp0 );
1603 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1604 dst0[chan_index] = tmp0;
1605 }
1606 break;
1607
1608 case TGSI_OPCODE_SLE:
1609 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1610 src0 = emit_fetch( bld, inst, 0, chan_index );
1611 src1 = emit_fetch( bld, inst, 1, chan_index );
1612 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1613 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1614 }
1615 break;
1616
1617 case TGSI_OPCODE_SNE:
1618 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1619 src0 = emit_fetch( bld, inst, 0, chan_index );
1620 src1 = emit_fetch( bld, inst, 1, chan_index );
1621 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1622 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1623 }
1624 break;
1625
1626 case TGSI_OPCODE_STR:
1627 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1628 dst0[chan_index] = bld->base.one;
1629 }
1630 break;
1631
1632 case TGSI_OPCODE_TEX:
1633 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1634 break;
1635
1636 case TGSI_OPCODE_TXD:
1637 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1638 break;
1639
1640 case TGSI_OPCODE_UP2H:
1641 /* deprecated */
1642 assert (0);
1643 return FALSE;
1644 break;
1645
1646 case TGSI_OPCODE_UP2US:
1647 /* deprecated */
1648 assert(0);
1649 return FALSE;
1650 break;
1651
1652 case TGSI_OPCODE_UP4B:
1653 /* deprecated */
1654 assert(0);
1655 return FALSE;
1656 break;
1657
1658 case TGSI_OPCODE_UP4UB:
1659 /* deprecated */
1660 assert(0);
1661 return FALSE;
1662 break;
1663
1664 case TGSI_OPCODE_X2D:
1665 /* deprecated? */
1666 assert(0);
1667 return FALSE;
1668 break;
1669
1670 case TGSI_OPCODE_ARA:
1671 /* deprecated */
1672 assert(0);
1673 return FALSE;
1674 break;
1675
1676 case TGSI_OPCODE_ARR:
1677 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1678 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1679 tmp0 = lp_build_round(&bld->base, tmp0);
1680 dst0[chan_index] = tmp0;
1681 }
1682 break;
1683
1684 case TGSI_OPCODE_BRA:
1685 /* deprecated */
1686 assert(0);
1687 return FALSE;
1688 break;
1689
1690 case TGSI_OPCODE_CAL:
1691 lp_exec_mask_call(&bld->exec_mask,
1692 inst->Label.Label,
1693 pc);
1694
1695 break;
1696
1697 case TGSI_OPCODE_RET:
1698 lp_exec_mask_ret(&bld->exec_mask, pc);
1699 break;
1700
1701 case TGSI_OPCODE_END:
1702 *pc = -1;
1703 break;
1704
1705 case TGSI_OPCODE_SSG:
1706 /* TGSI_OPCODE_SGN */
1707 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1708 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1709 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1710 }
1711 break;
1712
1713 case TGSI_OPCODE_CMP:
1714 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1715 src0 = emit_fetch( bld, inst, 0, chan_index );
1716 src1 = emit_fetch( bld, inst, 1, chan_index );
1717 src2 = emit_fetch( bld, inst, 2, chan_index );
1718 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1719 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1720 }
1721 break;
1722
1723 case TGSI_OPCODE_SCS:
1724 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1725 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1726 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1727 }
1728 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1729 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1730 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1731 }
1732 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1733 dst0[CHAN_Z] = bld->base.zero;
1734 }
1735 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1736 dst0[CHAN_W] = bld->base.one;
1737 }
1738 break;
1739
1740 case TGSI_OPCODE_TXB:
1741 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1742 break;
1743
1744 case TGSI_OPCODE_NRM:
1745 /* fall-through */
1746 case TGSI_OPCODE_NRM4:
1747 /* 3 or 4-component normalization */
1748 {
1749 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1750
1751 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1752 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1753 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1754 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1755
1756 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1757
1758 /* xmm4 = src.x */
1759 /* xmm0 = src.x * src.x */
1760 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1761 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1762 tmp4 = tmp0;
1763 }
1764 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1765
1766 /* xmm5 = src.y */
1767 /* xmm0 = xmm0 + src.y * src.y */
1768 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1769 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1770 tmp5 = tmp1;
1771 }
1772 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1773 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1774
1775 /* xmm6 = src.z */
1776 /* xmm0 = xmm0 + src.z * src.z */
1777 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1778 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1779 tmp6 = tmp1;
1780 }
1781 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1782 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1783
1784 if (dims == 4) {
1785 /* xmm7 = src.w */
1786 /* xmm0 = xmm0 + src.w * src.w */
1787 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1788 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1789 tmp7 = tmp1;
1790 }
1791 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1792 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1793 }
1794
1795 /* xmm1 = 1 / sqrt(xmm0) */
1796 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1797
1798 /* dst.x = xmm1 * src.x */
1799 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1800 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1801 }
1802
1803 /* dst.y = xmm1 * src.y */
1804 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1805 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1806 }
1807
1808 /* dst.z = xmm1 * src.z */
1809 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1810 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1811 }
1812
1813 /* dst.w = xmm1 * src.w */
1814 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1815 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1816 }
1817 }
1818
1819 /* dst.w = 1.0 */
1820 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1821 dst0[CHAN_W] = bld->base.one;
1822 }
1823 }
1824 break;
1825
1826 case TGSI_OPCODE_DIV:
1827 /* deprecated */
1828 assert( 0 );
1829 return FALSE;
1830 break;
1831
1832 case TGSI_OPCODE_DP2:
1833 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1834 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1835 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1836 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1837 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1838 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1839 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1840 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1841 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1842 }
1843 break;
1844
1845 case TGSI_OPCODE_TXL:
1846 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1847 break;
1848
1849 case TGSI_OPCODE_TXP:
1850 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1851 break;
1852
1853 case TGSI_OPCODE_BRK:
1854 lp_exec_break(&bld->exec_mask);
1855 break;
1856
1857 case TGSI_OPCODE_IF:
1858 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1859 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1860 tmp0, bld->base.zero);
1861 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1862 break;
1863
1864 case TGSI_OPCODE_BGNLOOP:
1865 lp_exec_bgnloop(&bld->exec_mask);
1866 break;
1867
1868 case TGSI_OPCODE_BGNSUB:
1869 lp_exec_mask_bgnsub(&bld->exec_mask);
1870 break;
1871
1872 case TGSI_OPCODE_ELSE:
1873 lp_exec_mask_cond_invert(&bld->exec_mask);
1874 break;
1875
1876 case TGSI_OPCODE_ENDIF:
1877 lp_exec_mask_cond_pop(&bld->exec_mask);
1878 break;
1879
1880 case TGSI_OPCODE_ENDLOOP:
1881 lp_exec_endloop(&bld->exec_mask);
1882 break;
1883
1884 case TGSI_OPCODE_ENDSUB:
1885 lp_exec_mask_endsub(&bld->exec_mask, pc);
1886 break;
1887
1888 case TGSI_OPCODE_PUSHA:
1889 /* deprecated? */
1890 assert(0);
1891 return FALSE;
1892 break;
1893
1894 case TGSI_OPCODE_POPA:
1895 /* deprecated? */
1896 assert(0);
1897 return FALSE;
1898 break;
1899
1900 case TGSI_OPCODE_CEIL:
1901 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1902 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1903 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1904 }
1905 break;
1906
1907 case TGSI_OPCODE_I2F:
1908 /* deprecated? */
1909 assert(0);
1910 return FALSE;
1911 break;
1912
1913 case TGSI_OPCODE_NOT:
1914 /* deprecated? */
1915 assert(0);
1916 return FALSE;
1917 break;
1918
1919 case TGSI_OPCODE_TRUNC:
1920 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1921 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1922 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1923 }
1924 break;
1925
1926 case TGSI_OPCODE_SHL:
1927 /* deprecated? */
1928 assert(0);
1929 return FALSE;
1930 break;
1931
1932 case TGSI_OPCODE_ISHR:
1933 /* deprecated? */
1934 assert(0);
1935 return FALSE;
1936 break;
1937
1938 case TGSI_OPCODE_AND:
1939 /* deprecated? */
1940 assert(0);
1941 return FALSE;
1942 break;
1943
1944 case TGSI_OPCODE_OR:
1945 /* deprecated? */
1946 assert(0);
1947 return FALSE;
1948 break;
1949
1950 case TGSI_OPCODE_MOD:
1951 /* deprecated? */
1952 assert(0);
1953 return FALSE;
1954 break;
1955
1956 case TGSI_OPCODE_XOR:
1957 /* deprecated? */
1958 assert(0);
1959 return FALSE;
1960 break;
1961
1962 case TGSI_OPCODE_SAD:
1963 /* deprecated? */
1964 assert(0);
1965 return FALSE;
1966 break;
1967
1968 case TGSI_OPCODE_TXF:
1969 /* deprecated? */
1970 assert(0);
1971 return FALSE;
1972 break;
1973
1974 case TGSI_OPCODE_TXQ:
1975 /* deprecated? */
1976 assert(0);
1977 return FALSE;
1978 break;
1979
1980 case TGSI_OPCODE_CONT:
1981 lp_exec_continue(&bld->exec_mask);
1982 break;
1983
1984 case TGSI_OPCODE_EMIT:
1985 return FALSE;
1986 break;
1987
1988 case TGSI_OPCODE_ENDPRIM:
1989 return FALSE;
1990 break;
1991
1992 case TGSI_OPCODE_NOP:
1993 break;
1994
1995 default:
1996 return FALSE;
1997 }
1998
1999 if(info->num_dst) {
2000 LLVMValueRef pred[NUM_CHANNELS];
2001
2002 emit_fetch_predicate( bld, inst, pred );
2003
2004 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2005 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2006 }
2007 }
2008
2009 return TRUE;
2010 }
2011
2012
2013 void
2014 lp_build_tgsi_soa(LLVMBuilderRef builder,
2015 const struct tgsi_token *tokens,
2016 struct lp_type type,
2017 struct lp_build_mask_context *mask,
2018 LLVMValueRef consts_ptr,
2019 const LLVMValueRef *pos,
2020 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2021 LLVMValueRef (*outputs)[NUM_CHANNELS],
2022 struct lp_build_sampler_soa *sampler,
2023 const struct tgsi_shader_info *info)
2024 {
2025 struct lp_build_tgsi_soa_context bld;
2026 struct tgsi_parse_context parse;
2027 uint num_immediates = 0;
2028 uint num_instructions = 0;
2029 unsigned i;
2030 int pc = 0;
2031
2032 /* Setup build context */
2033 memset(&bld, 0, sizeof bld);
2034 lp_build_context_init(&bld.base, builder, type);
2035 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
2036 bld.mask = mask;
2037 bld.pos = pos;
2038 bld.inputs = inputs;
2039 bld.outputs = outputs;
2040 bld.consts_ptr = consts_ptr;
2041 bld.sampler = sampler;
2042 bld.indirect_files = info->indirect_files;
2043 bld.instructions = (struct tgsi_full_instruction *)
2044 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2045 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2046
2047 if (!bld.instructions) {
2048 return;
2049 }
2050
2051 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2052
2053 tgsi_parse_init( &parse, tokens );
2054
2055 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2056 tgsi_parse_token( &parse );
2057
2058 switch( parse.FullToken.Token.Type ) {
2059 case TGSI_TOKEN_TYPE_DECLARATION:
2060 /* Inputs already interpolated */
2061 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2062 break;
2063
2064 case TGSI_TOKEN_TYPE_INSTRUCTION:
2065 {
2066 /* save expanded instruction */
2067 if (num_instructions == bld.max_instructions) {
2068 bld.instructions = REALLOC(bld.instructions,
2069 bld.max_instructions
2070 * sizeof(struct tgsi_full_instruction),
2071 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2072 * sizeof(struct tgsi_full_instruction));
2073 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2074 }
2075
2076 memcpy(bld.instructions + num_instructions,
2077 &parse.FullToken.FullInstruction,
2078 sizeof(bld.instructions[0]));
2079
2080 num_instructions++;
2081 }
2082
2083 break;
2084
2085 case TGSI_TOKEN_TYPE_IMMEDIATE:
2086 /* simply copy the immediate values into the next immediates[] slot */
2087 {
2088 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2089 assert(size <= 4);
2090 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2091 for( i = 0; i < size; ++i )
2092 bld.immediates[num_immediates][i] =
2093 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2094 for( i = size; i < 4; ++i )
2095 bld.immediates[num_immediates][i] = bld.base.undef;
2096 num_immediates++;
2097 }
2098 break;
2099
2100 case TGSI_TOKEN_TYPE_PROPERTY:
2101 break;
2102
2103 default:
2104 assert( 0 );
2105 }
2106 }
2107
2108 while (pc != -1) {
2109 struct tgsi_full_instruction *instr = bld.instructions + pc;
2110 const struct tgsi_opcode_info *opcode_info =
2111 tgsi_get_opcode_info(instr->Instruction.Opcode);
2112 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2113 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2114 opcode_info->mnemonic);
2115 }
2116
2117 if (0) {
2118 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2119 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2120 debug_printf("11111111111111111111111111111 \n");
2121 tgsi_dump(tokens, 0);
2122 lp_debug_dump_value(function);
2123 debug_printf("2222222222222222222222222222 \n");
2124 }
2125 tgsi_parse_free( &parse );
2126
2127 if (0) {
2128 LLVMModuleRef module = LLVMGetGlobalParent(
2129 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2130 LLVMDumpModule(module);
2131
2132 }
2133
2134 FREE( bld.instructions );
2135 }
2136