gallivm: Use proper index to lookup predicate register array.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for integer masks and indices */
122 struct lp_build_context int_bld;
123
124 LLVMValueRef consts_ptr;
125 const LLVMValueRef *pos;
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128
129 const struct lp_build_sampler_soa *sampler;
130
131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
135
136 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
137 * set in the indirect_files field.
138 * The temps[] array above is unused then.
139 */
140 LLVMValueRef temps_array;
141
142 /** bitmask indicating which register files are accessed indirectly */
143 unsigned indirect_files;
144
145 struct lp_build_mask_context *mask;
146 struct lp_exec_mask exec_mask;
147
148 struct tgsi_full_instruction *instructions;
149 uint max_instructions;
150 };
151
152 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
153 {
154 mask->bld = bld;
155 mask->has_mask = FALSE;
156 mask->cond_stack_size = 0;
157 mask->loop_stack_size = 0;
158 mask->call_stack_size = 0;
159
160 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
161 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
162 LLVMConstAllOnes(mask->int_vec_type);
163 }
164
165 static void lp_exec_mask_update(struct lp_exec_mask *mask)
166 {
167 if (mask->loop_stack_size) {
168 /*for loops we need to update the entire mask at runtime */
169 LLVMValueRef tmp;
170 assert(mask->break_mask);
171 tmp = LLVMBuildAnd(mask->bld->builder,
172 mask->cont_mask,
173 mask->break_mask,
174 "maskcb");
175 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
176 mask->cond_mask,
177 tmp,
178 "maskfull");
179 } else
180 mask->exec_mask = mask->cond_mask;
181
182 if (mask->call_stack_size) {
183 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
184 mask->exec_mask,
185 mask->ret_mask,
186 "callmask");
187 }
188
189 mask->has_mask = (mask->cond_stack_size > 0 ||
190 mask->loop_stack_size > 0 ||
191 mask->call_stack_size > 0);
192 }
193
194 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
195 LLVMValueRef val)
196 {
197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
198 if (mask->cond_stack_size == 0) {
199 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
200 }
201 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
202 assert(LLVMTypeOf(val) == mask->int_vec_type);
203 mask->cond_mask = val;
204
205 lp_exec_mask_update(mask);
206 }
207
208 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
209 {
210 LLVMValueRef prev_mask;
211 LLVMValueRef inv_mask;
212
213 assert(mask->cond_stack_size);
214 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
215 if (mask->cond_stack_size == 1) {
216 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
217 }
218
219 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
220
221 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
222 inv_mask,
223 prev_mask, "");
224 lp_exec_mask_update(mask);
225 }
226
227 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
228 {
229 assert(mask->cond_stack_size);
230 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
231 lp_exec_mask_update(mask);
232 }
233
234 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
235 {
236 if (mask->loop_stack_size == 0) {
237 assert(mask->loop_block == NULL);
238 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
239 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
240 assert(mask->break_var == NULL);
241 }
242
243 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
244
245 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
246 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
247 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
248 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
249 ++mask->loop_stack_size;
250
251 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
252 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
253
254 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
255 LLVMBuildBr(mask->bld->builder, mask->loop_block);
256 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
257
258 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
259
260 lp_exec_mask_update(mask);
261 }
262
263 static void lp_exec_break(struct lp_exec_mask *mask)
264 {
265 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
266 mask->exec_mask,
267 "break");
268
269 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
270 mask->break_mask,
271 exec_mask, "break_full");
272
273 lp_exec_mask_update(mask);
274 }
275
276 static void lp_exec_continue(struct lp_exec_mask *mask)
277 {
278 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
279 mask->exec_mask,
280 "");
281
282 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
283 mask->cont_mask,
284 exec_mask, "");
285
286 lp_exec_mask_update(mask);
287 }
288
289
290 static void lp_exec_endloop(struct lp_exec_mask *mask)
291 {
292 LLVMBasicBlockRef endloop;
293 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
294 mask->bld->type.length);
295 LLVMValueRef i1cond;
296
297 assert(mask->break_mask);
298
299 /*
300 * Restore the cont_mask, but don't pop
301 */
302 assert(mask->loop_stack_size);
303 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
304 lp_exec_mask_update(mask);
305
306 /*
307 * Unlike the continue mask, the break_mask must be preserved across loop
308 * iterations
309 */
310 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
311
312 /* i1cond = (mask == 0) */
313 i1cond = LLVMBuildICmp(
314 mask->bld->builder,
315 LLVMIntNE,
316 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
317 LLVMConstNull(reg_type), "");
318
319 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
320
321 LLVMBuildCondBr(mask->bld->builder,
322 i1cond, mask->loop_block, endloop);
323
324 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
325
326 assert(mask->loop_stack_size);
327 --mask->loop_stack_size;
328 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
329 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
330 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
331 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
332
333 lp_exec_mask_update(mask);
334 }
335
336 /* stores val into an address pointed to by dst.
337 * mask->exec_mask is used to figure out which bits of val
338 * should be stored into the address
339 * (0 means don't store this bit, 1 means do store).
340 */
341 static void lp_exec_mask_store(struct lp_exec_mask *mask,
342 LLVMValueRef pred,
343 LLVMValueRef val,
344 LLVMValueRef dst)
345 {
346 /* Mix the predicate and execution mask */
347 if (mask->has_mask) {
348 if (pred) {
349 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
350 } else {
351 pred = mask->exec_mask;
352 }
353 }
354
355 if (pred) {
356 LLVMValueRef real_val, dst_val;
357
358 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
359 real_val = lp_build_select(mask->bld,
360 pred,
361 val, dst_val);
362
363 LLVMBuildStore(mask->bld->builder, real_val, dst);
364 } else
365 LLVMBuildStore(mask->bld->builder, val, dst);
366 }
367
368 static void lp_exec_mask_call(struct lp_exec_mask *mask,
369 int func,
370 int *pc)
371 {
372 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
373 mask->call_stack[mask->call_stack_size].pc = *pc;
374 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
375 mask->call_stack_size++;
376 *pc = func;
377 }
378
379 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
380 {
381 LLVMValueRef exec_mask;
382
383 if (mask->call_stack_size == 0) {
384 /* returning from main() */
385 *pc = -1;
386 return;
387 }
388 exec_mask = LLVMBuildNot(mask->bld->builder,
389 mask->exec_mask,
390 "ret");
391
392 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
393 mask->ret_mask,
394 exec_mask, "ret_full");
395
396 lp_exec_mask_update(mask);
397 }
398
399 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
400 {
401 }
402
403 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
404 {
405 assert(mask->call_stack_size);
406 mask->call_stack_size--;
407 *pc = mask->call_stack[mask->call_stack_size].pc;
408 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
409 lp_exec_mask_update(mask);
410 }
411
412
413 /**
414 * Return pointer to a temporary register channel (src or dest).
415 * Note that indirect addressing cannot be handled here.
416 * \param index which temporary register
417 * \param chan which channel of the temp register.
418 */
419 static LLVMValueRef
420 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
421 unsigned index,
422 unsigned chan)
423 {
424 assert(chan < 4);
425 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
426 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
427 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
428 }
429 else {
430 return bld->temps[index][chan];
431 }
432 }
433
434
435 /**
436 * Gather vector.
437 * XXX the lp_build_gather() function should be capable of doing this
438 * with a little work.
439 */
440 static LLVMValueRef
441 build_gather(struct lp_build_tgsi_soa_context *bld,
442 LLVMValueRef base_ptr,
443 LLVMValueRef indexes)
444 {
445 LLVMValueRef res = bld->base.undef;
446 unsigned i;
447
448 /*
449 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
450 */
451 for (i = 0; i < bld->base.type.length; i++) {
452 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
453 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
454 indexes, ii, "");
455 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
456 &index, 1, "");
457 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
458
459 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
460 }
461
462 return res;
463 }
464
465
466 /**
467 * Read the current value of the ADDR register, convert the floats to
468 * ints, multiply by four and return the vector of offsets.
469 * The offsets will be used to index into the constant buffer or
470 * temporary register file.
471 */
472 static LLVMValueRef
473 get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
474 const struct tgsi_src_register *indirect_reg)
475 {
476 /* always use X component of address register */
477 const int x = indirect_reg->SwizzleX;
478 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
479 uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x);
480 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
481 LLVMValueRef addr_vec;
482
483 addr_vec = LLVMBuildLoad(bld->base.builder,
484 bld->addr[indirect_reg->Index][swizzle],
485 "load addr reg");
486
487 /* for indexing we want integers */
488 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
489 int_vec_type, "");
490
491 /* addr_vec = addr_vec * 4 */
492 addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
493
494 return addr_vec;
495 }
496
497
498 /**
499 * Register fetch.
500 */
501 static LLVMValueRef
502 emit_fetch(
503 struct lp_build_tgsi_soa_context *bld,
504 const struct tgsi_full_instruction *inst,
505 unsigned src_op,
506 const unsigned chan_index )
507 {
508 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
509 const unsigned swizzle =
510 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
511 LLVMValueRef res;
512 LLVMValueRef addr_vec = NULL;
513
514 if (swizzle > 3) {
515 assert(0 && "invalid swizzle in emit_fetch()");
516 return bld->base.undef;
517 }
518
519 if (reg->Register.Indirect) {
520 assert(bld->indirect_files);
521 addr_vec = get_indirect_offsets(bld, &reg->Indirect);
522 }
523
524 switch (reg->Register.File) {
525 case TGSI_FILE_CONSTANT:
526 if (reg->Register.Indirect) {
527 LLVMValueRef index_vec; /* index into the const buffer */
528
529 assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT));
530
531 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
532 index_vec = lp_build_const_int_vec(bld->int_bld.type,
533 reg->Register.Index * 4 + swizzle);
534
535 /* index_vec = index_vec + addr_vec */
536 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
537
538 /* Gather values from the constant buffer */
539 res = build_gather(bld, bld->consts_ptr, index_vec);
540 }
541 else {
542 LLVMValueRef index; /* index into the const buffer */
543 LLVMValueRef scalar, scalar_ptr;
544
545 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
546
547 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
548 &index, 1, "");
549 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
550
551 res = lp_build_broadcast_scalar(&bld->base, scalar);
552 }
553 break;
554
555 case TGSI_FILE_IMMEDIATE:
556 res = bld->immediates[reg->Register.Index][swizzle];
557 assert(res);
558 break;
559
560 case TGSI_FILE_INPUT:
561 res = bld->inputs[reg->Register.Index][swizzle];
562 assert(res);
563 break;
564
565 case TGSI_FILE_TEMPORARY:
566 if (reg->Register.Indirect) {
567 LLVMValueRef vec_len =
568 lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length);
569 LLVMValueRef index_vec; /* index into the const buffer */
570 LLVMValueRef temps_array;
571 LLVMTypeRef float4_ptr_type;
572
573 assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
574
575 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
576 index_vec = lp_build_const_int_vec(bld->int_bld.type,
577 reg->Register.Index * 4 + swizzle);
578
579 /* index_vec += addr_vec */
580 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
581
582 /* index_vec *= vector_length */
583 index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len);
584
585 /* cast temps_array pointer to float* */
586 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
587 temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array,
588 float4_ptr_type, "");
589
590 /* Gather values from the temporary register array */
591 res = build_gather(bld, temps_array, index_vec);
592 }
593 else {
594 LLVMValueRef temp_ptr;
595 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
596 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
597 if (!res)
598 return bld->base.undef;
599 }
600 break;
601
602 default:
603 assert(0 && "invalid src register in emit_fetch()");
604 return bld->base.undef;
605 }
606
607 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
608 case TGSI_UTIL_SIGN_CLEAR:
609 res = lp_build_abs( &bld->base, res );
610 break;
611
612 case TGSI_UTIL_SIGN_SET:
613 /* TODO: Use bitwese OR for floating point */
614 res = lp_build_abs( &bld->base, res );
615 /* fall through */
616 case TGSI_UTIL_SIGN_TOGGLE:
617 res = lp_build_negate( &bld->base, res );
618 break;
619
620 case TGSI_UTIL_SIGN_KEEP:
621 break;
622 }
623
624 return res;
625 }
626
627
628 /**
629 * Register fetch with derivatives.
630 */
631 static void
632 emit_fetch_deriv(
633 struct lp_build_tgsi_soa_context *bld,
634 const struct tgsi_full_instruction *inst,
635 unsigned index,
636 const unsigned chan_index,
637 LLVMValueRef *res,
638 LLVMValueRef *ddx,
639 LLVMValueRef *ddy)
640 {
641 LLVMValueRef src;
642
643 src = emit_fetch(bld, inst, index, chan_index);
644
645 if(res)
646 *res = src;
647
648 /* TODO: use interpolation coeffs for inputs */
649
650 if(ddx)
651 *ddx = lp_build_ddx(&bld->base, src);
652
653 if(ddy)
654 *ddy = lp_build_ddy(&bld->base, src);
655 }
656
657
658 /**
659 * Predicate.
660 */
661 static void
662 emit_fetch_predicate(
663 struct lp_build_tgsi_soa_context *bld,
664 const struct tgsi_full_instruction *inst,
665 LLVMValueRef *pred)
666 {
667 unsigned index;
668 unsigned char swizzles[4];
669 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
670 LLVMValueRef value;
671 unsigned chan;
672
673 if (!inst->Instruction.Predicate) {
674 FOR_EACH_CHANNEL( chan ) {
675 pred[chan] = NULL;
676 }
677 return;
678 }
679
680 swizzles[0] = inst->Predicate.SwizzleX;
681 swizzles[1] = inst->Predicate.SwizzleY;
682 swizzles[2] = inst->Predicate.SwizzleZ;
683 swizzles[3] = inst->Predicate.SwizzleW;
684
685 index = inst->Predicate.Index;
686 assert(index < LP_MAX_TGSI_PREDS);
687
688 FOR_EACH_CHANNEL( chan ) {
689 unsigned swizzle = swizzles[chan];
690
691 /*
692 * Only fetch the predicate register channels that are actually listed
693 * in the swizzles
694 */
695 if (!unswizzled[swizzle]) {
696 value = LLVMBuildLoad(bld->base.builder,
697 bld->preds[index][swizzle], "");
698
699 /*
700 * Convert the value to an integer mask.
701 *
702 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
703 * is needlessly causing two comparisons due to storing the intermediate
704 * result as float vector instead of an integer mask vector.
705 */
706 value = lp_build_compare(bld->base.builder,
707 bld->base.type,
708 PIPE_FUNC_NOTEQUAL,
709 value,
710 bld->base.zero);
711 if (inst->Predicate.Negate) {
712 value = LLVMBuildNot(bld->base.builder, value, "");
713 }
714
715 unswizzled[swizzle] = value;
716 } else {
717 value = unswizzled[swizzle];
718 }
719
720 pred[chan] = value;
721 }
722 }
723
724
725 /**
726 * Register store.
727 */
728 static void
729 emit_store(
730 struct lp_build_tgsi_soa_context *bld,
731 const struct tgsi_full_instruction *inst,
732 unsigned index,
733 unsigned chan_index,
734 LLVMValueRef pred,
735 LLVMValueRef value)
736 {
737 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
738 LLVMValueRef addr = NULL;
739
740 switch( inst->Instruction.Saturate ) {
741 case TGSI_SAT_NONE:
742 break;
743
744 case TGSI_SAT_ZERO_ONE:
745 value = lp_build_max(&bld->base, value, bld->base.zero);
746 value = lp_build_min(&bld->base, value, bld->base.one);
747 break;
748
749 case TGSI_SAT_MINUS_PLUS_ONE:
750 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
751 value = lp_build_min(&bld->base, value, bld->base.one);
752 break;
753
754 default:
755 assert(0);
756 }
757
758 if (reg->Register.Indirect) {
759 /* XXX use get_indirect_offsets() here eventually */
760 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
761 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
762
763 assert(bld->indirect_files);
764
765 addr = LLVMBuildLoad(bld->base.builder,
766 bld->addr[reg->Indirect.Index][swizzle],
767 "");
768 /* for indexing we want integers */
769 addr = LLVMBuildFPToSI(bld->base.builder, addr,
770 int_vec_type, "");
771 addr = LLVMBuildExtractElement(bld->base.builder,
772 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
773 "");
774 addr = LLVMBuildMul(bld->base.builder,
775 addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
776 "");
777 }
778
779 switch( reg->Register.File ) {
780 case TGSI_FILE_OUTPUT:
781 lp_exec_mask_store(&bld->exec_mask, pred, value,
782 bld->outputs[reg->Register.Index][chan_index]);
783 break;
784
785 case TGSI_FILE_TEMPORARY:
786 if (reg->Register.Indirect) {
787 /* XXX not done yet */
788 debug_printf("WARNING: LLVM scatter store of temp regs"
789 " not implemented\n");
790 }
791 else {
792 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
793 chan_index);
794 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
795 }
796 break;
797
798 case TGSI_FILE_ADDRESS:
799 lp_exec_mask_store(&bld->exec_mask, pred, value,
800 bld->addr[reg->Indirect.Index][chan_index]);
801 break;
802
803 case TGSI_FILE_PREDICATE:
804 lp_exec_mask_store(&bld->exec_mask, pred, value,
805 bld->preds[reg->Register.Index][chan_index]);
806 break;
807
808 default:
809 assert( 0 );
810 }
811 }
812
813
814 /**
815 * High-level instruction translators.
816 */
817
818 enum tex_modifier {
819 TEX_MODIFIER_NONE = 0,
820 TEX_MODIFIER_PROJECTED,
821 TEX_MODIFIER_LOD_BIAS,
822 TEX_MODIFIER_EXPLICIT_LOD,
823 TEX_MODIFIER_EXPLICIT_DERIV
824 };
825
826 static void
827 emit_tex( struct lp_build_tgsi_soa_context *bld,
828 const struct tgsi_full_instruction *inst,
829 enum tex_modifier modifier,
830 LLVMValueRef *texel)
831 {
832 unsigned unit;
833 LLVMValueRef lod_bias, explicit_lod;
834 LLVMValueRef oow = NULL;
835 LLVMValueRef coords[3];
836 LLVMValueRef ddx[3];
837 LLVMValueRef ddy[3];
838 unsigned num_coords;
839 unsigned i;
840
841 if (!bld->sampler) {
842 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
843 for (i = 0; i < 4; i++) {
844 texel[i] = bld->base.undef;
845 }
846 return;
847 }
848
849 switch (inst->Texture.Texture) {
850 case TGSI_TEXTURE_1D:
851 num_coords = 1;
852 break;
853 case TGSI_TEXTURE_2D:
854 case TGSI_TEXTURE_RECT:
855 num_coords = 2;
856 break;
857 case TGSI_TEXTURE_SHADOW1D:
858 case TGSI_TEXTURE_SHADOW2D:
859 case TGSI_TEXTURE_SHADOWRECT:
860 case TGSI_TEXTURE_3D:
861 case TGSI_TEXTURE_CUBE:
862 num_coords = 3;
863 break;
864 default:
865 assert(0);
866 return;
867 }
868
869 if (modifier == TEX_MODIFIER_LOD_BIAS) {
870 lod_bias = emit_fetch( bld, inst, 0, 3 );
871 explicit_lod = NULL;
872 }
873 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
874 lod_bias = NULL;
875 explicit_lod = emit_fetch( bld, inst, 0, 3 );
876 }
877 else {
878 lod_bias = NULL;
879 explicit_lod = NULL;
880 }
881
882 if (modifier == TEX_MODIFIER_PROJECTED) {
883 oow = emit_fetch( bld, inst, 0, 3 );
884 oow = lp_build_rcp(&bld->base, oow);
885 }
886
887 for (i = 0; i < num_coords; i++) {
888 coords[i] = emit_fetch( bld, inst, 0, i );
889 if (modifier == TEX_MODIFIER_PROJECTED)
890 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
891 }
892 for (i = num_coords; i < 3; i++) {
893 coords[i] = bld->base.undef;
894 }
895
896 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
897 for (i = 0; i < num_coords; i++) {
898 ddx[i] = emit_fetch( bld, inst, 1, i );
899 ddy[i] = emit_fetch( bld, inst, 2, i );
900 }
901 unit = inst->Src[3].Register.Index;
902 } else {
903 for (i = 0; i < num_coords; i++) {
904 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
905 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
906 }
907 unit = inst->Src[1].Register.Index;
908 }
909 for (i = num_coords; i < 3; i++) {
910 ddx[i] = bld->base.undef;
911 ddy[i] = bld->base.undef;
912 }
913
914 bld->sampler->emit_fetch_texel(bld->sampler,
915 bld->base.builder,
916 bld->base.type,
917 unit, num_coords, coords,
918 ddx, ddy,
919 lod_bias, explicit_lod,
920 texel);
921 }
922
923
924 /**
925 * Kill fragment if any of the src register values are negative.
926 */
927 static void
928 emit_kil(
929 struct lp_build_tgsi_soa_context *bld,
930 const struct tgsi_full_instruction *inst )
931 {
932 const struct tgsi_full_src_register *reg = &inst->Src[0];
933 LLVMValueRef terms[NUM_CHANNELS];
934 LLVMValueRef mask;
935 unsigned chan_index;
936
937 memset(&terms, 0, sizeof terms);
938
939 FOR_EACH_CHANNEL( chan_index ) {
940 unsigned swizzle;
941
942 /* Unswizzle channel */
943 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
944
945 /* Check if the component has not been already tested. */
946 assert(swizzle < NUM_CHANNELS);
947 if( !terms[swizzle] )
948 /* TODO: change the comparison operator instead of setting the sign */
949 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
950 }
951
952 mask = NULL;
953 FOR_EACH_CHANNEL( chan_index ) {
954 if(terms[chan_index]) {
955 LLVMValueRef chan_mask;
956
957 /*
958 * If term < 0 then mask = 0 else mask = ~0.
959 */
960 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
961
962 if(mask)
963 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
964 else
965 mask = chan_mask;
966 }
967 }
968
969 if(mask)
970 lp_build_mask_update(bld->mask, mask);
971 }
972
973
974 /**
975 * Predicated fragment kill.
976 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
977 * The only predication is the execution mask which will apply if
978 * we're inside a loop or conditional.
979 */
980 static void
981 emit_kilp(struct lp_build_tgsi_soa_context *bld,
982 const struct tgsi_full_instruction *inst)
983 {
984 LLVMValueRef mask;
985
986 /* For those channels which are "alive", disable fragment shader
987 * execution.
988 */
989 if (bld->exec_mask.has_mask) {
990 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
991 }
992 else {
993 mask = bld->base.zero;
994 }
995
996 lp_build_mask_update(bld->mask, mask);
997 }
998
999 static void
1000 emit_declaration(
1001 struct lp_build_tgsi_soa_context *bld,
1002 const struct tgsi_full_declaration *decl)
1003 {
1004 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
1005
1006 unsigned first = decl->Range.First;
1007 unsigned last = decl->Range.Last;
1008 unsigned idx, i;
1009
1010 for (idx = first; idx <= last; ++idx) {
1011 switch (decl->Declaration.File) {
1012 case TGSI_FILE_TEMPORARY:
1013 assert(idx < LP_MAX_TGSI_TEMPS);
1014 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1015 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1016 last*4 + 4, 0);
1017 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1018 vec_type, array_size, "");
1019 } else {
1020 for (i = 0; i < NUM_CHANNELS; i++)
1021 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1022 vec_type, "");
1023 }
1024 break;
1025
1026 case TGSI_FILE_OUTPUT:
1027 for (i = 0; i < NUM_CHANNELS; i++)
1028 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1029 vec_type, "");
1030 break;
1031
1032 case TGSI_FILE_ADDRESS:
1033 assert(idx < LP_MAX_TGSI_ADDRS);
1034 for (i = 0; i < NUM_CHANNELS; i++)
1035 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1036 vec_type, "");
1037 break;
1038
1039 case TGSI_FILE_PREDICATE:
1040 assert(idx < LP_MAX_TGSI_PREDS);
1041 for (i = 0; i < NUM_CHANNELS; i++)
1042 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1043 vec_type, "");
1044 break;
1045
1046 default:
1047 /* don't need to declare other vars */
1048 break;
1049 }
1050 }
1051 }
1052
1053
1054 /**
1055 * Emit LLVM for one TGSI instruction.
1056 * \param return TRUE for success, FALSE otherwise
1057 */
1058 static boolean
1059 emit_instruction(
1060 struct lp_build_tgsi_soa_context *bld,
1061 const struct tgsi_full_instruction *inst,
1062 const struct tgsi_opcode_info *info,
1063 int *pc)
1064 {
1065 unsigned chan_index;
1066 LLVMValueRef src0, src1, src2;
1067 LLVMValueRef tmp0, tmp1, tmp2;
1068 LLVMValueRef tmp3 = NULL;
1069 LLVMValueRef tmp4 = NULL;
1070 LLVMValueRef tmp5 = NULL;
1071 LLVMValueRef tmp6 = NULL;
1072 LLVMValueRef tmp7 = NULL;
1073 LLVMValueRef res;
1074 LLVMValueRef dst0[NUM_CHANNELS];
1075
1076 /*
1077 * Stores and write masks are handled in a general fashion after the long
1078 * instruction opcode switch statement.
1079 *
1080 * Although not stricitly necessary, we avoid generating instructions for
1081 * channels which won't be stored, in cases where's that easy. For some
1082 * complex instructions, like texture sampling, it is more convenient to
1083 * assume a full writemask and then let LLVM optimization passes eliminate
1084 * redundant code.
1085 */
1086
1087 (*pc)++;
1088
1089 assert(info->num_dst <= 1);
1090 if (info->num_dst) {
1091 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1092 dst0[chan_index] = bld->base.undef;
1093 }
1094 }
1095
1096 switch (inst->Instruction.Opcode) {
1097 case TGSI_OPCODE_ARL:
1098 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1099 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1100 tmp0 = lp_build_floor(&bld->base, tmp0);
1101 dst0[chan_index] = tmp0;
1102 }
1103 break;
1104
1105 case TGSI_OPCODE_MOV:
1106 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1107 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1108 }
1109 break;
1110
1111 case TGSI_OPCODE_LIT:
1112 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1113 dst0[CHAN_X] = bld->base.one;
1114 }
1115 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1116 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1117 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1118 }
1119 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1120 /* XMM[1] = SrcReg[0].yyyy */
1121 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1122 /* XMM[1] = max(XMM[1], 0) */
1123 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1124 /* XMM[2] = SrcReg[0].wwww */
1125 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1126 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1127 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1128 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1129 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1130 }
1131 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1132 dst0[CHAN_W] = bld->base.one;
1133 }
1134 break;
1135
1136 case TGSI_OPCODE_RCP:
1137 /* TGSI_OPCODE_RECIP */
1138 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1139 res = lp_build_rcp(&bld->base, src0);
1140 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1141 dst0[chan_index] = res;
1142 }
1143 break;
1144
1145 case TGSI_OPCODE_RSQ:
1146 /* TGSI_OPCODE_RECIPSQRT */
1147 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1148 src0 = lp_build_abs(&bld->base, src0);
1149 res = lp_build_rsqrt(&bld->base, src0);
1150 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1151 dst0[chan_index] = res;
1152 }
1153 break;
1154
1155 case TGSI_OPCODE_EXP:
1156 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1157 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1158 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1159 LLVMValueRef *p_exp2_int_part = NULL;
1160 LLVMValueRef *p_frac_part = NULL;
1161 LLVMValueRef *p_exp2 = NULL;
1162
1163 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1164
1165 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1166 p_exp2_int_part = &tmp0;
1167 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1168 p_frac_part = &tmp1;
1169 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1170 p_exp2 = &tmp2;
1171
1172 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1173
1174 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1175 dst0[CHAN_X] = tmp0;
1176 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1177 dst0[CHAN_Y] = tmp1;
1178 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1179 dst0[CHAN_Z] = tmp2;
1180 }
1181 /* dst.w = 1.0 */
1182 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1183 dst0[CHAN_W] = bld->base.one;
1184 }
1185 break;
1186
1187 case TGSI_OPCODE_LOG:
1188 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1189 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1190 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1191 LLVMValueRef *p_floor_log2 = NULL;
1192 LLVMValueRef *p_exp = NULL;
1193 LLVMValueRef *p_log2 = NULL;
1194
1195 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1196 src0 = lp_build_abs( &bld->base, src0 );
1197
1198 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1199 p_floor_log2 = &tmp0;
1200 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1201 p_exp = &tmp1;
1202 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1203 p_log2 = &tmp2;
1204
1205 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1206
1207 /* dst.x = floor(lg2(abs(src.x))) */
1208 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1209 dst0[CHAN_X] = tmp0;
1210 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1211 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1212 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1213 }
1214 /* dst.z = lg2(abs(src.x)) */
1215 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1216 dst0[CHAN_Z] = tmp2;
1217 }
1218 /* dst.w = 1.0 */
1219 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1220 dst0[CHAN_W] = bld->base.one;
1221 }
1222 break;
1223
1224 case TGSI_OPCODE_MUL:
1225 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1226 src0 = emit_fetch( bld, inst, 0, chan_index );
1227 src1 = emit_fetch( bld, inst, 1, chan_index );
1228 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1229 }
1230 break;
1231
1232 case TGSI_OPCODE_ADD:
1233 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1234 src0 = emit_fetch( bld, inst, 0, chan_index );
1235 src1 = emit_fetch( bld, inst, 1, chan_index );
1236 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1237 }
1238 break;
1239
1240 case TGSI_OPCODE_DP3:
1241 /* TGSI_OPCODE_DOT3 */
1242 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1243 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1244 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1245 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1246 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1247 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1248 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1249 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1250 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1251 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1252 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1253 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1254 dst0[chan_index] = tmp0;
1255 }
1256 break;
1257
1258 case TGSI_OPCODE_DP4:
1259 /* TGSI_OPCODE_DOT4 */
1260 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1261 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1262 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1263 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1264 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1265 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1266 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1267 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1268 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1269 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1270 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1271 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1272 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1273 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1274 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1275 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1276 dst0[chan_index] = tmp0;
1277 }
1278 break;
1279
1280 case TGSI_OPCODE_DST:
1281 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1282 dst0[CHAN_X] = bld->base.one;
1283 }
1284 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1285 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1286 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1287 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1288 }
1289 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1290 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1291 }
1292 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1293 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1294 }
1295 break;
1296
1297 case TGSI_OPCODE_MIN:
1298 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1299 src0 = emit_fetch( bld, inst, 0, chan_index );
1300 src1 = emit_fetch( bld, inst, 1, chan_index );
1301 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1302 }
1303 break;
1304
1305 case TGSI_OPCODE_MAX:
1306 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1307 src0 = emit_fetch( bld, inst, 0, chan_index );
1308 src1 = emit_fetch( bld, inst, 1, chan_index );
1309 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1310 }
1311 break;
1312
1313 case TGSI_OPCODE_SLT:
1314 /* TGSI_OPCODE_SETLT */
1315 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1316 src0 = emit_fetch( bld, inst, 0, chan_index );
1317 src1 = emit_fetch( bld, inst, 1, chan_index );
1318 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1319 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1320 }
1321 break;
1322
1323 case TGSI_OPCODE_SGE:
1324 /* TGSI_OPCODE_SETGE */
1325 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1326 src0 = emit_fetch( bld, inst, 0, chan_index );
1327 src1 = emit_fetch( bld, inst, 1, chan_index );
1328 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1329 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1330 }
1331 break;
1332
1333 case TGSI_OPCODE_MAD:
1334 /* TGSI_OPCODE_MADD */
1335 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1336 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1337 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1338 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1339 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1340 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1341 dst0[chan_index] = tmp0;
1342 }
1343 break;
1344
1345 case TGSI_OPCODE_SUB:
1346 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1347 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1348 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1349 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1350 }
1351 break;
1352
1353 case TGSI_OPCODE_LRP:
1354 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1355 src0 = emit_fetch( bld, inst, 0, chan_index );
1356 src1 = emit_fetch( bld, inst, 1, chan_index );
1357 src2 = emit_fetch( bld, inst, 2, chan_index );
1358 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1359 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1360 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1361 }
1362 break;
1363
1364 case TGSI_OPCODE_CND:
1365 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1366 src0 = emit_fetch( bld, inst, 0, chan_index );
1367 src1 = emit_fetch( bld, inst, 1, chan_index );
1368 src2 = emit_fetch( bld, inst, 2, chan_index );
1369 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1370 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1371 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1372 }
1373 break;
1374
1375 case TGSI_OPCODE_DP2A:
1376 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1377 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1378 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1379 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1380 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1381 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1382 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1383 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1384 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1385 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1386 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1387 }
1388 break;
1389
1390 case TGSI_OPCODE_FRC:
1391 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1392 src0 = emit_fetch( bld, inst, 0, chan_index );
1393 tmp0 = lp_build_floor(&bld->base, src0);
1394 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1395 dst0[chan_index] = tmp0;
1396 }
1397 break;
1398
1399 case TGSI_OPCODE_CLAMP:
1400 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1401 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1402 src1 = emit_fetch( bld, inst, 1, chan_index );
1403 src2 = emit_fetch( bld, inst, 2, chan_index );
1404 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1405 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1406 dst0[chan_index] = tmp0;
1407 }
1408 break;
1409
1410 case TGSI_OPCODE_FLR:
1411 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1412 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1413 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1414 }
1415 break;
1416
1417 case TGSI_OPCODE_ROUND:
1418 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1419 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1420 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1421 }
1422 break;
1423
1424 case TGSI_OPCODE_EX2: {
1425 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1426 tmp0 = lp_build_exp2( &bld->base, tmp0);
1427 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1428 dst0[chan_index] = tmp0;
1429 }
1430 break;
1431 }
1432
1433 case TGSI_OPCODE_LG2:
1434 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1435 tmp0 = lp_build_log2( &bld->base, tmp0);
1436 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1437 dst0[chan_index] = tmp0;
1438 }
1439 break;
1440
1441 case TGSI_OPCODE_POW:
1442 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1443 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1444 res = lp_build_pow( &bld->base, src0, src1 );
1445 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1446 dst0[chan_index] = res;
1447 }
1448 break;
1449
1450 case TGSI_OPCODE_XPD:
1451 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1452 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1453 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1454 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1455 }
1456 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1457 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1458 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1459 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1460 }
1461 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1462 tmp2 = tmp0;
1463 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1464 tmp5 = tmp3;
1465 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1466 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1467 dst0[CHAN_X] = tmp2;
1468 }
1469 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1470 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1471 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1472 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1473 }
1474 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1475 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1476 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1477 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1478 dst0[CHAN_Y] = tmp3;
1479 }
1480 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1481 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1482 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1483 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1484 dst0[CHAN_Z] = tmp5;
1485 }
1486 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1487 dst0[CHAN_W] = bld->base.one;
1488 }
1489 break;
1490
1491 case TGSI_OPCODE_ABS:
1492 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1493 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1494 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1495 }
1496 break;
1497
1498 case TGSI_OPCODE_RCC:
1499 /* deprecated? */
1500 assert(0);
1501 return FALSE;
1502
1503 case TGSI_OPCODE_DPH:
1504 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1505 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1506 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1507 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1508 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1509 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1510 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1511 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1512 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1513 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1514 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1515 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1516 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1517 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1518 dst0[chan_index] = tmp0;
1519 }
1520 break;
1521
1522 case TGSI_OPCODE_COS:
1523 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1524 tmp0 = lp_build_cos( &bld->base, tmp0 );
1525 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1526 dst0[chan_index] = tmp0;
1527 }
1528 break;
1529
1530 case TGSI_OPCODE_DDX:
1531 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1532 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1533 }
1534 break;
1535
1536 case TGSI_OPCODE_DDY:
1537 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1538 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1539 }
1540 break;
1541
1542 case TGSI_OPCODE_KILP:
1543 /* predicated kill */
1544 emit_kilp( bld, inst );
1545 break;
1546
1547 case TGSI_OPCODE_KIL:
1548 /* conditional kill */
1549 emit_kil( bld, inst );
1550 break;
1551
1552 case TGSI_OPCODE_PK2H:
1553 return FALSE;
1554 break;
1555
1556 case TGSI_OPCODE_PK2US:
1557 return FALSE;
1558 break;
1559
1560 case TGSI_OPCODE_PK4B:
1561 return FALSE;
1562 break;
1563
1564 case TGSI_OPCODE_PK4UB:
1565 return FALSE;
1566 break;
1567
1568 case TGSI_OPCODE_RFL:
1569 return FALSE;
1570 break;
1571
1572 case TGSI_OPCODE_SEQ:
1573 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1574 src0 = emit_fetch( bld, inst, 0, chan_index );
1575 src1 = emit_fetch( bld, inst, 1, chan_index );
1576 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1577 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1578 }
1579 break;
1580
1581 case TGSI_OPCODE_SFL:
1582 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1583 dst0[chan_index] = bld->base.zero;
1584 }
1585 break;
1586
1587 case TGSI_OPCODE_SGT:
1588 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1589 src0 = emit_fetch( bld, inst, 0, chan_index );
1590 src1 = emit_fetch( bld, inst, 1, chan_index );
1591 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1592 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1593 }
1594 break;
1595
1596 case TGSI_OPCODE_SIN:
1597 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1598 tmp0 = lp_build_sin( &bld->base, tmp0 );
1599 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1600 dst0[chan_index] = tmp0;
1601 }
1602 break;
1603
1604 case TGSI_OPCODE_SLE:
1605 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1606 src0 = emit_fetch( bld, inst, 0, chan_index );
1607 src1 = emit_fetch( bld, inst, 1, chan_index );
1608 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1609 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1610 }
1611 break;
1612
1613 case TGSI_OPCODE_SNE:
1614 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1615 src0 = emit_fetch( bld, inst, 0, chan_index );
1616 src1 = emit_fetch( bld, inst, 1, chan_index );
1617 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1618 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1619 }
1620 break;
1621
1622 case TGSI_OPCODE_STR:
1623 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1624 dst0[chan_index] = bld->base.one;
1625 }
1626 break;
1627
1628 case TGSI_OPCODE_TEX:
1629 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1630 break;
1631
1632 case TGSI_OPCODE_TXD:
1633 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1634 break;
1635
1636 case TGSI_OPCODE_UP2H:
1637 /* deprecated */
1638 assert (0);
1639 return FALSE;
1640 break;
1641
1642 case TGSI_OPCODE_UP2US:
1643 /* deprecated */
1644 assert(0);
1645 return FALSE;
1646 break;
1647
1648 case TGSI_OPCODE_UP4B:
1649 /* deprecated */
1650 assert(0);
1651 return FALSE;
1652 break;
1653
1654 case TGSI_OPCODE_UP4UB:
1655 /* deprecated */
1656 assert(0);
1657 return FALSE;
1658 break;
1659
1660 case TGSI_OPCODE_X2D:
1661 /* deprecated? */
1662 assert(0);
1663 return FALSE;
1664 break;
1665
1666 case TGSI_OPCODE_ARA:
1667 /* deprecated */
1668 assert(0);
1669 return FALSE;
1670 break;
1671
1672 case TGSI_OPCODE_ARR:
1673 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1674 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1675 tmp0 = lp_build_round(&bld->base, tmp0);
1676 dst0[chan_index] = tmp0;
1677 }
1678 break;
1679
1680 case TGSI_OPCODE_BRA:
1681 /* deprecated */
1682 assert(0);
1683 return FALSE;
1684 break;
1685
1686 case TGSI_OPCODE_CAL:
1687 lp_exec_mask_call(&bld->exec_mask,
1688 inst->Label.Label,
1689 pc);
1690
1691 break;
1692
1693 case TGSI_OPCODE_RET:
1694 lp_exec_mask_ret(&bld->exec_mask, pc);
1695 break;
1696
1697 case TGSI_OPCODE_END:
1698 *pc = -1;
1699 break;
1700
1701 case TGSI_OPCODE_SSG:
1702 /* TGSI_OPCODE_SGN */
1703 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1704 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1705 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1706 }
1707 break;
1708
1709 case TGSI_OPCODE_CMP:
1710 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1711 src0 = emit_fetch( bld, inst, 0, chan_index );
1712 src1 = emit_fetch( bld, inst, 1, chan_index );
1713 src2 = emit_fetch( bld, inst, 2, chan_index );
1714 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1715 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1716 }
1717 break;
1718
1719 case TGSI_OPCODE_SCS:
1720 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1721 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1722 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1723 }
1724 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1725 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1726 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1727 }
1728 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1729 dst0[CHAN_Z] = bld->base.zero;
1730 }
1731 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1732 dst0[CHAN_W] = bld->base.one;
1733 }
1734 break;
1735
1736 case TGSI_OPCODE_TXB:
1737 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1738 break;
1739
1740 case TGSI_OPCODE_NRM:
1741 /* fall-through */
1742 case TGSI_OPCODE_NRM4:
1743 /* 3 or 4-component normalization */
1744 {
1745 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1746
1747 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1748 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1749 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1750 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1751
1752 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1753
1754 /* xmm4 = src.x */
1755 /* xmm0 = src.x * src.x */
1756 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1757 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1758 tmp4 = tmp0;
1759 }
1760 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1761
1762 /* xmm5 = src.y */
1763 /* xmm0 = xmm0 + src.y * src.y */
1764 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1765 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1766 tmp5 = tmp1;
1767 }
1768 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1769 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1770
1771 /* xmm6 = src.z */
1772 /* xmm0 = xmm0 + src.z * src.z */
1773 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1774 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1775 tmp6 = tmp1;
1776 }
1777 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1778 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1779
1780 if (dims == 4) {
1781 /* xmm7 = src.w */
1782 /* xmm0 = xmm0 + src.w * src.w */
1783 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1784 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1785 tmp7 = tmp1;
1786 }
1787 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1788 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1789 }
1790
1791 /* xmm1 = 1 / sqrt(xmm0) */
1792 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1793
1794 /* dst.x = xmm1 * src.x */
1795 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1796 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1797 }
1798
1799 /* dst.y = xmm1 * src.y */
1800 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1801 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1802 }
1803
1804 /* dst.z = xmm1 * src.z */
1805 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1806 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1807 }
1808
1809 /* dst.w = xmm1 * src.w */
1810 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1811 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1812 }
1813 }
1814
1815 /* dst.w = 1.0 */
1816 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1817 dst0[CHAN_W] = bld->base.one;
1818 }
1819 }
1820 break;
1821
1822 case TGSI_OPCODE_DIV:
1823 /* deprecated */
1824 assert( 0 );
1825 return FALSE;
1826 break;
1827
1828 case TGSI_OPCODE_DP2:
1829 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1830 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1831 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1832 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1833 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1834 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1835 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1836 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1837 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1838 }
1839 break;
1840
1841 case TGSI_OPCODE_TXL:
1842 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1843 break;
1844
1845 case TGSI_OPCODE_TXP:
1846 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1847 break;
1848
1849 case TGSI_OPCODE_BRK:
1850 lp_exec_break(&bld->exec_mask);
1851 break;
1852
1853 case TGSI_OPCODE_IF:
1854 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1855 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1856 tmp0, bld->base.zero);
1857 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1858 break;
1859
1860 case TGSI_OPCODE_BGNLOOP:
1861 lp_exec_bgnloop(&bld->exec_mask);
1862 break;
1863
1864 case TGSI_OPCODE_BGNSUB:
1865 lp_exec_mask_bgnsub(&bld->exec_mask);
1866 break;
1867
1868 case TGSI_OPCODE_ELSE:
1869 lp_exec_mask_cond_invert(&bld->exec_mask);
1870 break;
1871
1872 case TGSI_OPCODE_ENDIF:
1873 lp_exec_mask_cond_pop(&bld->exec_mask);
1874 break;
1875
1876 case TGSI_OPCODE_ENDLOOP:
1877 lp_exec_endloop(&bld->exec_mask);
1878 break;
1879
1880 case TGSI_OPCODE_ENDSUB:
1881 lp_exec_mask_endsub(&bld->exec_mask, pc);
1882 break;
1883
1884 case TGSI_OPCODE_PUSHA:
1885 /* deprecated? */
1886 assert(0);
1887 return FALSE;
1888 break;
1889
1890 case TGSI_OPCODE_POPA:
1891 /* deprecated? */
1892 assert(0);
1893 return FALSE;
1894 break;
1895
1896 case TGSI_OPCODE_CEIL:
1897 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1898 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1899 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1900 }
1901 break;
1902
1903 case TGSI_OPCODE_I2F:
1904 /* deprecated? */
1905 assert(0);
1906 return FALSE;
1907 break;
1908
1909 case TGSI_OPCODE_NOT:
1910 /* deprecated? */
1911 assert(0);
1912 return FALSE;
1913 break;
1914
1915 case TGSI_OPCODE_TRUNC:
1916 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1917 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1918 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1919 }
1920 break;
1921
1922 case TGSI_OPCODE_SHL:
1923 /* deprecated? */
1924 assert(0);
1925 return FALSE;
1926 break;
1927
1928 case TGSI_OPCODE_ISHR:
1929 /* deprecated? */
1930 assert(0);
1931 return FALSE;
1932 break;
1933
1934 case TGSI_OPCODE_AND:
1935 /* deprecated? */
1936 assert(0);
1937 return FALSE;
1938 break;
1939
1940 case TGSI_OPCODE_OR:
1941 /* deprecated? */
1942 assert(0);
1943 return FALSE;
1944 break;
1945
1946 case TGSI_OPCODE_MOD:
1947 /* deprecated? */
1948 assert(0);
1949 return FALSE;
1950 break;
1951
1952 case TGSI_OPCODE_XOR:
1953 /* deprecated? */
1954 assert(0);
1955 return FALSE;
1956 break;
1957
1958 case TGSI_OPCODE_SAD:
1959 /* deprecated? */
1960 assert(0);
1961 return FALSE;
1962 break;
1963
1964 case TGSI_OPCODE_TXF:
1965 /* deprecated? */
1966 assert(0);
1967 return FALSE;
1968 break;
1969
1970 case TGSI_OPCODE_TXQ:
1971 /* deprecated? */
1972 assert(0);
1973 return FALSE;
1974 break;
1975
1976 case TGSI_OPCODE_CONT:
1977 lp_exec_continue(&bld->exec_mask);
1978 break;
1979
1980 case TGSI_OPCODE_EMIT:
1981 return FALSE;
1982 break;
1983
1984 case TGSI_OPCODE_ENDPRIM:
1985 return FALSE;
1986 break;
1987
1988 case TGSI_OPCODE_NOP:
1989 break;
1990
1991 default:
1992 return FALSE;
1993 }
1994
1995 if(info->num_dst) {
1996 LLVMValueRef pred[NUM_CHANNELS];
1997
1998 emit_fetch_predicate( bld, inst, pred );
1999
2000 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2001 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2002 }
2003 }
2004
2005 return TRUE;
2006 }
2007
2008
2009 void
2010 lp_build_tgsi_soa(LLVMBuilderRef builder,
2011 const struct tgsi_token *tokens,
2012 struct lp_type type,
2013 struct lp_build_mask_context *mask,
2014 LLVMValueRef consts_ptr,
2015 const LLVMValueRef *pos,
2016 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2017 LLVMValueRef (*outputs)[NUM_CHANNELS],
2018 struct lp_build_sampler_soa *sampler,
2019 const struct tgsi_shader_info *info)
2020 {
2021 struct lp_build_tgsi_soa_context bld;
2022 struct tgsi_parse_context parse;
2023 uint num_immediates = 0;
2024 uint num_instructions = 0;
2025 unsigned i;
2026 int pc = 0;
2027
2028 /* Setup build context */
2029 memset(&bld, 0, sizeof bld);
2030 lp_build_context_init(&bld.base, builder, type);
2031 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
2032 bld.mask = mask;
2033 bld.pos = pos;
2034 bld.inputs = inputs;
2035 bld.outputs = outputs;
2036 bld.consts_ptr = consts_ptr;
2037 bld.sampler = sampler;
2038 bld.indirect_files = info->indirect_files;
2039 bld.instructions = (struct tgsi_full_instruction *)
2040 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2041 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2042
2043 if (!bld.instructions) {
2044 return;
2045 }
2046
2047 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2048
2049 tgsi_parse_init( &parse, tokens );
2050
2051 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2052 tgsi_parse_token( &parse );
2053
2054 switch( parse.FullToken.Token.Type ) {
2055 case TGSI_TOKEN_TYPE_DECLARATION:
2056 /* Inputs already interpolated */
2057 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2058 break;
2059
2060 case TGSI_TOKEN_TYPE_INSTRUCTION:
2061 {
2062 /* save expanded instruction */
2063 if (num_instructions == bld.max_instructions) {
2064 bld.instructions = REALLOC(bld.instructions,
2065 bld.max_instructions
2066 * sizeof(struct tgsi_full_instruction),
2067 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2068 * sizeof(struct tgsi_full_instruction));
2069 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2070 }
2071
2072 memcpy(bld.instructions + num_instructions,
2073 &parse.FullToken.FullInstruction,
2074 sizeof(bld.instructions[0]));
2075
2076 num_instructions++;
2077 }
2078
2079 break;
2080
2081 case TGSI_TOKEN_TYPE_IMMEDIATE:
2082 /* simply copy the immediate values into the next immediates[] slot */
2083 {
2084 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2085 assert(size <= 4);
2086 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2087 for( i = 0; i < size; ++i )
2088 bld.immediates[num_immediates][i] =
2089 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2090 for( i = size; i < 4; ++i )
2091 bld.immediates[num_immediates][i] = bld.base.undef;
2092 num_immediates++;
2093 }
2094 break;
2095
2096 case TGSI_TOKEN_TYPE_PROPERTY:
2097 break;
2098
2099 default:
2100 assert( 0 );
2101 }
2102 }
2103
2104 while (pc != -1) {
2105 struct tgsi_full_instruction *instr = bld.instructions + pc;
2106 const struct tgsi_opcode_info *opcode_info =
2107 tgsi_get_opcode_info(instr->Instruction.Opcode);
2108 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2109 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2110 opcode_info->mnemonic);
2111 }
2112
2113 if (0) {
2114 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2115 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2116 debug_printf("11111111111111111111111111111 \n");
2117 tgsi_dump(tokens, 0);
2118 lp_debug_dump_value(function);
2119 debug_printf("2222222222222222222222222222 \n");
2120 }
2121 tgsi_parse_free( &parse );
2122
2123 if (0) {
2124 LLVMModuleRef module = LLVMGetGlobalParent(
2125 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2126 LLVMDumpModule(module);
2127
2128 }
2129
2130 FREE( bld.instructions );
2131 }
2132