gallivm: refactor code into get_indirect_offsets() function
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for integer masks and indices */
122 struct lp_build_context int_bld;
123
124 LLVMValueRef consts_ptr;
125 const LLVMValueRef *pos;
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128
129 const struct lp_build_sampler_soa *sampler;
130
131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
135
136 /* we allocate an array of temps if we have indirect
137 * addressing and then the temps above is unused */
138 LLVMValueRef temps_array;
139 boolean has_indirect_addressing;
140
141 struct lp_build_mask_context *mask;
142 struct lp_exec_mask exec_mask;
143
144 struct tgsi_full_instruction *instructions;
145 uint max_instructions;
146 };
147
148 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
149 {
150 mask->bld = bld;
151 mask->has_mask = FALSE;
152 mask->cond_stack_size = 0;
153 mask->loop_stack_size = 0;
154 mask->call_stack_size = 0;
155
156 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
157 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
158 LLVMConstAllOnes(mask->int_vec_type);
159 }
160
161 static void lp_exec_mask_update(struct lp_exec_mask *mask)
162 {
163 if (mask->loop_stack_size) {
164 /*for loops we need to update the entire mask at runtime */
165 LLVMValueRef tmp;
166 assert(mask->break_mask);
167 tmp = LLVMBuildAnd(mask->bld->builder,
168 mask->cont_mask,
169 mask->break_mask,
170 "maskcb");
171 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
172 mask->cond_mask,
173 tmp,
174 "maskfull");
175 } else
176 mask->exec_mask = mask->cond_mask;
177
178 if (mask->call_stack_size) {
179 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
180 mask->exec_mask,
181 mask->ret_mask,
182 "callmask");
183 }
184
185 mask->has_mask = (mask->cond_stack_size > 0 ||
186 mask->loop_stack_size > 0 ||
187 mask->call_stack_size > 0);
188 }
189
190 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
191 LLVMValueRef val)
192 {
193 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
194 if (mask->cond_stack_size == 0) {
195 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
196 }
197 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
198 assert(LLVMTypeOf(val) == mask->int_vec_type);
199 mask->cond_mask = val;
200
201 lp_exec_mask_update(mask);
202 }
203
204 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
205 {
206 LLVMValueRef prev_mask;
207 LLVMValueRef inv_mask;
208
209 assert(mask->cond_stack_size);
210 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
211 if (mask->cond_stack_size == 1) {
212 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
213 }
214
215 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
216
217 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
218 inv_mask,
219 prev_mask, "");
220 lp_exec_mask_update(mask);
221 }
222
223 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
224 {
225 assert(mask->cond_stack_size);
226 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
227 lp_exec_mask_update(mask);
228 }
229
230 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
231 {
232 if (mask->loop_stack_size == 0) {
233 assert(mask->loop_block == NULL);
234 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
235 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
236 assert(mask->break_var == NULL);
237 }
238
239 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
240
241 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
242 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
243 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
244 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
245 ++mask->loop_stack_size;
246
247 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
248 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
249
250 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
251 LLVMBuildBr(mask->bld->builder, mask->loop_block);
252 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
253
254 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
255
256 lp_exec_mask_update(mask);
257 }
258
259 static void lp_exec_break(struct lp_exec_mask *mask)
260 {
261 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
262 mask->exec_mask,
263 "break");
264
265 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
266 mask->break_mask,
267 exec_mask, "break_full");
268
269 lp_exec_mask_update(mask);
270 }
271
272 static void lp_exec_continue(struct lp_exec_mask *mask)
273 {
274 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
275 mask->exec_mask,
276 "");
277
278 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
279 mask->cont_mask,
280 exec_mask, "");
281
282 lp_exec_mask_update(mask);
283 }
284
285
286 static void lp_exec_endloop(struct lp_exec_mask *mask)
287 {
288 LLVMBasicBlockRef endloop;
289 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
290 mask->bld->type.length);
291 LLVMValueRef i1cond;
292
293 assert(mask->break_mask);
294
295 /*
296 * Restore the cont_mask, but don't pop
297 */
298 assert(mask->loop_stack_size);
299 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
300 lp_exec_mask_update(mask);
301
302 /*
303 * Unlike the continue mask, the break_mask must be preserved across loop
304 * iterations
305 */
306 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
307
308 /* i1cond = (mask == 0) */
309 i1cond = LLVMBuildICmp(
310 mask->bld->builder,
311 LLVMIntNE,
312 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
313 LLVMConstNull(reg_type), "");
314
315 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
316
317 LLVMBuildCondBr(mask->bld->builder,
318 i1cond, mask->loop_block, endloop);
319
320 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
321
322 assert(mask->loop_stack_size);
323 --mask->loop_stack_size;
324 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
325 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
326 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
327 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
328
329 lp_exec_mask_update(mask);
330 }
331
332 /* stores val into an address pointed to by dst.
333 * mask->exec_mask is used to figure out which bits of val
334 * should be stored into the address
335 * (0 means don't store this bit, 1 means do store).
336 */
337 static void lp_exec_mask_store(struct lp_exec_mask *mask,
338 LLVMValueRef pred,
339 LLVMValueRef val,
340 LLVMValueRef dst)
341 {
342 /* Mix the predicate and execution mask */
343 if (mask->has_mask) {
344 if (pred) {
345 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
346 } else {
347 pred = mask->exec_mask;
348 }
349 }
350
351 if (pred) {
352 LLVMValueRef real_val, dst_val;
353
354 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
355 real_val = lp_build_select(mask->bld,
356 pred,
357 val, dst_val);
358
359 LLVMBuildStore(mask->bld->builder, real_val, dst);
360 } else
361 LLVMBuildStore(mask->bld->builder, val, dst);
362 }
363
364 static void lp_exec_mask_call(struct lp_exec_mask *mask,
365 int func,
366 int *pc)
367 {
368 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
369 mask->call_stack[mask->call_stack_size].pc = *pc;
370 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
371 mask->call_stack_size++;
372 *pc = func;
373 }
374
375 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
376 {
377 LLVMValueRef exec_mask;
378
379 if (mask->call_stack_size == 0) {
380 /* returning from main() */
381 *pc = -1;
382 return;
383 }
384 exec_mask = LLVMBuildNot(mask->bld->builder,
385 mask->exec_mask,
386 "ret");
387
388 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
389 mask->ret_mask,
390 exec_mask, "ret_full");
391
392 lp_exec_mask_update(mask);
393 }
394
395 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
396 {
397 }
398
399 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
400 {
401 assert(mask->call_stack_size);
402 mask->call_stack_size--;
403 *pc = mask->call_stack[mask->call_stack_size].pc;
404 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
405 lp_exec_mask_update(mask);
406 }
407
408
409 /**
410 * Return pointer to a temporary register channel (src or dest).
411 * Note that indirect addressing cannot be handled here.
412 * \param index which temporary register
413 * \param chan which channel of the temp register.
414 */
415 static LLVMValueRef
416 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
417 unsigned index,
418 unsigned chan)
419 {
420 assert(chan < 4);
421 if (bld->has_indirect_addressing) {
422 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
423 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
424 }
425 else {
426 return bld->temps[index][chan];
427 }
428 }
429
430
431 /**
432 * Gather vector.
433 * XXX the lp_build_gather() function should be capable of doing this
434 * with a little work.
435 */
436 static LLVMValueRef
437 build_gather(struct lp_build_tgsi_soa_context *bld,
438 LLVMValueRef base_ptr,
439 LLVMValueRef indexes)
440 {
441 LLVMValueRef res = bld->base.undef;
442 unsigned i;
443
444 /*
445 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
446 */
447 for (i = 0; i < bld->base.type.length; i++) {
448 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
449 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
450 indexes, ii, "");
451 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
452 &index, 1, "");
453 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
454
455 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
456 }
457
458 return res;
459 }
460
461
462 /**
463 * Read the current value of the ADDR register, convert the floats to
464 * ints, multiply by four and return the vector of offsets.
465 * The offsets will be used to index into the constant buffer or
466 * temporary register file.
467 */
468 static LLVMValueRef
469 get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
470 const struct tgsi_src_register *indirect_reg)
471 {
472 /* always use X component of address register */
473 const int x = indirect_reg->SwizzleX;
474 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
475 uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x);
476 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
477 LLVMValueRef addr_vec;
478
479 addr_vec = LLVMBuildLoad(bld->base.builder,
480 bld->addr[indirect_reg->Index][swizzle],
481 "load addr reg");
482
483 /* for indexing we want integers */
484 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
485 int_vec_type, "");
486
487 /* addr_vec = addr_vec * 4 */
488 addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
489
490 return addr_vec;
491 }
492
493
494 /**
495 * Register fetch.
496 */
497 static LLVMValueRef
498 emit_fetch(
499 struct lp_build_tgsi_soa_context *bld,
500 const struct tgsi_full_instruction *inst,
501 unsigned src_op,
502 const unsigned chan_index )
503 {
504 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
505 const unsigned swizzle =
506 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
507 LLVMValueRef res;
508 LLVMValueRef addr_vec = NULL;
509
510 if (swizzle > 3) {
511 assert(0 && "invalid swizzle in emit_fetch()");
512 return bld->base.undef;
513 }
514
515 if (reg->Register.Indirect) {
516 addr_vec = get_indirect_offsets(bld, &reg->Indirect);
517 }
518
519 switch (reg->Register.File) {
520 case TGSI_FILE_CONSTANT:
521 if (reg->Register.Indirect) {
522 LLVMValueRef index_vec; /* index into the const buffer */
523
524 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
525 index_vec = lp_build_const_int_vec(bld->int_bld.type,
526 reg->Register.Index * 4 + swizzle);
527
528 /* index_vec = index_vec + addr_vec */
529 index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
530
531 /* Gather values from the constant buffer */
532 res = build_gather(bld, bld->consts_ptr, index_vec);
533 }
534 else {
535 LLVMValueRef index; /* index into the const buffer */
536 LLVMValueRef scalar, scalar_ptr;
537
538 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
539
540 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
541 &index, 1, "");
542 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
543
544 res = lp_build_broadcast_scalar(&bld->base, scalar);
545 }
546 break;
547
548 case TGSI_FILE_IMMEDIATE:
549 res = bld->immediates[reg->Register.Index][swizzle];
550 assert(res);
551 break;
552
553 case TGSI_FILE_INPUT:
554 res = bld->inputs[reg->Register.Index][swizzle];
555 assert(res);
556 break;
557
558 case TGSI_FILE_TEMPORARY:
559 if (reg->Register.Indirect) {
560 LLVMValueRef vec_len =
561 lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length);
562 LLVMValueRef index_vec; /* index into the const buffer */
563 LLVMValueRef temps_array;
564 LLVMTypeRef float4_ptr_type;
565
566 assert(bld->has_indirect_addressing);
567
568 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
569 index_vec = lp_build_const_int_vec(bld->int_bld.type,
570 reg->Register.Index * 4 + swizzle);
571
572 /* index_vec += addr_vec */
573 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
574
575 /* index_vec *= vector_length */
576 index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len);
577
578 /* cast temps_array pointer to float* */
579 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
580 temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array,
581 float4_ptr_type, "");
582
583 /* Gather values from the temporary register array */
584 res = build_gather(bld, temps_array, index_vec);
585 }
586 else {
587 LLVMValueRef temp_ptr;
588 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
589 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
590 if (!res)
591 return bld->base.undef;
592 }
593 break;
594
595 default:
596 assert(0 && "invalid src register in emit_fetch()");
597 return bld->base.undef;
598 }
599
600 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
601 case TGSI_UTIL_SIGN_CLEAR:
602 res = lp_build_abs( &bld->base, res );
603 break;
604
605 case TGSI_UTIL_SIGN_SET:
606 /* TODO: Use bitwese OR for floating point */
607 res = lp_build_abs( &bld->base, res );
608 res = LLVMBuildNeg( bld->base.builder, res, "" );
609 break;
610
611 case TGSI_UTIL_SIGN_TOGGLE:
612 res = LLVMBuildNeg( bld->base.builder, res, "" );
613 break;
614
615 case TGSI_UTIL_SIGN_KEEP:
616 break;
617 }
618
619 return res;
620 }
621
622
623 /**
624 * Register fetch with derivatives.
625 */
626 static void
627 emit_fetch_deriv(
628 struct lp_build_tgsi_soa_context *bld,
629 const struct tgsi_full_instruction *inst,
630 unsigned index,
631 const unsigned chan_index,
632 LLVMValueRef *res,
633 LLVMValueRef *ddx,
634 LLVMValueRef *ddy)
635 {
636 LLVMValueRef src;
637
638 src = emit_fetch(bld, inst, index, chan_index);
639
640 if(res)
641 *res = src;
642
643 /* TODO: use interpolation coeffs for inputs */
644
645 if(ddx)
646 *ddx = lp_build_ddx(&bld->base, src);
647
648 if(ddy)
649 *ddy = lp_build_ddy(&bld->base, src);
650 }
651
652
653 /**
654 * Predicate.
655 */
656 static void
657 emit_fetch_predicate(
658 struct lp_build_tgsi_soa_context *bld,
659 const struct tgsi_full_instruction *inst,
660 LLVMValueRef *pred)
661 {
662 unsigned index;
663 unsigned char swizzles[4];
664 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
665 LLVMValueRef value;
666 unsigned chan;
667
668 if (!inst->Instruction.Predicate) {
669 FOR_EACH_CHANNEL( chan ) {
670 pred[chan] = NULL;
671 }
672 return;
673 }
674
675 swizzles[0] = inst->Predicate.SwizzleX;
676 swizzles[1] = inst->Predicate.SwizzleY;
677 swizzles[2] = inst->Predicate.SwizzleZ;
678 swizzles[3] = inst->Predicate.SwizzleW;
679
680 index = inst->Predicate.Index;
681 assert(index < LP_MAX_TGSI_PREDS);
682
683 FOR_EACH_CHANNEL( chan ) {
684 unsigned swizzle = swizzles[chan];
685
686 /*
687 * Only fetch the predicate register channels that are actually listed
688 * in the swizzles
689 */
690 if (!unswizzled[swizzle]) {
691 value = LLVMBuildLoad(bld->base.builder,
692 bld->preds[index][swizzle], "");
693
694 /*
695 * Convert the value to an integer mask.
696 *
697 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
698 * is needlessly causing two comparisons due to storing the intermediate
699 * result as float vector instead of an integer mask vector.
700 */
701 value = lp_build_compare(bld->base.builder,
702 bld->base.type,
703 PIPE_FUNC_NOTEQUAL,
704 value,
705 bld->base.zero);
706 if (inst->Predicate.Negate) {
707 value = LLVMBuildNot(bld->base.builder, value, "");
708 }
709
710 unswizzled[swizzle] = value;
711 } else {
712 value = unswizzled[swizzle];
713 }
714
715 pred[chan] = value;
716 }
717 }
718
719
720 /**
721 * Register store.
722 */
723 static void
724 emit_store(
725 struct lp_build_tgsi_soa_context *bld,
726 const struct tgsi_full_instruction *inst,
727 unsigned index,
728 unsigned chan_index,
729 LLVMValueRef pred,
730 LLVMValueRef value)
731 {
732 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
733 LLVMValueRef addr = NULL;
734
735 switch( inst->Instruction.Saturate ) {
736 case TGSI_SAT_NONE:
737 break;
738
739 case TGSI_SAT_ZERO_ONE:
740 value = lp_build_max(&bld->base, value, bld->base.zero);
741 value = lp_build_min(&bld->base, value, bld->base.one);
742 break;
743
744 case TGSI_SAT_MINUS_PLUS_ONE:
745 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
746 value = lp_build_min(&bld->base, value, bld->base.one);
747 break;
748
749 default:
750 assert(0);
751 }
752
753 if (reg->Register.Indirect) {
754 /* XXX use get_indirect_offsets() here eventually */
755 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
756 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
757 addr = LLVMBuildLoad(bld->base.builder,
758 bld->addr[reg->Indirect.Index][swizzle],
759 "");
760 /* for indexing we want integers */
761 addr = LLVMBuildFPToSI(bld->base.builder, addr,
762 int_vec_type, "");
763 addr = LLVMBuildExtractElement(bld->base.builder,
764 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
765 "");
766 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
767 }
768
769 switch( reg->Register.File ) {
770 case TGSI_FILE_OUTPUT:
771 lp_exec_mask_store(&bld->exec_mask, pred, value,
772 bld->outputs[reg->Register.Index][chan_index]);
773 break;
774
775 case TGSI_FILE_TEMPORARY:
776 if (reg->Register.Indirect) {
777 /* XXX not done yet */
778 debug_printf("WARNING: LLVM scatter store of temp regs"
779 " not implemented\n");
780 }
781 else {
782 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
783 chan_index);
784 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
785 }
786 break;
787
788 case TGSI_FILE_ADDRESS:
789 lp_exec_mask_store(&bld->exec_mask, pred, value,
790 bld->addr[reg->Indirect.Index][chan_index]);
791 break;
792
793 case TGSI_FILE_PREDICATE:
794 lp_exec_mask_store(&bld->exec_mask, pred, value,
795 bld->preds[index][chan_index]);
796 break;
797
798 default:
799 assert( 0 );
800 }
801 }
802
803
804 /**
805 * High-level instruction translators.
806 */
807
808 enum tex_modifier {
809 TEX_MODIFIER_NONE = 0,
810 TEX_MODIFIER_PROJECTED,
811 TEX_MODIFIER_LOD_BIAS,
812 TEX_MODIFIER_EXPLICIT_LOD,
813 TEX_MODIFIER_EXPLICIT_DERIV
814 };
815
816 static void
817 emit_tex( struct lp_build_tgsi_soa_context *bld,
818 const struct tgsi_full_instruction *inst,
819 enum tex_modifier modifier,
820 LLVMValueRef *texel)
821 {
822 unsigned unit;
823 LLVMValueRef lod_bias, explicit_lod;
824 LLVMValueRef oow = NULL;
825 LLVMValueRef coords[3];
826 LLVMValueRef ddx[3];
827 LLVMValueRef ddy[3];
828 unsigned num_coords;
829 unsigned i;
830
831 if (!bld->sampler) {
832 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
833 for (i = 0; i < 4; i++) {
834 texel[i] = bld->base.undef;
835 }
836 return;
837 }
838
839 switch (inst->Texture.Texture) {
840 case TGSI_TEXTURE_1D:
841 num_coords = 1;
842 break;
843 case TGSI_TEXTURE_2D:
844 case TGSI_TEXTURE_RECT:
845 num_coords = 2;
846 break;
847 case TGSI_TEXTURE_SHADOW1D:
848 case TGSI_TEXTURE_SHADOW2D:
849 case TGSI_TEXTURE_SHADOWRECT:
850 case TGSI_TEXTURE_3D:
851 case TGSI_TEXTURE_CUBE:
852 num_coords = 3;
853 break;
854 default:
855 assert(0);
856 return;
857 }
858
859 if (modifier == TEX_MODIFIER_LOD_BIAS) {
860 lod_bias = emit_fetch( bld, inst, 0, 3 );
861 explicit_lod = NULL;
862 }
863 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
864 lod_bias = NULL;
865 explicit_lod = emit_fetch( bld, inst, 0, 3 );
866 }
867 else {
868 lod_bias = NULL;
869 explicit_lod = NULL;
870 }
871
872 if (modifier == TEX_MODIFIER_PROJECTED) {
873 oow = emit_fetch( bld, inst, 0, 3 );
874 oow = lp_build_rcp(&bld->base, oow);
875 }
876
877 for (i = 0; i < num_coords; i++) {
878 coords[i] = emit_fetch( bld, inst, 0, i );
879 if (modifier == TEX_MODIFIER_PROJECTED)
880 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
881 }
882 for (i = num_coords; i < 3; i++) {
883 coords[i] = bld->base.undef;
884 }
885
886 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
887 for (i = 0; i < num_coords; i++) {
888 ddx[i] = emit_fetch( bld, inst, 1, i );
889 ddy[i] = emit_fetch( bld, inst, 2, i );
890 }
891 unit = inst->Src[3].Register.Index;
892 } else {
893 for (i = 0; i < num_coords; i++) {
894 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
895 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
896 }
897 unit = inst->Src[1].Register.Index;
898 }
899 for (i = num_coords; i < 3; i++) {
900 ddx[i] = bld->base.undef;
901 ddy[i] = bld->base.undef;
902 }
903
904 bld->sampler->emit_fetch_texel(bld->sampler,
905 bld->base.builder,
906 bld->base.type,
907 unit, num_coords, coords,
908 ddx, ddy,
909 lod_bias, explicit_lod,
910 texel);
911 }
912
913
914 /**
915 * Kill fragment if any of the src register values are negative.
916 */
917 static void
918 emit_kil(
919 struct lp_build_tgsi_soa_context *bld,
920 const struct tgsi_full_instruction *inst )
921 {
922 const struct tgsi_full_src_register *reg = &inst->Src[0];
923 LLVMValueRef terms[NUM_CHANNELS];
924 LLVMValueRef mask;
925 unsigned chan_index;
926
927 memset(&terms, 0, sizeof terms);
928
929 FOR_EACH_CHANNEL( chan_index ) {
930 unsigned swizzle;
931
932 /* Unswizzle channel */
933 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
934
935 /* Check if the component has not been already tested. */
936 assert(swizzle < NUM_CHANNELS);
937 if( !terms[swizzle] )
938 /* TODO: change the comparison operator instead of setting the sign */
939 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
940 }
941
942 mask = NULL;
943 FOR_EACH_CHANNEL( chan_index ) {
944 if(terms[chan_index]) {
945 LLVMValueRef chan_mask;
946
947 /*
948 * If term < 0 then mask = 0 else mask = ~0.
949 */
950 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
951
952 if(mask)
953 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
954 else
955 mask = chan_mask;
956 }
957 }
958
959 if(mask)
960 lp_build_mask_update(bld->mask, mask);
961 }
962
963
964 /**
965 * Predicated fragment kill.
966 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
967 * The only predication is the execution mask which will apply if
968 * we're inside a loop or conditional.
969 */
970 static void
971 emit_kilp(struct lp_build_tgsi_soa_context *bld,
972 const struct tgsi_full_instruction *inst)
973 {
974 LLVMValueRef mask;
975
976 /* For those channels which are "alive", disable fragment shader
977 * execution.
978 */
979 if (bld->exec_mask.has_mask) {
980 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
981 }
982 else {
983 mask = bld->base.zero;
984 }
985
986 lp_build_mask_update(bld->mask, mask);
987 }
988
989 static void
990 emit_declaration(
991 struct lp_build_tgsi_soa_context *bld,
992 const struct tgsi_full_declaration *decl)
993 {
994 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
995
996 unsigned first = decl->Range.First;
997 unsigned last = decl->Range.Last;
998 unsigned idx, i;
999
1000 for (idx = first; idx <= last; ++idx) {
1001 switch (decl->Declaration.File) {
1002 case TGSI_FILE_TEMPORARY:
1003 assert(idx < LP_MAX_TGSI_TEMPS);
1004 if (bld->has_indirect_addressing) {
1005 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1006 last*4 + 4, 0);
1007 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1008 vec_type, array_size, "");
1009 } else {
1010 for (i = 0; i < NUM_CHANNELS; i++)
1011 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1012 vec_type, "");
1013 }
1014 break;
1015
1016 case TGSI_FILE_OUTPUT:
1017 for (i = 0; i < NUM_CHANNELS; i++)
1018 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1019 vec_type, "");
1020 break;
1021
1022 case TGSI_FILE_ADDRESS:
1023 assert(idx < LP_MAX_TGSI_ADDRS);
1024 for (i = 0; i < NUM_CHANNELS; i++)
1025 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1026 vec_type, "");
1027 break;
1028
1029 case TGSI_FILE_PREDICATE:
1030 assert(idx < LP_MAX_TGSI_PREDS);
1031 for (i = 0; i < NUM_CHANNELS; i++)
1032 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1033 vec_type, "");
1034 break;
1035
1036 default:
1037 /* don't need to declare other vars */
1038 break;
1039 }
1040 }
1041 }
1042
1043
1044 /**
1045 * Emit LLVM for one TGSI instruction.
1046 * \param return TRUE for success, FALSE otherwise
1047 */
1048 static boolean
1049 emit_instruction(
1050 struct lp_build_tgsi_soa_context *bld,
1051 const struct tgsi_full_instruction *inst,
1052 const struct tgsi_opcode_info *info,
1053 int *pc)
1054 {
1055 unsigned chan_index;
1056 LLVMValueRef src0, src1, src2;
1057 LLVMValueRef tmp0, tmp1, tmp2;
1058 LLVMValueRef tmp3 = NULL;
1059 LLVMValueRef tmp4 = NULL;
1060 LLVMValueRef tmp5 = NULL;
1061 LLVMValueRef tmp6 = NULL;
1062 LLVMValueRef tmp7 = NULL;
1063 LLVMValueRef res;
1064 LLVMValueRef dst0[NUM_CHANNELS];
1065
1066 /*
1067 * Stores and write masks are handled in a general fashion after the long
1068 * instruction opcode switch statement.
1069 *
1070 * Although not stricitly necessary, we avoid generating instructions for
1071 * channels which won't be stored, in cases where's that easy. For some
1072 * complex instructions, like texture sampling, it is more convenient to
1073 * assume a full writemask and then let LLVM optimization passes eliminate
1074 * redundant code.
1075 */
1076
1077 (*pc)++;
1078
1079 assert(info->num_dst <= 1);
1080 if (info->num_dst) {
1081 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1082 dst0[chan_index] = bld->base.undef;
1083 }
1084 }
1085
1086 switch (inst->Instruction.Opcode) {
1087 case TGSI_OPCODE_ARL:
1088 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1089 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1090 tmp0 = lp_build_floor(&bld->base, tmp0);
1091 dst0[chan_index] = tmp0;
1092 }
1093 break;
1094
1095 case TGSI_OPCODE_MOV:
1096 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1097 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1098 }
1099 break;
1100
1101 case TGSI_OPCODE_LIT:
1102 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1103 dst0[CHAN_X] = bld->base.one;
1104 }
1105 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1106 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1107 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1108 }
1109 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1110 /* XMM[1] = SrcReg[0].yyyy */
1111 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1112 /* XMM[1] = max(XMM[1], 0) */
1113 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1114 /* XMM[2] = SrcReg[0].wwww */
1115 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1116 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1117 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1118 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1119 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1120 }
1121 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1122 dst0[CHAN_W] = bld->base.one;
1123 }
1124 break;
1125
1126 case TGSI_OPCODE_RCP:
1127 /* TGSI_OPCODE_RECIP */
1128 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1129 res = lp_build_rcp(&bld->base, src0);
1130 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1131 dst0[chan_index] = res;
1132 }
1133 break;
1134
1135 case TGSI_OPCODE_RSQ:
1136 /* TGSI_OPCODE_RECIPSQRT */
1137 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1138 src0 = lp_build_abs(&bld->base, src0);
1139 res = lp_build_rsqrt(&bld->base, src0);
1140 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1141 dst0[chan_index] = res;
1142 }
1143 break;
1144
1145 case TGSI_OPCODE_EXP:
1146 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1147 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1148 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1149 LLVMValueRef *p_exp2_int_part = NULL;
1150 LLVMValueRef *p_frac_part = NULL;
1151 LLVMValueRef *p_exp2 = NULL;
1152
1153 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1154
1155 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1156 p_exp2_int_part = &tmp0;
1157 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1158 p_frac_part = &tmp1;
1159 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1160 p_exp2 = &tmp2;
1161
1162 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1163
1164 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1165 dst0[CHAN_X] = tmp0;
1166 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1167 dst0[CHAN_Y] = tmp1;
1168 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1169 dst0[CHAN_Z] = tmp2;
1170 }
1171 /* dst.w = 1.0 */
1172 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1173 dst0[CHAN_W] = bld->base.one;
1174 }
1175 break;
1176
1177 case TGSI_OPCODE_LOG:
1178 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1179 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1180 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1181 LLVMValueRef *p_floor_log2 = NULL;
1182 LLVMValueRef *p_exp = NULL;
1183 LLVMValueRef *p_log2 = NULL;
1184
1185 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1186 src0 = lp_build_abs( &bld->base, src0 );
1187
1188 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1189 p_floor_log2 = &tmp0;
1190 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1191 p_exp = &tmp1;
1192 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1193 p_log2 = &tmp2;
1194
1195 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1196
1197 /* dst.x = floor(lg2(abs(src.x))) */
1198 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1199 dst0[CHAN_X] = tmp0;
1200 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1201 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1202 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1203 }
1204 /* dst.z = lg2(abs(src.x)) */
1205 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1206 dst0[CHAN_Z] = tmp2;
1207 }
1208 /* dst.w = 1.0 */
1209 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1210 dst0[CHAN_W] = bld->base.one;
1211 }
1212 break;
1213
1214 case TGSI_OPCODE_MUL:
1215 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1216 src0 = emit_fetch( bld, inst, 0, chan_index );
1217 src1 = emit_fetch( bld, inst, 1, chan_index );
1218 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1219 }
1220 break;
1221
1222 case TGSI_OPCODE_ADD:
1223 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1224 src0 = emit_fetch( bld, inst, 0, chan_index );
1225 src1 = emit_fetch( bld, inst, 1, chan_index );
1226 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1227 }
1228 break;
1229
1230 case TGSI_OPCODE_DP3:
1231 /* TGSI_OPCODE_DOT3 */
1232 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1233 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1234 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1235 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1236 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1237 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1238 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1239 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1240 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1241 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1242 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1243 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1244 dst0[chan_index] = tmp0;
1245 }
1246 break;
1247
1248 case TGSI_OPCODE_DP4:
1249 /* TGSI_OPCODE_DOT4 */
1250 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1251 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1252 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1253 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1254 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1255 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1256 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1257 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1258 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1259 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1260 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1261 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1262 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1263 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1264 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1265 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1266 dst0[chan_index] = tmp0;
1267 }
1268 break;
1269
1270 case TGSI_OPCODE_DST:
1271 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1272 dst0[CHAN_X] = bld->base.one;
1273 }
1274 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1275 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1276 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1277 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1278 }
1279 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1280 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1281 }
1282 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1283 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1284 }
1285 break;
1286
1287 case TGSI_OPCODE_MIN:
1288 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1289 src0 = emit_fetch( bld, inst, 0, chan_index );
1290 src1 = emit_fetch( bld, inst, 1, chan_index );
1291 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1292 }
1293 break;
1294
1295 case TGSI_OPCODE_MAX:
1296 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1297 src0 = emit_fetch( bld, inst, 0, chan_index );
1298 src1 = emit_fetch( bld, inst, 1, chan_index );
1299 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1300 }
1301 break;
1302
1303 case TGSI_OPCODE_SLT:
1304 /* TGSI_OPCODE_SETLT */
1305 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1306 src0 = emit_fetch( bld, inst, 0, chan_index );
1307 src1 = emit_fetch( bld, inst, 1, chan_index );
1308 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1309 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1310 }
1311 break;
1312
1313 case TGSI_OPCODE_SGE:
1314 /* TGSI_OPCODE_SETGE */
1315 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1316 src0 = emit_fetch( bld, inst, 0, chan_index );
1317 src1 = emit_fetch( bld, inst, 1, chan_index );
1318 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1319 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1320 }
1321 break;
1322
1323 case TGSI_OPCODE_MAD:
1324 /* TGSI_OPCODE_MADD */
1325 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1326 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1327 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1328 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1329 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1330 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1331 dst0[chan_index] = tmp0;
1332 }
1333 break;
1334
1335 case TGSI_OPCODE_SUB:
1336 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1337 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1338 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1339 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1340 }
1341 break;
1342
1343 case TGSI_OPCODE_LRP:
1344 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1345 src0 = emit_fetch( bld, inst, 0, chan_index );
1346 src1 = emit_fetch( bld, inst, 1, chan_index );
1347 src2 = emit_fetch( bld, inst, 2, chan_index );
1348 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1349 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1350 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1351 }
1352 break;
1353
1354 case TGSI_OPCODE_CND:
1355 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1356 src0 = emit_fetch( bld, inst, 0, chan_index );
1357 src1 = emit_fetch( bld, inst, 1, chan_index );
1358 src2 = emit_fetch( bld, inst, 2, chan_index );
1359 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1360 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1361 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1362 }
1363 break;
1364
1365 case TGSI_OPCODE_DP2A:
1366 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1367 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1368 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1369 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1370 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1371 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1372 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1373 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1374 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1375 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1376 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1377 }
1378 break;
1379
1380 case TGSI_OPCODE_FRC:
1381 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1382 src0 = emit_fetch( bld, inst, 0, chan_index );
1383 tmp0 = lp_build_floor(&bld->base, src0);
1384 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1385 dst0[chan_index] = tmp0;
1386 }
1387 break;
1388
1389 case TGSI_OPCODE_CLAMP:
1390 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1391 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1392 src1 = emit_fetch( bld, inst, 1, chan_index );
1393 src2 = emit_fetch( bld, inst, 2, chan_index );
1394 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1395 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1396 dst0[chan_index] = tmp0;
1397 }
1398 break;
1399
1400 case TGSI_OPCODE_FLR:
1401 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1402 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1403 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1404 }
1405 break;
1406
1407 case TGSI_OPCODE_ROUND:
1408 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1409 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1410 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1411 }
1412 break;
1413
1414 case TGSI_OPCODE_EX2: {
1415 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1416 tmp0 = lp_build_exp2( &bld->base, tmp0);
1417 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1418 dst0[chan_index] = tmp0;
1419 }
1420 break;
1421 }
1422
1423 case TGSI_OPCODE_LG2:
1424 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1425 tmp0 = lp_build_log2( &bld->base, tmp0);
1426 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1427 dst0[chan_index] = tmp0;
1428 }
1429 break;
1430
1431 case TGSI_OPCODE_POW:
1432 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1433 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1434 res = lp_build_pow( &bld->base, src0, src1 );
1435 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1436 dst0[chan_index] = res;
1437 }
1438 break;
1439
1440 case TGSI_OPCODE_XPD:
1441 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1442 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1443 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1444 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1445 }
1446 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1447 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1448 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1449 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1450 }
1451 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1452 tmp2 = tmp0;
1453 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1454 tmp5 = tmp3;
1455 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1456 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1457 dst0[CHAN_X] = tmp2;
1458 }
1459 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1460 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1461 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1462 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1463 }
1464 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1465 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1466 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1467 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1468 dst0[CHAN_Y] = tmp3;
1469 }
1470 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1471 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1472 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1473 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1474 dst0[CHAN_Z] = tmp5;
1475 }
1476 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1477 dst0[CHAN_W] = bld->base.one;
1478 }
1479 break;
1480
1481 case TGSI_OPCODE_ABS:
1482 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1483 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1484 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1485 }
1486 break;
1487
1488 case TGSI_OPCODE_RCC:
1489 /* deprecated? */
1490 assert(0);
1491 return FALSE;
1492
1493 case TGSI_OPCODE_DPH:
1494 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1495 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1496 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1497 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1498 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1499 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1500 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1501 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1502 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1503 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1504 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1505 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1506 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1507 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1508 dst0[chan_index] = tmp0;
1509 }
1510 break;
1511
1512 case TGSI_OPCODE_COS:
1513 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1514 tmp0 = lp_build_cos( &bld->base, tmp0 );
1515 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1516 dst0[chan_index] = tmp0;
1517 }
1518 break;
1519
1520 case TGSI_OPCODE_DDX:
1521 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1522 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1523 }
1524 break;
1525
1526 case TGSI_OPCODE_DDY:
1527 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1528 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1529 }
1530 break;
1531
1532 case TGSI_OPCODE_KILP:
1533 /* predicated kill */
1534 emit_kilp( bld, inst );
1535 break;
1536
1537 case TGSI_OPCODE_KIL:
1538 /* conditional kill */
1539 emit_kil( bld, inst );
1540 break;
1541
1542 case TGSI_OPCODE_PK2H:
1543 return FALSE;
1544 break;
1545
1546 case TGSI_OPCODE_PK2US:
1547 return FALSE;
1548 break;
1549
1550 case TGSI_OPCODE_PK4B:
1551 return FALSE;
1552 break;
1553
1554 case TGSI_OPCODE_PK4UB:
1555 return FALSE;
1556 break;
1557
1558 case TGSI_OPCODE_RFL:
1559 return FALSE;
1560 break;
1561
1562 case TGSI_OPCODE_SEQ:
1563 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1564 src0 = emit_fetch( bld, inst, 0, chan_index );
1565 src1 = emit_fetch( bld, inst, 1, chan_index );
1566 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1567 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1568 }
1569 break;
1570
1571 case TGSI_OPCODE_SFL:
1572 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1573 dst0[chan_index] = bld->base.zero;
1574 }
1575 break;
1576
1577 case TGSI_OPCODE_SGT:
1578 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1579 src0 = emit_fetch( bld, inst, 0, chan_index );
1580 src1 = emit_fetch( bld, inst, 1, chan_index );
1581 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1582 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1583 }
1584 break;
1585
1586 case TGSI_OPCODE_SIN:
1587 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1588 tmp0 = lp_build_sin( &bld->base, tmp0 );
1589 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1590 dst0[chan_index] = tmp0;
1591 }
1592 break;
1593
1594 case TGSI_OPCODE_SLE:
1595 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1596 src0 = emit_fetch( bld, inst, 0, chan_index );
1597 src1 = emit_fetch( bld, inst, 1, chan_index );
1598 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1599 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1600 }
1601 break;
1602
1603 case TGSI_OPCODE_SNE:
1604 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1605 src0 = emit_fetch( bld, inst, 0, chan_index );
1606 src1 = emit_fetch( bld, inst, 1, chan_index );
1607 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1608 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1609 }
1610 break;
1611
1612 case TGSI_OPCODE_STR:
1613 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1614 dst0[chan_index] = bld->base.one;
1615 }
1616 break;
1617
1618 case TGSI_OPCODE_TEX:
1619 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1620 break;
1621
1622 case TGSI_OPCODE_TXD:
1623 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1624 break;
1625
1626 case TGSI_OPCODE_UP2H:
1627 /* deprecated */
1628 assert (0);
1629 return FALSE;
1630 break;
1631
1632 case TGSI_OPCODE_UP2US:
1633 /* deprecated */
1634 assert(0);
1635 return FALSE;
1636 break;
1637
1638 case TGSI_OPCODE_UP4B:
1639 /* deprecated */
1640 assert(0);
1641 return FALSE;
1642 break;
1643
1644 case TGSI_OPCODE_UP4UB:
1645 /* deprecated */
1646 assert(0);
1647 return FALSE;
1648 break;
1649
1650 case TGSI_OPCODE_X2D:
1651 /* deprecated? */
1652 assert(0);
1653 return FALSE;
1654 break;
1655
1656 case TGSI_OPCODE_ARA:
1657 /* deprecated */
1658 assert(0);
1659 return FALSE;
1660 break;
1661
1662 case TGSI_OPCODE_ARR:
1663 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1664 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1665 tmp0 = lp_build_round(&bld->base, tmp0);
1666 dst0[chan_index] = tmp0;
1667 }
1668 break;
1669
1670 case TGSI_OPCODE_BRA:
1671 /* deprecated */
1672 assert(0);
1673 return FALSE;
1674 break;
1675
1676 case TGSI_OPCODE_CAL:
1677 lp_exec_mask_call(&bld->exec_mask,
1678 inst->Label.Label,
1679 pc);
1680
1681 break;
1682
1683 case TGSI_OPCODE_RET:
1684 lp_exec_mask_ret(&bld->exec_mask, pc);
1685 break;
1686
1687 case TGSI_OPCODE_END:
1688 *pc = -1;
1689 break;
1690
1691 case TGSI_OPCODE_SSG:
1692 /* TGSI_OPCODE_SGN */
1693 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1694 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1695 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1696 }
1697 break;
1698
1699 case TGSI_OPCODE_CMP:
1700 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1701 src0 = emit_fetch( bld, inst, 0, chan_index );
1702 src1 = emit_fetch( bld, inst, 1, chan_index );
1703 src2 = emit_fetch( bld, inst, 2, chan_index );
1704 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1705 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1706 }
1707 break;
1708
1709 case TGSI_OPCODE_SCS:
1710 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1711 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1712 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1713 }
1714 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1715 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1716 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1717 }
1718 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1719 dst0[CHAN_Z] = bld->base.zero;
1720 }
1721 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1722 dst0[CHAN_W] = bld->base.one;
1723 }
1724 break;
1725
1726 case TGSI_OPCODE_TXB:
1727 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1728 break;
1729
1730 case TGSI_OPCODE_NRM:
1731 /* fall-through */
1732 case TGSI_OPCODE_NRM4:
1733 /* 3 or 4-component normalization */
1734 {
1735 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1736
1737 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1738 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1739 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1740 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1741
1742 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1743
1744 /* xmm4 = src.x */
1745 /* xmm0 = src.x * src.x */
1746 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1747 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1748 tmp4 = tmp0;
1749 }
1750 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1751
1752 /* xmm5 = src.y */
1753 /* xmm0 = xmm0 + src.y * src.y */
1754 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1755 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1756 tmp5 = tmp1;
1757 }
1758 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1759 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1760
1761 /* xmm6 = src.z */
1762 /* xmm0 = xmm0 + src.z * src.z */
1763 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1764 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1765 tmp6 = tmp1;
1766 }
1767 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1768 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1769
1770 if (dims == 4) {
1771 /* xmm7 = src.w */
1772 /* xmm0 = xmm0 + src.w * src.w */
1773 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1774 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1775 tmp7 = tmp1;
1776 }
1777 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1778 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1779 }
1780
1781 /* xmm1 = 1 / sqrt(xmm0) */
1782 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1783
1784 /* dst.x = xmm1 * src.x */
1785 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1786 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1787 }
1788
1789 /* dst.y = xmm1 * src.y */
1790 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1791 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1792 }
1793
1794 /* dst.z = xmm1 * src.z */
1795 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1796 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1797 }
1798
1799 /* dst.w = xmm1 * src.w */
1800 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1801 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1802 }
1803 }
1804
1805 /* dst.w = 1.0 */
1806 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1807 dst0[CHAN_W] = bld->base.one;
1808 }
1809 }
1810 break;
1811
1812 case TGSI_OPCODE_DIV:
1813 /* deprecated */
1814 assert( 0 );
1815 return FALSE;
1816 break;
1817
1818 case TGSI_OPCODE_DP2:
1819 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1820 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1821 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1822 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1823 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1824 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1825 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1826 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1827 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1828 }
1829 break;
1830
1831 case TGSI_OPCODE_TXL:
1832 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1833 break;
1834
1835 case TGSI_OPCODE_TXP:
1836 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1837 break;
1838
1839 case TGSI_OPCODE_BRK:
1840 lp_exec_break(&bld->exec_mask);
1841 break;
1842
1843 case TGSI_OPCODE_IF:
1844 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1845 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1846 tmp0, bld->base.zero);
1847 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1848 break;
1849
1850 case TGSI_OPCODE_BGNLOOP:
1851 lp_exec_bgnloop(&bld->exec_mask);
1852 break;
1853
1854 case TGSI_OPCODE_BGNSUB:
1855 lp_exec_mask_bgnsub(&bld->exec_mask);
1856 break;
1857
1858 case TGSI_OPCODE_ELSE:
1859 lp_exec_mask_cond_invert(&bld->exec_mask);
1860 break;
1861
1862 case TGSI_OPCODE_ENDIF:
1863 lp_exec_mask_cond_pop(&bld->exec_mask);
1864 break;
1865
1866 case TGSI_OPCODE_ENDLOOP:
1867 lp_exec_endloop(&bld->exec_mask);
1868 break;
1869
1870 case TGSI_OPCODE_ENDSUB:
1871 lp_exec_mask_endsub(&bld->exec_mask, pc);
1872 break;
1873
1874 case TGSI_OPCODE_PUSHA:
1875 /* deprecated? */
1876 assert(0);
1877 return FALSE;
1878 break;
1879
1880 case TGSI_OPCODE_POPA:
1881 /* deprecated? */
1882 assert(0);
1883 return FALSE;
1884 break;
1885
1886 case TGSI_OPCODE_CEIL:
1887 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1888 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1889 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1890 }
1891 break;
1892
1893 case TGSI_OPCODE_I2F:
1894 /* deprecated? */
1895 assert(0);
1896 return FALSE;
1897 break;
1898
1899 case TGSI_OPCODE_NOT:
1900 /* deprecated? */
1901 assert(0);
1902 return FALSE;
1903 break;
1904
1905 case TGSI_OPCODE_TRUNC:
1906 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1907 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1908 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1909 }
1910 break;
1911
1912 case TGSI_OPCODE_SHL:
1913 /* deprecated? */
1914 assert(0);
1915 return FALSE;
1916 break;
1917
1918 case TGSI_OPCODE_ISHR:
1919 /* deprecated? */
1920 assert(0);
1921 return FALSE;
1922 break;
1923
1924 case TGSI_OPCODE_AND:
1925 /* deprecated? */
1926 assert(0);
1927 return FALSE;
1928 break;
1929
1930 case TGSI_OPCODE_OR:
1931 /* deprecated? */
1932 assert(0);
1933 return FALSE;
1934 break;
1935
1936 case TGSI_OPCODE_MOD:
1937 /* deprecated? */
1938 assert(0);
1939 return FALSE;
1940 break;
1941
1942 case TGSI_OPCODE_XOR:
1943 /* deprecated? */
1944 assert(0);
1945 return FALSE;
1946 break;
1947
1948 case TGSI_OPCODE_SAD:
1949 /* deprecated? */
1950 assert(0);
1951 return FALSE;
1952 break;
1953
1954 case TGSI_OPCODE_TXF:
1955 /* deprecated? */
1956 assert(0);
1957 return FALSE;
1958 break;
1959
1960 case TGSI_OPCODE_TXQ:
1961 /* deprecated? */
1962 assert(0);
1963 return FALSE;
1964 break;
1965
1966 case TGSI_OPCODE_CONT:
1967 lp_exec_continue(&bld->exec_mask);
1968 break;
1969
1970 case TGSI_OPCODE_EMIT:
1971 return FALSE;
1972 break;
1973
1974 case TGSI_OPCODE_ENDPRIM:
1975 return FALSE;
1976 break;
1977
1978 case TGSI_OPCODE_NOP:
1979 break;
1980
1981 default:
1982 return FALSE;
1983 }
1984
1985 if(info->num_dst) {
1986 LLVMValueRef pred[NUM_CHANNELS];
1987
1988 emit_fetch_predicate( bld, inst, pred );
1989
1990 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1991 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1992 }
1993 }
1994
1995 return TRUE;
1996 }
1997
1998
1999 void
2000 lp_build_tgsi_soa(LLVMBuilderRef builder,
2001 const struct tgsi_token *tokens,
2002 struct lp_type type,
2003 struct lp_build_mask_context *mask,
2004 LLVMValueRef consts_ptr,
2005 const LLVMValueRef *pos,
2006 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2007 LLVMValueRef (*outputs)[NUM_CHANNELS],
2008 struct lp_build_sampler_soa *sampler,
2009 const struct tgsi_shader_info *info)
2010 {
2011 struct lp_build_tgsi_soa_context bld;
2012 struct tgsi_parse_context parse;
2013 uint num_immediates = 0;
2014 uint num_instructions = 0;
2015 unsigned i;
2016 int pc = 0;
2017
2018 /* Setup build context */
2019 memset(&bld, 0, sizeof bld);
2020 lp_build_context_init(&bld.base, builder, type);
2021 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
2022 bld.mask = mask;
2023 bld.pos = pos;
2024 bld.inputs = inputs;
2025 bld.outputs = outputs;
2026 bld.consts_ptr = consts_ptr;
2027 bld.sampler = sampler;
2028 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
2029 info->opcode_count[TGSI_OPCODE_ARL] > 0;
2030 bld.instructions = (struct tgsi_full_instruction *)
2031 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2032 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2033
2034 if (!bld.instructions) {
2035 return;
2036 }
2037
2038 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2039
2040 tgsi_parse_init( &parse, tokens );
2041
2042 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2043 tgsi_parse_token( &parse );
2044
2045 switch( parse.FullToken.Token.Type ) {
2046 case TGSI_TOKEN_TYPE_DECLARATION:
2047 /* Inputs already interpolated */
2048 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2049 break;
2050
2051 case TGSI_TOKEN_TYPE_INSTRUCTION:
2052 {
2053 /* save expanded instruction */
2054 if (num_instructions == bld.max_instructions) {
2055 bld.instructions = REALLOC(bld.instructions,
2056 bld.max_instructions
2057 * sizeof(struct tgsi_full_instruction),
2058 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2059 * sizeof(struct tgsi_full_instruction));
2060 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2061 }
2062
2063 memcpy(bld.instructions + num_instructions,
2064 &parse.FullToken.FullInstruction,
2065 sizeof(bld.instructions[0]));
2066
2067 num_instructions++;
2068 }
2069
2070 break;
2071
2072 case TGSI_TOKEN_TYPE_IMMEDIATE:
2073 /* simply copy the immediate values into the next immediates[] slot */
2074 {
2075 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2076 assert(size <= 4);
2077 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2078 for( i = 0; i < size; ++i )
2079 bld.immediates[num_immediates][i] =
2080 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2081 for( i = size; i < 4; ++i )
2082 bld.immediates[num_immediates][i] = bld.base.undef;
2083 num_immediates++;
2084 }
2085 break;
2086
2087 case TGSI_TOKEN_TYPE_PROPERTY:
2088 break;
2089
2090 default:
2091 assert( 0 );
2092 }
2093 }
2094
2095 while (pc != -1) {
2096 struct tgsi_full_instruction *instr = bld.instructions + pc;
2097 const struct tgsi_opcode_info *opcode_info =
2098 tgsi_get_opcode_info(instr->Instruction.Opcode);
2099 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2100 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2101 opcode_info->mnemonic);
2102 }
2103
2104 if (0) {
2105 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2106 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2107 debug_printf("11111111111111111111111111111 \n");
2108 tgsi_dump(tokens, 0);
2109 lp_debug_dump_value(function);
2110 debug_printf("2222222222222222222222222222 \n");
2111 }
2112 tgsi_parse_free( &parse );
2113
2114 if (0) {
2115 LLVMModuleRef module = LLVMGetGlobalParent(
2116 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2117 LLVMDumpModule(module);
2118
2119 }
2120
2121 FREE( bld.instructions );
2122 }
2123