Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for integer masks and indices */
122 struct lp_build_context int_bld;
123
124 LLVMValueRef consts_ptr;
125 const LLVMValueRef *pos;
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128
129 const struct lp_build_sampler_soa *sampler;
130
131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
135
136 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
137 * set in the indirect_files field.
138 * The temps[] array above is unused then.
139 */
140 LLVMValueRef temps_array;
141
142 /** bitmask indicating which register files are accessed indirectly */
143 unsigned indirect_files;
144
145 struct lp_build_mask_context *mask;
146 struct lp_exec_mask exec_mask;
147
148 struct tgsi_full_instruction *instructions;
149 uint max_instructions;
150 };
151
152 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
153 {
154 mask->bld = bld;
155 mask->has_mask = FALSE;
156 mask->cond_stack_size = 0;
157 mask->loop_stack_size = 0;
158 mask->call_stack_size = 0;
159
160 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
161 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
162 LLVMConstAllOnes(mask->int_vec_type);
163 }
164
165 static void lp_exec_mask_update(struct lp_exec_mask *mask)
166 {
167 if (mask->loop_stack_size) {
168 /*for loops we need to update the entire mask at runtime */
169 LLVMValueRef tmp;
170 assert(mask->break_mask);
171 tmp = LLVMBuildAnd(mask->bld->builder,
172 mask->cont_mask,
173 mask->break_mask,
174 "maskcb");
175 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
176 mask->cond_mask,
177 tmp,
178 "maskfull");
179 } else
180 mask->exec_mask = mask->cond_mask;
181
182 if (mask->call_stack_size) {
183 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
184 mask->exec_mask,
185 mask->ret_mask,
186 "callmask");
187 }
188
189 mask->has_mask = (mask->cond_stack_size > 0 ||
190 mask->loop_stack_size > 0 ||
191 mask->call_stack_size > 0);
192 }
193
194 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
195 LLVMValueRef val)
196 {
197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
198 if (mask->cond_stack_size == 0) {
199 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
200 }
201 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
202 assert(LLVMTypeOf(val) == mask->int_vec_type);
203 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
204 mask->cond_mask,
205 val,
206 "");
207 lp_exec_mask_update(mask);
208 }
209
210 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
211 {
212 LLVMValueRef prev_mask;
213 LLVMValueRef inv_mask;
214
215 assert(mask->cond_stack_size);
216 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
217 if (mask->cond_stack_size == 1) {
218 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
219 }
220
221 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
222
223 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
224 inv_mask,
225 prev_mask, "");
226 lp_exec_mask_update(mask);
227 }
228
229 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
230 {
231 assert(mask->cond_stack_size);
232 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
233 lp_exec_mask_update(mask);
234 }
235
236 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
237 {
238 if (mask->loop_stack_size == 0) {
239 assert(mask->loop_block == NULL);
240 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
241 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
242 assert(mask->break_var == NULL);
243 }
244
245 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
246
247 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
248 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
249 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
250 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
251 ++mask->loop_stack_size;
252
253 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
254 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
255
256 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
257 LLVMBuildBr(mask->bld->builder, mask->loop_block);
258 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
259
260 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
261
262 lp_exec_mask_update(mask);
263 }
264
265 static void lp_exec_break(struct lp_exec_mask *mask)
266 {
267 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
268 mask->exec_mask,
269 "break");
270
271 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
272 mask->break_mask,
273 exec_mask, "break_full");
274
275 lp_exec_mask_update(mask);
276 }
277
278 static void lp_exec_continue(struct lp_exec_mask *mask)
279 {
280 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
281 mask->exec_mask,
282 "");
283
284 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
285 mask->cont_mask,
286 exec_mask, "");
287
288 lp_exec_mask_update(mask);
289 }
290
291
292 static void lp_exec_endloop(struct lp_exec_mask *mask)
293 {
294 LLVMBasicBlockRef endloop;
295 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
296 mask->bld->type.length);
297 LLVMValueRef i1cond;
298
299 assert(mask->break_mask);
300
301 /*
302 * Restore the cont_mask, but don't pop
303 */
304 assert(mask->loop_stack_size);
305 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
306 lp_exec_mask_update(mask);
307
308 /*
309 * Unlike the continue mask, the break_mask must be preserved across loop
310 * iterations
311 */
312 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
313
314 /* i1cond = (mask == 0) */
315 i1cond = LLVMBuildICmp(
316 mask->bld->builder,
317 LLVMIntNE,
318 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
319 LLVMConstNull(reg_type), "");
320
321 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
322
323 LLVMBuildCondBr(mask->bld->builder,
324 i1cond, mask->loop_block, endloop);
325
326 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
327
328 assert(mask->loop_stack_size);
329 --mask->loop_stack_size;
330 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
331 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
332 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
333 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
334
335 lp_exec_mask_update(mask);
336 }
337
338 /* stores val into an address pointed to by dst.
339 * mask->exec_mask is used to figure out which bits of val
340 * should be stored into the address
341 * (0 means don't store this bit, 1 means do store).
342 */
343 static void lp_exec_mask_store(struct lp_exec_mask *mask,
344 LLVMValueRef pred,
345 LLVMValueRef val,
346 LLVMValueRef dst)
347 {
348 /* Mix the predicate and execution mask */
349 if (mask->has_mask) {
350 if (pred) {
351 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
352 } else {
353 pred = mask->exec_mask;
354 }
355 }
356
357 if (pred) {
358 LLVMValueRef real_val, dst_val;
359
360 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
361 real_val = lp_build_select(mask->bld,
362 pred,
363 val, dst_val);
364
365 LLVMBuildStore(mask->bld->builder, real_val, dst);
366 } else
367 LLVMBuildStore(mask->bld->builder, val, dst);
368 }
369
370 static void lp_exec_mask_call(struct lp_exec_mask *mask,
371 int func,
372 int *pc)
373 {
374 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
375 mask->call_stack[mask->call_stack_size].pc = *pc;
376 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
377 mask->call_stack_size++;
378 *pc = func;
379 }
380
381 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
382 {
383 LLVMValueRef exec_mask;
384
385 if (mask->call_stack_size == 0) {
386 /* returning from main() */
387 *pc = -1;
388 return;
389 }
390 exec_mask = LLVMBuildNot(mask->bld->builder,
391 mask->exec_mask,
392 "ret");
393
394 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
395 mask->ret_mask,
396 exec_mask, "ret_full");
397
398 lp_exec_mask_update(mask);
399 }
400
401 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
402 {
403 }
404
405 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
406 {
407 assert(mask->call_stack_size);
408 mask->call_stack_size--;
409 *pc = mask->call_stack[mask->call_stack_size].pc;
410 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
411 lp_exec_mask_update(mask);
412 }
413
414
415 /**
416 * Return pointer to a temporary register channel (src or dest).
417 * Note that indirect addressing cannot be handled here.
418 * \param index which temporary register
419 * \param chan which channel of the temp register.
420 */
421 static LLVMValueRef
422 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
423 unsigned index,
424 unsigned chan)
425 {
426 assert(chan < 4);
427 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
428 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
429 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
430 }
431 else {
432 return bld->temps[index][chan];
433 }
434 }
435
436
437 /**
438 * Gather vector.
439 * XXX the lp_build_gather() function should be capable of doing this
440 * with a little work.
441 */
442 static LLVMValueRef
443 build_gather(struct lp_build_tgsi_soa_context *bld,
444 LLVMValueRef base_ptr,
445 LLVMValueRef indexes)
446 {
447 LLVMValueRef res = bld->base.undef;
448 unsigned i;
449
450 /*
451 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
452 */
453 for (i = 0; i < bld->base.type.length; i++) {
454 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
455 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
456 indexes, ii, "");
457 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
458 &index, 1, "");
459 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
460
461 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
462 }
463
464 return res;
465 }
466
467
468 /**
469 * Read the current value of the ADDR register, convert the floats to
470 * ints, multiply by four and return the vector of offsets.
471 * The offsets will be used to index into the constant buffer or
472 * temporary register file.
473 */
474 static LLVMValueRef
475 get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
476 const struct tgsi_src_register *indirect_reg)
477 {
478 /* always use X component of address register */
479 const int x = indirect_reg->SwizzleX;
480 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
481 uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x);
482 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
483 LLVMValueRef addr_vec;
484
485 addr_vec = LLVMBuildLoad(bld->base.builder,
486 bld->addr[indirect_reg->Index][swizzle],
487 "load addr reg");
488
489 /* for indexing we want integers */
490 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
491 int_vec_type, "");
492
493 /* addr_vec = addr_vec * 4 */
494 addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
495
496 return addr_vec;
497 }
498
499
500 /**
501 * Register fetch.
502 */
503 static LLVMValueRef
504 emit_fetch(
505 struct lp_build_tgsi_soa_context *bld,
506 const struct tgsi_full_instruction *inst,
507 unsigned src_op,
508 const unsigned chan_index )
509 {
510 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
511 const unsigned swizzle =
512 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
513 LLVMValueRef res;
514 LLVMValueRef addr_vec = NULL;
515
516 if (swizzle > 3) {
517 assert(0 && "invalid swizzle in emit_fetch()");
518 return bld->base.undef;
519 }
520
521 if (reg->Register.Indirect) {
522 assert(bld->indirect_files);
523 addr_vec = get_indirect_offsets(bld, &reg->Indirect);
524 }
525
526 switch (reg->Register.File) {
527 case TGSI_FILE_CONSTANT:
528 if (reg->Register.Indirect) {
529 LLVMValueRef index_vec; /* index into the const buffer */
530
531 assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT));
532
533 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
534 index_vec = lp_build_const_int_vec(bld->int_bld.type,
535 reg->Register.Index * 4 + swizzle);
536
537 /* index_vec = index_vec + addr_vec */
538 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
539
540 /* Gather values from the constant buffer */
541 res = build_gather(bld, bld->consts_ptr, index_vec);
542 }
543 else {
544 LLVMValueRef index; /* index into the const buffer */
545 LLVMValueRef scalar, scalar_ptr;
546
547 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
548
549 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
550 &index, 1, "");
551 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
552
553 res = lp_build_broadcast_scalar(&bld->base, scalar);
554 }
555 break;
556
557 case TGSI_FILE_IMMEDIATE:
558 res = bld->immediates[reg->Register.Index][swizzle];
559 assert(res);
560 break;
561
562 case TGSI_FILE_INPUT:
563 res = bld->inputs[reg->Register.Index][swizzle];
564 assert(res);
565 break;
566
567 case TGSI_FILE_TEMPORARY:
568 if (reg->Register.Indirect) {
569 LLVMValueRef vec_len =
570 lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length);
571 LLVMValueRef index_vec; /* index into the const buffer */
572 LLVMValueRef temps_array;
573 LLVMTypeRef float4_ptr_type;
574
575 assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
576
577 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
578 index_vec = lp_build_const_int_vec(bld->int_bld.type,
579 reg->Register.Index * 4 + swizzle);
580
581 /* index_vec += addr_vec */
582 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
583
584 /* index_vec *= vector_length */
585 index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len);
586
587 /* cast temps_array pointer to float* */
588 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
589 temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array,
590 float4_ptr_type, "");
591
592 /* Gather values from the temporary register array */
593 res = build_gather(bld, temps_array, index_vec);
594 }
595 else {
596 LLVMValueRef temp_ptr;
597 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
598 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
599 if (!res)
600 return bld->base.undef;
601 }
602 break;
603
604 default:
605 assert(0 && "invalid src register in emit_fetch()");
606 return bld->base.undef;
607 }
608
609 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
610 case TGSI_UTIL_SIGN_CLEAR:
611 res = lp_build_abs( &bld->base, res );
612 break;
613
614 case TGSI_UTIL_SIGN_SET:
615 /* TODO: Use bitwese OR for floating point */
616 res = lp_build_abs( &bld->base, res );
617 /* fall through */
618 case TGSI_UTIL_SIGN_TOGGLE:
619 res = lp_build_negate( &bld->base, res );
620 break;
621
622 case TGSI_UTIL_SIGN_KEEP:
623 break;
624 }
625
626 return res;
627 }
628
629
630 /**
631 * Register fetch with derivatives.
632 */
633 static void
634 emit_fetch_deriv(
635 struct lp_build_tgsi_soa_context *bld,
636 const struct tgsi_full_instruction *inst,
637 unsigned index,
638 const unsigned chan_index,
639 LLVMValueRef *res,
640 LLVMValueRef *ddx,
641 LLVMValueRef *ddy)
642 {
643 LLVMValueRef src;
644
645 src = emit_fetch(bld, inst, index, chan_index);
646
647 if(res)
648 *res = src;
649
650 /* TODO: use interpolation coeffs for inputs */
651
652 if(ddx)
653 *ddx = lp_build_ddx(&bld->base, src);
654
655 if(ddy)
656 *ddy = lp_build_ddy(&bld->base, src);
657 }
658
659
660 /**
661 * Predicate.
662 */
663 static void
664 emit_fetch_predicate(
665 struct lp_build_tgsi_soa_context *bld,
666 const struct tgsi_full_instruction *inst,
667 LLVMValueRef *pred)
668 {
669 unsigned index;
670 unsigned char swizzles[4];
671 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
672 LLVMValueRef value;
673 unsigned chan;
674
675 if (!inst->Instruction.Predicate) {
676 FOR_EACH_CHANNEL( chan ) {
677 pred[chan] = NULL;
678 }
679 return;
680 }
681
682 swizzles[0] = inst->Predicate.SwizzleX;
683 swizzles[1] = inst->Predicate.SwizzleY;
684 swizzles[2] = inst->Predicate.SwizzleZ;
685 swizzles[3] = inst->Predicate.SwizzleW;
686
687 index = inst->Predicate.Index;
688 assert(index < LP_MAX_TGSI_PREDS);
689
690 FOR_EACH_CHANNEL( chan ) {
691 unsigned swizzle = swizzles[chan];
692
693 /*
694 * Only fetch the predicate register channels that are actually listed
695 * in the swizzles
696 */
697 if (!unswizzled[swizzle]) {
698 value = LLVMBuildLoad(bld->base.builder,
699 bld->preds[index][swizzle], "");
700
701 /*
702 * Convert the value to an integer mask.
703 *
704 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
705 * is needlessly causing two comparisons due to storing the intermediate
706 * result as float vector instead of an integer mask vector.
707 */
708 value = lp_build_compare(bld->base.builder,
709 bld->base.type,
710 PIPE_FUNC_NOTEQUAL,
711 value,
712 bld->base.zero);
713 if (inst->Predicate.Negate) {
714 value = LLVMBuildNot(bld->base.builder, value, "");
715 }
716
717 unswizzled[swizzle] = value;
718 } else {
719 value = unswizzled[swizzle];
720 }
721
722 pred[chan] = value;
723 }
724 }
725
726
727 /**
728 * Register store.
729 */
730 static void
731 emit_store(
732 struct lp_build_tgsi_soa_context *bld,
733 const struct tgsi_full_instruction *inst,
734 unsigned index,
735 unsigned chan_index,
736 LLVMValueRef pred,
737 LLVMValueRef value)
738 {
739 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
740 LLVMValueRef addr = NULL;
741
742 switch( inst->Instruction.Saturate ) {
743 case TGSI_SAT_NONE:
744 break;
745
746 case TGSI_SAT_ZERO_ONE:
747 value = lp_build_max(&bld->base, value, bld->base.zero);
748 value = lp_build_min(&bld->base, value, bld->base.one);
749 break;
750
751 case TGSI_SAT_MINUS_PLUS_ONE:
752 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
753 value = lp_build_min(&bld->base, value, bld->base.one);
754 break;
755
756 default:
757 assert(0);
758 }
759
760 if (reg->Register.Indirect) {
761 /* XXX use get_indirect_offsets() here eventually */
762 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
763 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
764
765 assert(bld->indirect_files);
766
767 addr = LLVMBuildLoad(bld->base.builder,
768 bld->addr[reg->Indirect.Index][swizzle],
769 "");
770 /* for indexing we want integers */
771 addr = LLVMBuildFPToSI(bld->base.builder, addr,
772 int_vec_type, "");
773 addr = LLVMBuildExtractElement(bld->base.builder,
774 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
775 "");
776 addr = LLVMBuildMul(bld->base.builder,
777 addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
778 "");
779 }
780
781 switch( reg->Register.File ) {
782 case TGSI_FILE_OUTPUT:
783 lp_exec_mask_store(&bld->exec_mask, pred, value,
784 bld->outputs[reg->Register.Index][chan_index]);
785 break;
786
787 case TGSI_FILE_TEMPORARY:
788 if (reg->Register.Indirect) {
789 /* XXX not done yet */
790 debug_printf("WARNING: LLVM scatter store of temp regs"
791 " not implemented\n");
792 }
793 else {
794 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
795 chan_index);
796 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
797 }
798 break;
799
800 case TGSI_FILE_ADDRESS:
801 lp_exec_mask_store(&bld->exec_mask, pred, value,
802 bld->addr[reg->Indirect.Index][chan_index]);
803 break;
804
805 case TGSI_FILE_PREDICATE:
806 lp_exec_mask_store(&bld->exec_mask, pred, value,
807 bld->preds[reg->Register.Index][chan_index]);
808 break;
809
810 default:
811 assert( 0 );
812 }
813 }
814
815
816 /**
817 * High-level instruction translators.
818 */
819
820 enum tex_modifier {
821 TEX_MODIFIER_NONE = 0,
822 TEX_MODIFIER_PROJECTED,
823 TEX_MODIFIER_LOD_BIAS,
824 TEX_MODIFIER_EXPLICIT_LOD,
825 TEX_MODIFIER_EXPLICIT_DERIV
826 };
827
828 static void
829 emit_tex( struct lp_build_tgsi_soa_context *bld,
830 const struct tgsi_full_instruction *inst,
831 enum tex_modifier modifier,
832 LLVMValueRef *texel)
833 {
834 unsigned unit;
835 LLVMValueRef lod_bias, explicit_lod;
836 LLVMValueRef oow = NULL;
837 LLVMValueRef coords[3];
838 LLVMValueRef ddx[3];
839 LLVMValueRef ddy[3];
840 unsigned num_coords;
841 unsigned i;
842
843 if (!bld->sampler) {
844 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
845 for (i = 0; i < 4; i++) {
846 texel[i] = bld->base.undef;
847 }
848 return;
849 }
850
851 switch (inst->Texture.Texture) {
852 case TGSI_TEXTURE_1D:
853 num_coords = 1;
854 break;
855 case TGSI_TEXTURE_2D:
856 case TGSI_TEXTURE_RECT:
857 num_coords = 2;
858 break;
859 case TGSI_TEXTURE_SHADOW1D:
860 case TGSI_TEXTURE_SHADOW2D:
861 case TGSI_TEXTURE_SHADOWRECT:
862 case TGSI_TEXTURE_3D:
863 case TGSI_TEXTURE_CUBE:
864 num_coords = 3;
865 break;
866 default:
867 assert(0);
868 return;
869 }
870
871 if (modifier == TEX_MODIFIER_LOD_BIAS) {
872 lod_bias = emit_fetch( bld, inst, 0, 3 );
873 explicit_lod = NULL;
874 }
875 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
876 lod_bias = NULL;
877 explicit_lod = emit_fetch( bld, inst, 0, 3 );
878 }
879 else {
880 lod_bias = NULL;
881 explicit_lod = NULL;
882 }
883
884 if (modifier == TEX_MODIFIER_PROJECTED) {
885 oow = emit_fetch( bld, inst, 0, 3 );
886 oow = lp_build_rcp(&bld->base, oow);
887 }
888
889 for (i = 0; i < num_coords; i++) {
890 coords[i] = emit_fetch( bld, inst, 0, i );
891 if (modifier == TEX_MODIFIER_PROJECTED)
892 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
893 }
894 for (i = num_coords; i < 3; i++) {
895 coords[i] = bld->base.undef;
896 }
897
898 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
899 for (i = 0; i < num_coords; i++) {
900 ddx[i] = emit_fetch( bld, inst, 1, i );
901 ddy[i] = emit_fetch( bld, inst, 2, i );
902 }
903 unit = inst->Src[3].Register.Index;
904 } else {
905 for (i = 0; i < num_coords; i++) {
906 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
907 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
908 }
909 unit = inst->Src[1].Register.Index;
910 }
911 for (i = num_coords; i < 3; i++) {
912 ddx[i] = bld->base.undef;
913 ddy[i] = bld->base.undef;
914 }
915
916 bld->sampler->emit_fetch_texel(bld->sampler,
917 bld->base.builder,
918 bld->base.type,
919 unit, num_coords, coords,
920 ddx, ddy,
921 lod_bias, explicit_lod,
922 texel);
923 }
924
925
926 /**
927 * Kill fragment if any of the src register values are negative.
928 */
929 static void
930 emit_kil(
931 struct lp_build_tgsi_soa_context *bld,
932 const struct tgsi_full_instruction *inst )
933 {
934 const struct tgsi_full_src_register *reg = &inst->Src[0];
935 LLVMValueRef terms[NUM_CHANNELS];
936 LLVMValueRef mask;
937 unsigned chan_index;
938
939 memset(&terms, 0, sizeof terms);
940
941 FOR_EACH_CHANNEL( chan_index ) {
942 unsigned swizzle;
943
944 /* Unswizzle channel */
945 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
946
947 /* Check if the component has not been already tested. */
948 assert(swizzle < NUM_CHANNELS);
949 if( !terms[swizzle] )
950 /* TODO: change the comparison operator instead of setting the sign */
951 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
952 }
953
954 mask = NULL;
955 FOR_EACH_CHANNEL( chan_index ) {
956 if(terms[chan_index]) {
957 LLVMValueRef chan_mask;
958
959 /*
960 * If term < 0 then mask = 0 else mask = ~0.
961 */
962 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
963
964 if(mask)
965 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
966 else
967 mask = chan_mask;
968 }
969 }
970
971 if(mask)
972 lp_build_mask_update(bld->mask, mask);
973 }
974
975
976 /**
977 * Predicated fragment kill.
978 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
979 * The only predication is the execution mask which will apply if
980 * we're inside a loop or conditional.
981 */
982 static void
983 emit_kilp(struct lp_build_tgsi_soa_context *bld,
984 const struct tgsi_full_instruction *inst)
985 {
986 LLVMValueRef mask;
987
988 /* For those channels which are "alive", disable fragment shader
989 * execution.
990 */
991 if (bld->exec_mask.has_mask) {
992 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
993 }
994 else {
995 mask = bld->base.zero;
996 }
997
998 lp_build_mask_update(bld->mask, mask);
999 }
1000
1001 static void
1002 emit_declaration(
1003 struct lp_build_tgsi_soa_context *bld,
1004 const struct tgsi_full_declaration *decl)
1005 {
1006 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
1007
1008 unsigned first = decl->Range.First;
1009 unsigned last = decl->Range.Last;
1010 unsigned idx, i;
1011
1012 for (idx = first; idx <= last; ++idx) {
1013 switch (decl->Declaration.File) {
1014 case TGSI_FILE_TEMPORARY:
1015 assert(idx < LP_MAX_TGSI_TEMPS);
1016 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1017 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1018 last*4 + 4, 0);
1019 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1020 vec_type, array_size, "");
1021 } else {
1022 for (i = 0; i < NUM_CHANNELS; i++)
1023 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1024 vec_type, "");
1025 }
1026 break;
1027
1028 case TGSI_FILE_OUTPUT:
1029 for (i = 0; i < NUM_CHANNELS; i++)
1030 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1031 vec_type, "");
1032 break;
1033
1034 case TGSI_FILE_ADDRESS:
1035 assert(idx < LP_MAX_TGSI_ADDRS);
1036 for (i = 0; i < NUM_CHANNELS; i++)
1037 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1038 vec_type, "");
1039 break;
1040
1041 case TGSI_FILE_PREDICATE:
1042 assert(idx < LP_MAX_TGSI_PREDS);
1043 for (i = 0; i < NUM_CHANNELS; i++)
1044 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1045 vec_type, "");
1046 break;
1047
1048 default:
1049 /* don't need to declare other vars */
1050 break;
1051 }
1052 }
1053 }
1054
1055
1056 /**
1057 * Emit LLVM for one TGSI instruction.
1058 * \param return TRUE for success, FALSE otherwise
1059 */
1060 static boolean
1061 emit_instruction(
1062 struct lp_build_tgsi_soa_context *bld,
1063 const struct tgsi_full_instruction *inst,
1064 const struct tgsi_opcode_info *info,
1065 int *pc)
1066 {
1067 unsigned chan_index;
1068 LLVMValueRef src0, src1, src2;
1069 LLVMValueRef tmp0, tmp1, tmp2;
1070 LLVMValueRef tmp3 = NULL;
1071 LLVMValueRef tmp4 = NULL;
1072 LLVMValueRef tmp5 = NULL;
1073 LLVMValueRef tmp6 = NULL;
1074 LLVMValueRef tmp7 = NULL;
1075 LLVMValueRef res;
1076 LLVMValueRef dst0[NUM_CHANNELS];
1077
1078 /*
1079 * Stores and write masks are handled in a general fashion after the long
1080 * instruction opcode switch statement.
1081 *
1082 * Although not stricitly necessary, we avoid generating instructions for
1083 * channels which won't be stored, in cases where's that easy. For some
1084 * complex instructions, like texture sampling, it is more convenient to
1085 * assume a full writemask and then let LLVM optimization passes eliminate
1086 * redundant code.
1087 */
1088
1089 (*pc)++;
1090
1091 assert(info->num_dst <= 1);
1092 if (info->num_dst) {
1093 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1094 dst0[chan_index] = bld->base.undef;
1095 }
1096 }
1097
1098 switch (inst->Instruction.Opcode) {
1099 case TGSI_OPCODE_ARL:
1100 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1101 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1102 tmp0 = lp_build_floor(&bld->base, tmp0);
1103 dst0[chan_index] = tmp0;
1104 }
1105 break;
1106
1107 case TGSI_OPCODE_MOV:
1108 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1109 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1110 }
1111 break;
1112
1113 case TGSI_OPCODE_LIT:
1114 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1115 dst0[CHAN_X] = bld->base.one;
1116 }
1117 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1118 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1119 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1120 }
1121 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1122 /* XMM[1] = SrcReg[0].yyyy */
1123 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1124 /* XMM[1] = max(XMM[1], 0) */
1125 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1126 /* XMM[2] = SrcReg[0].wwww */
1127 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1128 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1129 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1130 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1131 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1132 }
1133 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1134 dst0[CHAN_W] = bld->base.one;
1135 }
1136 break;
1137
1138 case TGSI_OPCODE_RCP:
1139 /* TGSI_OPCODE_RECIP */
1140 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1141 res = lp_build_rcp(&bld->base, src0);
1142 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1143 dst0[chan_index] = res;
1144 }
1145 break;
1146
1147 case TGSI_OPCODE_RSQ:
1148 /* TGSI_OPCODE_RECIPSQRT */
1149 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1150 src0 = lp_build_abs(&bld->base, src0);
1151 res = lp_build_rsqrt(&bld->base, src0);
1152 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1153 dst0[chan_index] = res;
1154 }
1155 break;
1156
1157 case TGSI_OPCODE_EXP:
1158 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1159 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1160 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1161 LLVMValueRef *p_exp2_int_part = NULL;
1162 LLVMValueRef *p_frac_part = NULL;
1163 LLVMValueRef *p_exp2 = NULL;
1164
1165 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1166
1167 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1168 p_exp2_int_part = &tmp0;
1169 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1170 p_frac_part = &tmp1;
1171 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1172 p_exp2 = &tmp2;
1173
1174 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1175
1176 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1177 dst0[CHAN_X] = tmp0;
1178 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1179 dst0[CHAN_Y] = tmp1;
1180 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1181 dst0[CHAN_Z] = tmp2;
1182 }
1183 /* dst.w = 1.0 */
1184 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1185 dst0[CHAN_W] = bld->base.one;
1186 }
1187 break;
1188
1189 case TGSI_OPCODE_LOG:
1190 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1191 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1192 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1193 LLVMValueRef *p_floor_log2 = NULL;
1194 LLVMValueRef *p_exp = NULL;
1195 LLVMValueRef *p_log2 = NULL;
1196
1197 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1198 src0 = lp_build_abs( &bld->base, src0 );
1199
1200 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1201 p_floor_log2 = &tmp0;
1202 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1203 p_exp = &tmp1;
1204 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1205 p_log2 = &tmp2;
1206
1207 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1208
1209 /* dst.x = floor(lg2(abs(src.x))) */
1210 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1211 dst0[CHAN_X] = tmp0;
1212 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1213 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1214 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1215 }
1216 /* dst.z = lg2(abs(src.x)) */
1217 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1218 dst0[CHAN_Z] = tmp2;
1219 }
1220 /* dst.w = 1.0 */
1221 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1222 dst0[CHAN_W] = bld->base.one;
1223 }
1224 break;
1225
1226 case TGSI_OPCODE_MUL:
1227 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1228 src0 = emit_fetch( bld, inst, 0, chan_index );
1229 src1 = emit_fetch( bld, inst, 1, chan_index );
1230 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1231 }
1232 break;
1233
1234 case TGSI_OPCODE_ADD:
1235 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1236 src0 = emit_fetch( bld, inst, 0, chan_index );
1237 src1 = emit_fetch( bld, inst, 1, chan_index );
1238 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1239 }
1240 break;
1241
1242 case TGSI_OPCODE_DP3:
1243 /* TGSI_OPCODE_DOT3 */
1244 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1245 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1246 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1247 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1248 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1249 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1250 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1251 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1252 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1253 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1254 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1255 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1256 dst0[chan_index] = tmp0;
1257 }
1258 break;
1259
1260 case TGSI_OPCODE_DP4:
1261 /* TGSI_OPCODE_DOT4 */
1262 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1263 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1264 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1265 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1266 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1267 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1268 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1269 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1270 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1271 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1272 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1273 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1274 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1275 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1276 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1277 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1278 dst0[chan_index] = tmp0;
1279 }
1280 break;
1281
1282 case TGSI_OPCODE_DST:
1283 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1284 dst0[CHAN_X] = bld->base.one;
1285 }
1286 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1287 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1288 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1289 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1290 }
1291 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1292 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1293 }
1294 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1295 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1296 }
1297 break;
1298
1299 case TGSI_OPCODE_MIN:
1300 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1301 src0 = emit_fetch( bld, inst, 0, chan_index );
1302 src1 = emit_fetch( bld, inst, 1, chan_index );
1303 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1304 }
1305 break;
1306
1307 case TGSI_OPCODE_MAX:
1308 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1309 src0 = emit_fetch( bld, inst, 0, chan_index );
1310 src1 = emit_fetch( bld, inst, 1, chan_index );
1311 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1312 }
1313 break;
1314
1315 case TGSI_OPCODE_SLT:
1316 /* TGSI_OPCODE_SETLT */
1317 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1318 src0 = emit_fetch( bld, inst, 0, chan_index );
1319 src1 = emit_fetch( bld, inst, 1, chan_index );
1320 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1321 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1322 }
1323 break;
1324
1325 case TGSI_OPCODE_SGE:
1326 /* TGSI_OPCODE_SETGE */
1327 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1328 src0 = emit_fetch( bld, inst, 0, chan_index );
1329 src1 = emit_fetch( bld, inst, 1, chan_index );
1330 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1331 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1332 }
1333 break;
1334
1335 case TGSI_OPCODE_MAD:
1336 /* TGSI_OPCODE_MADD */
1337 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1338 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1339 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1340 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1341 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1342 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1343 dst0[chan_index] = tmp0;
1344 }
1345 break;
1346
1347 case TGSI_OPCODE_SUB:
1348 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1349 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1350 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1351 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1352 }
1353 break;
1354
1355 case TGSI_OPCODE_LRP:
1356 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1357 src0 = emit_fetch( bld, inst, 0, chan_index );
1358 src1 = emit_fetch( bld, inst, 1, chan_index );
1359 src2 = emit_fetch( bld, inst, 2, chan_index );
1360 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1361 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1362 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1363 }
1364 break;
1365
1366 case TGSI_OPCODE_CND:
1367 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1368 src0 = emit_fetch( bld, inst, 0, chan_index );
1369 src1 = emit_fetch( bld, inst, 1, chan_index );
1370 src2 = emit_fetch( bld, inst, 2, chan_index );
1371 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1372 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1373 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1374 }
1375 break;
1376
1377 case TGSI_OPCODE_DP2A:
1378 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1379 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1380 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1381 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1382 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1383 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1384 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1385 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1386 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1387 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1388 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1389 }
1390 break;
1391
1392 case TGSI_OPCODE_FRC:
1393 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1394 src0 = emit_fetch( bld, inst, 0, chan_index );
1395 tmp0 = lp_build_floor(&bld->base, src0);
1396 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1397 dst0[chan_index] = tmp0;
1398 }
1399 break;
1400
1401 case TGSI_OPCODE_CLAMP:
1402 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1403 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1404 src1 = emit_fetch( bld, inst, 1, chan_index );
1405 src2 = emit_fetch( bld, inst, 2, chan_index );
1406 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1407 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1408 dst0[chan_index] = tmp0;
1409 }
1410 break;
1411
1412 case TGSI_OPCODE_FLR:
1413 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1414 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1415 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1416 }
1417 break;
1418
1419 case TGSI_OPCODE_ROUND:
1420 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1421 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1422 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1423 }
1424 break;
1425
1426 case TGSI_OPCODE_EX2: {
1427 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1428 tmp0 = lp_build_exp2( &bld->base, tmp0);
1429 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1430 dst0[chan_index] = tmp0;
1431 }
1432 break;
1433 }
1434
1435 case TGSI_OPCODE_LG2:
1436 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1437 tmp0 = lp_build_log2( &bld->base, tmp0);
1438 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1439 dst0[chan_index] = tmp0;
1440 }
1441 break;
1442
1443 case TGSI_OPCODE_POW:
1444 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1445 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1446 res = lp_build_pow( &bld->base, src0, src1 );
1447 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1448 dst0[chan_index] = res;
1449 }
1450 break;
1451
1452 case TGSI_OPCODE_XPD:
1453 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1454 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1455 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1456 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1457 }
1458 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1459 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1460 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1461 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1462 }
1463 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1464 tmp2 = tmp0;
1465 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1466 tmp5 = tmp3;
1467 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1468 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1469 dst0[CHAN_X] = tmp2;
1470 }
1471 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1472 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1473 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1474 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1475 }
1476 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1477 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1478 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1479 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1480 dst0[CHAN_Y] = tmp3;
1481 }
1482 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1483 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1484 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1485 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1486 dst0[CHAN_Z] = tmp5;
1487 }
1488 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1489 dst0[CHAN_W] = bld->base.one;
1490 }
1491 break;
1492
1493 case TGSI_OPCODE_ABS:
1494 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1495 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1496 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1497 }
1498 break;
1499
1500 case TGSI_OPCODE_RCC:
1501 /* deprecated? */
1502 assert(0);
1503 return FALSE;
1504
1505 case TGSI_OPCODE_DPH:
1506 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1507 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1508 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1509 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1510 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1511 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1512 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1513 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1514 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1515 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1516 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1517 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1518 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1519 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1520 dst0[chan_index] = tmp0;
1521 }
1522 break;
1523
1524 case TGSI_OPCODE_COS:
1525 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1526 tmp0 = lp_build_cos( &bld->base, tmp0 );
1527 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1528 dst0[chan_index] = tmp0;
1529 }
1530 break;
1531
1532 case TGSI_OPCODE_DDX:
1533 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1534 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1535 }
1536 break;
1537
1538 case TGSI_OPCODE_DDY:
1539 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1540 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1541 }
1542 break;
1543
1544 case TGSI_OPCODE_KILP:
1545 /* predicated kill */
1546 emit_kilp( bld, inst );
1547 break;
1548
1549 case TGSI_OPCODE_KIL:
1550 /* conditional kill */
1551 emit_kil( bld, inst );
1552 break;
1553
1554 case TGSI_OPCODE_PK2H:
1555 return FALSE;
1556 break;
1557
1558 case TGSI_OPCODE_PK2US:
1559 return FALSE;
1560 break;
1561
1562 case TGSI_OPCODE_PK4B:
1563 return FALSE;
1564 break;
1565
1566 case TGSI_OPCODE_PK4UB:
1567 return FALSE;
1568 break;
1569
1570 case TGSI_OPCODE_RFL:
1571 return FALSE;
1572 break;
1573
1574 case TGSI_OPCODE_SEQ:
1575 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1576 src0 = emit_fetch( bld, inst, 0, chan_index );
1577 src1 = emit_fetch( bld, inst, 1, chan_index );
1578 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1579 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1580 }
1581 break;
1582
1583 case TGSI_OPCODE_SFL:
1584 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1585 dst0[chan_index] = bld->base.zero;
1586 }
1587 break;
1588
1589 case TGSI_OPCODE_SGT:
1590 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1591 src0 = emit_fetch( bld, inst, 0, chan_index );
1592 src1 = emit_fetch( bld, inst, 1, chan_index );
1593 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1594 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1595 }
1596 break;
1597
1598 case TGSI_OPCODE_SIN:
1599 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1600 tmp0 = lp_build_sin( &bld->base, tmp0 );
1601 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1602 dst0[chan_index] = tmp0;
1603 }
1604 break;
1605
1606 case TGSI_OPCODE_SLE:
1607 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1608 src0 = emit_fetch( bld, inst, 0, chan_index );
1609 src1 = emit_fetch( bld, inst, 1, chan_index );
1610 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1611 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1612 }
1613 break;
1614
1615 case TGSI_OPCODE_SNE:
1616 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1617 src0 = emit_fetch( bld, inst, 0, chan_index );
1618 src1 = emit_fetch( bld, inst, 1, chan_index );
1619 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1620 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1621 }
1622 break;
1623
1624 case TGSI_OPCODE_STR:
1625 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1626 dst0[chan_index] = bld->base.one;
1627 }
1628 break;
1629
1630 case TGSI_OPCODE_TEX:
1631 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1632 break;
1633
1634 case TGSI_OPCODE_TXD:
1635 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1636 break;
1637
1638 case TGSI_OPCODE_UP2H:
1639 /* deprecated */
1640 assert (0);
1641 return FALSE;
1642 break;
1643
1644 case TGSI_OPCODE_UP2US:
1645 /* deprecated */
1646 assert(0);
1647 return FALSE;
1648 break;
1649
1650 case TGSI_OPCODE_UP4B:
1651 /* deprecated */
1652 assert(0);
1653 return FALSE;
1654 break;
1655
1656 case TGSI_OPCODE_UP4UB:
1657 /* deprecated */
1658 assert(0);
1659 return FALSE;
1660 break;
1661
1662 case TGSI_OPCODE_X2D:
1663 /* deprecated? */
1664 assert(0);
1665 return FALSE;
1666 break;
1667
1668 case TGSI_OPCODE_ARA:
1669 /* deprecated */
1670 assert(0);
1671 return FALSE;
1672 break;
1673
1674 case TGSI_OPCODE_ARR:
1675 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1676 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1677 tmp0 = lp_build_round(&bld->base, tmp0);
1678 dst0[chan_index] = tmp0;
1679 }
1680 break;
1681
1682 case TGSI_OPCODE_BRA:
1683 /* deprecated */
1684 assert(0);
1685 return FALSE;
1686 break;
1687
1688 case TGSI_OPCODE_CAL:
1689 lp_exec_mask_call(&bld->exec_mask,
1690 inst->Label.Label,
1691 pc);
1692
1693 break;
1694
1695 case TGSI_OPCODE_RET:
1696 lp_exec_mask_ret(&bld->exec_mask, pc);
1697 break;
1698
1699 case TGSI_OPCODE_END:
1700 *pc = -1;
1701 break;
1702
1703 case TGSI_OPCODE_SSG:
1704 /* TGSI_OPCODE_SGN */
1705 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1706 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1707 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1708 }
1709 break;
1710
1711 case TGSI_OPCODE_CMP:
1712 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1713 src0 = emit_fetch( bld, inst, 0, chan_index );
1714 src1 = emit_fetch( bld, inst, 1, chan_index );
1715 src2 = emit_fetch( bld, inst, 2, chan_index );
1716 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1717 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1718 }
1719 break;
1720
1721 case TGSI_OPCODE_SCS:
1722 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1723 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1724 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1725 }
1726 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1727 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1728 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1729 }
1730 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1731 dst0[CHAN_Z] = bld->base.zero;
1732 }
1733 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1734 dst0[CHAN_W] = bld->base.one;
1735 }
1736 break;
1737
1738 case TGSI_OPCODE_TXB:
1739 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1740 break;
1741
1742 case TGSI_OPCODE_NRM:
1743 /* fall-through */
1744 case TGSI_OPCODE_NRM4:
1745 /* 3 or 4-component normalization */
1746 {
1747 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1748
1749 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1750 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1751 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1752 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1753
1754 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1755
1756 /* xmm4 = src.x */
1757 /* xmm0 = src.x * src.x */
1758 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1759 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1760 tmp4 = tmp0;
1761 }
1762 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1763
1764 /* xmm5 = src.y */
1765 /* xmm0 = xmm0 + src.y * src.y */
1766 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1767 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1768 tmp5 = tmp1;
1769 }
1770 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1771 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1772
1773 /* xmm6 = src.z */
1774 /* xmm0 = xmm0 + src.z * src.z */
1775 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1776 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1777 tmp6 = tmp1;
1778 }
1779 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1780 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1781
1782 if (dims == 4) {
1783 /* xmm7 = src.w */
1784 /* xmm0 = xmm0 + src.w * src.w */
1785 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1786 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1787 tmp7 = tmp1;
1788 }
1789 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1790 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1791 }
1792
1793 /* xmm1 = 1 / sqrt(xmm0) */
1794 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1795
1796 /* dst.x = xmm1 * src.x */
1797 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1798 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1799 }
1800
1801 /* dst.y = xmm1 * src.y */
1802 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1803 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1804 }
1805
1806 /* dst.z = xmm1 * src.z */
1807 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1808 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1809 }
1810
1811 /* dst.w = xmm1 * src.w */
1812 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1813 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1814 }
1815 }
1816
1817 /* dst.w = 1.0 */
1818 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1819 dst0[CHAN_W] = bld->base.one;
1820 }
1821 }
1822 break;
1823
1824 case TGSI_OPCODE_DIV:
1825 /* deprecated */
1826 assert( 0 );
1827 return FALSE;
1828 break;
1829
1830 case TGSI_OPCODE_DP2:
1831 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1832 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1833 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1834 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1835 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1836 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1837 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1838 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1839 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1840 }
1841 break;
1842
1843 case TGSI_OPCODE_TXL:
1844 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1845 break;
1846
1847 case TGSI_OPCODE_TXP:
1848 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1849 break;
1850
1851 case TGSI_OPCODE_BRK:
1852 lp_exec_break(&bld->exec_mask);
1853 break;
1854
1855 case TGSI_OPCODE_IF:
1856 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1857 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1858 tmp0, bld->base.zero);
1859 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1860 break;
1861
1862 case TGSI_OPCODE_BGNLOOP:
1863 lp_exec_bgnloop(&bld->exec_mask);
1864 break;
1865
1866 case TGSI_OPCODE_BGNSUB:
1867 lp_exec_mask_bgnsub(&bld->exec_mask);
1868 break;
1869
1870 case TGSI_OPCODE_ELSE:
1871 lp_exec_mask_cond_invert(&bld->exec_mask);
1872 break;
1873
1874 case TGSI_OPCODE_ENDIF:
1875 lp_exec_mask_cond_pop(&bld->exec_mask);
1876 break;
1877
1878 case TGSI_OPCODE_ENDLOOP:
1879 lp_exec_endloop(&bld->exec_mask);
1880 break;
1881
1882 case TGSI_OPCODE_ENDSUB:
1883 lp_exec_mask_endsub(&bld->exec_mask, pc);
1884 break;
1885
1886 case TGSI_OPCODE_PUSHA:
1887 /* deprecated? */
1888 assert(0);
1889 return FALSE;
1890 break;
1891
1892 case TGSI_OPCODE_POPA:
1893 /* deprecated? */
1894 assert(0);
1895 return FALSE;
1896 break;
1897
1898 case TGSI_OPCODE_CEIL:
1899 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1900 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1901 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1902 }
1903 break;
1904
1905 case TGSI_OPCODE_I2F:
1906 /* deprecated? */
1907 assert(0);
1908 return FALSE;
1909 break;
1910
1911 case TGSI_OPCODE_NOT:
1912 /* deprecated? */
1913 assert(0);
1914 return FALSE;
1915 break;
1916
1917 case TGSI_OPCODE_TRUNC:
1918 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1919 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1920 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1921 }
1922 break;
1923
1924 case TGSI_OPCODE_SHL:
1925 /* deprecated? */
1926 assert(0);
1927 return FALSE;
1928 break;
1929
1930 case TGSI_OPCODE_ISHR:
1931 /* deprecated? */
1932 assert(0);
1933 return FALSE;
1934 break;
1935
1936 case TGSI_OPCODE_AND:
1937 /* deprecated? */
1938 assert(0);
1939 return FALSE;
1940 break;
1941
1942 case TGSI_OPCODE_OR:
1943 /* deprecated? */
1944 assert(0);
1945 return FALSE;
1946 break;
1947
1948 case TGSI_OPCODE_MOD:
1949 /* deprecated? */
1950 assert(0);
1951 return FALSE;
1952 break;
1953
1954 case TGSI_OPCODE_XOR:
1955 /* deprecated? */
1956 assert(0);
1957 return FALSE;
1958 break;
1959
1960 case TGSI_OPCODE_SAD:
1961 /* deprecated? */
1962 assert(0);
1963 return FALSE;
1964 break;
1965
1966 case TGSI_OPCODE_TXF:
1967 /* deprecated? */
1968 assert(0);
1969 return FALSE;
1970 break;
1971
1972 case TGSI_OPCODE_TXQ:
1973 /* deprecated? */
1974 assert(0);
1975 return FALSE;
1976 break;
1977
1978 case TGSI_OPCODE_CONT:
1979 lp_exec_continue(&bld->exec_mask);
1980 break;
1981
1982 case TGSI_OPCODE_EMIT:
1983 return FALSE;
1984 break;
1985
1986 case TGSI_OPCODE_ENDPRIM:
1987 return FALSE;
1988 break;
1989
1990 case TGSI_OPCODE_NOP:
1991 break;
1992
1993 default:
1994 return FALSE;
1995 }
1996
1997 if(info->num_dst) {
1998 LLVMValueRef pred[NUM_CHANNELS];
1999
2000 emit_fetch_predicate( bld, inst, pred );
2001
2002 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2003 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2004 }
2005 }
2006
2007 return TRUE;
2008 }
2009
2010
2011 void
2012 lp_build_tgsi_soa(LLVMBuilderRef builder,
2013 const struct tgsi_token *tokens,
2014 struct lp_type type,
2015 struct lp_build_mask_context *mask,
2016 LLVMValueRef consts_ptr,
2017 const LLVMValueRef *pos,
2018 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2019 LLVMValueRef (*outputs)[NUM_CHANNELS],
2020 struct lp_build_sampler_soa *sampler,
2021 const struct tgsi_shader_info *info)
2022 {
2023 struct lp_build_tgsi_soa_context bld;
2024 struct tgsi_parse_context parse;
2025 uint num_immediates = 0;
2026 uint num_instructions = 0;
2027 unsigned i;
2028 int pc = 0;
2029
2030 /* Setup build context */
2031 memset(&bld, 0, sizeof bld);
2032 lp_build_context_init(&bld.base, builder, type);
2033 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
2034 bld.mask = mask;
2035 bld.pos = pos;
2036 bld.inputs = inputs;
2037 bld.outputs = outputs;
2038 bld.consts_ptr = consts_ptr;
2039 bld.sampler = sampler;
2040 bld.indirect_files = info->indirect_files;
2041 bld.instructions = (struct tgsi_full_instruction *)
2042 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2043 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2044
2045 if (!bld.instructions) {
2046 return;
2047 }
2048
2049 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2050
2051 tgsi_parse_init( &parse, tokens );
2052
2053 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2054 tgsi_parse_token( &parse );
2055
2056 switch( parse.FullToken.Token.Type ) {
2057 case TGSI_TOKEN_TYPE_DECLARATION:
2058 /* Inputs already interpolated */
2059 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2060 break;
2061
2062 case TGSI_TOKEN_TYPE_INSTRUCTION:
2063 {
2064 /* save expanded instruction */
2065 if (num_instructions == bld.max_instructions) {
2066 bld.instructions = REALLOC(bld.instructions,
2067 bld.max_instructions
2068 * sizeof(struct tgsi_full_instruction),
2069 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2070 * sizeof(struct tgsi_full_instruction));
2071 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2072 }
2073
2074 memcpy(bld.instructions + num_instructions,
2075 &parse.FullToken.FullInstruction,
2076 sizeof(bld.instructions[0]));
2077
2078 num_instructions++;
2079 }
2080
2081 break;
2082
2083 case TGSI_TOKEN_TYPE_IMMEDIATE:
2084 /* simply copy the immediate values into the next immediates[] slot */
2085 {
2086 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2087 assert(size <= 4);
2088 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2089 for( i = 0; i < size; ++i )
2090 bld.immediates[num_immediates][i] =
2091 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2092 for( i = size; i < 4; ++i )
2093 bld.immediates[num_immediates][i] = bld.base.undef;
2094 num_immediates++;
2095 }
2096 break;
2097
2098 case TGSI_TOKEN_TYPE_PROPERTY:
2099 break;
2100
2101 default:
2102 assert( 0 );
2103 }
2104 }
2105
2106 while (pc != -1) {
2107 struct tgsi_full_instruction *instr = bld.instructions + pc;
2108 const struct tgsi_opcode_info *opcode_info =
2109 tgsi_get_opcode_info(instr->Instruction.Opcode);
2110 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2111 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2112 opcode_info->mnemonic);
2113 }
2114
2115 if (0) {
2116 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2117 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2118 debug_printf("11111111111111111111111111111 \n");
2119 tgsi_dump(tokens, 0);
2120 lp_debug_dump_value(function);
2121 debug_printf("2222222222222222222222222222 \n");
2122 }
2123 tgsi_parse_free( &parse );
2124
2125 if (0) {
2126 LLVMModuleRef module = LLVMGetGlobalParent(
2127 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2128 LLVMDumpModule(module);
2129
2130 }
2131
2132 FREE( bld.instructions );
2133 }
2134