42d796cb958c4126b3339e21a80443cc2658ef26
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for integer masks and indices */
122 struct lp_build_context int_bld;
123
124 LLVMValueRef consts_ptr;
125 const LLVMValueRef *pos;
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128
129 const struct lp_build_sampler_soa *sampler;
130
131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
135
136 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
137 * set in the indirect_files field.
138 * The temps[] array above is unused then.
139 */
140 LLVMValueRef temps_array;
141
142 /** bitmask indicating which register files are accessed indirectly */
143 unsigned indirect_files;
144
145 struct lp_build_mask_context *mask;
146 struct lp_exec_mask exec_mask;
147
148 struct tgsi_full_instruction *instructions;
149 uint max_instructions;
150 };
151
152 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
153 {
154 mask->bld = bld;
155 mask->has_mask = FALSE;
156 mask->cond_stack_size = 0;
157 mask->loop_stack_size = 0;
158 mask->call_stack_size = 0;
159
160 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
161 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
162 LLVMConstAllOnes(mask->int_vec_type);
163 }
164
165 static void lp_exec_mask_update(struct lp_exec_mask *mask)
166 {
167 if (mask->loop_stack_size) {
168 /*for loops we need to update the entire mask at runtime */
169 LLVMValueRef tmp;
170 assert(mask->break_mask);
171 tmp = LLVMBuildAnd(mask->bld->builder,
172 mask->cont_mask,
173 mask->break_mask,
174 "maskcb");
175 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
176 mask->cond_mask,
177 tmp,
178 "maskfull");
179 } else
180 mask->exec_mask = mask->cond_mask;
181
182 if (mask->call_stack_size) {
183 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
184 mask->exec_mask,
185 mask->ret_mask,
186 "callmask");
187 }
188
189 mask->has_mask = (mask->cond_stack_size > 0 ||
190 mask->loop_stack_size > 0 ||
191 mask->call_stack_size > 0);
192 }
193
194 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
195 LLVMValueRef val)
196 {
197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
198 if (mask->cond_stack_size == 0) {
199 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
200 }
201 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
202 assert(LLVMTypeOf(val) == mask->int_vec_type);
203 mask->cond_mask = val;
204
205 lp_exec_mask_update(mask);
206 }
207
208 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
209 {
210 LLVMValueRef prev_mask;
211 LLVMValueRef inv_mask;
212
213 assert(mask->cond_stack_size);
214 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
215 if (mask->cond_stack_size == 1) {
216 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
217 }
218
219 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
220
221 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
222 inv_mask,
223 prev_mask, "");
224 lp_exec_mask_update(mask);
225 }
226
227 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
228 {
229 assert(mask->cond_stack_size);
230 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
231 lp_exec_mask_update(mask);
232 }
233
234 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
235 {
236 if (mask->loop_stack_size == 0) {
237 assert(mask->loop_block == NULL);
238 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
239 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
240 assert(mask->break_var == NULL);
241 }
242
243 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
244
245 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
246 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
247 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
248 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
249 ++mask->loop_stack_size;
250
251 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
252 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
253
254 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
255 LLVMBuildBr(mask->bld->builder, mask->loop_block);
256 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
257
258 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
259
260 lp_exec_mask_update(mask);
261 }
262
263 static void lp_exec_break(struct lp_exec_mask *mask)
264 {
265 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
266 mask->exec_mask,
267 "break");
268
269 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
270 mask->break_mask,
271 exec_mask, "break_full");
272
273 lp_exec_mask_update(mask);
274 }
275
276 static void lp_exec_continue(struct lp_exec_mask *mask)
277 {
278 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
279 mask->exec_mask,
280 "");
281
282 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
283 mask->cont_mask,
284 exec_mask, "");
285
286 lp_exec_mask_update(mask);
287 }
288
289
290 static void lp_exec_endloop(struct lp_exec_mask *mask)
291 {
292 LLVMBasicBlockRef endloop;
293 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
294 mask->bld->type.length);
295 LLVMValueRef i1cond;
296
297 assert(mask->break_mask);
298
299 /*
300 * Restore the cont_mask, but don't pop
301 */
302 assert(mask->loop_stack_size);
303 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
304 lp_exec_mask_update(mask);
305
306 /*
307 * Unlike the continue mask, the break_mask must be preserved across loop
308 * iterations
309 */
310 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
311
312 /* i1cond = (mask == 0) */
313 i1cond = LLVMBuildICmp(
314 mask->bld->builder,
315 LLVMIntNE,
316 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
317 LLVMConstNull(reg_type), "");
318
319 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
320
321 LLVMBuildCondBr(mask->bld->builder,
322 i1cond, mask->loop_block, endloop);
323
324 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
325
326 assert(mask->loop_stack_size);
327 --mask->loop_stack_size;
328 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
329 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
330 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
331 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
332
333 lp_exec_mask_update(mask);
334 }
335
336 /* stores val into an address pointed to by dst.
337 * mask->exec_mask is used to figure out which bits of val
338 * should be stored into the address
339 * (0 means don't store this bit, 1 means do store).
340 */
341 static void lp_exec_mask_store(struct lp_exec_mask *mask,
342 LLVMValueRef pred,
343 LLVMValueRef val,
344 LLVMValueRef dst)
345 {
346 /* Mix the predicate and execution mask */
347 if (mask->has_mask) {
348 if (pred) {
349 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
350 } else {
351 pred = mask->exec_mask;
352 }
353 }
354
355 if (pred) {
356 LLVMValueRef real_val, dst_val;
357
358 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
359 real_val = lp_build_select(mask->bld,
360 pred,
361 val, dst_val);
362
363 LLVMBuildStore(mask->bld->builder, real_val, dst);
364 } else
365 LLVMBuildStore(mask->bld->builder, val, dst);
366 }
367
368 static void lp_exec_mask_call(struct lp_exec_mask *mask,
369 int func,
370 int *pc)
371 {
372 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
373 mask->call_stack[mask->call_stack_size].pc = *pc;
374 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
375 mask->call_stack_size++;
376 *pc = func;
377 }
378
379 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
380 {
381 LLVMValueRef exec_mask;
382
383 if (mask->call_stack_size == 0) {
384 /* returning from main() */
385 *pc = -1;
386 return;
387 }
388 exec_mask = LLVMBuildNot(mask->bld->builder,
389 mask->exec_mask,
390 "ret");
391
392 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
393 mask->ret_mask,
394 exec_mask, "ret_full");
395
396 lp_exec_mask_update(mask);
397 }
398
399 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
400 {
401 }
402
403 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
404 {
405 assert(mask->call_stack_size);
406 mask->call_stack_size--;
407 *pc = mask->call_stack[mask->call_stack_size].pc;
408 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
409 lp_exec_mask_update(mask);
410 }
411
412
413 /**
414 * Return pointer to a temporary register channel (src or dest).
415 * Note that indirect addressing cannot be handled here.
416 * \param index which temporary register
417 * \param chan which channel of the temp register.
418 */
419 static LLVMValueRef
420 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
421 unsigned index,
422 unsigned chan)
423 {
424 assert(chan < 4);
425 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
426 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
427 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
428 }
429 else {
430 return bld->temps[index][chan];
431 }
432 }
433
434
435 /**
436 * Gather vector.
437 * XXX the lp_build_gather() function should be capable of doing this
438 * with a little work.
439 */
440 static LLVMValueRef
441 build_gather(struct lp_build_tgsi_soa_context *bld,
442 LLVMValueRef base_ptr,
443 LLVMValueRef indexes)
444 {
445 LLVMValueRef res = bld->base.undef;
446 unsigned i;
447
448 /*
449 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
450 */
451 for (i = 0; i < bld->base.type.length; i++) {
452 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
453 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
454 indexes, ii, "");
455 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
456 &index, 1, "");
457 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
458
459 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
460 }
461
462 return res;
463 }
464
465
466 /**
467 * Read the current value of the ADDR register, convert the floats to
468 * ints, multiply by four and return the vector of offsets.
469 * The offsets will be used to index into the constant buffer or
470 * temporary register file.
471 */
472 static LLVMValueRef
473 get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
474 const struct tgsi_src_register *indirect_reg)
475 {
476 /* always use X component of address register */
477 const int x = indirect_reg->SwizzleX;
478 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
479 uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x);
480 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
481 LLVMValueRef addr_vec;
482
483 addr_vec = LLVMBuildLoad(bld->base.builder,
484 bld->addr[indirect_reg->Index][swizzle],
485 "load addr reg");
486
487 /* for indexing we want integers */
488 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
489 int_vec_type, "");
490
491 /* addr_vec = addr_vec * 4 */
492 addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
493
494 return addr_vec;
495 }
496
497
498 /**
499 * Register fetch.
500 */
501 static LLVMValueRef
502 emit_fetch(
503 struct lp_build_tgsi_soa_context *bld,
504 const struct tgsi_full_instruction *inst,
505 unsigned src_op,
506 const unsigned chan_index )
507 {
508 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
509 const unsigned swizzle =
510 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
511 LLVMValueRef res;
512 LLVMValueRef addr_vec = NULL;
513
514 if (swizzle > 3) {
515 assert(0 && "invalid swizzle in emit_fetch()");
516 return bld->base.undef;
517 }
518
519 if (reg->Register.Indirect) {
520 assert(bld->indirect_files);
521 addr_vec = get_indirect_offsets(bld, &reg->Indirect);
522 }
523
524 switch (reg->Register.File) {
525 case TGSI_FILE_CONSTANT:
526 if (reg->Register.Indirect) {
527 LLVMValueRef index_vec; /* index into the const buffer */
528
529 assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT));
530
531 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
532 index_vec = lp_build_const_int_vec(bld->int_bld.type,
533 reg->Register.Index * 4 + swizzle);
534
535 /* index_vec = index_vec + addr_vec */
536 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
537
538 /* Gather values from the constant buffer */
539 res = build_gather(bld, bld->consts_ptr, index_vec);
540 }
541 else {
542 LLVMValueRef index; /* index into the const buffer */
543 LLVMValueRef scalar, scalar_ptr;
544
545 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
546
547 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
548 &index, 1, "");
549 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
550
551 res = lp_build_broadcast_scalar(&bld->base, scalar);
552 }
553 break;
554
555 case TGSI_FILE_IMMEDIATE:
556 res = bld->immediates[reg->Register.Index][swizzle];
557 assert(res);
558 break;
559
560 case TGSI_FILE_INPUT:
561 res = bld->inputs[reg->Register.Index][swizzle];
562 assert(res);
563 break;
564
565 case TGSI_FILE_TEMPORARY:
566 if (reg->Register.Indirect) {
567 LLVMValueRef vec_len =
568 lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length);
569 LLVMValueRef index_vec; /* index into the const buffer */
570 LLVMValueRef temps_array;
571 LLVMTypeRef float4_ptr_type;
572
573 assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
574
575 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
576 index_vec = lp_build_const_int_vec(bld->int_bld.type,
577 reg->Register.Index * 4 + swizzle);
578
579 /* index_vec += addr_vec */
580 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
581
582 /* index_vec *= vector_length */
583 index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len);
584
585 /* cast temps_array pointer to float* */
586 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
587 temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array,
588 float4_ptr_type, "");
589
590 /* Gather values from the temporary register array */
591 res = build_gather(bld, temps_array, index_vec);
592 }
593 else {
594 LLVMValueRef temp_ptr;
595 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
596 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
597 if (!res)
598 return bld->base.undef;
599 }
600 break;
601
602 default:
603 assert(0 && "invalid src register in emit_fetch()");
604 return bld->base.undef;
605 }
606
607 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
608 case TGSI_UTIL_SIGN_CLEAR:
609 res = lp_build_abs( &bld->base, res );
610 break;
611
612 case TGSI_UTIL_SIGN_SET:
613 /* TODO: Use bitwese OR for floating point */
614 res = lp_build_abs( &bld->base, res );
615 res = LLVMBuildNeg( bld->base.builder, res, "" );
616 break;
617
618 case TGSI_UTIL_SIGN_TOGGLE:
619 res = LLVMBuildNeg( bld->base.builder, res, "" );
620 break;
621
622 case TGSI_UTIL_SIGN_KEEP:
623 break;
624 }
625
626 return res;
627 }
628
629
630 /**
631 * Register fetch with derivatives.
632 */
633 static void
634 emit_fetch_deriv(
635 struct lp_build_tgsi_soa_context *bld,
636 const struct tgsi_full_instruction *inst,
637 unsigned index,
638 const unsigned chan_index,
639 LLVMValueRef *res,
640 LLVMValueRef *ddx,
641 LLVMValueRef *ddy)
642 {
643 LLVMValueRef src;
644
645 src = emit_fetch(bld, inst, index, chan_index);
646
647 if(res)
648 *res = src;
649
650 /* TODO: use interpolation coeffs for inputs */
651
652 if(ddx)
653 *ddx = lp_build_ddx(&bld->base, src);
654
655 if(ddy)
656 *ddy = lp_build_ddy(&bld->base, src);
657 }
658
659
660 /**
661 * Predicate.
662 */
663 static void
664 emit_fetch_predicate(
665 struct lp_build_tgsi_soa_context *bld,
666 const struct tgsi_full_instruction *inst,
667 LLVMValueRef *pred)
668 {
669 unsigned index;
670 unsigned char swizzles[4];
671 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
672 LLVMValueRef value;
673 unsigned chan;
674
675 if (!inst->Instruction.Predicate) {
676 FOR_EACH_CHANNEL( chan ) {
677 pred[chan] = NULL;
678 }
679 return;
680 }
681
682 swizzles[0] = inst->Predicate.SwizzleX;
683 swizzles[1] = inst->Predicate.SwizzleY;
684 swizzles[2] = inst->Predicate.SwizzleZ;
685 swizzles[3] = inst->Predicate.SwizzleW;
686
687 index = inst->Predicate.Index;
688 assert(index < LP_MAX_TGSI_PREDS);
689
690 FOR_EACH_CHANNEL( chan ) {
691 unsigned swizzle = swizzles[chan];
692
693 /*
694 * Only fetch the predicate register channels that are actually listed
695 * in the swizzles
696 */
697 if (!unswizzled[swizzle]) {
698 value = LLVMBuildLoad(bld->base.builder,
699 bld->preds[index][swizzle], "");
700
701 /*
702 * Convert the value to an integer mask.
703 *
704 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
705 * is needlessly causing two comparisons due to storing the intermediate
706 * result as float vector instead of an integer mask vector.
707 */
708 value = lp_build_compare(bld->base.builder,
709 bld->base.type,
710 PIPE_FUNC_NOTEQUAL,
711 value,
712 bld->base.zero);
713 if (inst->Predicate.Negate) {
714 value = LLVMBuildNot(bld->base.builder, value, "");
715 }
716
717 unswizzled[swizzle] = value;
718 } else {
719 value = unswizzled[swizzle];
720 }
721
722 pred[chan] = value;
723 }
724 }
725
726
727 /**
728 * Register store.
729 */
730 static void
731 emit_store(
732 struct lp_build_tgsi_soa_context *bld,
733 const struct tgsi_full_instruction *inst,
734 unsigned index,
735 unsigned chan_index,
736 LLVMValueRef pred,
737 LLVMValueRef value)
738 {
739 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
740 LLVMValueRef addr = NULL;
741
742 switch( inst->Instruction.Saturate ) {
743 case TGSI_SAT_NONE:
744 break;
745
746 case TGSI_SAT_ZERO_ONE:
747 value = lp_build_max(&bld->base, value, bld->base.zero);
748 value = lp_build_min(&bld->base, value, bld->base.one);
749 break;
750
751 case TGSI_SAT_MINUS_PLUS_ONE:
752 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
753 value = lp_build_min(&bld->base, value, bld->base.one);
754 break;
755
756 default:
757 assert(0);
758 }
759
760 if (reg->Register.Indirect) {
761 /* XXX use get_indirect_offsets() here eventually */
762 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
763 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
764
765 assert(bld->indirect_files);
766
767 addr = LLVMBuildLoad(bld->base.builder,
768 bld->addr[reg->Indirect.Index][swizzle],
769 "");
770 /* for indexing we want integers */
771 addr = LLVMBuildFPToSI(bld->base.builder, addr,
772 int_vec_type, "");
773 addr = LLVMBuildExtractElement(bld->base.builder,
774 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
775 "");
776 addr = LLVMBuildMul(bld->base.builder,
777 addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
778 "");
779 }
780
781 switch( reg->Register.File ) {
782 case TGSI_FILE_OUTPUT:
783 lp_exec_mask_store(&bld->exec_mask, pred, value,
784 bld->outputs[reg->Register.Index][chan_index]);
785 break;
786
787 case TGSI_FILE_TEMPORARY:
788 if (reg->Register.Indirect) {
789 /* XXX not done yet */
790 debug_printf("WARNING: LLVM scatter store of temp regs"
791 " not implemented\n");
792 }
793 else {
794 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
795 chan_index);
796 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
797 }
798 break;
799
800 case TGSI_FILE_ADDRESS:
801 lp_exec_mask_store(&bld->exec_mask, pred, value,
802 bld->addr[reg->Indirect.Index][chan_index]);
803 break;
804
805 case TGSI_FILE_PREDICATE:
806 lp_exec_mask_store(&bld->exec_mask, pred, value,
807 bld->preds[index][chan_index]);
808 break;
809
810 default:
811 assert( 0 );
812 }
813 }
814
815
816 /**
817 * High-level instruction translators.
818 */
819
820 enum tex_modifier {
821 TEX_MODIFIER_NONE = 0,
822 TEX_MODIFIER_PROJECTED,
823 TEX_MODIFIER_LOD_BIAS,
824 TEX_MODIFIER_EXPLICIT_LOD,
825 TEX_MODIFIER_EXPLICIT_DERIV
826 };
827
828 static void
829 emit_tex( struct lp_build_tgsi_soa_context *bld,
830 const struct tgsi_full_instruction *inst,
831 enum tex_modifier modifier,
832 LLVMValueRef *texel)
833 {
834 unsigned unit;
835 LLVMValueRef lod_bias, explicit_lod;
836 LLVMValueRef oow = NULL;
837 LLVMValueRef coords[3];
838 LLVMValueRef ddx[3];
839 LLVMValueRef ddy[3];
840 unsigned num_coords;
841 unsigned i;
842
843 if (!bld->sampler) {
844 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
845 for (i = 0; i < 4; i++) {
846 texel[i] = bld->base.undef;
847 }
848 return;
849 }
850
851 switch (inst->Texture.Texture) {
852 case TGSI_TEXTURE_1D:
853 num_coords = 1;
854 break;
855 case TGSI_TEXTURE_2D:
856 case TGSI_TEXTURE_RECT:
857 num_coords = 2;
858 break;
859 case TGSI_TEXTURE_SHADOW1D:
860 case TGSI_TEXTURE_SHADOW2D:
861 case TGSI_TEXTURE_SHADOWRECT:
862 case TGSI_TEXTURE_3D:
863 case TGSI_TEXTURE_CUBE:
864 num_coords = 3;
865 break;
866 default:
867 assert(0);
868 return;
869 }
870
871 if (modifier == TEX_MODIFIER_LOD_BIAS) {
872 lod_bias = emit_fetch( bld, inst, 0, 3 );
873 explicit_lod = NULL;
874 }
875 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
876 lod_bias = NULL;
877 explicit_lod = emit_fetch( bld, inst, 0, 3 );
878 }
879 else {
880 lod_bias = NULL;
881 explicit_lod = NULL;
882 }
883
884 if (modifier == TEX_MODIFIER_PROJECTED) {
885 oow = emit_fetch( bld, inst, 0, 3 );
886 oow = lp_build_rcp(&bld->base, oow);
887 }
888
889 for (i = 0; i < num_coords; i++) {
890 coords[i] = emit_fetch( bld, inst, 0, i );
891 if (modifier == TEX_MODIFIER_PROJECTED)
892 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
893 }
894 for (i = num_coords; i < 3; i++) {
895 coords[i] = bld->base.undef;
896 }
897
898 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
899 for (i = 0; i < num_coords; i++) {
900 ddx[i] = emit_fetch( bld, inst, 1, i );
901 ddy[i] = emit_fetch( bld, inst, 2, i );
902 }
903 unit = inst->Src[3].Register.Index;
904 } else {
905 for (i = 0; i < num_coords; i++) {
906 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
907 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
908 }
909 unit = inst->Src[1].Register.Index;
910 }
911 for (i = num_coords; i < 3; i++) {
912 ddx[i] = bld->base.undef;
913 ddy[i] = bld->base.undef;
914 }
915
916 bld->sampler->emit_fetch_texel(bld->sampler,
917 bld->base.builder,
918 bld->base.type,
919 unit, num_coords, coords,
920 ddx, ddy,
921 lod_bias, explicit_lod,
922 texel);
923 }
924
925
926 /**
927 * Kill fragment if any of the src register values are negative.
928 */
929 static void
930 emit_kil(
931 struct lp_build_tgsi_soa_context *bld,
932 const struct tgsi_full_instruction *inst )
933 {
934 const struct tgsi_full_src_register *reg = &inst->Src[0];
935 LLVMValueRef terms[NUM_CHANNELS];
936 LLVMValueRef mask;
937 unsigned chan_index;
938
939 memset(&terms, 0, sizeof terms);
940
941 FOR_EACH_CHANNEL( chan_index ) {
942 unsigned swizzle;
943
944 /* Unswizzle channel */
945 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
946
947 /* Check if the component has not been already tested. */
948 assert(swizzle < NUM_CHANNELS);
949 if( !terms[swizzle] )
950 /* TODO: change the comparison operator instead of setting the sign */
951 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
952 }
953
954 mask = NULL;
955 FOR_EACH_CHANNEL( chan_index ) {
956 if(terms[chan_index]) {
957 LLVMValueRef chan_mask;
958
959 /*
960 * If term < 0 then mask = 0 else mask = ~0.
961 */
962 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
963
964 if(mask)
965 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
966 else
967 mask = chan_mask;
968 }
969 }
970
971 if(mask)
972 lp_build_mask_update(bld->mask, mask);
973 }
974
975
976 /**
977 * Predicated fragment kill.
978 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
979 * The only predication is the execution mask which will apply if
980 * we're inside a loop or conditional.
981 */
982 static void
983 emit_kilp(struct lp_build_tgsi_soa_context *bld,
984 const struct tgsi_full_instruction *inst)
985 {
986 LLVMValueRef mask;
987
988 /* For those channels which are "alive", disable fragment shader
989 * execution.
990 */
991 if (bld->exec_mask.has_mask) {
992 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
993 }
994 else {
995 mask = bld->base.zero;
996 }
997
998 lp_build_mask_update(bld->mask, mask);
999 }
1000
1001 static void
1002 emit_declaration(
1003 struct lp_build_tgsi_soa_context *bld,
1004 const struct tgsi_full_declaration *decl)
1005 {
1006 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
1007
1008 unsigned first = decl->Range.First;
1009 unsigned last = decl->Range.Last;
1010 unsigned idx, i;
1011
1012 for (idx = first; idx <= last; ++idx) {
1013 switch (decl->Declaration.File) {
1014 case TGSI_FILE_TEMPORARY:
1015 assert(idx < LP_MAX_TGSI_TEMPS);
1016 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1017 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1018 last*4 + 4, 0);
1019 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1020 vec_type, array_size, "");
1021 } else {
1022 for (i = 0; i < NUM_CHANNELS; i++)
1023 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1024 vec_type, "");
1025 }
1026 break;
1027
1028 case TGSI_FILE_OUTPUT:
1029 for (i = 0; i < NUM_CHANNELS; i++)
1030 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1031 vec_type, "");
1032 break;
1033
1034 case TGSI_FILE_ADDRESS:
1035 assert(idx < LP_MAX_TGSI_ADDRS);
1036 for (i = 0; i < NUM_CHANNELS; i++)
1037 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1038 vec_type, "");
1039 break;
1040
1041 case TGSI_FILE_PREDICATE:
1042 assert(idx < LP_MAX_TGSI_PREDS);
1043 for (i = 0; i < NUM_CHANNELS; i++)
1044 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1045 vec_type, "");
1046 break;
1047
1048 default:
1049 /* don't need to declare other vars */
1050 break;
1051 }
1052 }
1053 }
1054
1055
1056 /**
1057 * Emit LLVM for one TGSI instruction.
1058 * \param return TRUE for success, FALSE otherwise
1059 */
1060 static boolean
1061 emit_instruction(
1062 struct lp_build_tgsi_soa_context *bld,
1063 const struct tgsi_full_instruction *inst,
1064 const struct tgsi_opcode_info *info,
1065 int *pc)
1066 {
1067 unsigned chan_index;
1068 LLVMValueRef src0, src1, src2;
1069 LLVMValueRef tmp0, tmp1, tmp2;
1070 LLVMValueRef tmp3 = NULL;
1071 LLVMValueRef tmp4 = NULL;
1072 LLVMValueRef tmp5 = NULL;
1073 LLVMValueRef tmp6 = NULL;
1074 LLVMValueRef tmp7 = NULL;
1075 LLVMValueRef res;
1076 LLVMValueRef dst0[NUM_CHANNELS];
1077
1078 /*
1079 * Stores and write masks are handled in a general fashion after the long
1080 * instruction opcode switch statement.
1081 *
1082 * Although not stricitly necessary, we avoid generating instructions for
1083 * channels which won't be stored, in cases where's that easy. For some
1084 * complex instructions, like texture sampling, it is more convenient to
1085 * assume a full writemask and then let LLVM optimization passes eliminate
1086 * redundant code.
1087 */
1088
1089 (*pc)++;
1090
1091 assert(info->num_dst <= 1);
1092 if (info->num_dst) {
1093 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1094 dst0[chan_index] = bld->base.undef;
1095 }
1096 }
1097
1098 switch (inst->Instruction.Opcode) {
1099 case TGSI_OPCODE_ARL:
1100 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1101 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1102 tmp0 = lp_build_floor(&bld->base, tmp0);
1103 dst0[chan_index] = tmp0;
1104 }
1105 break;
1106
1107 case TGSI_OPCODE_MOV:
1108 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1109 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1110 }
1111 break;
1112
1113 case TGSI_OPCODE_LIT:
1114 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1115 dst0[CHAN_X] = bld->base.one;
1116 }
1117 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1118 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1119 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1120 }
1121 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1122 /* XMM[1] = SrcReg[0].yyyy */
1123 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1124 /* XMM[1] = max(XMM[1], 0) */
1125 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1126 /* XMM[2] = SrcReg[0].wwww */
1127 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1128 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1129 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1130 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1131 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1132 }
1133 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1134 dst0[CHAN_W] = bld->base.one;
1135 }
1136 break;
1137
1138 case TGSI_OPCODE_RCP:
1139 /* TGSI_OPCODE_RECIP */
1140 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1141 res = lp_build_rcp(&bld->base, src0);
1142 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1143 dst0[chan_index] = res;
1144 }
1145 break;
1146
1147 case TGSI_OPCODE_RSQ:
1148 /* TGSI_OPCODE_RECIPSQRT */
1149 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1150 src0 = lp_build_abs(&bld->base, src0);
1151 res = lp_build_rsqrt(&bld->base, src0);
1152 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1153 dst0[chan_index] = res;
1154 }
1155 break;
1156
1157 case TGSI_OPCODE_EXP:
1158 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1159 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1160 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1161 LLVMValueRef *p_exp2_int_part = NULL;
1162 LLVMValueRef *p_frac_part = NULL;
1163 LLVMValueRef *p_exp2 = NULL;
1164
1165 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1166
1167 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1168 p_exp2_int_part = &tmp0;
1169 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1170 p_frac_part = &tmp1;
1171 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1172 p_exp2 = &tmp2;
1173
1174 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1175
1176 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1177 dst0[CHAN_X] = tmp0;
1178 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1179 dst0[CHAN_Y] = tmp1;
1180 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1181 dst0[CHAN_Z] = tmp2;
1182 }
1183 /* dst.w = 1.0 */
1184 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1185 dst0[CHAN_W] = bld->base.one;
1186 }
1187 break;
1188
1189 case TGSI_OPCODE_LOG:
1190 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1191 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1192 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1193 LLVMValueRef *p_floor_log2 = NULL;
1194 LLVMValueRef *p_exp = NULL;
1195 LLVMValueRef *p_log2 = NULL;
1196
1197 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1198 src0 = lp_build_abs( &bld->base, src0 );
1199
1200 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1201 p_floor_log2 = &tmp0;
1202 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1203 p_exp = &tmp1;
1204 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1205 p_log2 = &tmp2;
1206
1207 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1208
1209 /* dst.x = floor(lg2(abs(src.x))) */
1210 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1211 dst0[CHAN_X] = tmp0;
1212 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1213 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1214 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1215 }
1216 /* dst.z = lg2(abs(src.x)) */
1217 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1218 dst0[CHAN_Z] = tmp2;
1219 }
1220 /* dst.w = 1.0 */
1221 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1222 dst0[CHAN_W] = bld->base.one;
1223 }
1224 break;
1225
1226 case TGSI_OPCODE_MUL:
1227 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1228 src0 = emit_fetch( bld, inst, 0, chan_index );
1229 src1 = emit_fetch( bld, inst, 1, chan_index );
1230 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1231 }
1232 break;
1233
1234 case TGSI_OPCODE_ADD:
1235 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1236 src0 = emit_fetch( bld, inst, 0, chan_index );
1237 src1 = emit_fetch( bld, inst, 1, chan_index );
1238 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1239 }
1240 break;
1241
1242 case TGSI_OPCODE_DP3:
1243 /* TGSI_OPCODE_DOT3 */
1244 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1245 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1246 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1247 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1248 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1249 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1250 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1251 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1252 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1253 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1254 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1255 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1256 dst0[chan_index] = tmp0;
1257 }
1258 break;
1259
1260 case TGSI_OPCODE_DP4:
1261 /* TGSI_OPCODE_DOT4 */
1262 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1263 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1264 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1265 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1266 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1267 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1268 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1269 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1270 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1271 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1272 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1273 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1274 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1275 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1276 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1277 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1278 dst0[chan_index] = tmp0;
1279 }
1280 break;
1281
1282 case TGSI_OPCODE_DST:
1283 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1284 dst0[CHAN_X] = bld->base.one;
1285 }
1286 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1287 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1288 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1289 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1290 }
1291 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1292 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1293 }
1294 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1295 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1296 }
1297 break;
1298
1299 case TGSI_OPCODE_MIN:
1300 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1301 src0 = emit_fetch( bld, inst, 0, chan_index );
1302 src1 = emit_fetch( bld, inst, 1, chan_index );
1303 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1304 }
1305 break;
1306
1307 case TGSI_OPCODE_MAX:
1308 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1309 src0 = emit_fetch( bld, inst, 0, chan_index );
1310 src1 = emit_fetch( bld, inst, 1, chan_index );
1311 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1312 }
1313 break;
1314
1315 case TGSI_OPCODE_SLT:
1316 /* TGSI_OPCODE_SETLT */
1317 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1318 src0 = emit_fetch( bld, inst, 0, chan_index );
1319 src1 = emit_fetch( bld, inst, 1, chan_index );
1320 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1321 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1322 }
1323 break;
1324
1325 case TGSI_OPCODE_SGE:
1326 /* TGSI_OPCODE_SETGE */
1327 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1328 src0 = emit_fetch( bld, inst, 0, chan_index );
1329 src1 = emit_fetch( bld, inst, 1, chan_index );
1330 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1331 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1332 }
1333 break;
1334
1335 case TGSI_OPCODE_MAD:
1336 /* TGSI_OPCODE_MADD */
1337 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1338 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1339 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1340 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1341 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1342 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1343 dst0[chan_index] = tmp0;
1344 }
1345 break;
1346
1347 case TGSI_OPCODE_SUB:
1348 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1349 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1350 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1351 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1352 }
1353 break;
1354
1355 case TGSI_OPCODE_LRP:
1356 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1357 src0 = emit_fetch( bld, inst, 0, chan_index );
1358 src1 = emit_fetch( bld, inst, 1, chan_index );
1359 src2 = emit_fetch( bld, inst, 2, chan_index );
1360 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1361 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1362 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1363 }
1364 break;
1365
1366 case TGSI_OPCODE_CND:
1367 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1368 src0 = emit_fetch( bld, inst, 0, chan_index );
1369 src1 = emit_fetch( bld, inst, 1, chan_index );
1370 src2 = emit_fetch( bld, inst, 2, chan_index );
1371 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1372 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1373 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1374 }
1375 break;
1376
1377 case TGSI_OPCODE_DP2A:
1378 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1379 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1380 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1381 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1382 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1383 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1384 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1385 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1386 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1387 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1388 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1389 }
1390 break;
1391
1392 case TGSI_OPCODE_FRC:
1393 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1394 src0 = emit_fetch( bld, inst, 0, chan_index );
1395 tmp0 = lp_build_floor(&bld->base, src0);
1396 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1397 dst0[chan_index] = tmp0;
1398 }
1399 break;
1400
1401 case TGSI_OPCODE_CLAMP:
1402 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1403 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1404 src1 = emit_fetch( bld, inst, 1, chan_index );
1405 src2 = emit_fetch( bld, inst, 2, chan_index );
1406 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1407 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1408 dst0[chan_index] = tmp0;
1409 }
1410 break;
1411
1412 case TGSI_OPCODE_FLR:
1413 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1414 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1415 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1416 }
1417 break;
1418
1419 case TGSI_OPCODE_ROUND:
1420 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1421 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1422 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1423 }
1424 break;
1425
1426 case TGSI_OPCODE_EX2: {
1427 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1428 tmp0 = lp_build_exp2( &bld->base, tmp0);
1429 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1430 dst0[chan_index] = tmp0;
1431 }
1432 break;
1433 }
1434
1435 case TGSI_OPCODE_LG2:
1436 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1437 tmp0 = lp_build_log2( &bld->base, tmp0);
1438 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1439 dst0[chan_index] = tmp0;
1440 }
1441 break;
1442
1443 case TGSI_OPCODE_POW:
1444 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1445 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1446 res = lp_build_pow( &bld->base, src0, src1 );
1447 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1448 dst0[chan_index] = res;
1449 }
1450 break;
1451
1452 case TGSI_OPCODE_XPD:
1453 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1454 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1455 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1456 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1457 }
1458 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1459 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1460 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1461 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1462 }
1463 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1464 tmp2 = tmp0;
1465 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1466 tmp5 = tmp3;
1467 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1468 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1469 dst0[CHAN_X] = tmp2;
1470 }
1471 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1472 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1473 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1474 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1475 }
1476 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1477 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1478 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1479 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1480 dst0[CHAN_Y] = tmp3;
1481 }
1482 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1483 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1484 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1485 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1486 dst0[CHAN_Z] = tmp5;
1487 }
1488 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1489 dst0[CHAN_W] = bld->base.one;
1490 }
1491 break;
1492
1493 case TGSI_OPCODE_ABS:
1494 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1495 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1496 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1497 }
1498 break;
1499
1500 case TGSI_OPCODE_RCC:
1501 /* deprecated? */
1502 assert(0);
1503 return FALSE;
1504
1505 case TGSI_OPCODE_DPH:
1506 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1507 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1508 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1509 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1510 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1511 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1512 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1513 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1514 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1515 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1516 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1517 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1518 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1519 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1520 dst0[chan_index] = tmp0;
1521 }
1522 break;
1523
1524 case TGSI_OPCODE_COS:
1525 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1526 tmp0 = lp_build_cos( &bld->base, tmp0 );
1527 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1528 dst0[chan_index] = tmp0;
1529 }
1530 break;
1531
1532 case TGSI_OPCODE_DDX:
1533 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1534 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1535 }
1536 break;
1537
1538 case TGSI_OPCODE_DDY:
1539 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1540 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1541 }
1542 break;
1543
1544 case TGSI_OPCODE_KILP:
1545 /* predicated kill */
1546 emit_kilp( bld, inst );
1547 break;
1548
1549 case TGSI_OPCODE_KIL:
1550 /* conditional kill */
1551 emit_kil( bld, inst );
1552 break;
1553
1554 case TGSI_OPCODE_PK2H:
1555 return FALSE;
1556 break;
1557
1558 case TGSI_OPCODE_PK2US:
1559 return FALSE;
1560 break;
1561
1562 case TGSI_OPCODE_PK4B:
1563 return FALSE;
1564 break;
1565
1566 case TGSI_OPCODE_PK4UB:
1567 return FALSE;
1568 break;
1569
1570 case TGSI_OPCODE_RFL:
1571 return FALSE;
1572 break;
1573
1574 case TGSI_OPCODE_SEQ:
1575 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1576 src0 = emit_fetch( bld, inst, 0, chan_index );
1577 src1 = emit_fetch( bld, inst, 1, chan_index );
1578 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1579 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1580 }
1581 break;
1582
1583 case TGSI_OPCODE_SFL:
1584 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1585 dst0[chan_index] = bld->base.zero;
1586 }
1587 break;
1588
1589 case TGSI_OPCODE_SGT:
1590 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1591 src0 = emit_fetch( bld, inst, 0, chan_index );
1592 src1 = emit_fetch( bld, inst, 1, chan_index );
1593 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1594 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1595 }
1596 break;
1597
1598 case TGSI_OPCODE_SIN:
1599 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1600 tmp0 = lp_build_sin( &bld->base, tmp0 );
1601 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1602 dst0[chan_index] = tmp0;
1603 }
1604 break;
1605
1606 case TGSI_OPCODE_SLE:
1607 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1608 src0 = emit_fetch( bld, inst, 0, chan_index );
1609 src1 = emit_fetch( bld, inst, 1, chan_index );
1610 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1611 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1612 }
1613 break;
1614
1615 case TGSI_OPCODE_SNE:
1616 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1617 src0 = emit_fetch( bld, inst, 0, chan_index );
1618 src1 = emit_fetch( bld, inst, 1, chan_index );
1619 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1620 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1621 }
1622 break;
1623
1624 case TGSI_OPCODE_STR:
1625 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1626 dst0[chan_index] = bld->base.one;
1627 }
1628 break;
1629
1630 case TGSI_OPCODE_TEX:
1631 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1632 break;
1633
1634 case TGSI_OPCODE_TXD:
1635 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1636 break;
1637
1638 case TGSI_OPCODE_UP2H:
1639 /* deprecated */
1640 assert (0);
1641 return FALSE;
1642 break;
1643
1644 case TGSI_OPCODE_UP2US:
1645 /* deprecated */
1646 assert(0);
1647 return FALSE;
1648 break;
1649
1650 case TGSI_OPCODE_UP4B:
1651 /* deprecated */
1652 assert(0);
1653 return FALSE;
1654 break;
1655
1656 case TGSI_OPCODE_UP4UB:
1657 /* deprecated */
1658 assert(0);
1659 return FALSE;
1660 break;
1661
1662 case TGSI_OPCODE_X2D:
1663 /* deprecated? */
1664 assert(0);
1665 return FALSE;
1666 break;
1667
1668 case TGSI_OPCODE_ARA:
1669 /* deprecated */
1670 assert(0);
1671 return FALSE;
1672 break;
1673
1674 case TGSI_OPCODE_ARR:
1675 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1676 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1677 tmp0 = lp_build_round(&bld->base, tmp0);
1678 dst0[chan_index] = tmp0;
1679 }
1680 break;
1681
1682 case TGSI_OPCODE_BRA:
1683 /* deprecated */
1684 assert(0);
1685 return FALSE;
1686 break;
1687
1688 case TGSI_OPCODE_CAL:
1689 lp_exec_mask_call(&bld->exec_mask,
1690 inst->Label.Label,
1691 pc);
1692
1693 break;
1694
1695 case TGSI_OPCODE_RET:
1696 lp_exec_mask_ret(&bld->exec_mask, pc);
1697 break;
1698
1699 case TGSI_OPCODE_END:
1700 *pc = -1;
1701 break;
1702
1703 case TGSI_OPCODE_SSG:
1704 /* TGSI_OPCODE_SGN */
1705 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1706 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1707 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1708 }
1709 break;
1710
1711 case TGSI_OPCODE_CMP:
1712 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1713 src0 = emit_fetch( bld, inst, 0, chan_index );
1714 src1 = emit_fetch( bld, inst, 1, chan_index );
1715 src2 = emit_fetch( bld, inst, 2, chan_index );
1716 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1717 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1718 }
1719 break;
1720
1721 case TGSI_OPCODE_SCS:
1722 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1723 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1724 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1725 }
1726 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1727 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1728 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1729 }
1730 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1731 dst0[CHAN_Z] = bld->base.zero;
1732 }
1733 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1734 dst0[CHAN_W] = bld->base.one;
1735 }
1736 break;
1737
1738 case TGSI_OPCODE_TXB:
1739 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1740 break;
1741
1742 case TGSI_OPCODE_NRM:
1743 /* fall-through */
1744 case TGSI_OPCODE_NRM4:
1745 /* 3 or 4-component normalization */
1746 {
1747 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1748
1749 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1750 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1751 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1752 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1753
1754 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1755
1756 /* xmm4 = src.x */
1757 /* xmm0 = src.x * src.x */
1758 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1759 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1760 tmp4 = tmp0;
1761 }
1762 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1763
1764 /* xmm5 = src.y */
1765 /* xmm0 = xmm0 + src.y * src.y */
1766 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1767 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1768 tmp5 = tmp1;
1769 }
1770 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1771 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1772
1773 /* xmm6 = src.z */
1774 /* xmm0 = xmm0 + src.z * src.z */
1775 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1776 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1777 tmp6 = tmp1;
1778 }
1779 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1780 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1781
1782 if (dims == 4) {
1783 /* xmm7 = src.w */
1784 /* xmm0 = xmm0 + src.w * src.w */
1785 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1786 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1787 tmp7 = tmp1;
1788 }
1789 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1790 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1791 }
1792
1793 /* xmm1 = 1 / sqrt(xmm0) */
1794 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1795
1796 /* dst.x = xmm1 * src.x */
1797 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1798 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1799 }
1800
1801 /* dst.y = xmm1 * src.y */
1802 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1803 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1804 }
1805
1806 /* dst.z = xmm1 * src.z */
1807 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1808 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1809 }
1810
1811 /* dst.w = xmm1 * src.w */
1812 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1813 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1814 }
1815 }
1816
1817 /* dst.w = 1.0 */
1818 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1819 dst0[CHAN_W] = bld->base.one;
1820 }
1821 }
1822 break;
1823
1824 case TGSI_OPCODE_DIV:
1825 /* deprecated */
1826 assert( 0 );
1827 return FALSE;
1828 break;
1829
1830 case TGSI_OPCODE_DP2:
1831 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1832 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1833 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1834 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1835 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1836 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1837 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1838 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1839 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1840 }
1841 break;
1842
1843 case TGSI_OPCODE_TXL:
1844 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1845 break;
1846
1847 case TGSI_OPCODE_TXP:
1848 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1849 break;
1850
1851 case TGSI_OPCODE_BRK:
1852 lp_exec_break(&bld->exec_mask);
1853 break;
1854
1855 case TGSI_OPCODE_IF:
1856 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1857 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1858 tmp0, bld->base.zero);
1859 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1860 break;
1861
1862 case TGSI_OPCODE_BGNLOOP:
1863 lp_exec_bgnloop(&bld->exec_mask);
1864 break;
1865
1866 case TGSI_OPCODE_BGNSUB:
1867 lp_exec_mask_bgnsub(&bld->exec_mask);
1868 break;
1869
1870 case TGSI_OPCODE_ELSE:
1871 lp_exec_mask_cond_invert(&bld->exec_mask);
1872 break;
1873
1874 case TGSI_OPCODE_ENDIF:
1875 lp_exec_mask_cond_pop(&bld->exec_mask);
1876 break;
1877
1878 case TGSI_OPCODE_ENDLOOP:
1879 lp_exec_endloop(&bld->exec_mask);
1880 break;
1881
1882 case TGSI_OPCODE_ENDSUB:
1883 lp_exec_mask_endsub(&bld->exec_mask, pc);
1884 break;
1885
1886 case TGSI_OPCODE_PUSHA:
1887 /* deprecated? */
1888 assert(0);
1889 return FALSE;
1890 break;
1891
1892 case TGSI_OPCODE_POPA:
1893 /* deprecated? */
1894 assert(0);
1895 return FALSE;
1896 break;
1897
1898 case TGSI_OPCODE_CEIL:
1899 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1900 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1901 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1902 }
1903 break;
1904
1905 case TGSI_OPCODE_I2F:
1906 /* deprecated? */
1907 assert(0);
1908 return FALSE;
1909 break;
1910
1911 case TGSI_OPCODE_NOT:
1912 /* deprecated? */
1913 assert(0);
1914 return FALSE;
1915 break;
1916
1917 case TGSI_OPCODE_TRUNC:
1918 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1919 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1920 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1921 }
1922 break;
1923
1924 case TGSI_OPCODE_SHL:
1925 /* deprecated? */
1926 assert(0);
1927 return FALSE;
1928 break;
1929
1930 case TGSI_OPCODE_ISHR:
1931 /* deprecated? */
1932 assert(0);
1933 return FALSE;
1934 break;
1935
1936 case TGSI_OPCODE_AND:
1937 /* deprecated? */
1938 assert(0);
1939 return FALSE;
1940 break;
1941
1942 case TGSI_OPCODE_OR:
1943 /* deprecated? */
1944 assert(0);
1945 return FALSE;
1946 break;
1947
1948 case TGSI_OPCODE_MOD:
1949 /* deprecated? */
1950 assert(0);
1951 return FALSE;
1952 break;
1953
1954 case TGSI_OPCODE_XOR:
1955 /* deprecated? */
1956 assert(0);
1957 return FALSE;
1958 break;
1959
1960 case TGSI_OPCODE_SAD:
1961 /* deprecated? */
1962 assert(0);
1963 return FALSE;
1964 break;
1965
1966 case TGSI_OPCODE_TXF:
1967 /* deprecated? */
1968 assert(0);
1969 return FALSE;
1970 break;
1971
1972 case TGSI_OPCODE_TXQ:
1973 /* deprecated? */
1974 assert(0);
1975 return FALSE;
1976 break;
1977
1978 case TGSI_OPCODE_CONT:
1979 lp_exec_continue(&bld->exec_mask);
1980 break;
1981
1982 case TGSI_OPCODE_EMIT:
1983 return FALSE;
1984 break;
1985
1986 case TGSI_OPCODE_ENDPRIM:
1987 return FALSE;
1988 break;
1989
1990 case TGSI_OPCODE_NOP:
1991 break;
1992
1993 default:
1994 return FALSE;
1995 }
1996
1997 if(info->num_dst) {
1998 LLVMValueRef pred[NUM_CHANNELS];
1999
2000 emit_fetch_predicate( bld, inst, pred );
2001
2002 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2003 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2004 }
2005 }
2006
2007 return TRUE;
2008 }
2009
2010
2011 void
2012 lp_build_tgsi_soa(LLVMBuilderRef builder,
2013 const struct tgsi_token *tokens,
2014 struct lp_type type,
2015 struct lp_build_mask_context *mask,
2016 LLVMValueRef consts_ptr,
2017 const LLVMValueRef *pos,
2018 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2019 LLVMValueRef (*outputs)[NUM_CHANNELS],
2020 struct lp_build_sampler_soa *sampler,
2021 const struct tgsi_shader_info *info)
2022 {
2023 struct lp_build_tgsi_soa_context bld;
2024 struct tgsi_parse_context parse;
2025 uint num_immediates = 0;
2026 uint num_instructions = 0;
2027 unsigned i;
2028 int pc = 0;
2029
2030 /* Setup build context */
2031 memset(&bld, 0, sizeof bld);
2032 lp_build_context_init(&bld.base, builder, type);
2033 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
2034 bld.mask = mask;
2035 bld.pos = pos;
2036 bld.inputs = inputs;
2037 bld.outputs = outputs;
2038 bld.consts_ptr = consts_ptr;
2039 bld.sampler = sampler;
2040 bld.indirect_files = info->indirect_files;
2041 bld.instructions = (struct tgsi_full_instruction *)
2042 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2043 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2044
2045 if (!bld.instructions) {
2046 return;
2047 }
2048
2049 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2050
2051 tgsi_parse_init( &parse, tokens );
2052
2053 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2054 tgsi_parse_token( &parse );
2055
2056 switch( parse.FullToken.Token.Type ) {
2057 case TGSI_TOKEN_TYPE_DECLARATION:
2058 /* Inputs already interpolated */
2059 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2060 break;
2061
2062 case TGSI_TOKEN_TYPE_INSTRUCTION:
2063 {
2064 /* save expanded instruction */
2065 if (num_instructions == bld.max_instructions) {
2066 bld.instructions = REALLOC(bld.instructions,
2067 bld.max_instructions
2068 * sizeof(struct tgsi_full_instruction),
2069 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2070 * sizeof(struct tgsi_full_instruction));
2071 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2072 }
2073
2074 memcpy(bld.instructions + num_instructions,
2075 &parse.FullToken.FullInstruction,
2076 sizeof(bld.instructions[0]));
2077
2078 num_instructions++;
2079 }
2080
2081 break;
2082
2083 case TGSI_TOKEN_TYPE_IMMEDIATE:
2084 /* simply copy the immediate values into the next immediates[] slot */
2085 {
2086 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2087 assert(size <= 4);
2088 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2089 for( i = 0; i < size; ++i )
2090 bld.immediates[num_immediates][i] =
2091 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2092 for( i = size; i < 4; ++i )
2093 bld.immediates[num_immediates][i] = bld.base.undef;
2094 num_immediates++;
2095 }
2096 break;
2097
2098 case TGSI_TOKEN_TYPE_PROPERTY:
2099 break;
2100
2101 default:
2102 assert( 0 );
2103 }
2104 }
2105
2106 while (pc != -1) {
2107 struct tgsi_full_instruction *instr = bld.instructions + pc;
2108 const struct tgsi_opcode_info *opcode_info =
2109 tgsi_get_opcode_info(instr->Instruction.Opcode);
2110 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2111 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2112 opcode_info->mnemonic);
2113 }
2114
2115 if (0) {
2116 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2117 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2118 debug_printf("11111111111111111111111111111 \n");
2119 tgsi_dump(tokens, 0);
2120 lp_debug_dump_value(function);
2121 debug_printf("2222222222222222222222222222 \n");
2122 }
2123 tgsi_parse_free( &parse );
2124
2125 if (0) {
2126 LLVMModuleRef module = LLVMGetGlobalParent(
2127 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2128 LLVMDumpModule(module);
2129
2130 }
2131
2132 FREE( bld.instructions );
2133 }
2134