gallivm: Move the texture modifiers to the header.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for integer masks and indices */
122 struct lp_build_context int_bld;
123
124 LLVMValueRef consts_ptr;
125 const LLVMValueRef *pos;
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128
129 const struct lp_build_sampler_soa *sampler;
130
131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
135
136 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
137 * set in the indirect_files field.
138 * The temps[] array above is unused then.
139 */
140 LLVMValueRef temps_array;
141
142 /** bitmask indicating which register files are accessed indirectly */
143 unsigned indirect_files;
144
145 struct lp_build_mask_context *mask;
146 struct lp_exec_mask exec_mask;
147
148 struct tgsi_full_instruction *instructions;
149 uint max_instructions;
150 };
151
152 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
153 {
154 mask->bld = bld;
155 mask->has_mask = FALSE;
156 mask->cond_stack_size = 0;
157 mask->loop_stack_size = 0;
158 mask->call_stack_size = 0;
159
160 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
161 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
162 LLVMConstAllOnes(mask->int_vec_type);
163 }
164
165 static void lp_exec_mask_update(struct lp_exec_mask *mask)
166 {
167 if (mask->loop_stack_size) {
168 /*for loops we need to update the entire mask at runtime */
169 LLVMValueRef tmp;
170 assert(mask->break_mask);
171 tmp = LLVMBuildAnd(mask->bld->builder,
172 mask->cont_mask,
173 mask->break_mask,
174 "maskcb");
175 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
176 mask->cond_mask,
177 tmp,
178 "maskfull");
179 } else
180 mask->exec_mask = mask->cond_mask;
181
182 if (mask->call_stack_size) {
183 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
184 mask->exec_mask,
185 mask->ret_mask,
186 "callmask");
187 }
188
189 mask->has_mask = (mask->cond_stack_size > 0 ||
190 mask->loop_stack_size > 0 ||
191 mask->call_stack_size > 0);
192 }
193
194 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
195 LLVMValueRef val)
196 {
197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
198 if (mask->cond_stack_size == 0) {
199 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
200 }
201 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
202 assert(LLVMTypeOf(val) == mask->int_vec_type);
203 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
204 mask->cond_mask,
205 val,
206 "");
207 lp_exec_mask_update(mask);
208 }
209
210 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
211 {
212 LLVMValueRef prev_mask;
213 LLVMValueRef inv_mask;
214
215 assert(mask->cond_stack_size);
216 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
217 if (mask->cond_stack_size == 1) {
218 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
219 }
220
221 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
222
223 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
224 inv_mask,
225 prev_mask, "");
226 lp_exec_mask_update(mask);
227 }
228
229 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
230 {
231 assert(mask->cond_stack_size);
232 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
233 lp_exec_mask_update(mask);
234 }
235
236 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
237 {
238 if (mask->loop_stack_size == 0) {
239 assert(mask->loop_block == NULL);
240 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
241 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
242 assert(mask->break_var == NULL);
243 }
244
245 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
246
247 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
248 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
249 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
250 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
251 ++mask->loop_stack_size;
252
253 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
254 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
255
256 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
257 LLVMBuildBr(mask->bld->builder, mask->loop_block);
258 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
259
260 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
261
262 lp_exec_mask_update(mask);
263 }
264
265 static void lp_exec_break(struct lp_exec_mask *mask)
266 {
267 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
268 mask->exec_mask,
269 "break");
270
271 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
272 mask->break_mask,
273 exec_mask, "break_full");
274
275 lp_exec_mask_update(mask);
276 }
277
278 static void lp_exec_continue(struct lp_exec_mask *mask)
279 {
280 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
281 mask->exec_mask,
282 "");
283
284 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
285 mask->cont_mask,
286 exec_mask, "");
287
288 lp_exec_mask_update(mask);
289 }
290
291
292 static void lp_exec_endloop(struct lp_exec_mask *mask)
293 {
294 LLVMBasicBlockRef endloop;
295 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
296 mask->bld->type.length);
297 LLVMValueRef i1cond;
298
299 assert(mask->break_mask);
300
301 /*
302 * Restore the cont_mask, but don't pop
303 */
304 assert(mask->loop_stack_size);
305 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
306 lp_exec_mask_update(mask);
307
308 /*
309 * Unlike the continue mask, the break_mask must be preserved across loop
310 * iterations
311 */
312 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
313
314 /* i1cond = (mask == 0) */
315 i1cond = LLVMBuildICmp(
316 mask->bld->builder,
317 LLVMIntNE,
318 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
319 LLVMConstNull(reg_type), "");
320
321 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
322
323 LLVMBuildCondBr(mask->bld->builder,
324 i1cond, mask->loop_block, endloop);
325
326 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
327
328 assert(mask->loop_stack_size);
329 --mask->loop_stack_size;
330 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
331 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
332 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
333 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
334
335 lp_exec_mask_update(mask);
336 }
337
338 /* stores val into an address pointed to by dst.
339 * mask->exec_mask is used to figure out which bits of val
340 * should be stored into the address
341 * (0 means don't store this bit, 1 means do store).
342 */
343 static void lp_exec_mask_store(struct lp_exec_mask *mask,
344 LLVMValueRef pred,
345 LLVMValueRef val,
346 LLVMValueRef dst)
347 {
348 /* Mix the predicate and execution mask */
349 if (mask->has_mask) {
350 if (pred) {
351 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
352 } else {
353 pred = mask->exec_mask;
354 }
355 }
356
357 if (pred) {
358 LLVMValueRef real_val, dst_val;
359
360 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
361 real_val = lp_build_select(mask->bld,
362 pred,
363 val, dst_val);
364
365 LLVMBuildStore(mask->bld->builder, real_val, dst);
366 } else
367 LLVMBuildStore(mask->bld->builder, val, dst);
368 }
369
370 static void lp_exec_mask_call(struct lp_exec_mask *mask,
371 int func,
372 int *pc)
373 {
374 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
375 mask->call_stack[mask->call_stack_size].pc = *pc;
376 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
377 mask->call_stack_size++;
378 *pc = func;
379 }
380
381 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
382 {
383 LLVMValueRef exec_mask;
384
385 if (mask->call_stack_size == 0) {
386 /* returning from main() */
387 *pc = -1;
388 return;
389 }
390 exec_mask = LLVMBuildNot(mask->bld->builder,
391 mask->exec_mask,
392 "ret");
393
394 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
395 mask->ret_mask,
396 exec_mask, "ret_full");
397
398 lp_exec_mask_update(mask);
399 }
400
401 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
402 {
403 }
404
405 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
406 {
407 assert(mask->call_stack_size);
408 mask->call_stack_size--;
409 *pc = mask->call_stack[mask->call_stack_size].pc;
410 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
411 lp_exec_mask_update(mask);
412 }
413
414
415 /**
416 * Return pointer to a temporary register channel (src or dest).
417 * Note that indirect addressing cannot be handled here.
418 * \param index which temporary register
419 * \param chan which channel of the temp register.
420 */
421 static LLVMValueRef
422 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
423 unsigned index,
424 unsigned chan)
425 {
426 assert(chan < 4);
427 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
428 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
429 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
430 }
431 else {
432 return bld->temps[index][chan];
433 }
434 }
435
436
437 /**
438 * Gather vector.
439 * XXX the lp_build_gather() function should be capable of doing this
440 * with a little work.
441 */
442 static LLVMValueRef
443 build_gather(struct lp_build_tgsi_soa_context *bld,
444 LLVMValueRef base_ptr,
445 LLVMValueRef indexes)
446 {
447 LLVMValueRef res = bld->base.undef;
448 unsigned i;
449
450 /*
451 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
452 */
453 for (i = 0; i < bld->base.type.length; i++) {
454 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
455 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
456 indexes, ii, "");
457 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
458 &index, 1, "");
459 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
460
461 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
462 }
463
464 return res;
465 }
466
467
468 /**
469 * Read the current value of the ADDR register, convert the floats to
470 * ints, multiply by four and return the vector of offsets.
471 * The offsets will be used to index into the constant buffer or
472 * temporary register file.
473 */
474 static LLVMValueRef
475 get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
476 const struct tgsi_src_register *indirect_reg)
477 {
478 /* always use X component of address register */
479 const int x = indirect_reg->SwizzleX;
480 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
481 uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x);
482 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
483 LLVMValueRef addr_vec;
484
485 addr_vec = LLVMBuildLoad(bld->base.builder,
486 bld->addr[indirect_reg->Index][swizzle],
487 "load addr reg");
488
489 /* for indexing we want integers */
490 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
491 int_vec_type, "");
492
493 /* addr_vec = addr_vec * 4 */
494 addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4);
495
496 return addr_vec;
497 }
498
499
500 /**
501 * Register fetch.
502 */
503 static LLVMValueRef
504 emit_fetch(
505 struct lp_build_tgsi_soa_context *bld,
506 const struct tgsi_full_instruction *inst,
507 unsigned src_op,
508 const unsigned chan_index )
509 {
510 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
511 const unsigned swizzle =
512 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
513 LLVMValueRef res;
514 LLVMValueRef addr_vec = NULL;
515
516 if (swizzle > 3) {
517 assert(0 && "invalid swizzle in emit_fetch()");
518 return bld->base.undef;
519 }
520
521 if (reg->Register.Indirect) {
522 assert(bld->indirect_files);
523 addr_vec = get_indirect_offsets(bld, &reg->Indirect);
524 }
525
526 switch (reg->Register.File) {
527 case TGSI_FILE_CONSTANT:
528 if (reg->Register.Indirect) {
529 LLVMValueRef index_vec; /* index into the const buffer */
530
531 assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT));
532
533 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
534 index_vec = lp_build_const_int_vec(bld->int_bld.type,
535 reg->Register.Index * 4 + swizzle);
536
537 /* index_vec = index_vec + addr_vec */
538 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
539
540 /* Gather values from the constant buffer */
541 res = build_gather(bld, bld->consts_ptr, index_vec);
542 }
543 else {
544 LLVMValueRef index; /* index into the const buffer */
545 LLVMValueRef scalar, scalar_ptr;
546
547 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
548
549 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
550 &index, 1, "");
551 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
552
553 res = lp_build_broadcast_scalar(&bld->base, scalar);
554 }
555 break;
556
557 case TGSI_FILE_IMMEDIATE:
558 res = bld->immediates[reg->Register.Index][swizzle];
559 assert(res);
560 break;
561
562 case TGSI_FILE_INPUT:
563 res = bld->inputs[reg->Register.Index][swizzle];
564 assert(res);
565 break;
566
567 case TGSI_FILE_TEMPORARY:
568 if (reg->Register.Indirect) {
569 LLVMValueRef vec_len =
570 lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length);
571 LLVMValueRef index_vec; /* index into the const buffer */
572 LLVMValueRef temps_array;
573 LLVMTypeRef float4_ptr_type;
574
575 assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
576
577 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
578 index_vec = lp_build_const_int_vec(bld->int_bld.type,
579 reg->Register.Index * 4 + swizzle);
580
581 /* index_vec += addr_vec */
582 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
583
584 /* index_vec *= vector_length */
585 index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len);
586
587 /* cast temps_array pointer to float* */
588 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
589 temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array,
590 float4_ptr_type, "");
591
592 /* Gather values from the temporary register array */
593 res = build_gather(bld, temps_array, index_vec);
594 }
595 else {
596 LLVMValueRef temp_ptr;
597 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
598 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
599 if (!res)
600 return bld->base.undef;
601 }
602 break;
603
604 default:
605 assert(0 && "invalid src register in emit_fetch()");
606 return bld->base.undef;
607 }
608
609 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
610 case TGSI_UTIL_SIGN_CLEAR:
611 res = lp_build_abs( &bld->base, res );
612 break;
613
614 case TGSI_UTIL_SIGN_SET:
615 res = lp_build_abs( &bld->base, res );
616 /* fall through */
617 case TGSI_UTIL_SIGN_TOGGLE:
618 res = lp_build_negate( &bld->base, res );
619 break;
620
621 case TGSI_UTIL_SIGN_KEEP:
622 break;
623 }
624
625 return res;
626 }
627
628
629 /**
630 * Register fetch with derivatives.
631 */
632 static void
633 emit_fetch_deriv(
634 struct lp_build_tgsi_soa_context *bld,
635 const struct tgsi_full_instruction *inst,
636 unsigned index,
637 const unsigned chan_index,
638 LLVMValueRef *res,
639 LLVMValueRef *ddx,
640 LLVMValueRef *ddy)
641 {
642 LLVMValueRef src;
643
644 src = emit_fetch(bld, inst, index, chan_index);
645
646 if(res)
647 *res = src;
648
649 /* TODO: use interpolation coeffs for inputs */
650
651 if(ddx)
652 *ddx = lp_build_ddx(&bld->base, src);
653
654 if(ddy)
655 *ddy = lp_build_ddy(&bld->base, src);
656 }
657
658
659 /**
660 * Predicate.
661 */
662 static void
663 emit_fetch_predicate(
664 struct lp_build_tgsi_soa_context *bld,
665 const struct tgsi_full_instruction *inst,
666 LLVMValueRef *pred)
667 {
668 unsigned index;
669 unsigned char swizzles[4];
670 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
671 LLVMValueRef value;
672 unsigned chan;
673
674 if (!inst->Instruction.Predicate) {
675 FOR_EACH_CHANNEL( chan ) {
676 pred[chan] = NULL;
677 }
678 return;
679 }
680
681 swizzles[0] = inst->Predicate.SwizzleX;
682 swizzles[1] = inst->Predicate.SwizzleY;
683 swizzles[2] = inst->Predicate.SwizzleZ;
684 swizzles[3] = inst->Predicate.SwizzleW;
685
686 index = inst->Predicate.Index;
687 assert(index < LP_MAX_TGSI_PREDS);
688
689 FOR_EACH_CHANNEL( chan ) {
690 unsigned swizzle = swizzles[chan];
691
692 /*
693 * Only fetch the predicate register channels that are actually listed
694 * in the swizzles
695 */
696 if (!unswizzled[swizzle]) {
697 value = LLVMBuildLoad(bld->base.builder,
698 bld->preds[index][swizzle], "");
699
700 /*
701 * Convert the value to an integer mask.
702 *
703 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
704 * is needlessly causing two comparisons due to storing the intermediate
705 * result as float vector instead of an integer mask vector.
706 */
707 value = lp_build_compare(bld->base.builder,
708 bld->base.type,
709 PIPE_FUNC_NOTEQUAL,
710 value,
711 bld->base.zero);
712 if (inst->Predicate.Negate) {
713 value = LLVMBuildNot(bld->base.builder, value, "");
714 }
715
716 unswizzled[swizzle] = value;
717 } else {
718 value = unswizzled[swizzle];
719 }
720
721 pred[chan] = value;
722 }
723 }
724
725
726 /**
727 * Register store.
728 */
729 static void
730 emit_store(
731 struct lp_build_tgsi_soa_context *bld,
732 const struct tgsi_full_instruction *inst,
733 unsigned index,
734 unsigned chan_index,
735 LLVMValueRef pred,
736 LLVMValueRef value)
737 {
738 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
739 LLVMValueRef addr = NULL;
740
741 switch( inst->Instruction.Saturate ) {
742 case TGSI_SAT_NONE:
743 break;
744
745 case TGSI_SAT_ZERO_ONE:
746 value = lp_build_max(&bld->base, value, bld->base.zero);
747 value = lp_build_min(&bld->base, value, bld->base.one);
748 break;
749
750 case TGSI_SAT_MINUS_PLUS_ONE:
751 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
752 value = lp_build_min(&bld->base, value, bld->base.one);
753 break;
754
755 default:
756 assert(0);
757 }
758
759 if (reg->Register.Indirect) {
760 /* XXX use get_indirect_offsets() here eventually */
761 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
762 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
763
764 assert(bld->indirect_files);
765
766 addr = LLVMBuildLoad(bld->base.builder,
767 bld->addr[reg->Indirect.Index][swizzle],
768 "");
769 /* for indexing we want integers */
770 addr = LLVMBuildFPToSI(bld->base.builder, addr,
771 int_vec_type, "");
772 addr = LLVMBuildExtractElement(bld->base.builder,
773 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
774 "");
775 addr = LLVMBuildMul(bld->base.builder,
776 addr, LLVMConstInt(LLVMInt32Type(), 4, 0),
777 "");
778 }
779
780 switch( reg->Register.File ) {
781 case TGSI_FILE_OUTPUT:
782 lp_exec_mask_store(&bld->exec_mask, pred, value,
783 bld->outputs[reg->Register.Index][chan_index]);
784 break;
785
786 case TGSI_FILE_TEMPORARY:
787 if (reg->Register.Indirect) {
788 /* XXX not done yet */
789 debug_printf("WARNING: LLVM scatter store of temp regs"
790 " not implemented\n");
791 }
792 else {
793 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
794 chan_index);
795 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
796 }
797 break;
798
799 case TGSI_FILE_ADDRESS:
800 lp_exec_mask_store(&bld->exec_mask, pred, value,
801 bld->addr[reg->Indirect.Index][chan_index]);
802 break;
803
804 case TGSI_FILE_PREDICATE:
805 lp_exec_mask_store(&bld->exec_mask, pred, value,
806 bld->preds[reg->Register.Index][chan_index]);
807 break;
808
809 default:
810 assert( 0 );
811 }
812 }
813
814
815 /**
816 * High-level instruction translators.
817 */
818
819 static void
820 emit_tex( struct lp_build_tgsi_soa_context *bld,
821 const struct tgsi_full_instruction *inst,
822 enum lp_build_tex_modifier modifier,
823 LLVMValueRef *texel)
824 {
825 unsigned unit;
826 LLVMValueRef lod_bias, explicit_lod;
827 LLVMValueRef oow = NULL;
828 LLVMValueRef coords[3];
829 LLVMValueRef ddx[3];
830 LLVMValueRef ddy[3];
831 unsigned num_coords;
832 unsigned i;
833
834 if (!bld->sampler) {
835 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
836 for (i = 0; i < 4; i++) {
837 texel[i] = bld->base.undef;
838 }
839 return;
840 }
841
842 switch (inst->Texture.Texture) {
843 case TGSI_TEXTURE_1D:
844 num_coords = 1;
845 break;
846 case TGSI_TEXTURE_2D:
847 case TGSI_TEXTURE_RECT:
848 num_coords = 2;
849 break;
850 case TGSI_TEXTURE_SHADOW1D:
851 case TGSI_TEXTURE_SHADOW2D:
852 case TGSI_TEXTURE_SHADOWRECT:
853 case TGSI_TEXTURE_3D:
854 case TGSI_TEXTURE_CUBE:
855 num_coords = 3;
856 break;
857 default:
858 assert(0);
859 return;
860 }
861
862 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
863 lod_bias = emit_fetch( bld, inst, 0, 3 );
864 explicit_lod = NULL;
865 }
866 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
867 lod_bias = NULL;
868 explicit_lod = emit_fetch( bld, inst, 0, 3 );
869 }
870 else {
871 lod_bias = NULL;
872 explicit_lod = NULL;
873 }
874
875 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
876 oow = emit_fetch( bld, inst, 0, 3 );
877 oow = lp_build_rcp(&bld->base, oow);
878 }
879
880 for (i = 0; i < num_coords; i++) {
881 coords[i] = emit_fetch( bld, inst, 0, i );
882 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
883 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
884 }
885 for (i = num_coords; i < 3; i++) {
886 coords[i] = bld->base.undef;
887 }
888
889 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
890 for (i = 0; i < num_coords; i++) {
891 ddx[i] = emit_fetch( bld, inst, 1, i );
892 ddy[i] = emit_fetch( bld, inst, 2, i );
893 }
894 unit = inst->Src[3].Register.Index;
895 } else {
896 for (i = 0; i < num_coords; i++) {
897 ddx[i] = lp_build_ddx( &bld->base, coords[i] );
898 ddy[i] = lp_build_ddy( &bld->base, coords[i] );
899 }
900 unit = inst->Src[1].Register.Index;
901 }
902 for (i = num_coords; i < 3; i++) {
903 ddx[i] = bld->base.undef;
904 ddy[i] = bld->base.undef;
905 }
906
907 bld->sampler->emit_fetch_texel(bld->sampler,
908 bld->base.builder,
909 bld->base.type,
910 unit, num_coords, coords,
911 ddx, ddy,
912 lod_bias, explicit_lod,
913 texel);
914 }
915
916
917 /**
918 * Kill fragment if any of the src register values are negative.
919 */
920 static void
921 emit_kil(
922 struct lp_build_tgsi_soa_context *bld,
923 const struct tgsi_full_instruction *inst )
924 {
925 const struct tgsi_full_src_register *reg = &inst->Src[0];
926 LLVMValueRef terms[NUM_CHANNELS];
927 LLVMValueRef mask;
928 unsigned chan_index;
929
930 memset(&terms, 0, sizeof terms);
931
932 FOR_EACH_CHANNEL( chan_index ) {
933 unsigned swizzle;
934
935 /* Unswizzle channel */
936 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
937
938 /* Check if the component has not been already tested. */
939 assert(swizzle < NUM_CHANNELS);
940 if( !terms[swizzle] )
941 /* TODO: change the comparison operator instead of setting the sign */
942 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
943 }
944
945 mask = NULL;
946 FOR_EACH_CHANNEL( chan_index ) {
947 if(terms[chan_index]) {
948 LLVMValueRef chan_mask;
949
950 /*
951 * If term < 0 then mask = 0 else mask = ~0.
952 */
953 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
954
955 if(mask)
956 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
957 else
958 mask = chan_mask;
959 }
960 }
961
962 if(mask)
963 lp_build_mask_update(bld->mask, mask);
964 }
965
966
967 /**
968 * Predicated fragment kill.
969 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
970 * The only predication is the execution mask which will apply if
971 * we're inside a loop or conditional.
972 */
973 static void
974 emit_kilp(struct lp_build_tgsi_soa_context *bld,
975 const struct tgsi_full_instruction *inst)
976 {
977 LLVMValueRef mask;
978
979 /* For those channels which are "alive", disable fragment shader
980 * execution.
981 */
982 if (bld->exec_mask.has_mask) {
983 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
984 }
985 else {
986 mask = bld->base.zero;
987 }
988
989 lp_build_mask_update(bld->mask, mask);
990 }
991
992 static void
993 emit_declaration(
994 struct lp_build_tgsi_soa_context *bld,
995 const struct tgsi_full_declaration *decl)
996 {
997 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
998
999 unsigned first = decl->Range.First;
1000 unsigned last = decl->Range.Last;
1001 unsigned idx, i;
1002
1003 for (idx = first; idx <= last; ++idx) {
1004 switch (decl->Declaration.File) {
1005 case TGSI_FILE_TEMPORARY:
1006 assert(idx < LP_MAX_TGSI_TEMPS);
1007 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1008 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1009 last*4 + 4, 0);
1010 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1011 vec_type, array_size, "");
1012 } else {
1013 for (i = 0; i < NUM_CHANNELS; i++)
1014 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1015 vec_type, "");
1016 }
1017 break;
1018
1019 case TGSI_FILE_OUTPUT:
1020 for (i = 0; i < NUM_CHANNELS; i++)
1021 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1022 vec_type, "");
1023 break;
1024
1025 case TGSI_FILE_ADDRESS:
1026 assert(idx < LP_MAX_TGSI_ADDRS);
1027 for (i = 0; i < NUM_CHANNELS; i++)
1028 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1029 vec_type, "");
1030 break;
1031
1032 case TGSI_FILE_PREDICATE:
1033 assert(idx < LP_MAX_TGSI_PREDS);
1034 for (i = 0; i < NUM_CHANNELS; i++)
1035 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1036 vec_type, "");
1037 break;
1038
1039 default:
1040 /* don't need to declare other vars */
1041 break;
1042 }
1043 }
1044 }
1045
1046
1047 /**
1048 * Emit LLVM for one TGSI instruction.
1049 * \param return TRUE for success, FALSE otherwise
1050 */
1051 static boolean
1052 emit_instruction(
1053 struct lp_build_tgsi_soa_context *bld,
1054 const struct tgsi_full_instruction *inst,
1055 const struct tgsi_opcode_info *info,
1056 int *pc)
1057 {
1058 unsigned chan_index;
1059 LLVMValueRef src0, src1, src2;
1060 LLVMValueRef tmp0, tmp1, tmp2;
1061 LLVMValueRef tmp3 = NULL;
1062 LLVMValueRef tmp4 = NULL;
1063 LLVMValueRef tmp5 = NULL;
1064 LLVMValueRef tmp6 = NULL;
1065 LLVMValueRef tmp7 = NULL;
1066 LLVMValueRef res;
1067 LLVMValueRef dst0[NUM_CHANNELS];
1068
1069 /*
1070 * Stores and write masks are handled in a general fashion after the long
1071 * instruction opcode switch statement.
1072 *
1073 * Although not stricitly necessary, we avoid generating instructions for
1074 * channels which won't be stored, in cases where's that easy. For some
1075 * complex instructions, like texture sampling, it is more convenient to
1076 * assume a full writemask and then let LLVM optimization passes eliminate
1077 * redundant code.
1078 */
1079
1080 (*pc)++;
1081
1082 assert(info->num_dst <= 1);
1083 if (info->num_dst) {
1084 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1085 dst0[chan_index] = bld->base.undef;
1086 }
1087 }
1088
1089 switch (inst->Instruction.Opcode) {
1090 case TGSI_OPCODE_ARL:
1091 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1092 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1093 tmp0 = lp_build_floor(&bld->base, tmp0);
1094 dst0[chan_index] = tmp0;
1095 }
1096 break;
1097
1098 case TGSI_OPCODE_MOV:
1099 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1100 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1101 }
1102 break;
1103
1104 case TGSI_OPCODE_LIT:
1105 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1106 dst0[CHAN_X] = bld->base.one;
1107 }
1108 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1109 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1110 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1111 }
1112 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1113 /* XMM[1] = SrcReg[0].yyyy */
1114 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1115 /* XMM[1] = max(XMM[1], 0) */
1116 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1117 /* XMM[2] = SrcReg[0].wwww */
1118 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1119 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1120 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1121 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1122 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1123 }
1124 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1125 dst0[CHAN_W] = bld->base.one;
1126 }
1127 break;
1128
1129 case TGSI_OPCODE_RCP:
1130 /* TGSI_OPCODE_RECIP */
1131 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1132 res = lp_build_rcp(&bld->base, src0);
1133 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1134 dst0[chan_index] = res;
1135 }
1136 break;
1137
1138 case TGSI_OPCODE_RSQ:
1139 /* TGSI_OPCODE_RECIPSQRT */
1140 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1141 src0 = lp_build_abs(&bld->base, src0);
1142 res = lp_build_rsqrt(&bld->base, src0);
1143 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1144 dst0[chan_index] = res;
1145 }
1146 break;
1147
1148 case TGSI_OPCODE_EXP:
1149 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1150 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1151 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1152 LLVMValueRef *p_exp2_int_part = NULL;
1153 LLVMValueRef *p_frac_part = NULL;
1154 LLVMValueRef *p_exp2 = NULL;
1155
1156 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1157
1158 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1159 p_exp2_int_part = &tmp0;
1160 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1161 p_frac_part = &tmp1;
1162 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1163 p_exp2 = &tmp2;
1164
1165 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1166
1167 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1168 dst0[CHAN_X] = tmp0;
1169 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1170 dst0[CHAN_Y] = tmp1;
1171 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1172 dst0[CHAN_Z] = tmp2;
1173 }
1174 /* dst.w = 1.0 */
1175 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1176 dst0[CHAN_W] = bld->base.one;
1177 }
1178 break;
1179
1180 case TGSI_OPCODE_LOG:
1181 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1182 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1183 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1184 LLVMValueRef *p_floor_log2 = NULL;
1185 LLVMValueRef *p_exp = NULL;
1186 LLVMValueRef *p_log2 = NULL;
1187
1188 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1189 src0 = lp_build_abs( &bld->base, src0 );
1190
1191 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1192 p_floor_log2 = &tmp0;
1193 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1194 p_exp = &tmp1;
1195 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1196 p_log2 = &tmp2;
1197
1198 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1199
1200 /* dst.x = floor(lg2(abs(src.x))) */
1201 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1202 dst0[CHAN_X] = tmp0;
1203 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1204 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1205 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1206 }
1207 /* dst.z = lg2(abs(src.x)) */
1208 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1209 dst0[CHAN_Z] = tmp2;
1210 }
1211 /* dst.w = 1.0 */
1212 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1213 dst0[CHAN_W] = bld->base.one;
1214 }
1215 break;
1216
1217 case TGSI_OPCODE_MUL:
1218 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1219 src0 = emit_fetch( bld, inst, 0, chan_index );
1220 src1 = emit_fetch( bld, inst, 1, chan_index );
1221 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1222 }
1223 break;
1224
1225 case TGSI_OPCODE_ADD:
1226 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1227 src0 = emit_fetch( bld, inst, 0, chan_index );
1228 src1 = emit_fetch( bld, inst, 1, chan_index );
1229 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1230 }
1231 break;
1232
1233 case TGSI_OPCODE_DP3:
1234 /* TGSI_OPCODE_DOT3 */
1235 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1236 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1237 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1238 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1239 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1240 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1241 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1242 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1243 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1244 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1245 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1246 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1247 dst0[chan_index] = tmp0;
1248 }
1249 break;
1250
1251 case TGSI_OPCODE_DP4:
1252 /* TGSI_OPCODE_DOT4 */
1253 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1254 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1255 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1256 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1257 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1258 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1259 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1260 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1261 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1262 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1263 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1264 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1265 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1266 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1267 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1268 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1269 dst0[chan_index] = tmp0;
1270 }
1271 break;
1272
1273 case TGSI_OPCODE_DST:
1274 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1275 dst0[CHAN_X] = bld->base.one;
1276 }
1277 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1278 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1279 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1280 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1281 }
1282 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1283 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1284 }
1285 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1286 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1287 }
1288 break;
1289
1290 case TGSI_OPCODE_MIN:
1291 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1292 src0 = emit_fetch( bld, inst, 0, chan_index );
1293 src1 = emit_fetch( bld, inst, 1, chan_index );
1294 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1295 }
1296 break;
1297
1298 case TGSI_OPCODE_MAX:
1299 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1300 src0 = emit_fetch( bld, inst, 0, chan_index );
1301 src1 = emit_fetch( bld, inst, 1, chan_index );
1302 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1303 }
1304 break;
1305
1306 case TGSI_OPCODE_SLT:
1307 /* TGSI_OPCODE_SETLT */
1308 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1309 src0 = emit_fetch( bld, inst, 0, chan_index );
1310 src1 = emit_fetch( bld, inst, 1, chan_index );
1311 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1312 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1313 }
1314 break;
1315
1316 case TGSI_OPCODE_SGE:
1317 /* TGSI_OPCODE_SETGE */
1318 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1319 src0 = emit_fetch( bld, inst, 0, chan_index );
1320 src1 = emit_fetch( bld, inst, 1, chan_index );
1321 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1322 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1323 }
1324 break;
1325
1326 case TGSI_OPCODE_MAD:
1327 /* TGSI_OPCODE_MADD */
1328 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1329 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1330 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1331 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1332 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1333 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1334 dst0[chan_index] = tmp0;
1335 }
1336 break;
1337
1338 case TGSI_OPCODE_SUB:
1339 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1340 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1341 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1342 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1343 }
1344 break;
1345
1346 case TGSI_OPCODE_LRP:
1347 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1348 src0 = emit_fetch( bld, inst, 0, chan_index );
1349 src1 = emit_fetch( bld, inst, 1, chan_index );
1350 src2 = emit_fetch( bld, inst, 2, chan_index );
1351 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1352 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1353 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1354 }
1355 break;
1356
1357 case TGSI_OPCODE_CND:
1358 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1359 src0 = emit_fetch( bld, inst, 0, chan_index );
1360 src1 = emit_fetch( bld, inst, 1, chan_index );
1361 src2 = emit_fetch( bld, inst, 2, chan_index );
1362 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1363 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1364 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1365 }
1366 break;
1367
1368 case TGSI_OPCODE_DP2A:
1369 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1370 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1371 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1372 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1373 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1374 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1375 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1376 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1377 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1378 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1379 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1380 }
1381 break;
1382
1383 case TGSI_OPCODE_FRC:
1384 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1385 src0 = emit_fetch( bld, inst, 0, chan_index );
1386 tmp0 = lp_build_floor(&bld->base, src0);
1387 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1388 dst0[chan_index] = tmp0;
1389 }
1390 break;
1391
1392 case TGSI_OPCODE_CLAMP:
1393 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1394 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1395 src1 = emit_fetch( bld, inst, 1, chan_index );
1396 src2 = emit_fetch( bld, inst, 2, chan_index );
1397 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1398 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1399 dst0[chan_index] = tmp0;
1400 }
1401 break;
1402
1403 case TGSI_OPCODE_FLR:
1404 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1405 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1406 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1407 }
1408 break;
1409
1410 case TGSI_OPCODE_ROUND:
1411 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1412 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1413 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1414 }
1415 break;
1416
1417 case TGSI_OPCODE_EX2: {
1418 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1419 tmp0 = lp_build_exp2( &bld->base, tmp0);
1420 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1421 dst0[chan_index] = tmp0;
1422 }
1423 break;
1424 }
1425
1426 case TGSI_OPCODE_LG2:
1427 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1428 tmp0 = lp_build_log2( &bld->base, tmp0);
1429 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1430 dst0[chan_index] = tmp0;
1431 }
1432 break;
1433
1434 case TGSI_OPCODE_POW:
1435 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1436 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1437 res = lp_build_pow( &bld->base, src0, src1 );
1438 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1439 dst0[chan_index] = res;
1440 }
1441 break;
1442
1443 case TGSI_OPCODE_XPD:
1444 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1445 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1446 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1447 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1448 }
1449 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1450 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1451 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1452 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1453 }
1454 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1455 tmp2 = tmp0;
1456 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1457 tmp5 = tmp3;
1458 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1459 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1460 dst0[CHAN_X] = tmp2;
1461 }
1462 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1463 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1464 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1465 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1466 }
1467 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1468 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1469 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1470 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1471 dst0[CHAN_Y] = tmp3;
1472 }
1473 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1474 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1475 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1476 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1477 dst0[CHAN_Z] = tmp5;
1478 }
1479 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1480 dst0[CHAN_W] = bld->base.one;
1481 }
1482 break;
1483
1484 case TGSI_OPCODE_ABS:
1485 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1486 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1487 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1488 }
1489 break;
1490
1491 case TGSI_OPCODE_RCC:
1492 /* deprecated? */
1493 assert(0);
1494 return FALSE;
1495
1496 case TGSI_OPCODE_DPH:
1497 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1498 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1499 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1500 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1501 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1502 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1503 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1504 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1505 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1506 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1507 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1508 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1509 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1510 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1511 dst0[chan_index] = tmp0;
1512 }
1513 break;
1514
1515 case TGSI_OPCODE_COS:
1516 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1517 tmp0 = lp_build_cos( &bld->base, tmp0 );
1518 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1519 dst0[chan_index] = tmp0;
1520 }
1521 break;
1522
1523 case TGSI_OPCODE_DDX:
1524 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1525 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1526 }
1527 break;
1528
1529 case TGSI_OPCODE_DDY:
1530 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1531 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1532 }
1533 break;
1534
1535 case TGSI_OPCODE_KILP:
1536 /* predicated kill */
1537 emit_kilp( bld, inst );
1538 break;
1539
1540 case TGSI_OPCODE_KIL:
1541 /* conditional kill */
1542 emit_kil( bld, inst );
1543 break;
1544
1545 case TGSI_OPCODE_PK2H:
1546 return FALSE;
1547 break;
1548
1549 case TGSI_OPCODE_PK2US:
1550 return FALSE;
1551 break;
1552
1553 case TGSI_OPCODE_PK4B:
1554 return FALSE;
1555 break;
1556
1557 case TGSI_OPCODE_PK4UB:
1558 return FALSE;
1559 break;
1560
1561 case TGSI_OPCODE_RFL:
1562 return FALSE;
1563 break;
1564
1565 case TGSI_OPCODE_SEQ:
1566 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1567 src0 = emit_fetch( bld, inst, 0, chan_index );
1568 src1 = emit_fetch( bld, inst, 1, chan_index );
1569 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1570 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1571 }
1572 break;
1573
1574 case TGSI_OPCODE_SFL:
1575 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1576 dst0[chan_index] = bld->base.zero;
1577 }
1578 break;
1579
1580 case TGSI_OPCODE_SGT:
1581 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1582 src0 = emit_fetch( bld, inst, 0, chan_index );
1583 src1 = emit_fetch( bld, inst, 1, chan_index );
1584 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1585 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1586 }
1587 break;
1588
1589 case TGSI_OPCODE_SIN:
1590 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1591 tmp0 = lp_build_sin( &bld->base, tmp0 );
1592 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1593 dst0[chan_index] = tmp0;
1594 }
1595 break;
1596
1597 case TGSI_OPCODE_SLE:
1598 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1599 src0 = emit_fetch( bld, inst, 0, chan_index );
1600 src1 = emit_fetch( bld, inst, 1, chan_index );
1601 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1602 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1603 }
1604 break;
1605
1606 case TGSI_OPCODE_SNE:
1607 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1608 src0 = emit_fetch( bld, inst, 0, chan_index );
1609 src1 = emit_fetch( bld, inst, 1, chan_index );
1610 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1611 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1612 }
1613 break;
1614
1615 case TGSI_OPCODE_STR:
1616 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1617 dst0[chan_index] = bld->base.one;
1618 }
1619 break;
1620
1621 case TGSI_OPCODE_TEX:
1622 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1623 break;
1624
1625 case TGSI_OPCODE_TXD:
1626 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1627 break;
1628
1629 case TGSI_OPCODE_UP2H:
1630 /* deprecated */
1631 assert (0);
1632 return FALSE;
1633 break;
1634
1635 case TGSI_OPCODE_UP2US:
1636 /* deprecated */
1637 assert(0);
1638 return FALSE;
1639 break;
1640
1641 case TGSI_OPCODE_UP4B:
1642 /* deprecated */
1643 assert(0);
1644 return FALSE;
1645 break;
1646
1647 case TGSI_OPCODE_UP4UB:
1648 /* deprecated */
1649 assert(0);
1650 return FALSE;
1651 break;
1652
1653 case TGSI_OPCODE_X2D:
1654 /* deprecated? */
1655 assert(0);
1656 return FALSE;
1657 break;
1658
1659 case TGSI_OPCODE_ARA:
1660 /* deprecated */
1661 assert(0);
1662 return FALSE;
1663 break;
1664
1665 case TGSI_OPCODE_ARR:
1666 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1667 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1668 tmp0 = lp_build_round(&bld->base, tmp0);
1669 dst0[chan_index] = tmp0;
1670 }
1671 break;
1672
1673 case TGSI_OPCODE_BRA:
1674 /* deprecated */
1675 assert(0);
1676 return FALSE;
1677 break;
1678
1679 case TGSI_OPCODE_CAL:
1680 lp_exec_mask_call(&bld->exec_mask,
1681 inst->Label.Label,
1682 pc);
1683
1684 break;
1685
1686 case TGSI_OPCODE_RET:
1687 lp_exec_mask_ret(&bld->exec_mask, pc);
1688 break;
1689
1690 case TGSI_OPCODE_END:
1691 *pc = -1;
1692 break;
1693
1694 case TGSI_OPCODE_SSG:
1695 /* TGSI_OPCODE_SGN */
1696 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1697 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1698 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1699 }
1700 break;
1701
1702 case TGSI_OPCODE_CMP:
1703 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1704 src0 = emit_fetch( bld, inst, 0, chan_index );
1705 src1 = emit_fetch( bld, inst, 1, chan_index );
1706 src2 = emit_fetch( bld, inst, 2, chan_index );
1707 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1708 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1709 }
1710 break;
1711
1712 case TGSI_OPCODE_SCS:
1713 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1714 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1715 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1716 }
1717 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1718 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1719 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1720 }
1721 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1722 dst0[CHAN_Z] = bld->base.zero;
1723 }
1724 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1725 dst0[CHAN_W] = bld->base.one;
1726 }
1727 break;
1728
1729 case TGSI_OPCODE_TXB:
1730 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
1731 break;
1732
1733 case TGSI_OPCODE_NRM:
1734 /* fall-through */
1735 case TGSI_OPCODE_NRM4:
1736 /* 3 or 4-component normalization */
1737 {
1738 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1739
1740 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1741 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1742 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1743 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1744
1745 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1746
1747 /* xmm4 = src.x */
1748 /* xmm0 = src.x * src.x */
1749 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1750 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1751 tmp4 = tmp0;
1752 }
1753 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1754
1755 /* xmm5 = src.y */
1756 /* xmm0 = xmm0 + src.y * src.y */
1757 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1758 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1759 tmp5 = tmp1;
1760 }
1761 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1762 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1763
1764 /* xmm6 = src.z */
1765 /* xmm0 = xmm0 + src.z * src.z */
1766 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1767 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1768 tmp6 = tmp1;
1769 }
1770 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1771 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1772
1773 if (dims == 4) {
1774 /* xmm7 = src.w */
1775 /* xmm0 = xmm0 + src.w * src.w */
1776 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1777 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1778 tmp7 = tmp1;
1779 }
1780 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1781 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1782 }
1783
1784 /* xmm1 = 1 / sqrt(xmm0) */
1785 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1786
1787 /* dst.x = xmm1 * src.x */
1788 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1789 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1790 }
1791
1792 /* dst.y = xmm1 * src.y */
1793 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1794 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1795 }
1796
1797 /* dst.z = xmm1 * src.z */
1798 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1799 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1800 }
1801
1802 /* dst.w = xmm1 * src.w */
1803 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1804 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1805 }
1806 }
1807
1808 /* dst.w = 1.0 */
1809 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1810 dst0[CHAN_W] = bld->base.one;
1811 }
1812 }
1813 break;
1814
1815 case TGSI_OPCODE_DIV:
1816 /* deprecated */
1817 assert( 0 );
1818 return FALSE;
1819 break;
1820
1821 case TGSI_OPCODE_DP2:
1822 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1823 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1824 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1825 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1826 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1827 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1828 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1829 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1830 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1831 }
1832 break;
1833
1834 case TGSI_OPCODE_TXL:
1835 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1836 break;
1837
1838 case TGSI_OPCODE_TXP:
1839 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
1840 break;
1841
1842 case TGSI_OPCODE_BRK:
1843 lp_exec_break(&bld->exec_mask);
1844 break;
1845
1846 case TGSI_OPCODE_IF:
1847 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1848 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1849 tmp0, bld->base.zero);
1850 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1851 break;
1852
1853 case TGSI_OPCODE_BGNLOOP:
1854 lp_exec_bgnloop(&bld->exec_mask);
1855 break;
1856
1857 case TGSI_OPCODE_BGNSUB:
1858 lp_exec_mask_bgnsub(&bld->exec_mask);
1859 break;
1860
1861 case TGSI_OPCODE_ELSE:
1862 lp_exec_mask_cond_invert(&bld->exec_mask);
1863 break;
1864
1865 case TGSI_OPCODE_ENDIF:
1866 lp_exec_mask_cond_pop(&bld->exec_mask);
1867 break;
1868
1869 case TGSI_OPCODE_ENDLOOP:
1870 lp_exec_endloop(&bld->exec_mask);
1871 break;
1872
1873 case TGSI_OPCODE_ENDSUB:
1874 lp_exec_mask_endsub(&bld->exec_mask, pc);
1875 break;
1876
1877 case TGSI_OPCODE_PUSHA:
1878 /* deprecated? */
1879 assert(0);
1880 return FALSE;
1881 break;
1882
1883 case TGSI_OPCODE_POPA:
1884 /* deprecated? */
1885 assert(0);
1886 return FALSE;
1887 break;
1888
1889 case TGSI_OPCODE_CEIL:
1890 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1891 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1892 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1893 }
1894 break;
1895
1896 case TGSI_OPCODE_I2F:
1897 /* deprecated? */
1898 assert(0);
1899 return FALSE;
1900 break;
1901
1902 case TGSI_OPCODE_NOT:
1903 /* deprecated? */
1904 assert(0);
1905 return FALSE;
1906 break;
1907
1908 case TGSI_OPCODE_TRUNC:
1909 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1910 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1911 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1912 }
1913 break;
1914
1915 case TGSI_OPCODE_SHL:
1916 /* deprecated? */
1917 assert(0);
1918 return FALSE;
1919 break;
1920
1921 case TGSI_OPCODE_ISHR:
1922 /* deprecated? */
1923 assert(0);
1924 return FALSE;
1925 break;
1926
1927 case TGSI_OPCODE_AND:
1928 /* deprecated? */
1929 assert(0);
1930 return FALSE;
1931 break;
1932
1933 case TGSI_OPCODE_OR:
1934 /* deprecated? */
1935 assert(0);
1936 return FALSE;
1937 break;
1938
1939 case TGSI_OPCODE_MOD:
1940 /* deprecated? */
1941 assert(0);
1942 return FALSE;
1943 break;
1944
1945 case TGSI_OPCODE_XOR:
1946 /* deprecated? */
1947 assert(0);
1948 return FALSE;
1949 break;
1950
1951 case TGSI_OPCODE_SAD:
1952 /* deprecated? */
1953 assert(0);
1954 return FALSE;
1955 break;
1956
1957 case TGSI_OPCODE_TXF:
1958 /* deprecated? */
1959 assert(0);
1960 return FALSE;
1961 break;
1962
1963 case TGSI_OPCODE_TXQ:
1964 /* deprecated? */
1965 assert(0);
1966 return FALSE;
1967 break;
1968
1969 case TGSI_OPCODE_CONT:
1970 lp_exec_continue(&bld->exec_mask);
1971 break;
1972
1973 case TGSI_OPCODE_EMIT:
1974 return FALSE;
1975 break;
1976
1977 case TGSI_OPCODE_ENDPRIM:
1978 return FALSE;
1979 break;
1980
1981 case TGSI_OPCODE_NOP:
1982 break;
1983
1984 default:
1985 return FALSE;
1986 }
1987
1988 if(info->num_dst) {
1989 LLVMValueRef pred[NUM_CHANNELS];
1990
1991 emit_fetch_predicate( bld, inst, pred );
1992
1993 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1994 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1995 }
1996 }
1997
1998 return TRUE;
1999 }
2000
2001
2002 void
2003 lp_build_tgsi_soa(LLVMBuilderRef builder,
2004 const struct tgsi_token *tokens,
2005 struct lp_type type,
2006 struct lp_build_mask_context *mask,
2007 LLVMValueRef consts_ptr,
2008 const LLVMValueRef *pos,
2009 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2010 LLVMValueRef (*outputs)[NUM_CHANNELS],
2011 struct lp_build_sampler_soa *sampler,
2012 const struct tgsi_shader_info *info)
2013 {
2014 struct lp_build_tgsi_soa_context bld;
2015 struct tgsi_parse_context parse;
2016 uint num_immediates = 0;
2017 uint num_instructions = 0;
2018 unsigned i;
2019 int pc = 0;
2020
2021 /* Setup build context */
2022 memset(&bld, 0, sizeof bld);
2023 lp_build_context_init(&bld.base, builder, type);
2024 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
2025 bld.mask = mask;
2026 bld.pos = pos;
2027 bld.inputs = inputs;
2028 bld.outputs = outputs;
2029 bld.consts_ptr = consts_ptr;
2030 bld.sampler = sampler;
2031 bld.indirect_files = info->indirect_files;
2032 bld.instructions = (struct tgsi_full_instruction *)
2033 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2034 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2035
2036 if (!bld.instructions) {
2037 return;
2038 }
2039
2040 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2041
2042 tgsi_parse_init( &parse, tokens );
2043
2044 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2045 tgsi_parse_token( &parse );
2046
2047 switch( parse.FullToken.Token.Type ) {
2048 case TGSI_TOKEN_TYPE_DECLARATION:
2049 /* Inputs already interpolated */
2050 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2051 break;
2052
2053 case TGSI_TOKEN_TYPE_INSTRUCTION:
2054 {
2055 /* save expanded instruction */
2056 if (num_instructions == bld.max_instructions) {
2057 struct tgsi_full_instruction *instructions;
2058 instructions = REALLOC(bld.instructions,
2059 bld.max_instructions
2060 * sizeof(struct tgsi_full_instruction),
2061 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2062 * sizeof(struct tgsi_full_instruction));
2063 if (!instructions) {
2064 break;
2065 }
2066 bld.instructions = instructions;
2067 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2068 }
2069
2070 memcpy(bld.instructions + num_instructions,
2071 &parse.FullToken.FullInstruction,
2072 sizeof(bld.instructions[0]));
2073
2074 num_instructions++;
2075 }
2076
2077 break;
2078
2079 case TGSI_TOKEN_TYPE_IMMEDIATE:
2080 /* simply copy the immediate values into the next immediates[] slot */
2081 {
2082 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2083 assert(size <= 4);
2084 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2085 for( i = 0; i < size; ++i )
2086 bld.immediates[num_immediates][i] =
2087 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2088 for( i = size; i < 4; ++i )
2089 bld.immediates[num_immediates][i] = bld.base.undef;
2090 num_immediates++;
2091 }
2092 break;
2093
2094 case TGSI_TOKEN_TYPE_PROPERTY:
2095 break;
2096
2097 default:
2098 assert( 0 );
2099 }
2100 }
2101
2102 while (pc != -1) {
2103 struct tgsi_full_instruction *instr = bld.instructions + pc;
2104 const struct tgsi_opcode_info *opcode_info =
2105 tgsi_get_opcode_info(instr->Instruction.Opcode);
2106 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2107 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2108 opcode_info->mnemonic);
2109 }
2110
2111 if (0) {
2112 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2113 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2114 debug_printf("11111111111111111111111111111 \n");
2115 tgsi_dump(tokens, 0);
2116 lp_debug_dump_value(function);
2117 debug_printf("2222222222222222222222222222 \n");
2118 }
2119 tgsi_parse_free( &parse );
2120
2121 if (0) {
2122 LLVMModuleRef module = LLVMGetGlobalParent(
2123 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2124 LLVMDumpModule(module);
2125
2126 }
2127
2128 FREE( bld.instructions );
2129 }
2130