gallivm: add const qualifiers, fix comment string
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61 #include "lp_bld_printf.h"
62
63
64 #define FOR_EACH_CHANNEL( CHAN )\
65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
66
67 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
69
70 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
72
73 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74 FOR_EACH_CHANNEL( CHAN )\
75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
76
77 #define CHAN_X 0
78 #define CHAN_Y 1
79 #define CHAN_Z 2
80 #define CHAN_W 3
81 #define NUM_CHANNELS 4
82
83 #define LP_MAX_INSTRUCTIONS 256
84
85
86 struct lp_exec_mask {
87 struct lp_build_context *bld;
88
89 boolean has_mask;
90
91 LLVMTypeRef int_vec_type;
92
93 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
94 int cond_stack_size;
95 LLVMValueRef cond_mask;
96
97 LLVMBasicBlockRef loop_block;
98 LLVMValueRef cont_mask;
99 LLVMValueRef break_mask;
100 LLVMValueRef break_var;
101 struct {
102 LLVMBasicBlockRef loop_block;
103 LLVMValueRef cont_mask;
104 LLVMValueRef break_mask;
105 LLVMValueRef break_var;
106 } loop_stack[LP_MAX_TGSI_NESTING];
107 int loop_stack_size;
108
109 LLVMValueRef ret_mask;
110 struct {
111 int pc;
112 LLVMValueRef ret_mask;
113 } call_stack[LP_MAX_TGSI_NESTING];
114 int call_stack_size;
115
116 LLVMValueRef exec_mask;
117 };
118
119 struct lp_build_tgsi_soa_context
120 {
121 struct lp_build_context base;
122
123 /* Builder for vector integer masks and indices */
124 struct lp_build_context uint_bld;
125
126 /* Builder for scalar elements of shader's data type (float) */
127 struct lp_build_context elem_bld;
128
129 LLVMValueRef consts_ptr;
130 const LLVMValueRef *pos;
131 const LLVMValueRef (*inputs)[NUM_CHANNELS];
132 LLVMValueRef (*outputs)[NUM_CHANNELS];
133
134 const struct lp_build_sampler_soa *sampler;
135
136 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
137 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
138 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
139 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
140
141 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
142 * set in the indirect_files field.
143 * The temps[] array above is unused then.
144 */
145 LLVMValueRef temps_array;
146
147 const struct tgsi_shader_info *info;
148 /** bitmask indicating which register files are accessed indirectly */
149 unsigned indirect_files;
150
151 struct lp_build_mask_context *mask;
152 struct lp_exec_mask exec_mask;
153
154 struct tgsi_full_instruction *instructions;
155 uint max_instructions;
156 };
157
158 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
159 {
160 mask->bld = bld;
161 mask->has_mask = FALSE;
162 mask->cond_stack_size = 0;
163 mask->loop_stack_size = 0;
164 mask->call_stack_size = 0;
165
166 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
167 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
168 LLVMConstAllOnes(mask->int_vec_type);
169 }
170
171 static void lp_exec_mask_update(struct lp_exec_mask *mask)
172 {
173 if (mask->loop_stack_size) {
174 /*for loops we need to update the entire mask at runtime */
175 LLVMValueRef tmp;
176 assert(mask->break_mask);
177 tmp = LLVMBuildAnd(mask->bld->builder,
178 mask->cont_mask,
179 mask->break_mask,
180 "maskcb");
181 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
182 mask->cond_mask,
183 tmp,
184 "maskfull");
185 } else
186 mask->exec_mask = mask->cond_mask;
187
188 if (mask->call_stack_size) {
189 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
190 mask->exec_mask,
191 mask->ret_mask,
192 "callmask");
193 }
194
195 mask->has_mask = (mask->cond_stack_size > 0 ||
196 mask->loop_stack_size > 0 ||
197 mask->call_stack_size > 0);
198 }
199
200 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
201 LLVMValueRef val)
202 {
203 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
204 if (mask->cond_stack_size == 0) {
205 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
206 }
207 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
208 assert(LLVMTypeOf(val) == mask->int_vec_type);
209 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
210 mask->cond_mask,
211 val,
212 "");
213 lp_exec_mask_update(mask);
214 }
215
216 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
217 {
218 LLVMValueRef prev_mask;
219 LLVMValueRef inv_mask;
220
221 assert(mask->cond_stack_size);
222 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
223 if (mask->cond_stack_size == 1) {
224 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
225 }
226
227 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
228
229 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
230 inv_mask,
231 prev_mask, "");
232 lp_exec_mask_update(mask);
233 }
234
235 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
236 {
237 assert(mask->cond_stack_size);
238 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
239 lp_exec_mask_update(mask);
240 }
241
242 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
243 {
244 if (mask->loop_stack_size == 0) {
245 assert(mask->loop_block == NULL);
246 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
247 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
248 assert(mask->break_var == NULL);
249 }
250
251 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
252
253 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
254 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
255 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
256 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
257 ++mask->loop_stack_size;
258
259 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
260 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
261
262 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
263 LLVMBuildBr(mask->bld->builder, mask->loop_block);
264 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
265
266 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
267
268 lp_exec_mask_update(mask);
269 }
270
271 static void lp_exec_break(struct lp_exec_mask *mask)
272 {
273 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
274 mask->exec_mask,
275 "break");
276
277 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
278 mask->break_mask,
279 exec_mask, "break_full");
280
281 lp_exec_mask_update(mask);
282 }
283
284 static void lp_exec_continue(struct lp_exec_mask *mask)
285 {
286 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
287 mask->exec_mask,
288 "");
289
290 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
291 mask->cont_mask,
292 exec_mask, "");
293
294 lp_exec_mask_update(mask);
295 }
296
297
298 static void lp_exec_endloop(struct lp_exec_mask *mask)
299 {
300 LLVMBasicBlockRef endloop;
301 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
302 mask->bld->type.length);
303 LLVMValueRef i1cond;
304
305 assert(mask->break_mask);
306
307 /*
308 * Restore the cont_mask, but don't pop
309 */
310 assert(mask->loop_stack_size);
311 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
312 lp_exec_mask_update(mask);
313
314 /*
315 * Unlike the continue mask, the break_mask must be preserved across loop
316 * iterations
317 */
318 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
319
320 /* i1cond = (mask == 0) */
321 i1cond = LLVMBuildICmp(
322 mask->bld->builder,
323 LLVMIntNE,
324 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
325 LLVMConstNull(reg_type), "");
326
327 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
328
329 LLVMBuildCondBr(mask->bld->builder,
330 i1cond, mask->loop_block, endloop);
331
332 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
333
334 assert(mask->loop_stack_size);
335 --mask->loop_stack_size;
336 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
337 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
338 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
339 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
340
341 lp_exec_mask_update(mask);
342 }
343
344 /* stores val into an address pointed to by dst.
345 * mask->exec_mask is used to figure out which bits of val
346 * should be stored into the address
347 * (0 means don't store this bit, 1 means do store).
348 */
349 static void lp_exec_mask_store(struct lp_exec_mask *mask,
350 LLVMValueRef pred,
351 LLVMValueRef val,
352 LLVMValueRef dst)
353 {
354 /* Mix the predicate and execution mask */
355 if (mask->has_mask) {
356 if (pred) {
357 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
358 } else {
359 pred = mask->exec_mask;
360 }
361 }
362
363 if (pred) {
364 LLVMValueRef real_val, dst_val;
365
366 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
367 real_val = lp_build_select(mask->bld,
368 pred,
369 val, dst_val);
370
371 LLVMBuildStore(mask->bld->builder, real_val, dst);
372 } else
373 LLVMBuildStore(mask->bld->builder, val, dst);
374 }
375
376 static void lp_exec_mask_call(struct lp_exec_mask *mask,
377 int func,
378 int *pc)
379 {
380 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
381 mask->call_stack[mask->call_stack_size].pc = *pc;
382 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
383 mask->call_stack_size++;
384 *pc = func;
385 }
386
387 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
388 {
389 LLVMValueRef exec_mask;
390
391 if (mask->call_stack_size == 0) {
392 /* returning from main() */
393 *pc = -1;
394 return;
395 }
396 exec_mask = LLVMBuildNot(mask->bld->builder,
397 mask->exec_mask,
398 "ret");
399
400 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
401 mask->ret_mask,
402 exec_mask, "ret_full");
403
404 lp_exec_mask_update(mask);
405 }
406
407 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
408 {
409 }
410
411 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
412 {
413 assert(mask->call_stack_size);
414 mask->call_stack_size--;
415 *pc = mask->call_stack[mask->call_stack_size].pc;
416 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
417 lp_exec_mask_update(mask);
418 }
419
420
421 /**
422 * Return pointer to a temporary register channel (src or dest).
423 * Note that indirect addressing cannot be handled here.
424 * \param index which temporary register
425 * \param chan which channel of the temp register.
426 */
427 static LLVMValueRef
428 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
429 unsigned index,
430 unsigned chan)
431 {
432 assert(chan < 4);
433 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
434 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
435 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
436 }
437 else {
438 return bld->temps[index][chan];
439 }
440 }
441
442
443 /**
444 * Gather vector.
445 * XXX the lp_build_gather() function should be capable of doing this
446 * with a little work.
447 */
448 static LLVMValueRef
449 build_gather(struct lp_build_tgsi_soa_context *bld,
450 LLVMValueRef base_ptr,
451 LLVMValueRef indexes)
452 {
453 LLVMValueRef res = bld->base.undef;
454 unsigned i;
455
456 /*
457 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
458 */
459 for (i = 0; i < bld->base.type.length; i++) {
460 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
461 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
462 indexes, ii, "");
463 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
464 &index, 1, "gather_ptr");
465 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
466
467 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
468 }
469
470 return res;
471 }
472
473
474 /**
475 * Scatter/store vector.
476 */
477 static void
478 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
479 LLVMValueRef base_ptr,
480 LLVMValueRef indexes,
481 LLVMValueRef values,
482 struct lp_exec_mask *mask,
483 LLVMValueRef pred)
484 {
485 LLVMBuilderRef builder = bld->base.builder;
486 unsigned i;
487
488 /* Mix the predicate and execution mask */
489 if (mask->has_mask) {
490 if (pred) {
491 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
492 }
493 else {
494 pred = mask->exec_mask;
495 }
496 }
497
498 /*
499 * Loop over elements of index_vec, store scalar value.
500 */
501 for (i = 0; i < bld->base.type.length; i++) {
502 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
503 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
504 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
505 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
506 LLVMValueRef scalar_pred = pred ?
507 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
508
509 if (0)
510 lp_build_printf(builder, "scatter %d: val %f at %d %p\n",
511 ii, val, index, scalar_ptr);
512
513 if (scalar_pred) {
514 LLVMValueRef real_val, dst_val;
515 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
516 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
517 LLVMBuildStore(builder, real_val, scalar_ptr);
518 }
519 else {
520 LLVMBuildStore(builder, val, scalar_ptr);
521 }
522 }
523 }
524
525
526 /**
527 * Read the current value of the ADDR register, convert the floats to
528 * ints, add the base index and return the vector of offsets.
529 * The offsets will be used to index into the constant buffer or
530 * temporary register file.
531 */
532 static LLVMValueRef
533 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
534 unsigned reg_file, unsigned reg_index,
535 const struct tgsi_src_register *indirect_reg)
536 {
537 struct lp_build_context *uint_bld = &bld->uint_bld;
538 /* always use X component of address register */
539 unsigned swizzle = indirect_reg->SwizzleX;
540 LLVMValueRef base;
541 LLVMValueRef rel;
542 LLVMValueRef max_index;
543 LLVMValueRef index;
544
545 assert(bld->indirect_files & (1 << reg_file));
546
547 base = lp_build_const_int_vec(uint_bld->type, reg_index);
548
549 assert(swizzle < 4);
550 rel = LLVMBuildLoad(bld->base.builder,
551 bld->addr[indirect_reg->Index][swizzle],
552 "load addr reg");
553
554 /* for indexing we want integers */
555 rel = LLVMBuildFPToSI(bld->base.builder,
556 rel,
557 uint_bld->vec_type, "");
558
559 index = lp_build_add(uint_bld, base, rel);
560
561 max_index = lp_build_const_int_vec(uint_bld->type,
562 bld->info->file_max[reg_file]);
563
564 assert(!uint_bld->type.sign);
565 index = lp_build_min(uint_bld, index, max_index);
566
567 return index;
568 }
569
570
571 /**
572 * Register fetch.
573 */
574 static LLVMValueRef
575 emit_fetch(
576 struct lp_build_tgsi_soa_context *bld,
577 const struct tgsi_full_instruction *inst,
578 unsigned src_op,
579 const unsigned chan_index )
580 {
581 struct lp_build_context *uint_bld = &bld->uint_bld;
582 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
583 const unsigned swizzle =
584 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
585 LLVMValueRef res;
586 LLVMValueRef indirect_index = NULL;
587
588 if (swizzle > 3) {
589 assert(0 && "invalid swizzle in emit_fetch()");
590 return bld->base.undef;
591 }
592
593 if (reg->Register.Indirect) {
594 indirect_index = get_indirect_index(bld,
595 reg->Register.File,
596 reg->Register.Index,
597 &reg->Indirect);
598 } else {
599 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
600 }
601
602 switch (reg->Register.File) {
603 case TGSI_FILE_CONSTANT:
604 if (reg->Register.Indirect) {
605 LLVMValueRef swizzle_vec =
606 lp_build_const_int_vec(uint_bld->type, swizzle);
607 LLVMValueRef index_vec; /* index into the const buffer */
608
609 /* index_vec = indirect_index * 4 + swizzle */
610 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
611 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
612
613 /* Gather values from the constant buffer */
614 res = build_gather(bld, bld->consts_ptr, index_vec);
615 }
616 else {
617 LLVMValueRef index; /* index into the const buffer */
618 LLVMValueRef scalar, scalar_ptr;
619
620 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
621
622 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
623 &index, 1, "");
624 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
625
626 res = lp_build_broadcast_scalar(&bld->base, scalar);
627 }
628 break;
629
630 case TGSI_FILE_IMMEDIATE:
631 res = bld->immediates[reg->Register.Index][swizzle];
632 assert(res);
633 break;
634
635 case TGSI_FILE_INPUT:
636 res = bld->inputs[reg->Register.Index][swizzle];
637 assert(res);
638 break;
639
640 case TGSI_FILE_TEMPORARY:
641 if (reg->Register.Indirect) {
642 LLVMValueRef swizzle_vec =
643 lp_build_const_int_vec(uint_bld->type, swizzle);
644 LLVMValueRef length_vec =
645 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
646 LLVMValueRef index_vec; /* index into the const buffer */
647 LLVMValueRef temps_array;
648 LLVMTypeRef float4_ptr_type;
649
650 /* index_vec = (indirect_index * 4 + swizzle) * length */
651 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
652 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
653 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
654
655 /* cast temps_array pointer to float* */
656 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
657 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
658 float4_ptr_type, "");
659
660 /* Gather values from the temporary register array */
661 res = build_gather(bld, temps_array, index_vec);
662 }
663 else {
664 LLVMValueRef temp_ptr;
665 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
666 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
667 if (!res)
668 return bld->base.undef;
669 }
670 break;
671
672 default:
673 assert(0 && "invalid src register in emit_fetch()");
674 return bld->base.undef;
675 }
676
677 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
678 case TGSI_UTIL_SIGN_CLEAR:
679 res = lp_build_abs( &bld->base, res );
680 break;
681
682 case TGSI_UTIL_SIGN_SET:
683 res = lp_build_abs( &bld->base, res );
684 /* fall through */
685 case TGSI_UTIL_SIGN_TOGGLE:
686 res = lp_build_negate( &bld->base, res );
687 break;
688
689 case TGSI_UTIL_SIGN_KEEP:
690 break;
691 }
692
693 return res;
694 }
695
696
697 /**
698 * Register fetch with derivatives.
699 */
700 static void
701 emit_fetch_deriv(
702 struct lp_build_tgsi_soa_context *bld,
703 const struct tgsi_full_instruction *inst,
704 unsigned index,
705 const unsigned chan_index,
706 LLVMValueRef *res,
707 LLVMValueRef *ddx,
708 LLVMValueRef *ddy)
709 {
710 LLVMValueRef src;
711
712 src = emit_fetch(bld, inst, index, chan_index);
713
714 if(res)
715 *res = src;
716
717 /* TODO: use interpolation coeffs for inputs */
718
719 if(ddx)
720 *ddx = lp_build_ddx(&bld->base, src);
721
722 if(ddy)
723 *ddy = lp_build_ddy(&bld->base, src);
724 }
725
726
727 /**
728 * Predicate.
729 */
730 static void
731 emit_fetch_predicate(
732 struct lp_build_tgsi_soa_context *bld,
733 const struct tgsi_full_instruction *inst,
734 LLVMValueRef *pred)
735 {
736 unsigned index;
737 unsigned char swizzles[4];
738 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
739 LLVMValueRef value;
740 unsigned chan;
741
742 if (!inst->Instruction.Predicate) {
743 FOR_EACH_CHANNEL( chan ) {
744 pred[chan] = NULL;
745 }
746 return;
747 }
748
749 swizzles[0] = inst->Predicate.SwizzleX;
750 swizzles[1] = inst->Predicate.SwizzleY;
751 swizzles[2] = inst->Predicate.SwizzleZ;
752 swizzles[3] = inst->Predicate.SwizzleW;
753
754 index = inst->Predicate.Index;
755 assert(index < LP_MAX_TGSI_PREDS);
756
757 FOR_EACH_CHANNEL( chan ) {
758 unsigned swizzle = swizzles[chan];
759
760 /*
761 * Only fetch the predicate register channels that are actually listed
762 * in the swizzles
763 */
764 if (!unswizzled[swizzle]) {
765 value = LLVMBuildLoad(bld->base.builder,
766 bld->preds[index][swizzle], "");
767
768 /*
769 * Convert the value to an integer mask.
770 *
771 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
772 * is needlessly causing two comparisons due to storing the intermediate
773 * result as float vector instead of an integer mask vector.
774 */
775 value = lp_build_compare(bld->base.builder,
776 bld->base.type,
777 PIPE_FUNC_NOTEQUAL,
778 value,
779 bld->base.zero);
780 if (inst->Predicate.Negate) {
781 value = LLVMBuildNot(bld->base.builder, value, "");
782 }
783
784 unswizzled[swizzle] = value;
785 } else {
786 value = unswizzled[swizzle];
787 }
788
789 pred[chan] = value;
790 }
791 }
792
793
794 /**
795 * Register store.
796 */
797 static void
798 emit_store(
799 struct lp_build_tgsi_soa_context *bld,
800 const struct tgsi_full_instruction *inst,
801 unsigned index,
802 unsigned chan_index,
803 LLVMValueRef pred,
804 LLVMValueRef value)
805 {
806 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
807 struct lp_build_context *uint_bld = &bld->uint_bld;
808 LLVMValueRef indirect_index = NULL;
809
810 switch( inst->Instruction.Saturate ) {
811 case TGSI_SAT_NONE:
812 break;
813
814 case TGSI_SAT_ZERO_ONE:
815 value = lp_build_max(&bld->base, value, bld->base.zero);
816 value = lp_build_min(&bld->base, value, bld->base.one);
817 break;
818
819 case TGSI_SAT_MINUS_PLUS_ONE:
820 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
821 value = lp_build_min(&bld->base, value, bld->base.one);
822 break;
823
824 default:
825 assert(0);
826 }
827
828 if (reg->Register.Indirect) {
829 indirect_index = get_indirect_index(bld,
830 reg->Register.File,
831 reg->Register.Index,
832 &reg->Indirect);
833 } else {
834 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
835 }
836
837 switch( reg->Register.File ) {
838 case TGSI_FILE_OUTPUT:
839 lp_exec_mask_store(&bld->exec_mask, pred, value,
840 bld->outputs[reg->Register.Index][chan_index]);
841 break;
842
843 case TGSI_FILE_TEMPORARY:
844 if (reg->Register.Indirect) {
845 LLVMBuilderRef builder = bld->base.builder;
846 LLVMValueRef chan_vec =
847 lp_build_const_int_vec(uint_bld->type, chan_index);
848 LLVMValueRef length_vec =
849 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
850 LLVMValueRef index_vec; /* indexes into the temp registers */
851 LLVMValueRef temps_array;
852 LLVMValueRef pixel_offsets;
853 LLVMTypeRef float_ptr_type;
854 int i;
855
856 /* build pixel offset vector: {0, 1, 2, 3, ...} */
857 pixel_offsets = uint_bld->undef;
858 for (i = 0; i < bld->base.type.length; i++) {
859 LLVMValueRef ii = lp_build_const_int32(i);
860 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
861 ii, ii, "");
862 }
863
864 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
865 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
866 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
867 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
868 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
869
870 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
871 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
872 float_ptr_type, "");
873
874 /* Scatter store values into temp registers */
875 emit_mask_scatter(bld, temps_array, index_vec, value,
876 &bld->exec_mask, pred);
877 }
878 else {
879 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
880 chan_index);
881 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
882 }
883 break;
884
885 case TGSI_FILE_ADDRESS:
886 lp_exec_mask_store(&bld->exec_mask, pred, value,
887 bld->addr[reg->Indirect.Index][chan_index]);
888 break;
889
890 case TGSI_FILE_PREDICATE:
891 lp_exec_mask_store(&bld->exec_mask, pred, value,
892 bld->preds[reg->Register.Index][chan_index]);
893 break;
894
895 default:
896 assert( 0 );
897 }
898 }
899
900
901 /**
902 * High-level instruction translators.
903 */
904
905 static void
906 emit_tex( struct lp_build_tgsi_soa_context *bld,
907 const struct tgsi_full_instruction *inst,
908 enum lp_build_tex_modifier modifier,
909 LLVMValueRef *texel)
910 {
911 unsigned unit;
912 LLVMValueRef lod_bias, explicit_lod;
913 LLVMValueRef oow = NULL;
914 LLVMValueRef coords[3];
915 LLVMValueRef ddx[3];
916 LLVMValueRef ddy[3];
917 unsigned num_coords;
918 unsigned i;
919
920 if (!bld->sampler) {
921 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
922 for (i = 0; i < 4; i++) {
923 texel[i] = bld->base.undef;
924 }
925 return;
926 }
927
928 switch (inst->Texture.Texture) {
929 case TGSI_TEXTURE_1D:
930 num_coords = 1;
931 break;
932 case TGSI_TEXTURE_2D:
933 case TGSI_TEXTURE_RECT:
934 num_coords = 2;
935 break;
936 case TGSI_TEXTURE_SHADOW1D:
937 case TGSI_TEXTURE_SHADOW2D:
938 case TGSI_TEXTURE_SHADOWRECT:
939 case TGSI_TEXTURE_3D:
940 case TGSI_TEXTURE_CUBE:
941 num_coords = 3;
942 break;
943 default:
944 assert(0);
945 return;
946 }
947
948 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
949 lod_bias = emit_fetch( bld, inst, 0, 3 );
950 explicit_lod = NULL;
951 }
952 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
953 lod_bias = NULL;
954 explicit_lod = emit_fetch( bld, inst, 0, 3 );
955 }
956 else {
957 lod_bias = NULL;
958 explicit_lod = NULL;
959 }
960
961 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
962 oow = emit_fetch( bld, inst, 0, 3 );
963 oow = lp_build_rcp(&bld->base, oow);
964 }
965
966 for (i = 0; i < num_coords; i++) {
967 coords[i] = emit_fetch( bld, inst, 0, i );
968 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
969 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
970 }
971 for (i = num_coords; i < 3; i++) {
972 coords[i] = bld->base.undef;
973 }
974
975 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
976 LLVMTypeRef i32t = LLVMInt32Type();
977 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
978 for (i = 0; i < num_coords; i++) {
979 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
980 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
981 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
982 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
983 }
984 unit = inst->Src[3].Register.Index;
985 } else {
986 for (i = 0; i < num_coords; i++) {
987 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
988 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
989 }
990 unit = inst->Src[1].Register.Index;
991 }
992 for (i = num_coords; i < 3; i++) {
993 ddx[i] = LLVMGetUndef(bld->base.elem_type);
994 ddy[i] = LLVMGetUndef(bld->base.elem_type);
995 }
996
997 bld->sampler->emit_fetch_texel(bld->sampler,
998 bld->base.builder,
999 bld->base.type,
1000 unit, num_coords, coords,
1001 ddx, ddy,
1002 lod_bias, explicit_lod,
1003 texel);
1004 }
1005
1006 static boolean
1007 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1008 int pc)
1009 {
1010 int i;
1011
1012 for (i = 0; i < 5; i++) {
1013 unsigned opcode;
1014
1015 if (pc + i >= bld->info->num_instructions)
1016 return TRUE;
1017
1018 opcode = bld->instructions[pc + i].Instruction.Opcode;
1019
1020 if (opcode == TGSI_OPCODE_END)
1021 return TRUE;
1022
1023 if (opcode == TGSI_OPCODE_TEX ||
1024 opcode == TGSI_OPCODE_TXP ||
1025 opcode == TGSI_OPCODE_TXD ||
1026 opcode == TGSI_OPCODE_TXB ||
1027 opcode == TGSI_OPCODE_TXL ||
1028 opcode == TGSI_OPCODE_TXF ||
1029 opcode == TGSI_OPCODE_TXQ ||
1030 opcode == TGSI_OPCODE_CAL ||
1031 opcode == TGSI_OPCODE_CALLNZ ||
1032 opcode == TGSI_OPCODE_IF ||
1033 opcode == TGSI_OPCODE_IFC ||
1034 opcode == TGSI_OPCODE_BGNLOOP ||
1035 opcode == TGSI_OPCODE_SWITCH)
1036 return FALSE;
1037 }
1038
1039 return TRUE;
1040 }
1041
1042
1043
1044 /**
1045 * Kill fragment if any of the src register values are negative.
1046 */
1047 static void
1048 emit_kil(
1049 struct lp_build_tgsi_soa_context *bld,
1050 const struct tgsi_full_instruction *inst,
1051 int pc)
1052 {
1053 const struct tgsi_full_src_register *reg = &inst->Src[0];
1054 LLVMValueRef terms[NUM_CHANNELS];
1055 LLVMValueRef mask;
1056 unsigned chan_index;
1057
1058 memset(&terms, 0, sizeof terms);
1059
1060 FOR_EACH_CHANNEL( chan_index ) {
1061 unsigned swizzle;
1062
1063 /* Unswizzle channel */
1064 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1065
1066 /* Check if the component has not been already tested. */
1067 assert(swizzle < NUM_CHANNELS);
1068 if( !terms[swizzle] )
1069 /* TODO: change the comparison operator instead of setting the sign */
1070 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1071 }
1072
1073 mask = NULL;
1074 FOR_EACH_CHANNEL( chan_index ) {
1075 if(terms[chan_index]) {
1076 LLVMValueRef chan_mask;
1077
1078 /*
1079 * If term < 0 then mask = 0 else mask = ~0.
1080 */
1081 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1082
1083 if(mask)
1084 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1085 else
1086 mask = chan_mask;
1087 }
1088 }
1089
1090 if(mask) {
1091 lp_build_mask_update(bld->mask, mask);
1092
1093 if (!near_end_of_shader(bld, pc))
1094 lp_build_mask_check(bld->mask);
1095 }
1096 }
1097
1098
1099 /**
1100 * Predicated fragment kill.
1101 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1102 * The only predication is the execution mask which will apply if
1103 * we're inside a loop or conditional.
1104 */
1105 static void
1106 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1107 const struct tgsi_full_instruction *inst,
1108 int pc)
1109 {
1110 LLVMValueRef mask;
1111
1112 /* For those channels which are "alive", disable fragment shader
1113 * execution.
1114 */
1115 if (bld->exec_mask.has_mask) {
1116 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1117 }
1118 else {
1119 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1120 mask = zero;
1121 }
1122
1123 lp_build_mask_update(bld->mask, mask);
1124
1125 if (!near_end_of_shader(bld, pc))
1126 lp_build_mask_check(bld->mask);
1127 }
1128
1129
1130 /**
1131 * Emit code which will dump the value of all the temporary registers
1132 * to stdout.
1133 */
1134 static void
1135 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1136 {
1137 LLVMBuilderRef builder = bld->base.builder;
1138 LLVMValueRef temp_ptr;
1139 LLVMValueRef i0 = lp_build_const_int32(0);
1140 LLVMValueRef i1 = lp_build_const_int32(1);
1141 LLVMValueRef i2 = lp_build_const_int32(2);
1142 LLVMValueRef i3 = lp_build_const_int32(3);
1143 int index;
1144 int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1145
1146 for (index = 0; index < n; index++) {
1147 LLVMValueRef idx = lp_build_const_int32(index);
1148 LLVMValueRef v[4][4], res;
1149 int chan;
1150
1151 lp_build_printf(builder, "TEMP[%d]:\n", idx);
1152
1153 for (chan = 0; chan < 4; chan++) {
1154 temp_ptr = get_temp_ptr(bld, index, chan);
1155 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
1156 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1157 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1158 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1159 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1160 }
1161
1162 lp_build_printf(builder, " X: %f %f %f %f\n",
1163 v[0][0], v[0][1], v[0][2], v[0][3]);
1164 lp_build_printf(builder, " Y: %f %f %f %f\n",
1165 v[1][0], v[1][1], v[1][2], v[1][3]);
1166 lp_build_printf(builder, " Z: %f %f %f %f\n",
1167 v[2][0], v[2][1], v[2][2], v[2][3]);
1168 lp_build_printf(builder, " W: %f %f %f %f\n",
1169 v[3][0], v[3][1], v[3][2], v[3][3]);
1170 }
1171 }
1172
1173
1174
1175 static void
1176 emit_declaration(
1177 struct lp_build_tgsi_soa_context *bld,
1178 const struct tgsi_full_declaration *decl)
1179 {
1180 LLVMTypeRef vec_type = bld->base.vec_type;
1181 const unsigned first = decl->Range.First;
1182 const unsigned last = decl->Range.Last;
1183 unsigned idx, i;
1184
1185 for (idx = first; idx <= last; ++idx) {
1186 assert(last <= bld->info->file_max[decl->Declaration.File]);
1187 switch (decl->Declaration.File) {
1188 case TGSI_FILE_TEMPORARY:
1189 assert(idx < LP_MAX_TGSI_TEMPS);
1190 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1191 /* ignore 'first' - we want to index into a 0-based array */
1192 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1193 last*4 + 4, 0);
1194 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1195 vec_type, array_size,
1196 "temporary");
1197 idx = last;
1198 } else {
1199 for (i = 0; i < NUM_CHANNELS; i++)
1200 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1201 vec_type, "temp");
1202 }
1203 break;
1204
1205 case TGSI_FILE_OUTPUT:
1206 for (i = 0; i < NUM_CHANNELS; i++)
1207 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1208 vec_type, "output");
1209 break;
1210
1211 case TGSI_FILE_ADDRESS:
1212 assert(idx < LP_MAX_TGSI_ADDRS);
1213 for (i = 0; i < NUM_CHANNELS; i++)
1214 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1215 vec_type, "addr");
1216 break;
1217
1218 case TGSI_FILE_PREDICATE:
1219 assert(idx < LP_MAX_TGSI_PREDS);
1220 for (i = 0; i < NUM_CHANNELS; i++)
1221 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1222 vec_type, "predicate");
1223 break;
1224
1225 default:
1226 /* don't need to declare other vars */
1227 break;
1228 }
1229 }
1230 }
1231
1232
1233 /**
1234 * Emit LLVM for one TGSI instruction.
1235 * \param return TRUE for success, FALSE otherwise
1236 */
1237 static boolean
1238 emit_instruction(
1239 struct lp_build_tgsi_soa_context *bld,
1240 const struct tgsi_full_instruction *inst,
1241 const struct tgsi_opcode_info *info,
1242 int *pc)
1243 {
1244 unsigned chan_index;
1245 LLVMValueRef src0, src1, src2;
1246 LLVMValueRef tmp0, tmp1, tmp2;
1247 LLVMValueRef tmp3 = NULL;
1248 LLVMValueRef tmp4 = NULL;
1249 LLVMValueRef tmp5 = NULL;
1250 LLVMValueRef tmp6 = NULL;
1251 LLVMValueRef tmp7 = NULL;
1252 LLVMValueRef res;
1253 LLVMValueRef dst0[NUM_CHANNELS];
1254
1255 /*
1256 * Stores and write masks are handled in a general fashion after the long
1257 * instruction opcode switch statement.
1258 *
1259 * Although not stricitly necessary, we avoid generating instructions for
1260 * channels which won't be stored, in cases where's that easy. For some
1261 * complex instructions, like texture sampling, it is more convenient to
1262 * assume a full writemask and then let LLVM optimization passes eliminate
1263 * redundant code.
1264 */
1265
1266 (*pc)++;
1267
1268 assert(info->num_dst <= 1);
1269 if (info->num_dst) {
1270 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1271 dst0[chan_index] = bld->base.undef;
1272 }
1273 }
1274
1275 switch (inst->Instruction.Opcode) {
1276 case TGSI_OPCODE_ARL:
1277 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1278 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1279 tmp0 = lp_build_floor(&bld->base, tmp0);
1280 dst0[chan_index] = tmp0;
1281 }
1282 break;
1283
1284 case TGSI_OPCODE_MOV:
1285 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1286 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1287 }
1288 break;
1289
1290 case TGSI_OPCODE_LIT:
1291 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1292 dst0[CHAN_X] = bld->base.one;
1293 }
1294 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1295 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1296 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1297 }
1298 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1299 /* XMM[1] = SrcReg[0].yyyy */
1300 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1301 /* XMM[1] = max(XMM[1], 0) */
1302 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1303 /* XMM[2] = SrcReg[0].wwww */
1304 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1305 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1306 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1307 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1308 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1309 }
1310 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1311 dst0[CHAN_W] = bld->base.one;
1312 }
1313 break;
1314
1315 case TGSI_OPCODE_RCP:
1316 /* TGSI_OPCODE_RECIP */
1317 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1318 res = lp_build_rcp(&bld->base, src0);
1319 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1320 dst0[chan_index] = res;
1321 }
1322 break;
1323
1324 case TGSI_OPCODE_RSQ:
1325 /* TGSI_OPCODE_RECIPSQRT */
1326 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1327 src0 = lp_build_abs(&bld->base, src0);
1328 res = lp_build_rsqrt(&bld->base, src0);
1329 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1330 dst0[chan_index] = res;
1331 }
1332 break;
1333
1334 case TGSI_OPCODE_EXP:
1335 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1336 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1337 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1338 LLVMValueRef *p_exp2_int_part = NULL;
1339 LLVMValueRef *p_frac_part = NULL;
1340 LLVMValueRef *p_exp2 = NULL;
1341
1342 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1343
1344 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1345 p_exp2_int_part = &tmp0;
1346 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1347 p_frac_part = &tmp1;
1348 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1349 p_exp2 = &tmp2;
1350
1351 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1352
1353 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1354 dst0[CHAN_X] = tmp0;
1355 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1356 dst0[CHAN_Y] = tmp1;
1357 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1358 dst0[CHAN_Z] = tmp2;
1359 }
1360 /* dst.w = 1.0 */
1361 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1362 dst0[CHAN_W] = bld->base.one;
1363 }
1364 break;
1365
1366 case TGSI_OPCODE_LOG:
1367 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1368 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1369 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1370 LLVMValueRef *p_floor_log2 = NULL;
1371 LLVMValueRef *p_exp = NULL;
1372 LLVMValueRef *p_log2 = NULL;
1373
1374 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1375 src0 = lp_build_abs( &bld->base, src0 );
1376
1377 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1378 p_floor_log2 = &tmp0;
1379 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1380 p_exp = &tmp1;
1381 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1382 p_log2 = &tmp2;
1383
1384 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1385
1386 /* dst.x = floor(lg2(abs(src.x))) */
1387 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1388 dst0[CHAN_X] = tmp0;
1389 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1390 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1391 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1392 }
1393 /* dst.z = lg2(abs(src.x)) */
1394 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1395 dst0[CHAN_Z] = tmp2;
1396 }
1397 /* dst.w = 1.0 */
1398 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1399 dst0[CHAN_W] = bld->base.one;
1400 }
1401 break;
1402
1403 case TGSI_OPCODE_MUL:
1404 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1405 src0 = emit_fetch( bld, inst, 0, chan_index );
1406 src1 = emit_fetch( bld, inst, 1, chan_index );
1407 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1408 }
1409 break;
1410
1411 case TGSI_OPCODE_ADD:
1412 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1413 src0 = emit_fetch( bld, inst, 0, chan_index );
1414 src1 = emit_fetch( bld, inst, 1, chan_index );
1415 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1416 }
1417 break;
1418
1419 case TGSI_OPCODE_DP3:
1420 /* TGSI_OPCODE_DOT3 */
1421 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1422 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1423 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1424 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1425 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1426 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1427 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1428 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1429 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1430 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1431 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1432 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1433 dst0[chan_index] = tmp0;
1434 }
1435 break;
1436
1437 case TGSI_OPCODE_DP4:
1438 /* TGSI_OPCODE_DOT4 */
1439 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1440 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1441 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1442 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1443 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1444 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1445 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1446 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1447 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1448 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1449 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1450 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1451 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1452 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1453 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1454 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1455 dst0[chan_index] = tmp0;
1456 }
1457 break;
1458
1459 case TGSI_OPCODE_DST:
1460 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1461 dst0[CHAN_X] = bld->base.one;
1462 }
1463 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1464 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1465 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1466 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1467 }
1468 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1469 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1470 }
1471 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1472 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1473 }
1474 break;
1475
1476 case TGSI_OPCODE_MIN:
1477 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1478 src0 = emit_fetch( bld, inst, 0, chan_index );
1479 src1 = emit_fetch( bld, inst, 1, chan_index );
1480 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1481 }
1482 break;
1483
1484 case TGSI_OPCODE_MAX:
1485 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1486 src0 = emit_fetch( bld, inst, 0, chan_index );
1487 src1 = emit_fetch( bld, inst, 1, chan_index );
1488 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1489 }
1490 break;
1491
1492 case TGSI_OPCODE_SLT:
1493 /* TGSI_OPCODE_SETLT */
1494 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1495 src0 = emit_fetch( bld, inst, 0, chan_index );
1496 src1 = emit_fetch( bld, inst, 1, chan_index );
1497 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1498 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1499 }
1500 break;
1501
1502 case TGSI_OPCODE_SGE:
1503 /* TGSI_OPCODE_SETGE */
1504 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1505 src0 = emit_fetch( bld, inst, 0, chan_index );
1506 src1 = emit_fetch( bld, inst, 1, chan_index );
1507 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1508 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1509 }
1510 break;
1511
1512 case TGSI_OPCODE_MAD:
1513 /* TGSI_OPCODE_MADD */
1514 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1515 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1516 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1517 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1518 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1519 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1520 dst0[chan_index] = tmp0;
1521 }
1522 break;
1523
1524 case TGSI_OPCODE_SUB:
1525 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1526 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1527 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1528 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1529 }
1530 break;
1531
1532 case TGSI_OPCODE_LRP:
1533 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1534 src0 = emit_fetch( bld, inst, 0, chan_index );
1535 src1 = emit_fetch( bld, inst, 1, chan_index );
1536 src2 = emit_fetch( bld, inst, 2, chan_index );
1537 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1538 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1539 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1540 }
1541 break;
1542
1543 case TGSI_OPCODE_CND:
1544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1545 src0 = emit_fetch( bld, inst, 0, chan_index );
1546 src1 = emit_fetch( bld, inst, 1, chan_index );
1547 src2 = emit_fetch( bld, inst, 2, chan_index );
1548 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1549 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1550 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1551 }
1552 break;
1553
1554 case TGSI_OPCODE_DP2A:
1555 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1556 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1557 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1558 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1559 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1560 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1561 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1562 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1563 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1564 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1565 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1566 }
1567 break;
1568
1569 case TGSI_OPCODE_FRC:
1570 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1571 src0 = emit_fetch( bld, inst, 0, chan_index );
1572 tmp0 = lp_build_floor(&bld->base, src0);
1573 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1574 dst0[chan_index] = tmp0;
1575 }
1576 break;
1577
1578 case TGSI_OPCODE_CLAMP:
1579 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1580 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1581 src1 = emit_fetch( bld, inst, 1, chan_index );
1582 src2 = emit_fetch( bld, inst, 2, chan_index );
1583 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1584 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1585 dst0[chan_index] = tmp0;
1586 }
1587 break;
1588
1589 case TGSI_OPCODE_FLR:
1590 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1591 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1592 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1593 }
1594 break;
1595
1596 case TGSI_OPCODE_ROUND:
1597 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1598 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1599 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1600 }
1601 break;
1602
1603 case TGSI_OPCODE_EX2: {
1604 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1605 tmp0 = lp_build_exp2( &bld->base, tmp0);
1606 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1607 dst0[chan_index] = tmp0;
1608 }
1609 break;
1610 }
1611
1612 case TGSI_OPCODE_LG2:
1613 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1614 tmp0 = lp_build_log2( &bld->base, tmp0);
1615 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1616 dst0[chan_index] = tmp0;
1617 }
1618 break;
1619
1620 case TGSI_OPCODE_POW:
1621 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1622 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1623 res = lp_build_pow( &bld->base, src0, src1 );
1624 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1625 dst0[chan_index] = res;
1626 }
1627 break;
1628
1629 case TGSI_OPCODE_XPD:
1630 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1631 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1632 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1633 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1634 }
1635 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1636 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1637 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1638 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1639 }
1640 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1641 tmp2 = tmp0;
1642 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1643 tmp5 = tmp3;
1644 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1645 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1646 dst0[CHAN_X] = tmp2;
1647 }
1648 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1649 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1650 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1651 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1652 }
1653 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1654 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1655 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1656 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1657 dst0[CHAN_Y] = tmp3;
1658 }
1659 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1660 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1661 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1662 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1663 dst0[CHAN_Z] = tmp5;
1664 }
1665 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1666 dst0[CHAN_W] = bld->base.one;
1667 }
1668 break;
1669
1670 case TGSI_OPCODE_ABS:
1671 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1672 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1673 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1674 }
1675 break;
1676
1677 case TGSI_OPCODE_RCC:
1678 /* deprecated? */
1679 assert(0);
1680 return FALSE;
1681
1682 case TGSI_OPCODE_DPH:
1683 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1684 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1685 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1686 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1687 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1688 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1689 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1690 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1691 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1692 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1693 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1694 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1695 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1696 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1697 dst0[chan_index] = tmp0;
1698 }
1699 break;
1700
1701 case TGSI_OPCODE_COS:
1702 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1703 tmp0 = lp_build_cos( &bld->base, tmp0 );
1704 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1705 dst0[chan_index] = tmp0;
1706 }
1707 break;
1708
1709 case TGSI_OPCODE_DDX:
1710 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1711 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1712 }
1713 break;
1714
1715 case TGSI_OPCODE_DDY:
1716 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1717 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1718 }
1719 break;
1720
1721 case TGSI_OPCODE_KILP:
1722 /* predicated kill */
1723 emit_kilp( bld, inst, (*pc)-1 );
1724 break;
1725
1726 case TGSI_OPCODE_KIL:
1727 /* conditional kill */
1728 emit_kil( bld, inst, (*pc)-1 );
1729 break;
1730
1731 case TGSI_OPCODE_PK2H:
1732 return FALSE;
1733 break;
1734
1735 case TGSI_OPCODE_PK2US:
1736 return FALSE;
1737 break;
1738
1739 case TGSI_OPCODE_PK4B:
1740 return FALSE;
1741 break;
1742
1743 case TGSI_OPCODE_PK4UB:
1744 return FALSE;
1745 break;
1746
1747 case TGSI_OPCODE_RFL:
1748 return FALSE;
1749 break;
1750
1751 case TGSI_OPCODE_SEQ:
1752 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1753 src0 = emit_fetch( bld, inst, 0, chan_index );
1754 src1 = emit_fetch( bld, inst, 1, chan_index );
1755 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1756 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1757 }
1758 break;
1759
1760 case TGSI_OPCODE_SFL:
1761 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1762 dst0[chan_index] = bld->base.zero;
1763 }
1764 break;
1765
1766 case TGSI_OPCODE_SGT:
1767 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1768 src0 = emit_fetch( bld, inst, 0, chan_index );
1769 src1 = emit_fetch( bld, inst, 1, chan_index );
1770 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1771 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1772 }
1773 break;
1774
1775 case TGSI_OPCODE_SIN:
1776 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1777 tmp0 = lp_build_sin( &bld->base, tmp0 );
1778 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1779 dst0[chan_index] = tmp0;
1780 }
1781 break;
1782
1783 case TGSI_OPCODE_SLE:
1784 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1785 src0 = emit_fetch( bld, inst, 0, chan_index );
1786 src1 = emit_fetch( bld, inst, 1, chan_index );
1787 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1788 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1789 }
1790 break;
1791
1792 case TGSI_OPCODE_SNE:
1793 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1794 src0 = emit_fetch( bld, inst, 0, chan_index );
1795 src1 = emit_fetch( bld, inst, 1, chan_index );
1796 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1797 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1798 }
1799 break;
1800
1801 case TGSI_OPCODE_STR:
1802 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1803 dst0[chan_index] = bld->base.one;
1804 }
1805 break;
1806
1807 case TGSI_OPCODE_TEX:
1808 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1809 break;
1810
1811 case TGSI_OPCODE_TXD:
1812 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1813 break;
1814
1815 case TGSI_OPCODE_UP2H:
1816 /* deprecated */
1817 assert (0);
1818 return FALSE;
1819 break;
1820
1821 case TGSI_OPCODE_UP2US:
1822 /* deprecated */
1823 assert(0);
1824 return FALSE;
1825 break;
1826
1827 case TGSI_OPCODE_UP4B:
1828 /* deprecated */
1829 assert(0);
1830 return FALSE;
1831 break;
1832
1833 case TGSI_OPCODE_UP4UB:
1834 /* deprecated */
1835 assert(0);
1836 return FALSE;
1837 break;
1838
1839 case TGSI_OPCODE_X2D:
1840 /* deprecated? */
1841 assert(0);
1842 return FALSE;
1843 break;
1844
1845 case TGSI_OPCODE_ARA:
1846 /* deprecated */
1847 assert(0);
1848 return FALSE;
1849 break;
1850
1851 case TGSI_OPCODE_ARR:
1852 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1853 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1854 tmp0 = lp_build_round(&bld->base, tmp0);
1855 dst0[chan_index] = tmp0;
1856 }
1857 break;
1858
1859 case TGSI_OPCODE_BRA:
1860 /* deprecated */
1861 assert(0);
1862 return FALSE;
1863 break;
1864
1865 case TGSI_OPCODE_CAL:
1866 lp_exec_mask_call(&bld->exec_mask,
1867 inst->Label.Label,
1868 pc);
1869
1870 break;
1871
1872 case TGSI_OPCODE_RET:
1873 lp_exec_mask_ret(&bld->exec_mask, pc);
1874 break;
1875
1876 case TGSI_OPCODE_END:
1877 if (0) {
1878 /* for debugging */
1879 emit_dump_temps(bld);
1880 }
1881 *pc = -1;
1882 break;
1883
1884 case TGSI_OPCODE_SSG:
1885 /* TGSI_OPCODE_SGN */
1886 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1887 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1888 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1889 }
1890 break;
1891
1892 case TGSI_OPCODE_CMP:
1893 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1894 src0 = emit_fetch( bld, inst, 0, chan_index );
1895 src1 = emit_fetch( bld, inst, 1, chan_index );
1896 src2 = emit_fetch( bld, inst, 2, chan_index );
1897 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1898 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1899 }
1900 break;
1901
1902 case TGSI_OPCODE_SCS:
1903 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1904 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1905 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1906 }
1907 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1908 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1909 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1910 }
1911 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1912 dst0[CHAN_Z] = bld->base.zero;
1913 }
1914 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1915 dst0[CHAN_W] = bld->base.one;
1916 }
1917 break;
1918
1919 case TGSI_OPCODE_TXB:
1920 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
1921 break;
1922
1923 case TGSI_OPCODE_NRM:
1924 /* fall-through */
1925 case TGSI_OPCODE_NRM4:
1926 /* 3 or 4-component normalization */
1927 {
1928 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1929
1930 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1931 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1932 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1933 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1934
1935 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1936
1937 /* xmm4 = src.x */
1938 /* xmm0 = src.x * src.x */
1939 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1940 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1941 tmp4 = tmp0;
1942 }
1943 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1944
1945 /* xmm5 = src.y */
1946 /* xmm0 = xmm0 + src.y * src.y */
1947 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1948 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1949 tmp5 = tmp1;
1950 }
1951 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1952 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1953
1954 /* xmm6 = src.z */
1955 /* xmm0 = xmm0 + src.z * src.z */
1956 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1957 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1958 tmp6 = tmp1;
1959 }
1960 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1961 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1962
1963 if (dims == 4) {
1964 /* xmm7 = src.w */
1965 /* xmm0 = xmm0 + src.w * src.w */
1966 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1967 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1968 tmp7 = tmp1;
1969 }
1970 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1971 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1972 }
1973
1974 /* xmm1 = 1 / sqrt(xmm0) */
1975 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1976
1977 /* dst.x = xmm1 * src.x */
1978 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1979 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1980 }
1981
1982 /* dst.y = xmm1 * src.y */
1983 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1984 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1985 }
1986
1987 /* dst.z = xmm1 * src.z */
1988 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1989 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1990 }
1991
1992 /* dst.w = xmm1 * src.w */
1993 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1994 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1995 }
1996 }
1997
1998 /* dst.w = 1.0 */
1999 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2000 dst0[CHAN_W] = bld->base.one;
2001 }
2002 }
2003 break;
2004
2005 case TGSI_OPCODE_DIV:
2006 /* deprecated */
2007 assert( 0 );
2008 return FALSE;
2009 break;
2010
2011 case TGSI_OPCODE_DP2:
2012 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
2013 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
2014 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
2015 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
2016 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
2017 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
2018 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
2019 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2020 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
2021 }
2022 break;
2023
2024 case TGSI_OPCODE_TXL:
2025 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2026 break;
2027
2028 case TGSI_OPCODE_TXP:
2029 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2030 break;
2031
2032 case TGSI_OPCODE_BRK:
2033 lp_exec_break(&bld->exec_mask);
2034 break;
2035
2036 case TGSI_OPCODE_IF:
2037 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2038 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2039 tmp0, bld->base.zero);
2040 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2041 break;
2042
2043 case TGSI_OPCODE_BGNLOOP:
2044 lp_exec_bgnloop(&bld->exec_mask);
2045 break;
2046
2047 case TGSI_OPCODE_BGNSUB:
2048 lp_exec_mask_bgnsub(&bld->exec_mask);
2049 break;
2050
2051 case TGSI_OPCODE_ELSE:
2052 lp_exec_mask_cond_invert(&bld->exec_mask);
2053 break;
2054
2055 case TGSI_OPCODE_ENDIF:
2056 lp_exec_mask_cond_pop(&bld->exec_mask);
2057 break;
2058
2059 case TGSI_OPCODE_ENDLOOP:
2060 lp_exec_endloop(&bld->exec_mask);
2061 break;
2062
2063 case TGSI_OPCODE_ENDSUB:
2064 lp_exec_mask_endsub(&bld->exec_mask, pc);
2065 break;
2066
2067 case TGSI_OPCODE_PUSHA:
2068 /* deprecated? */
2069 assert(0);
2070 return FALSE;
2071 break;
2072
2073 case TGSI_OPCODE_POPA:
2074 /* deprecated? */
2075 assert(0);
2076 return FALSE;
2077 break;
2078
2079 case TGSI_OPCODE_CEIL:
2080 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2081 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2082 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2083 }
2084 break;
2085
2086 case TGSI_OPCODE_I2F:
2087 /* deprecated? */
2088 assert(0);
2089 return FALSE;
2090 break;
2091
2092 case TGSI_OPCODE_NOT:
2093 /* deprecated? */
2094 assert(0);
2095 return FALSE;
2096 break;
2097
2098 case TGSI_OPCODE_TRUNC:
2099 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2100 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2101 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2102 }
2103 break;
2104
2105 case TGSI_OPCODE_SHL:
2106 /* deprecated? */
2107 assert(0);
2108 return FALSE;
2109 break;
2110
2111 case TGSI_OPCODE_ISHR:
2112 /* deprecated? */
2113 assert(0);
2114 return FALSE;
2115 break;
2116
2117 case TGSI_OPCODE_AND:
2118 /* deprecated? */
2119 assert(0);
2120 return FALSE;
2121 break;
2122
2123 case TGSI_OPCODE_OR:
2124 /* deprecated? */
2125 assert(0);
2126 return FALSE;
2127 break;
2128
2129 case TGSI_OPCODE_MOD:
2130 /* deprecated? */
2131 assert(0);
2132 return FALSE;
2133 break;
2134
2135 case TGSI_OPCODE_XOR:
2136 /* deprecated? */
2137 assert(0);
2138 return FALSE;
2139 break;
2140
2141 case TGSI_OPCODE_SAD:
2142 /* deprecated? */
2143 assert(0);
2144 return FALSE;
2145 break;
2146
2147 case TGSI_OPCODE_TXF:
2148 /* deprecated? */
2149 assert(0);
2150 return FALSE;
2151 break;
2152
2153 case TGSI_OPCODE_TXQ:
2154 /* deprecated? */
2155 assert(0);
2156 return FALSE;
2157 break;
2158
2159 case TGSI_OPCODE_CONT:
2160 lp_exec_continue(&bld->exec_mask);
2161 break;
2162
2163 case TGSI_OPCODE_EMIT:
2164 return FALSE;
2165 break;
2166
2167 case TGSI_OPCODE_ENDPRIM:
2168 return FALSE;
2169 break;
2170
2171 case TGSI_OPCODE_NOP:
2172 break;
2173
2174 default:
2175 return FALSE;
2176 }
2177
2178 if(info->num_dst) {
2179 LLVMValueRef pred[NUM_CHANNELS];
2180
2181 emit_fetch_predicate( bld, inst, pred );
2182
2183 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2184 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2185 }
2186 }
2187
2188 return TRUE;
2189 }
2190
2191
2192 void
2193 lp_build_tgsi_soa(LLVMBuilderRef builder,
2194 const struct tgsi_token *tokens,
2195 struct lp_type type,
2196 struct lp_build_mask_context *mask,
2197 LLVMValueRef consts_ptr,
2198 const LLVMValueRef *pos,
2199 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2200 LLVMValueRef (*outputs)[NUM_CHANNELS],
2201 struct lp_build_sampler_soa *sampler,
2202 const struct tgsi_shader_info *info)
2203 {
2204 struct lp_build_tgsi_soa_context bld;
2205 struct tgsi_parse_context parse;
2206 uint num_immediates = 0;
2207 uint num_instructions = 0;
2208 unsigned i;
2209 int pc = 0;
2210
2211 struct lp_type res_type;
2212
2213 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2214 memset(&res_type, 0, sizeof res_type);
2215 res_type.width = type.width;
2216 res_type.length = type.length;
2217 res_type.sign = 1;
2218
2219 /* Setup build context */
2220 memset(&bld, 0, sizeof bld);
2221 lp_build_context_init(&bld.base, builder, type);
2222 lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
2223 lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type));
2224 bld.mask = mask;
2225 bld.pos = pos;
2226 bld.inputs = inputs;
2227 bld.outputs = outputs;
2228 bld.consts_ptr = consts_ptr;
2229 bld.sampler = sampler;
2230 bld.info = info;
2231 bld.indirect_files = info->indirect_files;
2232 bld.instructions = (struct tgsi_full_instruction *)
2233 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2234 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2235
2236 if (!bld.instructions) {
2237 return;
2238 }
2239
2240 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2241
2242 tgsi_parse_init( &parse, tokens );
2243
2244 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2245 tgsi_parse_token( &parse );
2246
2247 switch( parse.FullToken.Token.Type ) {
2248 case TGSI_TOKEN_TYPE_DECLARATION:
2249 /* Inputs already interpolated */
2250 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2251 break;
2252
2253 case TGSI_TOKEN_TYPE_INSTRUCTION:
2254 {
2255 /* save expanded instruction */
2256 if (num_instructions == bld.max_instructions) {
2257 struct tgsi_full_instruction *instructions;
2258 instructions = REALLOC(bld.instructions,
2259 bld.max_instructions
2260 * sizeof(struct tgsi_full_instruction),
2261 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2262 * sizeof(struct tgsi_full_instruction));
2263 if (!instructions) {
2264 break;
2265 }
2266 bld.instructions = instructions;
2267 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2268 }
2269
2270 memcpy(bld.instructions + num_instructions,
2271 &parse.FullToken.FullInstruction,
2272 sizeof(bld.instructions[0]));
2273
2274 num_instructions++;
2275 }
2276
2277 break;
2278
2279 case TGSI_TOKEN_TYPE_IMMEDIATE:
2280 /* simply copy the immediate values into the next immediates[] slot */
2281 {
2282 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2283 assert(size <= 4);
2284 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2285 for( i = 0; i < size; ++i )
2286 bld.immediates[num_immediates][i] =
2287 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2288 for( i = size; i < 4; ++i )
2289 bld.immediates[num_immediates][i] = bld.base.undef;
2290 num_immediates++;
2291 }
2292 break;
2293
2294 case TGSI_TOKEN_TYPE_PROPERTY:
2295 break;
2296
2297 default:
2298 assert( 0 );
2299 }
2300 }
2301
2302 while (pc != -1) {
2303 struct tgsi_full_instruction *instr = bld.instructions + pc;
2304 const struct tgsi_opcode_info *opcode_info =
2305 tgsi_get_opcode_info(instr->Instruction.Opcode);
2306 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2307 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2308 opcode_info->mnemonic);
2309 }
2310
2311 if (0) {
2312 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2313 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2314 debug_printf("11111111111111111111111111111 \n");
2315 tgsi_dump(tokens, 0);
2316 lp_debug_dump_value(function);
2317 debug_printf("2222222222222222222222222222 \n");
2318 }
2319 tgsi_parse_free( &parse );
2320
2321 if (0) {
2322 LLVMModuleRef module = LLVMGetGlobalParent(
2323 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2324 LLVMDumpModule(module);
2325
2326 }
2327
2328 FREE( bld.instructions );
2329 }
2330