9d13007c7a098d2cd4db8d51e6500de94544514d
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61
62
63 #define LP_MAX_INSTRUCTIONS 256
64
65
66 struct lp_build_tgsi_aos_context
67 {
68 struct lp_build_context base;
69
70 /* Builder for integer masks and indices */
71 struct lp_build_context int_bld;
72
73 /*
74 * AoS swizzle used:
75 * - swizzles[0] = red index
76 * - swizzles[1] = green index
77 * - swizzles[2] = blue index
78 * - swizzles[3] = alpha index
79 */
80 unsigned char swizzles[4];
81 unsigned char inv_swizzles[4];
82
83 LLVMValueRef consts_ptr;
84 const LLVMValueRef *inputs;
85 LLVMValueRef *outputs;
86
87 struct lp_build_sampler_aos *sampler;
88
89 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
90 LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
91 LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
92 LLVMValueRef preds[LP_MAX_TGSI_PREDS];
93
94 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
95 * set in the indirect_files field.
96 * The temps[] array above is unused then.
97 */
98 LLVMValueRef temps_array;
99
100 /** bitmask indicating which register files are accessed indirectly */
101 unsigned indirect_files;
102
103 struct tgsi_full_instruction *instructions;
104 uint max_instructions;
105 };
106
107
108 /**
109 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
110 * ordering.
111 */
112 static LLVMValueRef
113 swizzle_aos(struct lp_build_tgsi_aos_context *bld,
114 LLVMValueRef a,
115 unsigned swizzle_x,
116 unsigned swizzle_y,
117 unsigned swizzle_z,
118 unsigned swizzle_w)
119 {
120 unsigned char swizzles[4];
121
122 assert(swizzle_x < 4);
123 assert(swizzle_y < 4);
124 assert(swizzle_z < 4);
125 assert(swizzle_w < 4);
126
127 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
128 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
129 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
130 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
131
132 return lp_build_swizzle_aos(&bld->base, a, swizzles);
133 }
134
135
136 static LLVMValueRef
137 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
138 LLVMValueRef a,
139 unsigned chan)
140 {
141 chan = bld->swizzles[chan];
142 return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
143 }
144
145
146 /**
147 * Register fetch.
148 */
149 static LLVMValueRef
150 emit_fetch(
151 struct lp_build_tgsi_aos_context *bld,
152 const struct tgsi_full_instruction *inst,
153 unsigned src_op)
154 {
155 struct lp_type type = bld->base.type;
156 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
157 LLVMValueRef res;
158 unsigned chan;
159
160 assert(!reg->Register.Indirect);
161
162 /*
163 * Fetch the from the register file.
164 */
165
166 switch (reg->Register.File) {
167 case TGSI_FILE_CONSTANT:
168 /*
169 * Get the constants components
170 */
171
172 res = bld->base.undef;
173 for (chan = 0; chan < 4; ++chan) {
174 LLVMValueRef index;
175 LLVMValueRef scalar_ptr;
176 LLVMValueRef scalar;
177 LLVMValueRef swizzle;
178
179 index = LLVMConstInt(LLVMInt32Type(),
180 reg->Register.Index*4 + chan,
181 0);
182
183 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
184 &index, 1, "");
185
186 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
187
188 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
189
190 /*
191 * NOTE: constants array is always assumed to be RGBA
192 */
193
194 swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0);
195
196 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
197 }
198
199 /*
200 * Broadcast the first quaternion to all others.
201 *
202 * XXX: could be factored into a reusable function.
203 */
204
205 if (type.length > 4) {
206 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
207 unsigned i;
208
209 for (chan = 0; chan < 4; ++chan) {
210 shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
211 }
212
213 for (i = 4; i < type.length; ++i) {
214 shuffles[i] = shuffles[i % 4];
215 }
216
217 res = LLVMBuildShuffleVector(bld->base.builder,
218 res, bld->base.undef,
219 LLVMConstVector(shuffles, type.length),
220 "");
221 }
222 break;
223
224 case TGSI_FILE_IMMEDIATE:
225 res = bld->immediates[reg->Register.Index];
226 assert(res);
227 break;
228
229 case TGSI_FILE_INPUT:
230 res = bld->inputs[reg->Register.Index];
231 assert(res);
232 break;
233
234 case TGSI_FILE_TEMPORARY:
235 {
236 LLVMValueRef temp_ptr;
237 temp_ptr = bld->temps[reg->Register.Index];
238 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
239 if (!res)
240 return bld->base.undef;
241 }
242 break;
243
244 default:
245 assert(0 && "invalid src register in emit_fetch()");
246 return bld->base.undef;
247 }
248
249 /*
250 * Apply sign modifier.
251 */
252
253 if (reg->Register.Absolute) {
254 res = lp_build_abs(&bld->base, res);
255 }
256
257 if(reg->Register.Negate) {
258 res = lp_build_negate(&bld->base, res);
259 }
260
261 /*
262 * Swizzle the argument
263 */
264
265 res = swizzle_aos(bld, res,
266 reg->Register.SwizzleX,
267 reg->Register.SwizzleY,
268 reg->Register.SwizzleZ,
269 reg->Register.SwizzleW);
270
271 return res;
272 }
273
274
275 /**
276 * Register store.
277 */
278 static void
279 emit_store(
280 struct lp_build_tgsi_aos_context *bld,
281 const struct tgsi_full_instruction *inst,
282 unsigned index,
283 LLVMValueRef value)
284 {
285 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
286 LLVMValueRef mask = NULL;
287 LLVMValueRef ptr;
288
289 /*
290 * Saturate the value
291 */
292
293 switch (inst->Instruction.Saturate) {
294 case TGSI_SAT_NONE:
295 break;
296
297 case TGSI_SAT_ZERO_ONE:
298 value = lp_build_max(&bld->base, value, bld->base.zero);
299 value = lp_build_min(&bld->base, value, bld->base.one);
300 break;
301
302 case TGSI_SAT_MINUS_PLUS_ONE:
303 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
304 value = lp_build_min(&bld->base, value, bld->base.one);
305 break;
306
307 default:
308 assert(0);
309 }
310
311 /*
312 * Translate the register file
313 */
314
315 assert(!reg->Register.Indirect);
316
317 switch (reg->Register.File) {
318 case TGSI_FILE_OUTPUT:
319 ptr = bld->outputs[reg->Register.Index];
320 break;
321
322 case TGSI_FILE_TEMPORARY:
323 ptr = bld->temps[reg->Register.Index];
324 break;
325
326 case TGSI_FILE_ADDRESS:
327 ptr = bld->addr[reg->Indirect.Index];
328 break;
329
330 case TGSI_FILE_PREDICATE:
331 ptr = bld->preds[reg->Register.Index];
332 break;
333
334 default:
335 assert(0);
336 return;
337 }
338
339 /*
340 * Predicate
341 */
342
343 if (inst->Instruction.Predicate) {
344 LLVMValueRef pred;
345
346 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
347
348 pred = LLVMBuildLoad(bld->base.builder,
349 bld->preds[inst->Predicate.Index], "");
350
351 /*
352 * Convert the value to an integer mask.
353 */
354 pred = lp_build_compare(bld->base.builder,
355 bld->base.type,
356 PIPE_FUNC_NOTEQUAL,
357 pred,
358 bld->base.zero);
359
360 if (inst->Predicate.Negate) {
361 pred = LLVMBuildNot(bld->base.builder, pred, "");
362 }
363
364 pred = swizzle_aos(bld, pred,
365 inst->Predicate.SwizzleX,
366 inst->Predicate.SwizzleY,
367 inst->Predicate.SwizzleZ,
368 inst->Predicate.SwizzleW);
369
370 if (mask) {
371 mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
372 } else {
373 mask = pred;
374 }
375 }
376
377 /*
378 * Writemask
379 */
380
381 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
382 LLVMValueRef writemask;
383
384 writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask);
385
386 if (mask) {
387 mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
388 } else {
389 mask = writemask;
390 }
391 }
392
393 if (mask) {
394 LLVMValueRef orig_value;
395
396 orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
397 value = lp_build_select(&bld->base,
398 mask, value, orig_value);
399 }
400
401 LLVMBuildStore(bld->base.builder, value, ptr);
402 }
403
404
405 /**
406 * High-level instruction translators.
407 */
408
409 static LLVMValueRef
410 emit_tex(struct lp_build_tgsi_aos_context *bld,
411 const struct tgsi_full_instruction *inst,
412 enum lp_build_tex_modifier modifier)
413 {
414 unsigned target;
415 unsigned unit;
416 LLVMValueRef coords;
417 LLVMValueRef ddx;
418 LLVMValueRef ddy;
419
420 if (!bld->sampler) {
421 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
422 return bld->base.undef;
423 }
424
425 target = inst->Texture.Texture;
426
427 coords = emit_fetch( bld, inst, 0 );
428
429 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
430 ddx = emit_fetch( bld, inst, 1 );
431 ddy = emit_fetch( bld, inst, 2 );
432 unit = inst->Src[3].Register.Index;
433 } else {
434 #if 0
435 ddx = lp_build_ddx( &bld->base, coords );
436 ddy = lp_build_ddy( &bld->base, coords );
437 #else
438 /* TODO */
439 ddx = bld->base.one;
440 ddy = bld->base.one;
441 #endif
442 unit = inst->Src[1].Register.Index;
443 }
444
445 return bld->sampler->emit_fetch_texel(bld->sampler,
446 &bld->base,
447 target, unit,
448 coords, ddx, ddy,
449 modifier);
450 }
451
452
453 static void
454 emit_declaration(
455 struct lp_build_tgsi_aos_context *bld,
456 const struct tgsi_full_declaration *decl)
457 {
458 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
459
460 unsigned first = decl->Range.First;
461 unsigned last = decl->Range.Last;
462 unsigned idx;
463
464 for (idx = first; idx <= last; ++idx) {
465 switch (decl->Declaration.File) {
466 case TGSI_FILE_TEMPORARY:
467 assert(idx < LP_MAX_TGSI_TEMPS);
468 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
469 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
470 last + 1, 0);
471 bld->temps_array = lp_build_array_alloca(bld->base.builder,
472 vec_type, array_size, "");
473 } else {
474 bld->temps[idx] = lp_build_alloca(bld->base.builder,
475 vec_type, "");
476 }
477 break;
478
479 case TGSI_FILE_OUTPUT:
480 bld->outputs[idx] = lp_build_alloca(bld->base.builder,
481 vec_type, "");
482 break;
483
484 case TGSI_FILE_ADDRESS:
485 assert(idx < LP_MAX_TGSI_ADDRS);
486 bld->addr[idx] = lp_build_alloca(bld->base.builder,
487 vec_type, "");
488 break;
489
490 case TGSI_FILE_PREDICATE:
491 assert(idx < LP_MAX_TGSI_PREDS);
492 bld->preds[idx] = lp_build_alloca(bld->base.builder,
493 vec_type, "");
494 break;
495
496 default:
497 /* don't need to declare other vars */
498 break;
499 }
500 }
501 }
502
503
504 /**
505 * Emit LLVM for one TGSI instruction.
506 * \param return TRUE for success, FALSE otherwise
507 */
508 static boolean
509 emit_instruction(
510 struct lp_build_tgsi_aos_context *bld,
511 const struct tgsi_full_instruction *inst,
512 const struct tgsi_opcode_info *info,
513 int *pc)
514 {
515 LLVMValueRef src0, src1, src2;
516 LLVMValueRef tmp0, tmp1;
517 LLVMValueRef dst0;
518
519 /*
520 * Stores and write masks are handled in a general fashion after the long
521 * instruction opcode switch statement.
522 *
523 * Although not stricitly necessary, we avoid generating instructions for
524 * channels which won't be stored, in cases where's that easy. For some
525 * complex instructions, like texture sampling, it is more convenient to
526 * assume a full writemask and then let LLVM optimization passes eliminate
527 * redundant code.
528 */
529
530 (*pc)++;
531
532 assert(info->num_dst <= 1);
533 if (info->num_dst) {
534 dst0 = bld->base.undef;
535 }
536
537 switch (inst->Instruction.Opcode) {
538 case TGSI_OPCODE_ARL:
539 src0 = emit_fetch(bld, inst, 0);
540 dst0 = lp_build_floor(&bld->base, src0);
541 break;
542
543 case TGSI_OPCODE_MOV:
544 dst0 = emit_fetch(bld, inst, 0);
545 break;
546
547 case TGSI_OPCODE_LIT:
548 return FALSE;
549
550 case TGSI_OPCODE_RCP:
551 /* TGSI_OPCODE_RECIP */
552 src0 = emit_fetch(bld, inst, 0);
553 dst0 = lp_build_rcp(&bld->base, src0);
554 break;
555
556 case TGSI_OPCODE_RSQ:
557 /* TGSI_OPCODE_RECIPSQRT */
558 src0 = emit_fetch(bld, inst, 0);
559 tmp0 = lp_build_abs(&bld->base, src0);
560 dst0 = lp_build_rsqrt(&bld->base, tmp0);
561 break;
562
563 case TGSI_OPCODE_EXP:
564 return FALSE;
565
566 case TGSI_OPCODE_LOG:
567 return FALSE;
568
569 case TGSI_OPCODE_MUL:
570 src0 = emit_fetch(bld, inst, 0);
571 src1 = emit_fetch(bld, inst, 1);
572 dst0 = lp_build_mul(&bld->base, src0, src1);
573 break;
574
575 case TGSI_OPCODE_ADD:
576 src0 = emit_fetch(bld, inst, 0);
577 src1 = emit_fetch(bld, inst, 1);
578 dst0 = lp_build_add(&bld->base, src0, src1);
579 break;
580
581 case TGSI_OPCODE_DP3:
582 /* TGSI_OPCODE_DOT3 */
583 return FALSE;
584
585 case TGSI_OPCODE_DP4:
586 /* TGSI_OPCODE_DOT4 */
587 return FALSE;
588
589 case TGSI_OPCODE_DST:
590 return FALSE;
591
592 case TGSI_OPCODE_MIN:
593 src0 = emit_fetch(bld, inst, 0);
594 src1 = emit_fetch(bld, inst, 1);
595 dst0 = lp_build_max(&bld->base, src0, src1);
596 break;
597
598 case TGSI_OPCODE_MAX:
599 src0 = emit_fetch(bld, inst, 0);
600 src1 = emit_fetch(bld, inst, 1);
601 dst0 = lp_build_max(&bld->base, src0, src1);
602 break;
603
604 case TGSI_OPCODE_SLT:
605 /* TGSI_OPCODE_SETLT */
606 src0 = emit_fetch(bld, inst, 0);
607 src1 = emit_fetch(bld, inst, 1);
608 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
609 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
610 break;
611
612 case TGSI_OPCODE_SGE:
613 /* TGSI_OPCODE_SETGE */
614 src0 = emit_fetch(bld, inst, 0);
615 src1 = emit_fetch(bld, inst, 1);
616 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
617 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
618 break;
619
620 case TGSI_OPCODE_MAD:
621 /* TGSI_OPCODE_MADD */
622 src0 = emit_fetch(bld, inst, 0);
623 src1 = emit_fetch(bld, inst, 1);
624 src2 = emit_fetch(bld, inst, 2);
625 tmp0 = lp_build_mul(&bld->base, src0, src1);
626 dst0 = lp_build_add(&bld->base, tmp0, src2);
627 break;
628
629 case TGSI_OPCODE_SUB:
630 src0 = emit_fetch(bld, inst, 0);
631 src1 = emit_fetch(bld, inst, 1);
632 dst0 = lp_build_sub(&bld->base, src0, src1);
633 break;
634
635 case TGSI_OPCODE_LRP:
636 src0 = emit_fetch(bld, inst, 0);
637 src1 = emit_fetch(bld, inst, 1);
638 src2 = emit_fetch(bld, inst, 2);
639 tmp0 = lp_build_sub(&bld->base, src1, src2);
640 tmp0 = lp_build_mul(&bld->base, src0, tmp0);
641 dst0 = lp_build_add(&bld->base, tmp0, src2);
642 break;
643
644 case TGSI_OPCODE_CND:
645 src0 = emit_fetch(bld, inst, 0);
646 src1 = emit_fetch(bld, inst, 1);
647 src2 = emit_fetch(bld, inst, 2);
648 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
649 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
650 dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
651 break;
652
653 case TGSI_OPCODE_DP2A:
654 return FALSE;
655
656 case TGSI_OPCODE_FRC:
657 src0 = emit_fetch(bld, inst, 0);
658 tmp0 = lp_build_floor(&bld->base, src0);
659 dst0 = lp_build_sub(&bld->base, src0, tmp0);
660 break;
661
662 case TGSI_OPCODE_CLAMP:
663 src0 = emit_fetch(bld, inst, 0);
664 src1 = emit_fetch(bld, inst, 1);
665 src2 = emit_fetch(bld, inst, 2);
666 tmp0 = lp_build_max(&bld->base, src0, src1);
667 dst0 = lp_build_min(&bld->base, tmp0, src2);
668 break;
669
670 case TGSI_OPCODE_FLR:
671 src0 = emit_fetch(bld, inst, 0);
672 dst0 = lp_build_floor(&bld->base, src0);
673 break;
674
675 case TGSI_OPCODE_ROUND:
676 src0 = emit_fetch(bld, inst, 0);
677 dst0 = lp_build_round(&bld->base, src0);
678 break;
679
680 case TGSI_OPCODE_EX2:
681 src0 = emit_fetch(bld, inst, 0);
682 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
683 dst0 = lp_build_exp2(&bld->base, tmp0);
684 break;
685
686 case TGSI_OPCODE_LG2:
687 src0 = emit_fetch(bld, inst, 0);
688 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
689 dst0 = lp_build_log2(&bld->base, tmp0);
690 break;
691
692 case TGSI_OPCODE_POW:
693 src0 = emit_fetch(bld, inst, 0);
694 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
695 src1 = emit_fetch(bld, inst, 1);
696 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
697 dst0 = lp_build_pow(&bld->base, src0, src1);
698 break;
699
700 case TGSI_OPCODE_XPD:
701 return FALSE;
702
703 case TGSI_OPCODE_ABS:
704 src0 = emit_fetch(bld, inst, 0);
705 dst0 = lp_build_abs(&bld->base, src0);
706 break;
707
708 case TGSI_OPCODE_RCC:
709 /* deprecated? */
710 assert(0);
711 return FALSE;
712
713 case TGSI_OPCODE_DPH:
714 return FALSE;
715
716 case TGSI_OPCODE_COS:
717 src0 = emit_fetch(bld, inst, 0);
718 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
719 dst0 = lp_build_cos(&bld->base, tmp0);
720 break;
721
722 case TGSI_OPCODE_DDX:
723 return FALSE;
724
725 case TGSI_OPCODE_DDY:
726 return FALSE;
727
728 case TGSI_OPCODE_KILP:
729 /* predicated kill */
730 return FALSE;
731
732 case TGSI_OPCODE_KIL:
733 /* conditional kill */
734 return FALSE;
735
736 case TGSI_OPCODE_PK2H:
737 return FALSE;
738 break;
739
740 case TGSI_OPCODE_PK2US:
741 return FALSE;
742 break;
743
744 case TGSI_OPCODE_PK4B:
745 return FALSE;
746 break;
747
748 case TGSI_OPCODE_PK4UB:
749 return FALSE;
750
751 case TGSI_OPCODE_RFL:
752 return FALSE;
753
754 case TGSI_OPCODE_SEQ:
755 src0 = emit_fetch(bld, inst, 0);
756 src1 = emit_fetch(bld, inst, 1);
757 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
758 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
759 break;
760
761 case TGSI_OPCODE_SFL:
762 dst0 = bld->base.zero;
763 break;
764
765 case TGSI_OPCODE_SGT:
766 src0 = emit_fetch(bld, inst, 0);
767 src1 = emit_fetch(bld, inst, 1);
768 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
769 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
770 break;
771
772 case TGSI_OPCODE_SIN:
773 src0 = emit_fetch(bld, inst, 0);
774 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
775 dst0 = lp_build_sin(&bld->base, tmp0);
776 break;
777
778 case TGSI_OPCODE_SLE:
779 src0 = emit_fetch(bld, inst, 0);
780 src1 = emit_fetch(bld, inst, 1);
781 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
782 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
783 break;
784
785 case TGSI_OPCODE_SNE:
786 src0 = emit_fetch(bld, inst, 0);
787 src1 = emit_fetch(bld, inst, 1);
788 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
789 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
790 break;
791
792 case TGSI_OPCODE_STR:
793 dst0 = bld->base.one;
794 break;
795
796 case TGSI_OPCODE_TEX:
797 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
798 break;
799
800 case TGSI_OPCODE_TXD:
801 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
802 break;
803
804 case TGSI_OPCODE_UP2H:
805 /* deprecated */
806 assert (0);
807 return FALSE;
808 break;
809
810 case TGSI_OPCODE_UP2US:
811 /* deprecated */
812 assert(0);
813 return FALSE;
814 break;
815
816 case TGSI_OPCODE_UP4B:
817 /* deprecated */
818 assert(0);
819 return FALSE;
820 break;
821
822 case TGSI_OPCODE_UP4UB:
823 /* deprecated */
824 assert(0);
825 return FALSE;
826 break;
827
828 case TGSI_OPCODE_X2D:
829 /* deprecated? */
830 assert(0);
831 return FALSE;
832 break;
833
834 case TGSI_OPCODE_ARA:
835 /* deprecated */
836 assert(0);
837 return FALSE;
838 break;
839
840 case TGSI_OPCODE_ARR:
841 src0 = emit_fetch(bld, inst, 0);
842 dst0 = lp_build_round(&bld->base, src0);
843 break;
844
845 case TGSI_OPCODE_BRA:
846 /* deprecated */
847 assert(0);
848 return FALSE;
849 break;
850
851 case TGSI_OPCODE_CAL:
852 return FALSE;
853
854 case TGSI_OPCODE_RET:
855 return FALSE;
856
857 case TGSI_OPCODE_END:
858 *pc = -1;
859 break;
860
861 case TGSI_OPCODE_SSG:
862 /* TGSI_OPCODE_SGN */
863 tmp0 = emit_fetch(bld, inst, 0);
864 dst0 = lp_build_sgn(&bld->base, tmp0);
865 break;
866
867 case TGSI_OPCODE_CMP:
868 src0 = emit_fetch(bld, inst, 0);
869 src1 = emit_fetch(bld, inst, 1);
870 src2 = emit_fetch(bld, inst, 2);
871 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
872 dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
873 break;
874
875 case TGSI_OPCODE_SCS:
876 return FALSE;
877
878 case TGSI_OPCODE_TXB:
879 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
880 break;
881
882 case TGSI_OPCODE_NRM:
883 /* fall-through */
884 case TGSI_OPCODE_NRM4:
885 return FALSE;
886
887 case TGSI_OPCODE_DIV:
888 /* deprecated */
889 assert(0);
890 return FALSE;
891 break;
892
893 case TGSI_OPCODE_DP2:
894 return FALSE;
895
896 case TGSI_OPCODE_TXL:
897 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
898 break;
899
900 case TGSI_OPCODE_TXP:
901 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
902 break;
903
904 case TGSI_OPCODE_BRK:
905 return FALSE;
906
907 case TGSI_OPCODE_IF:
908 return FALSE;
909
910 case TGSI_OPCODE_BGNLOOP:
911 return FALSE;
912
913 case TGSI_OPCODE_BGNSUB:
914 return FALSE;
915
916 case TGSI_OPCODE_ELSE:
917 return FALSE;
918
919 case TGSI_OPCODE_ENDIF:
920 return FALSE;
921
922 case TGSI_OPCODE_ENDLOOP:
923 return FALSE;
924
925 case TGSI_OPCODE_ENDSUB:
926 return FALSE;
927
928 case TGSI_OPCODE_PUSHA:
929 /* deprecated? */
930 assert(0);
931 return FALSE;
932 break;
933
934 case TGSI_OPCODE_POPA:
935 /* deprecated? */
936 assert(0);
937 return FALSE;
938 break;
939
940 case TGSI_OPCODE_CEIL:
941 src0 = emit_fetch(bld, inst, 0);
942 dst0 = lp_build_ceil(&bld->base, src0);
943 break;
944
945 case TGSI_OPCODE_I2F:
946 /* deprecated? */
947 assert(0);
948 return FALSE;
949 break;
950
951 case TGSI_OPCODE_NOT:
952 /* deprecated? */
953 assert(0);
954 return FALSE;
955 break;
956
957 case TGSI_OPCODE_TRUNC:
958 src0 = emit_fetch(bld, inst, 0);
959 dst0 = lp_build_trunc(&bld->base, src0);
960 break;
961
962 case TGSI_OPCODE_SHL:
963 /* deprecated? */
964 assert(0);
965 return FALSE;
966 break;
967
968 case TGSI_OPCODE_ISHR:
969 /* deprecated? */
970 assert(0);
971 return FALSE;
972 break;
973
974 case TGSI_OPCODE_AND:
975 /* deprecated? */
976 assert(0);
977 return FALSE;
978 break;
979
980 case TGSI_OPCODE_OR:
981 /* deprecated? */
982 assert(0);
983 return FALSE;
984 break;
985
986 case TGSI_OPCODE_MOD:
987 /* deprecated? */
988 assert(0);
989 return FALSE;
990 break;
991
992 case TGSI_OPCODE_XOR:
993 /* deprecated? */
994 assert(0);
995 return FALSE;
996 break;
997
998 case TGSI_OPCODE_SAD:
999 /* deprecated? */
1000 assert(0);
1001 return FALSE;
1002 break;
1003
1004 case TGSI_OPCODE_TXF:
1005 /* deprecated? */
1006 assert(0);
1007 return FALSE;
1008 break;
1009
1010 case TGSI_OPCODE_TXQ:
1011 /* deprecated? */
1012 assert(0);
1013 return FALSE;
1014 break;
1015
1016 case TGSI_OPCODE_CONT:
1017 return FALSE;
1018
1019 case TGSI_OPCODE_EMIT:
1020 return FALSE;
1021 break;
1022
1023 case TGSI_OPCODE_ENDPRIM:
1024 return FALSE;
1025 break;
1026
1027 case TGSI_OPCODE_NOP:
1028 break;
1029
1030 default:
1031 return FALSE;
1032 }
1033
1034 if (info->num_dst) {
1035 emit_store(bld, inst, 0, dst0);
1036 }
1037
1038 return TRUE;
1039 }
1040
1041
1042 void
1043 lp_build_tgsi_aos(LLVMBuilderRef builder,
1044 const struct tgsi_token *tokens,
1045 struct lp_type type,
1046 const unsigned char swizzles[4],
1047 LLVMValueRef consts_ptr,
1048 const LLVMValueRef *inputs,
1049 LLVMValueRef *outputs,
1050 struct lp_build_sampler_aos *sampler,
1051 const struct tgsi_shader_info *info)
1052 {
1053 struct lp_build_tgsi_aos_context bld;
1054 struct tgsi_parse_context parse;
1055 uint num_immediates = 0;
1056 uint num_instructions = 0;
1057 unsigned chan;
1058 int pc = 0;
1059
1060 /* Setup build context */
1061 memset(&bld, 0, sizeof bld);
1062 lp_build_context_init(&bld.base, builder, type);
1063 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1064
1065 for (chan = 0; chan < 4; ++chan) {
1066 bld.swizzles[chan] = swizzles[chan];
1067 bld.inv_swizzles[swizzles[chan]] = chan;
1068 }
1069
1070 bld.inputs = inputs;
1071 bld.outputs = outputs;
1072 bld.consts_ptr = consts_ptr;
1073 bld.sampler = sampler;
1074 bld.indirect_files = info->indirect_files;
1075 bld.instructions = (struct tgsi_full_instruction *)
1076 MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
1077 bld.max_instructions = LP_MAX_INSTRUCTIONS;
1078
1079 if (!bld.instructions) {
1080 return;
1081 }
1082
1083 tgsi_parse_init(&parse, tokens);
1084
1085 while (!tgsi_parse_end_of_tokens(&parse)) {
1086 tgsi_parse_token(&parse);
1087
1088 switch(parse.FullToken.Token.Type) {
1089 case TGSI_TOKEN_TYPE_DECLARATION:
1090 /* Inputs already interpolated */
1091 emit_declaration(&bld, &parse.FullToken.FullDeclaration);
1092 break;
1093
1094 case TGSI_TOKEN_TYPE_INSTRUCTION:
1095 {
1096 /* save expanded instruction */
1097 if (num_instructions == bld.max_instructions) {
1098 struct tgsi_full_instruction *instructions;
1099 instructions = REALLOC(bld.instructions,
1100 bld.max_instructions
1101 * sizeof(struct tgsi_full_instruction),
1102 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
1103 * sizeof(struct tgsi_full_instruction));
1104 if (!instructions) {
1105 break;
1106 }
1107 bld.instructions = instructions;
1108 bld.max_instructions += LP_MAX_INSTRUCTIONS;
1109 }
1110
1111 memcpy(bld.instructions + num_instructions,
1112 &parse.FullToken.FullInstruction,
1113 sizeof(bld.instructions[0]));
1114
1115 num_instructions++;
1116 }
1117
1118 break;
1119
1120 case TGSI_TOKEN_TYPE_IMMEDIATE:
1121 /* simply copy the immediate values into the next immediates[] slot */
1122 {
1123 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1124 float imm[4];
1125 assert(size <= 4);
1126 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1127 for (chan = 0; chan < 4; ++chan) {
1128 imm[chan] = 0.0f;
1129 }
1130 for (chan = 0; chan < size; ++chan) {
1131 unsigned swizzle = bld.swizzles[chan];
1132 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1133 }
1134 bld.immediates[num_immediates] =
1135 lp_build_const_aos(type,
1136 imm[0], imm[1], imm[2], imm[3],
1137 NULL);
1138 num_immediates++;
1139 }
1140 break;
1141
1142 case TGSI_TOKEN_TYPE_PROPERTY:
1143 break;
1144
1145 default:
1146 assert(0);
1147 }
1148 }
1149
1150 while (pc != -1) {
1151 struct tgsi_full_instruction *instr = bld.instructions + pc;
1152 const struct tgsi_opcode_info *opcode_info =
1153 tgsi_get_opcode_info(instr->Instruction.Opcode);
1154 if (!emit_instruction(&bld, instr, opcode_info, &pc))
1155 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1156 opcode_info->mnemonic);
1157 }
1158
1159 if (0) {
1160 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1161 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1162 debug_printf("11111111111111111111111111111 \n");
1163 tgsi_dump(tokens, 0);
1164 lp_debug_dump_value(function);
1165 debug_printf("2222222222222222222222222222 \n");
1166 }
1167 tgsi_parse_free(&parse);
1168
1169 if (0) {
1170 LLVMModuleRef module = LLVMGetGlobalParent(
1171 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
1172 LLVMDumpModule(module);
1173 }
1174
1175 FREE(bld.instructions);
1176 }
1177