Merge branch 'gallium-polygon-stipple'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define LP_MAX_INSTRUCTIONS 256
63
64
65 struct lp_build_tgsi_aos_context
66 {
67 struct lp_build_context base;
68
69 /* Builder for integer masks and indices */
70 struct lp_build_context int_bld;
71
72 /*
73 * AoS swizzle used:
74 * - swizzles[0] = red index
75 * - swizzles[1] = green index
76 * - swizzles[2] = blue index
77 * - swizzles[3] = alpha index
78 */
79 unsigned char swizzles[4];
80 unsigned char inv_swizzles[4];
81
82 LLVMValueRef consts_ptr;
83 const LLVMValueRef *inputs;
84 LLVMValueRef *outputs;
85
86 struct lp_build_sampler_aos *sampler;
87
88 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
89 LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
90 LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
91 LLVMValueRef preds[LP_MAX_TGSI_PREDS];
92
93 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
94 * set in the indirect_files field.
95 * The temps[] array above is unused then.
96 */
97 LLVMValueRef temps_array;
98
99 /** bitmask indicating which register files are accessed indirectly */
100 unsigned indirect_files;
101
102 struct tgsi_full_instruction *instructions;
103 uint max_instructions;
104 };
105
106
107 /**
108 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
109 * ordering.
110 */
111 static LLVMValueRef
112 swizzle_aos(struct lp_build_tgsi_aos_context *bld,
113 LLVMValueRef a,
114 unsigned swizzle_x,
115 unsigned swizzle_y,
116 unsigned swizzle_z,
117 unsigned swizzle_w)
118 {
119 unsigned char swizzles[4];
120
121 assert(swizzle_x < 4);
122 assert(swizzle_y < 4);
123 assert(swizzle_z < 4);
124 assert(swizzle_w < 4);
125
126 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
127 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
128 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
129 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
130
131 return lp_build_swizzle_aos(&bld->base, a, swizzles);
132 }
133
134
135 static LLVMValueRef
136 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
137 LLVMValueRef a,
138 unsigned chan)
139 {
140 chan = bld->swizzles[chan];
141 return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
142 }
143
144
145 /**
146 * Register fetch.
147 */
148 static LLVMValueRef
149 emit_fetch(
150 struct lp_build_tgsi_aos_context *bld,
151 const struct tgsi_full_instruction *inst,
152 unsigned src_op)
153 {
154 LLVMBuilderRef builder = bld->base.gallivm->builder;
155 struct lp_type type = bld->base.type;
156 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
157 LLVMValueRef res;
158 unsigned chan;
159
160 assert(!reg->Register.Indirect);
161
162 /*
163 * Fetch the from the register file.
164 */
165
166 switch (reg->Register.File) {
167 case TGSI_FILE_CONSTANT:
168 /*
169 * Get the constants components
170 */
171
172 res = bld->base.undef;
173 for (chan = 0; chan < 4; ++chan) {
174 LLVMValueRef index;
175 LLVMValueRef scalar_ptr;
176 LLVMValueRef scalar;
177 LLVMValueRef swizzle;
178
179 index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan);
180
181 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
182 &index, 1, "");
183
184 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
185
186 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
187
188 /*
189 * NOTE: constants array is always assumed to be RGBA
190 */
191
192 swizzle = lp_build_const_int32(bld->base.gallivm, chan);
193
194 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
195 }
196
197 /*
198 * Broadcast the first quaternion to all others.
199 *
200 * XXX: could be factored into a reusable function.
201 */
202
203 if (type.length > 4) {
204 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
205 unsigned i;
206
207 for (chan = 0; chan < 4; ++chan) {
208 shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan);
209 }
210
211 for (i = 4; i < type.length; ++i) {
212 shuffles[i] = shuffles[i % 4];
213 }
214
215 res = LLVMBuildShuffleVector(builder,
216 res, bld->base.undef,
217 LLVMConstVector(shuffles, type.length),
218 "");
219 }
220 break;
221
222 case TGSI_FILE_IMMEDIATE:
223 res = bld->immediates[reg->Register.Index];
224 assert(res);
225 break;
226
227 case TGSI_FILE_INPUT:
228 res = bld->inputs[reg->Register.Index];
229 assert(res);
230 break;
231
232 case TGSI_FILE_TEMPORARY:
233 {
234 LLVMValueRef temp_ptr;
235 temp_ptr = bld->temps[reg->Register.Index];
236 res = LLVMBuildLoad(builder, temp_ptr, "");
237 if (!res)
238 return bld->base.undef;
239 }
240 break;
241
242 default:
243 assert(0 && "invalid src register in emit_fetch()");
244 return bld->base.undef;
245 }
246
247 /*
248 * Apply sign modifier.
249 */
250
251 if (reg->Register.Absolute) {
252 res = lp_build_abs(&bld->base, res);
253 }
254
255 if(reg->Register.Negate) {
256 res = lp_build_negate(&bld->base, res);
257 }
258
259 /*
260 * Swizzle the argument
261 */
262
263 res = swizzle_aos(bld, res,
264 reg->Register.SwizzleX,
265 reg->Register.SwizzleY,
266 reg->Register.SwizzleZ,
267 reg->Register.SwizzleW);
268
269 return res;
270 }
271
272
273 /**
274 * Register store.
275 */
276 static void
277 emit_store(
278 struct lp_build_tgsi_aos_context *bld,
279 const struct tgsi_full_instruction *inst,
280 unsigned index,
281 LLVMValueRef value)
282 {
283 LLVMBuilderRef builder = bld->base.gallivm->builder;
284 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
285 LLVMValueRef mask = NULL;
286 LLVMValueRef ptr;
287
288 /*
289 * Saturate the value
290 */
291
292 switch (inst->Instruction.Saturate) {
293 case TGSI_SAT_NONE:
294 break;
295
296 case TGSI_SAT_ZERO_ONE:
297 value = lp_build_max(&bld->base, value, bld->base.zero);
298 value = lp_build_min(&bld->base, value, bld->base.one);
299 break;
300
301 case TGSI_SAT_MINUS_PLUS_ONE:
302 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
303 value = lp_build_min(&bld->base, value, bld->base.one);
304 break;
305
306 default:
307 assert(0);
308 }
309
310 /*
311 * Translate the register file
312 */
313
314 assert(!reg->Register.Indirect);
315
316 switch (reg->Register.File) {
317 case TGSI_FILE_OUTPUT:
318 ptr = bld->outputs[reg->Register.Index];
319 break;
320
321 case TGSI_FILE_TEMPORARY:
322 ptr = bld->temps[reg->Register.Index];
323 break;
324
325 case TGSI_FILE_ADDRESS:
326 ptr = bld->addr[reg->Indirect.Index];
327 break;
328
329 case TGSI_FILE_PREDICATE:
330 ptr = bld->preds[reg->Register.Index];
331 break;
332
333 default:
334 assert(0);
335 return;
336 }
337
338 /*
339 * Predicate
340 */
341
342 if (inst->Instruction.Predicate) {
343 LLVMValueRef pred;
344
345 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
346
347 pred = LLVMBuildLoad(builder,
348 bld->preds[inst->Predicate.Index], "");
349
350 /*
351 * Convert the value to an integer mask.
352 */
353 pred = lp_build_compare(bld->base.gallivm,
354 bld->base.type,
355 PIPE_FUNC_NOTEQUAL,
356 pred,
357 bld->base.zero);
358
359 if (inst->Predicate.Negate) {
360 pred = LLVMBuildNot(builder, pred, "");
361 }
362
363 pred = swizzle_aos(bld, pred,
364 inst->Predicate.SwizzleX,
365 inst->Predicate.SwizzleY,
366 inst->Predicate.SwizzleZ,
367 inst->Predicate.SwizzleW);
368
369 if (mask) {
370 mask = LLVMBuildAnd(builder, mask, pred, "");
371 } else {
372 mask = pred;
373 }
374 }
375
376 /*
377 * Writemask
378 */
379
380 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
381 LLVMValueRef writemask;
382
383 writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type,
384 reg->Register.WriteMask);
385
386 if (mask) {
387 mask = LLVMBuildAnd(builder, mask, writemask, "");
388 } else {
389 mask = writemask;
390 }
391 }
392
393 if (mask) {
394 LLVMValueRef orig_value;
395
396 orig_value = LLVMBuildLoad(builder, ptr, "");
397 value = lp_build_select(&bld->base,
398 mask, value, orig_value);
399 }
400
401 LLVMBuildStore(builder, value, ptr);
402 }
403
404
405 /**
406 * High-level instruction translators.
407 */
408
409 static LLVMValueRef
410 emit_tex(struct lp_build_tgsi_aos_context *bld,
411 const struct tgsi_full_instruction *inst,
412 enum lp_build_tex_modifier modifier)
413 {
414 unsigned target;
415 unsigned unit;
416 LLVMValueRef coords;
417 LLVMValueRef ddx;
418 LLVMValueRef ddy;
419
420 if (!bld->sampler) {
421 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
422 return bld->base.undef;
423 }
424
425 target = inst->Texture.Texture;
426
427 coords = emit_fetch( bld, inst, 0 );
428
429 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
430 ddx = emit_fetch( bld, inst, 1 );
431 ddy = emit_fetch( bld, inst, 2 );
432 unit = inst->Src[3].Register.Index;
433 } else {
434 #if 0
435 ddx = lp_build_ddx( &bld->base, coords );
436 ddy = lp_build_ddy( &bld->base, coords );
437 #else
438 /* TODO */
439 ddx = bld->base.one;
440 ddy = bld->base.one;
441 #endif
442 unit = inst->Src[1].Register.Index;
443 }
444
445 return bld->sampler->emit_fetch_texel(bld->sampler,
446 &bld->base,
447 target, unit,
448 coords, ddx, ddy,
449 modifier);
450 }
451
452
453 static void
454 emit_declaration(
455 struct lp_build_tgsi_aos_context *bld,
456 const struct tgsi_full_declaration *decl)
457 {
458 struct gallivm_state *gallivm = bld->base.gallivm;
459 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type);
460
461 unsigned first = decl->Range.First;
462 unsigned last = decl->Range.Last;
463 unsigned idx;
464
465 for (idx = first; idx <= last; ++idx) {
466 switch (decl->Declaration.File) {
467 case TGSI_FILE_TEMPORARY:
468 assert(idx < LP_MAX_TGSI_TEMPS);
469 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
470 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
471 bld->temps_array = lp_build_array_alloca(bld->base.gallivm,
472 vec_type, array_size, "");
473 } else {
474 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
475 }
476 break;
477
478 case TGSI_FILE_OUTPUT:
479 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
480 break;
481
482 case TGSI_FILE_ADDRESS:
483 assert(idx < LP_MAX_TGSI_ADDRS);
484 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
485 break;
486
487 case TGSI_FILE_PREDICATE:
488 assert(idx < LP_MAX_TGSI_PREDS);
489 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
490 break;
491
492 default:
493 /* don't need to declare other vars */
494 break;
495 }
496 }
497 }
498
499
500 /**
501 * Emit LLVM for one TGSI instruction.
502 * \param return TRUE for success, FALSE otherwise
503 */
504 static boolean
505 emit_instruction(
506 struct lp_build_tgsi_aos_context *bld,
507 const struct tgsi_full_instruction *inst,
508 const struct tgsi_opcode_info *info,
509 int *pc)
510 {
511 LLVMValueRef src0, src1, src2;
512 LLVMValueRef tmp0, tmp1;
513 LLVMValueRef dst0 = NULL;
514
515 /*
516 * Stores and write masks are handled in a general fashion after the long
517 * instruction opcode switch statement.
518 *
519 * Although not stricitly necessary, we avoid generating instructions for
520 * channels which won't be stored, in cases where's that easy. For some
521 * complex instructions, like texture sampling, it is more convenient to
522 * assume a full writemask and then let LLVM optimization passes eliminate
523 * redundant code.
524 */
525
526 (*pc)++;
527
528 assert(info->num_dst <= 1);
529 if (info->num_dst) {
530 dst0 = bld->base.undef;
531 }
532
533 switch (inst->Instruction.Opcode) {
534 case TGSI_OPCODE_ARL:
535 src0 = emit_fetch(bld, inst, 0);
536 dst0 = lp_build_floor(&bld->base, src0);
537 break;
538
539 case TGSI_OPCODE_MOV:
540 dst0 = emit_fetch(bld, inst, 0);
541 break;
542
543 case TGSI_OPCODE_LIT:
544 return FALSE;
545
546 case TGSI_OPCODE_RCP:
547 /* TGSI_OPCODE_RECIP */
548 src0 = emit_fetch(bld, inst, 0);
549 dst0 = lp_build_rcp(&bld->base, src0);
550 break;
551
552 case TGSI_OPCODE_RSQ:
553 /* TGSI_OPCODE_RECIPSQRT */
554 src0 = emit_fetch(bld, inst, 0);
555 tmp0 = lp_build_abs(&bld->base, src0);
556 dst0 = lp_build_rsqrt(&bld->base, tmp0);
557 break;
558
559 case TGSI_OPCODE_EXP:
560 return FALSE;
561
562 case TGSI_OPCODE_LOG:
563 return FALSE;
564
565 case TGSI_OPCODE_MUL:
566 src0 = emit_fetch(bld, inst, 0);
567 src1 = emit_fetch(bld, inst, 1);
568 dst0 = lp_build_mul(&bld->base, src0, src1);
569 break;
570
571 case TGSI_OPCODE_ADD:
572 src0 = emit_fetch(bld, inst, 0);
573 src1 = emit_fetch(bld, inst, 1);
574 dst0 = lp_build_add(&bld->base, src0, src1);
575 break;
576
577 case TGSI_OPCODE_DP3:
578 /* TGSI_OPCODE_DOT3 */
579 return FALSE;
580
581 case TGSI_OPCODE_DP4:
582 /* TGSI_OPCODE_DOT4 */
583 return FALSE;
584
585 case TGSI_OPCODE_DST:
586 return FALSE;
587
588 case TGSI_OPCODE_MIN:
589 src0 = emit_fetch(bld, inst, 0);
590 src1 = emit_fetch(bld, inst, 1);
591 dst0 = lp_build_max(&bld->base, src0, src1);
592 break;
593
594 case TGSI_OPCODE_MAX:
595 src0 = emit_fetch(bld, inst, 0);
596 src1 = emit_fetch(bld, inst, 1);
597 dst0 = lp_build_max(&bld->base, src0, src1);
598 break;
599
600 case TGSI_OPCODE_SLT:
601 /* TGSI_OPCODE_SETLT */
602 src0 = emit_fetch(bld, inst, 0);
603 src1 = emit_fetch(bld, inst, 1);
604 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
605 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
606 break;
607
608 case TGSI_OPCODE_SGE:
609 /* TGSI_OPCODE_SETGE */
610 src0 = emit_fetch(bld, inst, 0);
611 src1 = emit_fetch(bld, inst, 1);
612 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
613 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
614 break;
615
616 case TGSI_OPCODE_MAD:
617 /* TGSI_OPCODE_MADD */
618 src0 = emit_fetch(bld, inst, 0);
619 src1 = emit_fetch(bld, inst, 1);
620 src2 = emit_fetch(bld, inst, 2);
621 tmp0 = lp_build_mul(&bld->base, src0, src1);
622 dst0 = lp_build_add(&bld->base, tmp0, src2);
623 break;
624
625 case TGSI_OPCODE_SUB:
626 src0 = emit_fetch(bld, inst, 0);
627 src1 = emit_fetch(bld, inst, 1);
628 dst0 = lp_build_sub(&bld->base, src0, src1);
629 break;
630
631 case TGSI_OPCODE_LRP:
632 src0 = emit_fetch(bld, inst, 0);
633 src1 = emit_fetch(bld, inst, 1);
634 src2 = emit_fetch(bld, inst, 2);
635 tmp0 = lp_build_sub(&bld->base, src1, src2);
636 tmp0 = lp_build_mul(&bld->base, src0, tmp0);
637 dst0 = lp_build_add(&bld->base, tmp0, src2);
638 break;
639
640 case TGSI_OPCODE_CND:
641 src0 = emit_fetch(bld, inst, 0);
642 src1 = emit_fetch(bld, inst, 1);
643 src2 = emit_fetch(bld, inst, 2);
644 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
645 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
646 dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
647 break;
648
649 case TGSI_OPCODE_DP2A:
650 return FALSE;
651
652 case TGSI_OPCODE_FRC:
653 src0 = emit_fetch(bld, inst, 0);
654 tmp0 = lp_build_floor(&bld->base, src0);
655 dst0 = lp_build_sub(&bld->base, src0, tmp0);
656 break;
657
658 case TGSI_OPCODE_CLAMP:
659 src0 = emit_fetch(bld, inst, 0);
660 src1 = emit_fetch(bld, inst, 1);
661 src2 = emit_fetch(bld, inst, 2);
662 tmp0 = lp_build_max(&bld->base, src0, src1);
663 dst0 = lp_build_min(&bld->base, tmp0, src2);
664 break;
665
666 case TGSI_OPCODE_FLR:
667 src0 = emit_fetch(bld, inst, 0);
668 dst0 = lp_build_floor(&bld->base, src0);
669 break;
670
671 case TGSI_OPCODE_ROUND:
672 src0 = emit_fetch(bld, inst, 0);
673 dst0 = lp_build_round(&bld->base, src0);
674 break;
675
676 case TGSI_OPCODE_EX2:
677 src0 = emit_fetch(bld, inst, 0);
678 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
679 dst0 = lp_build_exp2(&bld->base, tmp0);
680 break;
681
682 case TGSI_OPCODE_LG2:
683 src0 = emit_fetch(bld, inst, 0);
684 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
685 dst0 = lp_build_log2(&bld->base, tmp0);
686 break;
687
688 case TGSI_OPCODE_POW:
689 src0 = emit_fetch(bld, inst, 0);
690 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
691 src1 = emit_fetch(bld, inst, 1);
692 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
693 dst0 = lp_build_pow(&bld->base, src0, src1);
694 break;
695
696 case TGSI_OPCODE_XPD:
697 return FALSE;
698
699 case TGSI_OPCODE_ABS:
700 src0 = emit_fetch(bld, inst, 0);
701 dst0 = lp_build_abs(&bld->base, src0);
702 break;
703
704 case TGSI_OPCODE_RCC:
705 /* deprecated? */
706 assert(0);
707 return FALSE;
708
709 case TGSI_OPCODE_DPH:
710 return FALSE;
711
712 case TGSI_OPCODE_COS:
713 src0 = emit_fetch(bld, inst, 0);
714 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
715 dst0 = lp_build_cos(&bld->base, tmp0);
716 break;
717
718 case TGSI_OPCODE_DDX:
719 return FALSE;
720
721 case TGSI_OPCODE_DDY:
722 return FALSE;
723
724 case TGSI_OPCODE_KILP:
725 /* predicated kill */
726 return FALSE;
727
728 case TGSI_OPCODE_KIL:
729 /* conditional kill */
730 return FALSE;
731
732 case TGSI_OPCODE_PK2H:
733 return FALSE;
734 break;
735
736 case TGSI_OPCODE_PK2US:
737 return FALSE;
738 break;
739
740 case TGSI_OPCODE_PK4B:
741 return FALSE;
742 break;
743
744 case TGSI_OPCODE_PK4UB:
745 return FALSE;
746
747 case TGSI_OPCODE_RFL:
748 return FALSE;
749
750 case TGSI_OPCODE_SEQ:
751 src0 = emit_fetch(bld, inst, 0);
752 src1 = emit_fetch(bld, inst, 1);
753 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
754 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
755 break;
756
757 case TGSI_OPCODE_SFL:
758 dst0 = bld->base.zero;
759 break;
760
761 case TGSI_OPCODE_SGT:
762 src0 = emit_fetch(bld, inst, 0);
763 src1 = emit_fetch(bld, inst, 1);
764 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
765 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
766 break;
767
768 case TGSI_OPCODE_SIN:
769 src0 = emit_fetch(bld, inst, 0);
770 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
771 dst0 = lp_build_sin(&bld->base, tmp0);
772 break;
773
774 case TGSI_OPCODE_SLE:
775 src0 = emit_fetch(bld, inst, 0);
776 src1 = emit_fetch(bld, inst, 1);
777 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
778 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
779 break;
780
781 case TGSI_OPCODE_SNE:
782 src0 = emit_fetch(bld, inst, 0);
783 src1 = emit_fetch(bld, inst, 1);
784 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
785 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
786 break;
787
788 case TGSI_OPCODE_STR:
789 dst0 = bld->base.one;
790 break;
791
792 case TGSI_OPCODE_TEX:
793 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
794 break;
795
796 case TGSI_OPCODE_TXD:
797 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
798 break;
799
800 case TGSI_OPCODE_UP2H:
801 /* deprecated */
802 assert (0);
803 return FALSE;
804 break;
805
806 case TGSI_OPCODE_UP2US:
807 /* deprecated */
808 assert(0);
809 return FALSE;
810 break;
811
812 case TGSI_OPCODE_UP4B:
813 /* deprecated */
814 assert(0);
815 return FALSE;
816 break;
817
818 case TGSI_OPCODE_UP4UB:
819 /* deprecated */
820 assert(0);
821 return FALSE;
822 break;
823
824 case TGSI_OPCODE_X2D:
825 /* deprecated? */
826 assert(0);
827 return FALSE;
828 break;
829
830 case TGSI_OPCODE_ARA:
831 /* deprecated */
832 assert(0);
833 return FALSE;
834 break;
835
836 case TGSI_OPCODE_ARR:
837 src0 = emit_fetch(bld, inst, 0);
838 dst0 = lp_build_round(&bld->base, src0);
839 break;
840
841 case TGSI_OPCODE_BRA:
842 /* deprecated */
843 assert(0);
844 return FALSE;
845 break;
846
847 case TGSI_OPCODE_CAL:
848 return FALSE;
849
850 case TGSI_OPCODE_RET:
851 return FALSE;
852
853 case TGSI_OPCODE_END:
854 *pc = -1;
855 break;
856
857 case TGSI_OPCODE_SSG:
858 /* TGSI_OPCODE_SGN */
859 tmp0 = emit_fetch(bld, inst, 0);
860 dst0 = lp_build_sgn(&bld->base, tmp0);
861 break;
862
863 case TGSI_OPCODE_CMP:
864 src0 = emit_fetch(bld, inst, 0);
865 src1 = emit_fetch(bld, inst, 1);
866 src2 = emit_fetch(bld, inst, 2);
867 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
868 dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
869 break;
870
871 case TGSI_OPCODE_SCS:
872 return FALSE;
873
874 case TGSI_OPCODE_TXB:
875 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
876 break;
877
878 case TGSI_OPCODE_NRM:
879 /* fall-through */
880 case TGSI_OPCODE_NRM4:
881 return FALSE;
882
883 case TGSI_OPCODE_DIV:
884 /* deprecated */
885 assert(0);
886 return FALSE;
887 break;
888
889 case TGSI_OPCODE_DP2:
890 return FALSE;
891
892 case TGSI_OPCODE_TXL:
893 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
894 break;
895
896 case TGSI_OPCODE_TXP:
897 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
898 break;
899
900 case TGSI_OPCODE_BRK:
901 return FALSE;
902
903 case TGSI_OPCODE_IF:
904 return FALSE;
905
906 case TGSI_OPCODE_BGNLOOP:
907 return FALSE;
908
909 case TGSI_OPCODE_BGNSUB:
910 return FALSE;
911
912 case TGSI_OPCODE_ELSE:
913 return FALSE;
914
915 case TGSI_OPCODE_ENDIF:
916 return FALSE;
917
918 case TGSI_OPCODE_ENDLOOP:
919 return FALSE;
920
921 case TGSI_OPCODE_ENDSUB:
922 return FALSE;
923
924 case TGSI_OPCODE_PUSHA:
925 /* deprecated? */
926 assert(0);
927 return FALSE;
928 break;
929
930 case TGSI_OPCODE_POPA:
931 /* deprecated? */
932 assert(0);
933 return FALSE;
934 break;
935
936 case TGSI_OPCODE_CEIL:
937 src0 = emit_fetch(bld, inst, 0);
938 dst0 = lp_build_ceil(&bld->base, src0);
939 break;
940
941 case TGSI_OPCODE_I2F:
942 /* deprecated? */
943 assert(0);
944 return FALSE;
945 break;
946
947 case TGSI_OPCODE_NOT:
948 /* deprecated? */
949 assert(0);
950 return FALSE;
951 break;
952
953 case TGSI_OPCODE_TRUNC:
954 src0 = emit_fetch(bld, inst, 0);
955 dst0 = lp_build_trunc(&bld->base, src0);
956 break;
957
958 case TGSI_OPCODE_SHL:
959 /* deprecated? */
960 assert(0);
961 return FALSE;
962 break;
963
964 case TGSI_OPCODE_ISHR:
965 /* deprecated? */
966 assert(0);
967 return FALSE;
968 break;
969
970 case TGSI_OPCODE_AND:
971 /* deprecated? */
972 assert(0);
973 return FALSE;
974 break;
975
976 case TGSI_OPCODE_OR:
977 /* deprecated? */
978 assert(0);
979 return FALSE;
980 break;
981
982 case TGSI_OPCODE_MOD:
983 /* deprecated? */
984 assert(0);
985 return FALSE;
986 break;
987
988 case TGSI_OPCODE_XOR:
989 /* deprecated? */
990 assert(0);
991 return FALSE;
992 break;
993
994 case TGSI_OPCODE_SAD:
995 /* deprecated? */
996 assert(0);
997 return FALSE;
998 break;
999
1000 case TGSI_OPCODE_TXF:
1001 /* deprecated? */
1002 assert(0);
1003 return FALSE;
1004 break;
1005
1006 case TGSI_OPCODE_TXQ:
1007 /* deprecated? */
1008 assert(0);
1009 return FALSE;
1010 break;
1011
1012 case TGSI_OPCODE_CONT:
1013 return FALSE;
1014
1015 case TGSI_OPCODE_EMIT:
1016 return FALSE;
1017 break;
1018
1019 case TGSI_OPCODE_ENDPRIM:
1020 return FALSE;
1021 break;
1022
1023 case TGSI_OPCODE_NOP:
1024 break;
1025
1026 default:
1027 return FALSE;
1028 }
1029
1030 if (info->num_dst) {
1031 emit_store(bld, inst, 0, dst0);
1032 }
1033
1034 return TRUE;
1035 }
1036
1037
1038 void
1039 lp_build_tgsi_aos(struct gallivm_state *gallivm,
1040 const struct tgsi_token *tokens,
1041 struct lp_type type,
1042 const unsigned char swizzles[4],
1043 LLVMValueRef consts_ptr,
1044 const LLVMValueRef *inputs,
1045 LLVMValueRef *outputs,
1046 struct lp_build_sampler_aos *sampler,
1047 const struct tgsi_shader_info *info)
1048 {
1049 struct lp_build_tgsi_aos_context bld;
1050 struct tgsi_parse_context parse;
1051 uint num_immediates = 0;
1052 uint num_instructions = 0;
1053 unsigned chan;
1054 int pc = 0;
1055
1056 /* Setup build context */
1057 memset(&bld, 0, sizeof bld);
1058 lp_build_context_init(&bld.base, gallivm, type);
1059 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1060
1061 for (chan = 0; chan < 4; ++chan) {
1062 bld.swizzles[chan] = swizzles[chan];
1063 bld.inv_swizzles[swizzles[chan]] = chan;
1064 }
1065
1066 bld.inputs = inputs;
1067 bld.outputs = outputs;
1068 bld.consts_ptr = consts_ptr;
1069 bld.sampler = sampler;
1070 bld.indirect_files = info->indirect_files;
1071 bld.instructions = (struct tgsi_full_instruction *)
1072 MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
1073 bld.max_instructions = LP_MAX_INSTRUCTIONS;
1074
1075 if (!bld.instructions) {
1076 return;
1077 }
1078
1079 tgsi_parse_init(&parse, tokens);
1080
1081 while (!tgsi_parse_end_of_tokens(&parse)) {
1082 tgsi_parse_token(&parse);
1083
1084 switch(parse.FullToken.Token.Type) {
1085 case TGSI_TOKEN_TYPE_DECLARATION:
1086 /* Inputs already interpolated */
1087 emit_declaration(&bld, &parse.FullToken.FullDeclaration);
1088 break;
1089
1090 case TGSI_TOKEN_TYPE_INSTRUCTION:
1091 {
1092 /* save expanded instruction */
1093 if (num_instructions == bld.max_instructions) {
1094 struct tgsi_full_instruction *instructions;
1095 instructions = REALLOC(bld.instructions,
1096 bld.max_instructions
1097 * sizeof(struct tgsi_full_instruction),
1098 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
1099 * sizeof(struct tgsi_full_instruction));
1100 if (!instructions) {
1101 break;
1102 }
1103 bld.instructions = instructions;
1104 bld.max_instructions += LP_MAX_INSTRUCTIONS;
1105 }
1106
1107 memcpy(bld.instructions + num_instructions,
1108 &parse.FullToken.FullInstruction,
1109 sizeof(bld.instructions[0]));
1110
1111 num_instructions++;
1112 }
1113
1114 break;
1115
1116 case TGSI_TOKEN_TYPE_IMMEDIATE:
1117 /* simply copy the immediate values into the next immediates[] slot */
1118 {
1119 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1120 float imm[4];
1121 assert(size <= 4);
1122 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1123 for (chan = 0; chan < 4; ++chan) {
1124 imm[chan] = 0.0f;
1125 }
1126 for (chan = 0; chan < size; ++chan) {
1127 unsigned swizzle = bld.swizzles[chan];
1128 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1129 }
1130 bld.immediates[num_immediates] =
1131 lp_build_const_aos(gallivm, type,
1132 imm[0], imm[1], imm[2], imm[3],
1133 NULL);
1134 num_immediates++;
1135 }
1136 break;
1137
1138 case TGSI_TOKEN_TYPE_PROPERTY:
1139 break;
1140
1141 default:
1142 assert(0);
1143 }
1144 }
1145
1146 while (pc != -1) {
1147 struct tgsi_full_instruction *instr = bld.instructions + pc;
1148 const struct tgsi_opcode_info *opcode_info =
1149 tgsi_get_opcode_info(instr->Instruction.Opcode);
1150 if (!emit_instruction(&bld, instr, opcode_info, &pc))
1151 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1152 opcode_info->mnemonic);
1153 }
1154
1155 if (0) {
1156 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1157 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1158 debug_printf("11111111111111111111111111111 \n");
1159 tgsi_dump(tokens, 0);
1160 lp_debug_dump_value(function);
1161 debug_printf("2222222222222222222222222222 \n");
1162 }
1163 tgsi_parse_free(&parse);
1164
1165 if (0) {
1166 LLVMModuleRef module = LLVMGetGlobalParent(
1167 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1168 LLVMDumpModule(module);
1169 }
1170
1171 FREE(bld.instructions);
1172 }
1173