gallivm: Cleanup the TGSI <-> sampler interface.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61
62
63 #define LP_MAX_INSTRUCTIONS 256
64
65
66 struct lp_build_tgsi_aos_context
67 {
68 struct lp_build_context base;
69
70 /* Builder for integer masks and indices */
71 struct lp_build_context int_bld;
72
73 LLVMValueRef consts_ptr;
74 const LLVMValueRef *inputs;
75 LLVMValueRef *outputs;
76
77 struct lp_build_sampler_aos *sampler;
78
79 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
80 LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
81 LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
82 LLVMValueRef preds[LP_MAX_TGSI_PREDS];
83
84 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
85 * set in the indirect_files field.
86 * The temps[] array above is unused then.
87 */
88 LLVMValueRef temps_array;
89
90 /** bitmask indicating which register files are accessed indirectly */
91 unsigned indirect_files;
92
93 struct tgsi_full_instruction *instructions;
94 uint max_instructions;
95 };
96
97
98 /**
99 * Register fetch.
100 */
101 static LLVMValueRef
102 emit_fetch(
103 struct lp_build_tgsi_aos_context *bld,
104 const struct tgsi_full_instruction *inst,
105 unsigned src_op)
106 {
107 struct lp_type type = bld->base.type;
108 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
109 unsigned char swizzles[4];
110 LLVMValueRef res;
111 unsigned chan;
112
113 assert(!reg->Register.Indirect);
114
115 /*
116 * Fetch the from the register file.
117 */
118
119 switch (reg->Register.File) {
120 case TGSI_FILE_CONSTANT:
121 /*
122 * Get the constants components
123 */
124
125 res = bld->base.undef;
126 for (chan = 0; chan < 4; ++chan) {
127 LLVMValueRef index;
128 LLVMValueRef scalar_ptr;
129 LLVMValueRef scalar;
130
131 index = LLVMConstInt(LLVMInt32Type(),
132 reg->Register.Index*4 + chan, 0);
133
134 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
135 &index, 1, "");
136
137 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
138
139 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
140
141 index = LLVMConstInt(LLVMInt32Type(), chan, 0);
142
143 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, index, "");
144 }
145
146 /*
147 * Broadcast the first quaternion to all others.
148 *
149 * XXX: could be factored into a reusable function.
150 */
151
152 if (type.length > 4) {
153 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154 unsigned i;
155
156 for (chan = 0; chan < 4; ++chan) {
157 shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
158 }
159
160 for (i = 4; i < type.length; ++i) {
161 shuffles[i] = shuffles[i % 4];
162 }
163
164 res = LLVMBuildShuffleVector(bld->base.builder,
165 res, bld->base.undef,
166 LLVMConstVector(shuffles, type.length),
167 "");
168 }
169 break;
170
171 case TGSI_FILE_IMMEDIATE:
172 res = bld->immediates[reg->Register.Index];
173 assert(res);
174 break;
175
176 case TGSI_FILE_INPUT:
177 res = bld->inputs[reg->Register.Index];
178 assert(res);
179 break;
180
181 case TGSI_FILE_TEMPORARY:
182 {
183 LLVMValueRef temp_ptr;
184 temp_ptr = bld->temps[reg->Register.Index];
185 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
186 if (!res)
187 return bld->base.undef;
188 }
189 break;
190
191 default:
192 assert(0 && "invalid src register in emit_fetch()");
193 return bld->base.undef;
194 }
195
196 /*
197 * Apply sign modifier.
198 */
199
200 if (reg->Register.Absolute) {
201 res = lp_build_abs(&bld->base, res);
202 }
203
204 if(reg->Register.Negate) {
205 res = lp_build_negate(&bld->base, res);
206 }
207
208 /*
209 * Swizzle the argument
210 */
211
212 for (chan = 0; chan < 4; ++chan) {
213 const unsigned swizzle =
214 tgsi_util_get_full_src_register_swizzle(reg, chan);
215 if (swizzle > 3) {
216 assert(0 && "invalid swizzle in emit_fetch()");
217 return bld->base.undef;
218 }
219 swizzles[chan] = swizzle;
220 }
221
222 res = lp_build_swizzle_aos(&bld->base, res, swizzles);
223
224 return res;
225 }
226
227
228 /**
229 * Register store.
230 */
231 static void
232 emit_store(
233 struct lp_build_tgsi_aos_context *bld,
234 const struct tgsi_full_instruction *inst,
235 unsigned index,
236 LLVMValueRef value)
237 {
238 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
239 LLVMValueRef mask = NULL;
240 LLVMValueRef ptr;
241
242 /*
243 * Saturate the value
244 */
245
246 switch (inst->Instruction.Saturate) {
247 case TGSI_SAT_NONE:
248 break;
249
250 case TGSI_SAT_ZERO_ONE:
251 value = lp_build_max(&bld->base, value, bld->base.zero);
252 value = lp_build_min(&bld->base, value, bld->base.one);
253 break;
254
255 case TGSI_SAT_MINUS_PLUS_ONE:
256 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
257 value = lp_build_min(&bld->base, value, bld->base.one);
258 break;
259
260 default:
261 assert(0);
262 }
263
264 /*
265 * Translate the register file
266 */
267
268 assert(!reg->Register.Indirect);
269
270 switch (reg->Register.File) {
271 case TGSI_FILE_OUTPUT:
272 ptr = bld->outputs[reg->Register.Index];
273 break;
274
275 case TGSI_FILE_TEMPORARY:
276 ptr = bld->temps[reg->Register.Index];
277 break;
278
279 case TGSI_FILE_ADDRESS:
280 ptr = bld->addr[reg->Indirect.Index];
281 break;
282
283 case TGSI_FILE_PREDICATE:
284 ptr = bld->preds[reg->Register.Index];
285 break;
286
287 default:
288 assert(0);
289 return;
290 }
291
292 /*
293 * Predicate
294 */
295
296 if (inst->Instruction.Predicate) {
297 unsigned char swizzles[4];
298 LLVMValueRef pred;
299
300 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
301
302 pred = LLVMBuildLoad(bld->base.builder,
303 bld->preds[inst->Predicate.Index], "");
304
305 /*
306 * Convert the value to an integer mask.
307 */
308 pred = lp_build_compare(bld->base.builder,
309 bld->base.type,
310 PIPE_FUNC_NOTEQUAL,
311 pred,
312 bld->base.zero);
313
314 if (inst->Predicate.Negate) {
315 pred = LLVMBuildNot(bld->base.builder, pred, "");
316 }
317
318 swizzles[0] = inst->Predicate.SwizzleX;
319 swizzles[1] = inst->Predicate.SwizzleY;
320 swizzles[2] = inst->Predicate.SwizzleZ;
321 swizzles[3] = inst->Predicate.SwizzleW;
322
323 pred = lp_build_swizzle_aos(&bld->base, pred, swizzles);
324
325 if (mask) {
326 mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
327 } else {
328 mask = pred;
329 }
330 }
331
332 /*
333 * Writemask
334 */
335
336 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
337 LLVMValueRef writemask;
338
339 writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask);
340
341 if (mask) {
342 mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
343 } else {
344 mask = writemask;
345 }
346 }
347
348 if (mask) {
349 LLVMValueRef orig_value;
350
351 orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
352 value = lp_build_select(&bld->base,
353 mask, value, orig_value);
354 }
355
356 LLVMBuildStore(bld->base.builder, value, ptr);
357 }
358
359
360 /**
361 * High-level instruction translators.
362 */
363
364 static LLVMValueRef
365 emit_tex(struct lp_build_tgsi_aos_context *bld,
366 const struct tgsi_full_instruction *inst,
367 enum lp_build_tex_modifier modifier)
368 {
369 unsigned target;
370 unsigned unit;
371 LLVMValueRef coords;
372 LLVMValueRef ddx;
373 LLVMValueRef ddy;
374
375 if (!bld->sampler) {
376 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
377 return bld->base.undef;
378 }
379
380 target = inst->Texture.Texture;
381
382 coords = emit_fetch( bld, inst, 0 );
383
384 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
385 ddx = emit_fetch( bld, inst, 1 );
386 ddy = emit_fetch( bld, inst, 2 );
387 unit = inst->Src[3].Register.Index;
388 } else {
389 #if 0
390 ddx = lp_build_ddx( &bld->base, coords );
391 ddy = lp_build_ddy( &bld->base, coords );
392 #else
393 /* TODO */
394 ddx = bld->base.one;
395 ddy = bld->base.one;
396 #endif
397 unit = inst->Src[1].Register.Index;
398 }
399
400 return bld->sampler->emit_fetch_texel(bld->sampler,
401 &bld->base,
402 target, unit,
403 coords, ddx, ddy,
404 modifier);
405 }
406
407
408 static void
409 emit_declaration(
410 struct lp_build_tgsi_aos_context *bld,
411 const struct tgsi_full_declaration *decl)
412 {
413 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
414
415 unsigned first = decl->Range.First;
416 unsigned last = decl->Range.Last;
417 unsigned idx;
418
419 for (idx = first; idx <= last; ++idx) {
420 switch (decl->Declaration.File) {
421 case TGSI_FILE_TEMPORARY:
422 assert(idx < LP_MAX_TGSI_TEMPS);
423 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
424 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
425 last + 1, 0);
426 bld->temps_array = lp_build_array_alloca(bld->base.builder,
427 vec_type, array_size, "");
428 } else {
429 bld->temps[idx] = lp_build_alloca(bld->base.builder,
430 vec_type, "");
431 }
432 break;
433
434 case TGSI_FILE_OUTPUT:
435 bld->outputs[idx] = lp_build_alloca(bld->base.builder,
436 vec_type, "");
437 break;
438
439 case TGSI_FILE_ADDRESS:
440 assert(idx < LP_MAX_TGSI_ADDRS);
441 bld->addr[idx] = lp_build_alloca(bld->base.builder,
442 vec_type, "");
443 break;
444
445 case TGSI_FILE_PREDICATE:
446 assert(idx < LP_MAX_TGSI_PREDS);
447 bld->preds[idx] = lp_build_alloca(bld->base.builder,
448 vec_type, "");
449 break;
450
451 default:
452 /* don't need to declare other vars */
453 break;
454 }
455 }
456 }
457
458
459 /**
460 * Emit LLVM for one TGSI instruction.
461 * \param return TRUE for success, FALSE otherwise
462 */
463 static boolean
464 emit_instruction(
465 struct lp_build_tgsi_aos_context *bld,
466 const struct tgsi_full_instruction *inst,
467 const struct tgsi_opcode_info *info,
468 int *pc)
469 {
470 LLVMValueRef src0, src1, src2;
471 LLVMValueRef tmp0, tmp1;
472 LLVMValueRef dst0;
473
474 /*
475 * Stores and write masks are handled in a general fashion after the long
476 * instruction opcode switch statement.
477 *
478 * Although not stricitly necessary, we avoid generating instructions for
479 * channels which won't be stored, in cases where's that easy. For some
480 * complex instructions, like texture sampling, it is more convenient to
481 * assume a full writemask and then let LLVM optimization passes eliminate
482 * redundant code.
483 */
484
485 (*pc)++;
486
487 assert(info->num_dst <= 1);
488 if (info->num_dst) {
489 dst0 = bld->base.undef;
490 }
491
492 switch (inst->Instruction.Opcode) {
493 case TGSI_OPCODE_ARL:
494 src0 = emit_fetch(bld, inst, 0);
495 dst0 = lp_build_floor(&bld->base, src0);
496 break;
497
498 case TGSI_OPCODE_MOV:
499 dst0 = emit_fetch(bld, inst, 0);
500 break;
501
502 case TGSI_OPCODE_LIT:
503 return FALSE;
504
505 case TGSI_OPCODE_RCP:
506 /* TGSI_OPCODE_RECIP */
507 src0 = emit_fetch(bld, inst, 0);
508 dst0 = lp_build_rcp(&bld->base, src0);
509 break;
510
511 case TGSI_OPCODE_RSQ:
512 /* TGSI_OPCODE_RECIPSQRT */
513 src0 = emit_fetch(bld, inst, 0);
514 tmp0 = lp_build_abs(&bld->base, src0);
515 dst0 = lp_build_rsqrt(&bld->base, tmp0);
516 break;
517
518 case TGSI_OPCODE_EXP:
519 return FALSE;
520
521 case TGSI_OPCODE_LOG:
522 return FALSE;
523
524 case TGSI_OPCODE_MUL:
525 src0 = emit_fetch(bld, inst, 0);
526 src1 = emit_fetch(bld, inst, 1);
527 dst0 = lp_build_mul(&bld->base, src0, src1);
528 break;
529
530 case TGSI_OPCODE_ADD:
531 src0 = emit_fetch(bld, inst, 0);
532 src1 = emit_fetch(bld, inst, 1);
533 dst0 = lp_build_add(&bld->base, src0, src1);
534 break;
535
536 case TGSI_OPCODE_DP3:
537 /* TGSI_OPCODE_DOT3 */
538 return FALSE;
539
540 case TGSI_OPCODE_DP4:
541 /* TGSI_OPCODE_DOT4 */
542 return FALSE;
543
544 case TGSI_OPCODE_DST:
545 return FALSE;
546
547 case TGSI_OPCODE_MIN:
548 src0 = emit_fetch(bld, inst, 0);
549 src1 = emit_fetch(bld, inst, 1);
550 dst0 = lp_build_max(&bld->base, src0, src1);
551 break;
552
553 case TGSI_OPCODE_MAX:
554 src0 = emit_fetch(bld, inst, 0);
555 src1 = emit_fetch(bld, inst, 1);
556 dst0 = lp_build_max(&bld->base, src0, src1);
557 break;
558
559 case TGSI_OPCODE_SLT:
560 /* TGSI_OPCODE_SETLT */
561 src0 = emit_fetch(bld, inst, 0);
562 src1 = emit_fetch(bld, inst, 1);
563 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
564 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
565 break;
566
567 case TGSI_OPCODE_SGE:
568 /* TGSI_OPCODE_SETGE */
569 src0 = emit_fetch(bld, inst, 0);
570 src1 = emit_fetch(bld, inst, 1);
571 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
572 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
573 break;
574
575 case TGSI_OPCODE_MAD:
576 /* TGSI_OPCODE_MADD */
577 src0 = emit_fetch(bld, inst, 0);
578 src1 = emit_fetch(bld, inst, 1);
579 src2 = emit_fetch(bld, inst, 2);
580 tmp0 = lp_build_mul(&bld->base, src0, src1);
581 dst0 = lp_build_add(&bld->base, tmp0, src2);
582 break;
583
584 case TGSI_OPCODE_SUB:
585 src0 = emit_fetch(bld, inst, 0);
586 src1 = emit_fetch(bld, inst, 1);
587 dst0 = lp_build_sub(&bld->base, src0, src1);
588 break;
589
590 case TGSI_OPCODE_LRP:
591 src0 = emit_fetch(bld, inst, 0);
592 src1 = emit_fetch(bld, inst, 1);
593 src2 = emit_fetch(bld, inst, 2);
594 tmp0 = lp_build_sub(&bld->base, src1, src2);
595 tmp0 = lp_build_mul(&bld->base, src0, tmp0);
596 dst0 = lp_build_add(&bld->base, tmp0, src2);
597 break;
598
599 case TGSI_OPCODE_CND:
600 src0 = emit_fetch(bld, inst, 0);
601 src1 = emit_fetch(bld, inst, 1);
602 src2 = emit_fetch(bld, inst, 2);
603 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
604 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
605 dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
606 break;
607
608 case TGSI_OPCODE_DP2A:
609 return FALSE;
610
611 case TGSI_OPCODE_FRC:
612 src0 = emit_fetch(bld, inst, 0);
613 tmp0 = lp_build_floor(&bld->base, src0);
614 dst0 = lp_build_sub(&bld->base, src0, tmp0);
615 break;
616
617 case TGSI_OPCODE_CLAMP:
618 src0 = emit_fetch(bld, inst, 0);
619 src1 = emit_fetch(bld, inst, 1);
620 src2 = emit_fetch(bld, inst, 2);
621 tmp0 = lp_build_max(&bld->base, src0, src1);
622 dst0 = lp_build_min(&bld->base, tmp0, src2);
623 break;
624
625 case TGSI_OPCODE_FLR:
626 src0 = emit_fetch(bld, inst, 0);
627 dst0 = lp_build_floor(&bld->base, src0);
628 break;
629
630 case TGSI_OPCODE_ROUND:
631 src0 = emit_fetch(bld, inst, 0);
632 dst0 = lp_build_round(&bld->base, src0);
633 break;
634
635 case TGSI_OPCODE_EX2:
636 src0 = emit_fetch(bld, inst, 0);
637 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
638 dst0 = lp_build_exp2(&bld->base, tmp0);
639 break;
640
641 case TGSI_OPCODE_LG2:
642 src0 = emit_fetch(bld, inst, 0);
643 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
644 dst0 = lp_build_log2(&bld->base, tmp0);
645 break;
646
647 case TGSI_OPCODE_POW:
648 src0 = emit_fetch(bld, inst, 0);
649 src0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
650 src1 = emit_fetch(bld, inst, 1);
651 src1 = lp_build_swizzle_scalar_aos(&bld->base, src1, TGSI_SWIZZLE_X);
652 dst0 = lp_build_pow(&bld->base, src0, src1);
653 break;
654
655 case TGSI_OPCODE_XPD:
656 return FALSE;
657
658 case TGSI_OPCODE_ABS:
659 src0 = emit_fetch(bld, inst, 0);
660 dst0 = lp_build_abs(&bld->base, src0);
661 break;
662
663 case TGSI_OPCODE_RCC:
664 /* deprecated? */
665 assert(0);
666 return FALSE;
667
668 case TGSI_OPCODE_DPH:
669 return FALSE;
670
671 case TGSI_OPCODE_COS:
672 src0 = emit_fetch(bld, inst, 0);
673 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
674 dst0 = lp_build_cos(&bld->base, tmp0);
675 break;
676
677 case TGSI_OPCODE_DDX:
678 return FALSE;
679
680 case TGSI_OPCODE_DDY:
681 return FALSE;
682
683 case TGSI_OPCODE_KILP:
684 /* predicated kill */
685 return FALSE;
686
687 case TGSI_OPCODE_KIL:
688 /* conditional kill */
689 return FALSE;
690
691 case TGSI_OPCODE_PK2H:
692 return FALSE;
693 break;
694
695 case TGSI_OPCODE_PK2US:
696 return FALSE;
697 break;
698
699 case TGSI_OPCODE_PK4B:
700 return FALSE;
701 break;
702
703 case TGSI_OPCODE_PK4UB:
704 return FALSE;
705
706 case TGSI_OPCODE_RFL:
707 return FALSE;
708
709 case TGSI_OPCODE_SEQ:
710 src0 = emit_fetch(bld, inst, 0);
711 src1 = emit_fetch(bld, inst, 1);
712 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
713 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
714 break;
715
716 case TGSI_OPCODE_SFL:
717 dst0 = bld->base.zero;
718 break;
719
720 case TGSI_OPCODE_SGT:
721 src0 = emit_fetch(bld, inst, 0);
722 src1 = emit_fetch(bld, inst, 1);
723 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
724 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
725 break;
726
727 case TGSI_OPCODE_SIN:
728 src0 = emit_fetch(bld, inst, 0);
729 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
730 dst0 = lp_build_sin(&bld->base, tmp0);
731 break;
732
733 case TGSI_OPCODE_SLE:
734 src0 = emit_fetch(bld, inst, 0);
735 src1 = emit_fetch(bld, inst, 1);
736 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
737 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
738 break;
739
740 case TGSI_OPCODE_SNE:
741 src0 = emit_fetch(bld, inst, 0);
742 src1 = emit_fetch(bld, inst, 1);
743 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
744 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
745 break;
746
747 case TGSI_OPCODE_STR:
748 dst0 = bld->base.one;
749 break;
750
751 case TGSI_OPCODE_TEX:
752 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
753 break;
754
755 case TGSI_OPCODE_TXD:
756 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
757 break;
758
759 case TGSI_OPCODE_UP2H:
760 /* deprecated */
761 assert (0);
762 return FALSE;
763 break;
764
765 case TGSI_OPCODE_UP2US:
766 /* deprecated */
767 assert(0);
768 return FALSE;
769 break;
770
771 case TGSI_OPCODE_UP4B:
772 /* deprecated */
773 assert(0);
774 return FALSE;
775 break;
776
777 case TGSI_OPCODE_UP4UB:
778 /* deprecated */
779 assert(0);
780 return FALSE;
781 break;
782
783 case TGSI_OPCODE_X2D:
784 /* deprecated? */
785 assert(0);
786 return FALSE;
787 break;
788
789 case TGSI_OPCODE_ARA:
790 /* deprecated */
791 assert(0);
792 return FALSE;
793 break;
794
795 case TGSI_OPCODE_ARR:
796 src0 = emit_fetch(bld, inst, 0);
797 dst0 = lp_build_round(&bld->base, src0);
798 break;
799
800 case TGSI_OPCODE_BRA:
801 /* deprecated */
802 assert(0);
803 return FALSE;
804 break;
805
806 case TGSI_OPCODE_CAL:
807 return FALSE;
808
809 case TGSI_OPCODE_RET:
810 return FALSE;
811
812 case TGSI_OPCODE_END:
813 *pc = -1;
814 break;
815
816 case TGSI_OPCODE_SSG:
817 /* TGSI_OPCODE_SGN */
818 tmp0 = emit_fetch(bld, inst, 0);
819 dst0 = lp_build_sgn(&bld->base, tmp0);
820 break;
821
822 case TGSI_OPCODE_CMP:
823 src0 = emit_fetch(bld, inst, 0);
824 src1 = emit_fetch(bld, inst, 1);
825 src2 = emit_fetch(bld, inst, 2);
826 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
827 dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
828 break;
829
830 case TGSI_OPCODE_SCS:
831 return FALSE;
832
833 case TGSI_OPCODE_TXB:
834 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
835 break;
836
837 case TGSI_OPCODE_NRM:
838 /* fall-through */
839 case TGSI_OPCODE_NRM4:
840 return FALSE;
841
842 case TGSI_OPCODE_DIV:
843 /* deprecated */
844 assert(0);
845 return FALSE;
846 break;
847
848 case TGSI_OPCODE_DP2:
849 return FALSE;
850
851 case TGSI_OPCODE_TXL:
852 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
853 break;
854
855 case TGSI_OPCODE_TXP:
856 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
857 break;
858
859 case TGSI_OPCODE_BRK:
860 return FALSE;
861
862 case TGSI_OPCODE_IF:
863 return FALSE;
864
865 case TGSI_OPCODE_BGNLOOP:
866 return FALSE;
867
868 case TGSI_OPCODE_BGNSUB:
869 return FALSE;
870
871 case TGSI_OPCODE_ELSE:
872 return FALSE;
873
874 case TGSI_OPCODE_ENDIF:
875 return FALSE;
876
877 case TGSI_OPCODE_ENDLOOP:
878 return FALSE;
879
880 case TGSI_OPCODE_ENDSUB:
881 return FALSE;
882
883 case TGSI_OPCODE_PUSHA:
884 /* deprecated? */
885 assert(0);
886 return FALSE;
887 break;
888
889 case TGSI_OPCODE_POPA:
890 /* deprecated? */
891 assert(0);
892 return FALSE;
893 break;
894
895 case TGSI_OPCODE_CEIL:
896 src0 = emit_fetch(bld, inst, 0);
897 dst0 = lp_build_ceil(&bld->base, src0);
898 break;
899
900 case TGSI_OPCODE_I2F:
901 /* deprecated? */
902 assert(0);
903 return FALSE;
904 break;
905
906 case TGSI_OPCODE_NOT:
907 /* deprecated? */
908 assert(0);
909 return FALSE;
910 break;
911
912 case TGSI_OPCODE_TRUNC:
913 src0 = emit_fetch(bld, inst, 0);
914 dst0 = lp_build_trunc(&bld->base, src0);
915 break;
916
917 case TGSI_OPCODE_SHL:
918 /* deprecated? */
919 assert(0);
920 return FALSE;
921 break;
922
923 case TGSI_OPCODE_ISHR:
924 /* deprecated? */
925 assert(0);
926 return FALSE;
927 break;
928
929 case TGSI_OPCODE_AND:
930 /* deprecated? */
931 assert(0);
932 return FALSE;
933 break;
934
935 case TGSI_OPCODE_OR:
936 /* deprecated? */
937 assert(0);
938 return FALSE;
939 break;
940
941 case TGSI_OPCODE_MOD:
942 /* deprecated? */
943 assert(0);
944 return FALSE;
945 break;
946
947 case TGSI_OPCODE_XOR:
948 /* deprecated? */
949 assert(0);
950 return FALSE;
951 break;
952
953 case TGSI_OPCODE_SAD:
954 /* deprecated? */
955 assert(0);
956 return FALSE;
957 break;
958
959 case TGSI_OPCODE_TXF:
960 /* deprecated? */
961 assert(0);
962 return FALSE;
963 break;
964
965 case TGSI_OPCODE_TXQ:
966 /* deprecated? */
967 assert(0);
968 return FALSE;
969 break;
970
971 case TGSI_OPCODE_CONT:
972 return FALSE;
973
974 case TGSI_OPCODE_EMIT:
975 return FALSE;
976 break;
977
978 case TGSI_OPCODE_ENDPRIM:
979 return FALSE;
980 break;
981
982 case TGSI_OPCODE_NOP:
983 break;
984
985 default:
986 return FALSE;
987 }
988
989 if (info->num_dst) {
990 emit_store(bld, inst, 0, dst0);
991 }
992
993 return TRUE;
994 }
995
996
997 void
998 lp_build_tgsi_aos(LLVMBuilderRef builder,
999 const struct tgsi_token *tokens,
1000 struct lp_type type,
1001 LLVMValueRef consts_ptr,
1002 const LLVMValueRef *inputs,
1003 LLVMValueRef *outputs,
1004 struct lp_build_sampler_aos *sampler,
1005 const struct tgsi_shader_info *info)
1006 {
1007 struct lp_build_tgsi_aos_context bld;
1008 struct tgsi_parse_context parse;
1009 uint num_immediates = 0;
1010 uint num_instructions = 0;
1011 unsigned chan;
1012 int pc = 0;
1013
1014 /* Setup build context */
1015 memset(&bld, 0, sizeof bld);
1016 lp_build_context_init(&bld.base, builder, type);
1017 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1018 bld.inputs = inputs;
1019 bld.outputs = outputs;
1020 bld.consts_ptr = consts_ptr;
1021 bld.sampler = sampler;
1022 bld.indirect_files = info->indirect_files;
1023 bld.instructions = (struct tgsi_full_instruction *)
1024 MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
1025 bld.max_instructions = LP_MAX_INSTRUCTIONS;
1026
1027 if (!bld.instructions) {
1028 return;
1029 }
1030
1031 tgsi_parse_init(&parse, tokens);
1032
1033 while (!tgsi_parse_end_of_tokens(&parse)) {
1034 tgsi_parse_token(&parse);
1035
1036 switch(parse.FullToken.Token.Type) {
1037 case TGSI_TOKEN_TYPE_DECLARATION:
1038 /* Inputs already interpolated */
1039 emit_declaration(&bld, &parse.FullToken.FullDeclaration);
1040 break;
1041
1042 case TGSI_TOKEN_TYPE_INSTRUCTION:
1043 {
1044 /* save expanded instruction */
1045 if (num_instructions == bld.max_instructions) {
1046 struct tgsi_full_instruction *instructions;
1047 instructions = REALLOC(bld.instructions,
1048 bld.max_instructions
1049 * sizeof(struct tgsi_full_instruction),
1050 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
1051 * sizeof(struct tgsi_full_instruction));
1052 if (!instructions) {
1053 break;
1054 }
1055 bld.instructions = instructions;
1056 bld.max_instructions += LP_MAX_INSTRUCTIONS;
1057 }
1058
1059 memcpy(bld.instructions + num_instructions,
1060 &parse.FullToken.FullInstruction,
1061 sizeof(bld.instructions[0]));
1062
1063 num_instructions++;
1064 }
1065
1066 break;
1067
1068 case TGSI_TOKEN_TYPE_IMMEDIATE:
1069 /* simply copy the immediate values into the next immediates[] slot */
1070 {
1071 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1072 float rgba[4];
1073 assert(size <= 4);
1074 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1075 for (chan = 0; chan < size; ++chan) {
1076 rgba[chan] = parse.FullToken.FullImmediate.u[chan].Float;
1077 }
1078 for (chan = size; chan < 4; ++chan) {
1079 rgba[chan] = 0.0f;
1080 }
1081 bld.immediates[num_immediates] =
1082 lp_build_const_aos(type,
1083 rgba[0], rgba[1], rgba[2], rgba[3],
1084 NULL);
1085 num_immediates++;
1086 }
1087 break;
1088
1089 case TGSI_TOKEN_TYPE_PROPERTY:
1090 break;
1091
1092 default:
1093 assert(0);
1094 }
1095 }
1096
1097 while (pc != -1) {
1098 struct tgsi_full_instruction *instr = bld.instructions + pc;
1099 const struct tgsi_opcode_info *opcode_info =
1100 tgsi_get_opcode_info(instr->Instruction.Opcode);
1101 if (!emit_instruction(&bld, instr, opcode_info, &pc))
1102 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1103 opcode_info->mnemonic);
1104 }
1105
1106 if (0) {
1107 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1108 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1109 debug_printf("11111111111111111111111111111 \n");
1110 tgsi_dump(tokens, 0);
1111 lp_debug_dump_value(function);
1112 debug_printf("2222222222222222222222222222 \n");
1113 }
1114 tgsi_parse_free(&parse);
1115
1116 if (0) {
1117 LLVMModuleRef module = LLVMGetGlobalParent(
1118 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
1119 LLVMDumpModule(module);
1120 }
1121
1122 FREE(bld.instructions);
1123 }
1124