gallivm/llvmpipe: squash merge of the llvm-context branch
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define LP_MAX_INSTRUCTIONS 256
63
64
65 struct lp_build_tgsi_aos_context
66 {
67 struct lp_build_context base;
68
69 /* Builder for integer masks and indices */
70 struct lp_build_context int_bld;
71
72 /*
73 * AoS swizzle used:
74 * - swizzles[0] = red index
75 * - swizzles[1] = green index
76 * - swizzles[2] = blue index
77 * - swizzles[3] = alpha index
78 */
79 unsigned char swizzles[4];
80 unsigned char inv_swizzles[4];
81
82 LLVMValueRef consts_ptr;
83 const LLVMValueRef *inputs;
84 LLVMValueRef *outputs;
85
86 struct lp_build_sampler_aos *sampler;
87
88 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
89 LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
90 LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
91 LLVMValueRef preds[LP_MAX_TGSI_PREDS];
92
93 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
94 * set in the indirect_files field.
95 * The temps[] array above is unused then.
96 */
97 LLVMValueRef temps_array;
98
99 /** bitmask indicating which register files are accessed indirectly */
100 unsigned indirect_files;
101
102 struct tgsi_full_instruction *instructions;
103 uint max_instructions;
104 };
105
106
107 /**
108 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
109 * ordering.
110 */
111 static LLVMValueRef
112 swizzle_aos(struct lp_build_tgsi_aos_context *bld,
113 LLVMValueRef a,
114 unsigned swizzle_x,
115 unsigned swizzle_y,
116 unsigned swizzle_z,
117 unsigned swizzle_w)
118 {
119 unsigned char swizzles[4];
120
121 assert(swizzle_x < 4);
122 assert(swizzle_y < 4);
123 assert(swizzle_z < 4);
124 assert(swizzle_w < 4);
125
126 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
127 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
128 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
129 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
130
131 return lp_build_swizzle_aos(&bld->base, a, swizzles);
132 }
133
134
135 static LLVMValueRef
136 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
137 LLVMValueRef a,
138 unsigned chan)
139 {
140 chan = bld->swizzles[chan];
141 return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
142 }
143
144
145 /**
146 * Register fetch.
147 */
148 static LLVMValueRef
149 emit_fetch(
150 struct lp_build_tgsi_aos_context *bld,
151 const struct tgsi_full_instruction *inst,
152 unsigned src_op)
153 {
154 struct lp_type type = bld->base.type;
155 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
156 LLVMValueRef res;
157 unsigned chan;
158
159 assert(!reg->Register.Indirect);
160
161 /*
162 * Fetch the from the register file.
163 */
164
165 switch (reg->Register.File) {
166 case TGSI_FILE_CONSTANT:
167 /*
168 * Get the constants components
169 */
170
171 res = bld->base.undef;
172 for (chan = 0; chan < 4; ++chan) {
173 LLVMValueRef index;
174 LLVMValueRef scalar_ptr;
175 LLVMValueRef scalar;
176 LLVMValueRef swizzle;
177
178 index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan);
179
180 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
181 &index, 1, "");
182
183 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
184
185 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
186
187 /*
188 * NOTE: constants array is always assumed to be RGBA
189 */
190
191 swizzle = lp_build_const_int32(bld->base.gallivm, chan);
192
193 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
194 }
195
196 /*
197 * Broadcast the first quaternion to all others.
198 *
199 * XXX: could be factored into a reusable function.
200 */
201
202 if (type.length > 4) {
203 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
204 unsigned i;
205
206 for (chan = 0; chan < 4; ++chan) {
207 shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan);
208 }
209
210 for (i = 4; i < type.length; ++i) {
211 shuffles[i] = shuffles[i % 4];
212 }
213
214 res = LLVMBuildShuffleVector(bld->base.builder,
215 res, bld->base.undef,
216 LLVMConstVector(shuffles, type.length),
217 "");
218 }
219 break;
220
221 case TGSI_FILE_IMMEDIATE:
222 res = bld->immediates[reg->Register.Index];
223 assert(res);
224 break;
225
226 case TGSI_FILE_INPUT:
227 res = bld->inputs[reg->Register.Index];
228 assert(res);
229 break;
230
231 case TGSI_FILE_TEMPORARY:
232 {
233 LLVMValueRef temp_ptr;
234 temp_ptr = bld->temps[reg->Register.Index];
235 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
236 if (!res)
237 return bld->base.undef;
238 }
239 break;
240
241 default:
242 assert(0 && "invalid src register in emit_fetch()");
243 return bld->base.undef;
244 }
245
246 /*
247 * Apply sign modifier.
248 */
249
250 if (reg->Register.Absolute) {
251 res = lp_build_abs(&bld->base, res);
252 }
253
254 if(reg->Register.Negate) {
255 res = lp_build_negate(&bld->base, res);
256 }
257
258 /*
259 * Swizzle the argument
260 */
261
262 res = swizzle_aos(bld, res,
263 reg->Register.SwizzleX,
264 reg->Register.SwizzleY,
265 reg->Register.SwizzleZ,
266 reg->Register.SwizzleW);
267
268 return res;
269 }
270
271
272 /**
273 * Register store.
274 */
275 static void
276 emit_store(
277 struct lp_build_tgsi_aos_context *bld,
278 const struct tgsi_full_instruction *inst,
279 unsigned index,
280 LLVMValueRef value)
281 {
282 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
283 LLVMValueRef mask = NULL;
284 LLVMValueRef ptr;
285
286 /*
287 * Saturate the value
288 */
289
290 switch (inst->Instruction.Saturate) {
291 case TGSI_SAT_NONE:
292 break;
293
294 case TGSI_SAT_ZERO_ONE:
295 value = lp_build_max(&bld->base, value, bld->base.zero);
296 value = lp_build_min(&bld->base, value, bld->base.one);
297 break;
298
299 case TGSI_SAT_MINUS_PLUS_ONE:
300 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
301 value = lp_build_min(&bld->base, value, bld->base.one);
302 break;
303
304 default:
305 assert(0);
306 }
307
308 /*
309 * Translate the register file
310 */
311
312 assert(!reg->Register.Indirect);
313
314 switch (reg->Register.File) {
315 case TGSI_FILE_OUTPUT:
316 ptr = bld->outputs[reg->Register.Index];
317 break;
318
319 case TGSI_FILE_TEMPORARY:
320 ptr = bld->temps[reg->Register.Index];
321 break;
322
323 case TGSI_FILE_ADDRESS:
324 ptr = bld->addr[reg->Indirect.Index];
325 break;
326
327 case TGSI_FILE_PREDICATE:
328 ptr = bld->preds[reg->Register.Index];
329 break;
330
331 default:
332 assert(0);
333 return;
334 }
335
336 /*
337 * Predicate
338 */
339
340 if (inst->Instruction.Predicate) {
341 LLVMValueRef pred;
342
343 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
344
345 pred = LLVMBuildLoad(bld->base.builder,
346 bld->preds[inst->Predicate.Index], "");
347
348 /*
349 * Convert the value to an integer mask.
350 */
351 pred = lp_build_compare(bld->base.gallivm,
352 bld->base.type,
353 PIPE_FUNC_NOTEQUAL,
354 pred,
355 bld->base.zero);
356
357 if (inst->Predicate.Negate) {
358 pred = LLVMBuildNot(bld->base.builder, pred, "");
359 }
360
361 pred = swizzle_aos(bld, pred,
362 inst->Predicate.SwizzleX,
363 inst->Predicate.SwizzleY,
364 inst->Predicate.SwizzleZ,
365 inst->Predicate.SwizzleW);
366
367 if (mask) {
368 mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
369 } else {
370 mask = pred;
371 }
372 }
373
374 /*
375 * Writemask
376 */
377
378 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
379 LLVMValueRef writemask;
380
381 writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type,
382 reg->Register.WriteMask);
383
384 if (mask) {
385 mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
386 } else {
387 mask = writemask;
388 }
389 }
390
391 if (mask) {
392 LLVMValueRef orig_value;
393
394 orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
395 value = lp_build_select(&bld->base,
396 mask, value, orig_value);
397 }
398
399 LLVMBuildStore(bld->base.builder, value, ptr);
400 }
401
402
403 /**
404 * High-level instruction translators.
405 */
406
407 static LLVMValueRef
408 emit_tex(struct lp_build_tgsi_aos_context *bld,
409 const struct tgsi_full_instruction *inst,
410 enum lp_build_tex_modifier modifier)
411 {
412 unsigned target;
413 unsigned unit;
414 LLVMValueRef coords;
415 LLVMValueRef ddx;
416 LLVMValueRef ddy;
417
418 if (!bld->sampler) {
419 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
420 return bld->base.undef;
421 }
422
423 target = inst->Texture.Texture;
424
425 coords = emit_fetch( bld, inst, 0 );
426
427 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
428 ddx = emit_fetch( bld, inst, 1 );
429 ddy = emit_fetch( bld, inst, 2 );
430 unit = inst->Src[3].Register.Index;
431 } else {
432 #if 0
433 ddx = lp_build_ddx( &bld->base, coords );
434 ddy = lp_build_ddy( &bld->base, coords );
435 #else
436 /* TODO */
437 ddx = bld->base.one;
438 ddy = bld->base.one;
439 #endif
440 unit = inst->Src[1].Register.Index;
441 }
442
443 return bld->sampler->emit_fetch_texel(bld->sampler,
444 &bld->base,
445 target, unit,
446 coords, ddx, ddy,
447 modifier);
448 }
449
450
451 static void
452 emit_declaration(
453 struct lp_build_tgsi_aos_context *bld,
454 const struct tgsi_full_declaration *decl)
455 {
456 struct gallivm_state *gallivm = bld->base.gallivm;
457 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type);
458
459 unsigned first = decl->Range.First;
460 unsigned last = decl->Range.Last;
461 unsigned idx;
462
463 for (idx = first; idx <= last; ++idx) {
464 switch (decl->Declaration.File) {
465 case TGSI_FILE_TEMPORARY:
466 assert(idx < LP_MAX_TGSI_TEMPS);
467 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
468 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
469 bld->temps_array = lp_build_array_alloca(bld->base.gallivm,
470 vec_type, array_size, "");
471 } else {
472 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
473 }
474 break;
475
476 case TGSI_FILE_OUTPUT:
477 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
478 break;
479
480 case TGSI_FILE_ADDRESS:
481 assert(idx < LP_MAX_TGSI_ADDRS);
482 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
483 break;
484
485 case TGSI_FILE_PREDICATE:
486 assert(idx < LP_MAX_TGSI_PREDS);
487 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
488 break;
489
490 default:
491 /* don't need to declare other vars */
492 break;
493 }
494 }
495 }
496
497
498 /**
499 * Emit LLVM for one TGSI instruction.
500 * \param return TRUE for success, FALSE otherwise
501 */
502 static boolean
503 emit_instruction(
504 struct lp_build_tgsi_aos_context *bld,
505 const struct tgsi_full_instruction *inst,
506 const struct tgsi_opcode_info *info,
507 int *pc)
508 {
509 LLVMValueRef src0, src1, src2;
510 LLVMValueRef tmp0, tmp1;
511 LLVMValueRef dst0 = NULL;
512
513 /*
514 * Stores and write masks are handled in a general fashion after the long
515 * instruction opcode switch statement.
516 *
517 * Although not stricitly necessary, we avoid generating instructions for
518 * channels which won't be stored, in cases where's that easy. For some
519 * complex instructions, like texture sampling, it is more convenient to
520 * assume a full writemask and then let LLVM optimization passes eliminate
521 * redundant code.
522 */
523
524 (*pc)++;
525
526 assert(info->num_dst <= 1);
527 if (info->num_dst) {
528 dst0 = bld->base.undef;
529 }
530
531 switch (inst->Instruction.Opcode) {
532 case TGSI_OPCODE_ARL:
533 src0 = emit_fetch(bld, inst, 0);
534 dst0 = lp_build_floor(&bld->base, src0);
535 break;
536
537 case TGSI_OPCODE_MOV:
538 dst0 = emit_fetch(bld, inst, 0);
539 break;
540
541 case TGSI_OPCODE_LIT:
542 return FALSE;
543
544 case TGSI_OPCODE_RCP:
545 /* TGSI_OPCODE_RECIP */
546 src0 = emit_fetch(bld, inst, 0);
547 dst0 = lp_build_rcp(&bld->base, src0);
548 break;
549
550 case TGSI_OPCODE_RSQ:
551 /* TGSI_OPCODE_RECIPSQRT */
552 src0 = emit_fetch(bld, inst, 0);
553 tmp0 = lp_build_abs(&bld->base, src0);
554 dst0 = lp_build_rsqrt(&bld->base, tmp0);
555 break;
556
557 case TGSI_OPCODE_EXP:
558 return FALSE;
559
560 case TGSI_OPCODE_LOG:
561 return FALSE;
562
563 case TGSI_OPCODE_MUL:
564 src0 = emit_fetch(bld, inst, 0);
565 src1 = emit_fetch(bld, inst, 1);
566 dst0 = lp_build_mul(&bld->base, src0, src1);
567 break;
568
569 case TGSI_OPCODE_ADD:
570 src0 = emit_fetch(bld, inst, 0);
571 src1 = emit_fetch(bld, inst, 1);
572 dst0 = lp_build_add(&bld->base, src0, src1);
573 break;
574
575 case TGSI_OPCODE_DP3:
576 /* TGSI_OPCODE_DOT3 */
577 return FALSE;
578
579 case TGSI_OPCODE_DP4:
580 /* TGSI_OPCODE_DOT4 */
581 return FALSE;
582
583 case TGSI_OPCODE_DST:
584 return FALSE;
585
586 case TGSI_OPCODE_MIN:
587 src0 = emit_fetch(bld, inst, 0);
588 src1 = emit_fetch(bld, inst, 1);
589 dst0 = lp_build_max(&bld->base, src0, src1);
590 break;
591
592 case TGSI_OPCODE_MAX:
593 src0 = emit_fetch(bld, inst, 0);
594 src1 = emit_fetch(bld, inst, 1);
595 dst0 = lp_build_max(&bld->base, src0, src1);
596 break;
597
598 case TGSI_OPCODE_SLT:
599 /* TGSI_OPCODE_SETLT */
600 src0 = emit_fetch(bld, inst, 0);
601 src1 = emit_fetch(bld, inst, 1);
602 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
603 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
604 break;
605
606 case TGSI_OPCODE_SGE:
607 /* TGSI_OPCODE_SETGE */
608 src0 = emit_fetch(bld, inst, 0);
609 src1 = emit_fetch(bld, inst, 1);
610 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
611 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
612 break;
613
614 case TGSI_OPCODE_MAD:
615 /* TGSI_OPCODE_MADD */
616 src0 = emit_fetch(bld, inst, 0);
617 src1 = emit_fetch(bld, inst, 1);
618 src2 = emit_fetch(bld, inst, 2);
619 tmp0 = lp_build_mul(&bld->base, src0, src1);
620 dst0 = lp_build_add(&bld->base, tmp0, src2);
621 break;
622
623 case TGSI_OPCODE_SUB:
624 src0 = emit_fetch(bld, inst, 0);
625 src1 = emit_fetch(bld, inst, 1);
626 dst0 = lp_build_sub(&bld->base, src0, src1);
627 break;
628
629 case TGSI_OPCODE_LRP:
630 src0 = emit_fetch(bld, inst, 0);
631 src1 = emit_fetch(bld, inst, 1);
632 src2 = emit_fetch(bld, inst, 2);
633 tmp0 = lp_build_sub(&bld->base, src1, src2);
634 tmp0 = lp_build_mul(&bld->base, src0, tmp0);
635 dst0 = lp_build_add(&bld->base, tmp0, src2);
636 break;
637
638 case TGSI_OPCODE_CND:
639 src0 = emit_fetch(bld, inst, 0);
640 src1 = emit_fetch(bld, inst, 1);
641 src2 = emit_fetch(bld, inst, 2);
642 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
643 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
644 dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
645 break;
646
647 case TGSI_OPCODE_DP2A:
648 return FALSE;
649
650 case TGSI_OPCODE_FRC:
651 src0 = emit_fetch(bld, inst, 0);
652 tmp0 = lp_build_floor(&bld->base, src0);
653 dst0 = lp_build_sub(&bld->base, src0, tmp0);
654 break;
655
656 case TGSI_OPCODE_CLAMP:
657 src0 = emit_fetch(bld, inst, 0);
658 src1 = emit_fetch(bld, inst, 1);
659 src2 = emit_fetch(bld, inst, 2);
660 tmp0 = lp_build_max(&bld->base, src0, src1);
661 dst0 = lp_build_min(&bld->base, tmp0, src2);
662 break;
663
664 case TGSI_OPCODE_FLR:
665 src0 = emit_fetch(bld, inst, 0);
666 dst0 = lp_build_floor(&bld->base, src0);
667 break;
668
669 case TGSI_OPCODE_ROUND:
670 src0 = emit_fetch(bld, inst, 0);
671 dst0 = lp_build_round(&bld->base, src0);
672 break;
673
674 case TGSI_OPCODE_EX2:
675 src0 = emit_fetch(bld, inst, 0);
676 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
677 dst0 = lp_build_exp2(&bld->base, tmp0);
678 break;
679
680 case TGSI_OPCODE_LG2:
681 src0 = emit_fetch(bld, inst, 0);
682 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
683 dst0 = lp_build_log2(&bld->base, tmp0);
684 break;
685
686 case TGSI_OPCODE_POW:
687 src0 = emit_fetch(bld, inst, 0);
688 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
689 src1 = emit_fetch(bld, inst, 1);
690 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
691 dst0 = lp_build_pow(&bld->base, src0, src1);
692 break;
693
694 case TGSI_OPCODE_XPD:
695 return FALSE;
696
697 case TGSI_OPCODE_ABS:
698 src0 = emit_fetch(bld, inst, 0);
699 dst0 = lp_build_abs(&bld->base, src0);
700 break;
701
702 case TGSI_OPCODE_RCC:
703 /* deprecated? */
704 assert(0);
705 return FALSE;
706
707 case TGSI_OPCODE_DPH:
708 return FALSE;
709
710 case TGSI_OPCODE_COS:
711 src0 = emit_fetch(bld, inst, 0);
712 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
713 dst0 = lp_build_cos(&bld->base, tmp0);
714 break;
715
716 case TGSI_OPCODE_DDX:
717 return FALSE;
718
719 case TGSI_OPCODE_DDY:
720 return FALSE;
721
722 case TGSI_OPCODE_KILP:
723 /* predicated kill */
724 return FALSE;
725
726 case TGSI_OPCODE_KIL:
727 /* conditional kill */
728 return FALSE;
729
730 case TGSI_OPCODE_PK2H:
731 return FALSE;
732 break;
733
734 case TGSI_OPCODE_PK2US:
735 return FALSE;
736 break;
737
738 case TGSI_OPCODE_PK4B:
739 return FALSE;
740 break;
741
742 case TGSI_OPCODE_PK4UB:
743 return FALSE;
744
745 case TGSI_OPCODE_RFL:
746 return FALSE;
747
748 case TGSI_OPCODE_SEQ:
749 src0 = emit_fetch(bld, inst, 0);
750 src1 = emit_fetch(bld, inst, 1);
751 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
752 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
753 break;
754
755 case TGSI_OPCODE_SFL:
756 dst0 = bld->base.zero;
757 break;
758
759 case TGSI_OPCODE_SGT:
760 src0 = emit_fetch(bld, inst, 0);
761 src1 = emit_fetch(bld, inst, 1);
762 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
763 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
764 break;
765
766 case TGSI_OPCODE_SIN:
767 src0 = emit_fetch(bld, inst, 0);
768 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
769 dst0 = lp_build_sin(&bld->base, tmp0);
770 break;
771
772 case TGSI_OPCODE_SLE:
773 src0 = emit_fetch(bld, inst, 0);
774 src1 = emit_fetch(bld, inst, 1);
775 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
776 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
777 break;
778
779 case TGSI_OPCODE_SNE:
780 src0 = emit_fetch(bld, inst, 0);
781 src1 = emit_fetch(bld, inst, 1);
782 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
783 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
784 break;
785
786 case TGSI_OPCODE_STR:
787 dst0 = bld->base.one;
788 break;
789
790 case TGSI_OPCODE_TEX:
791 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
792 break;
793
794 case TGSI_OPCODE_TXD:
795 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
796 break;
797
798 case TGSI_OPCODE_UP2H:
799 /* deprecated */
800 assert (0);
801 return FALSE;
802 break;
803
804 case TGSI_OPCODE_UP2US:
805 /* deprecated */
806 assert(0);
807 return FALSE;
808 break;
809
810 case TGSI_OPCODE_UP4B:
811 /* deprecated */
812 assert(0);
813 return FALSE;
814 break;
815
816 case TGSI_OPCODE_UP4UB:
817 /* deprecated */
818 assert(0);
819 return FALSE;
820 break;
821
822 case TGSI_OPCODE_X2D:
823 /* deprecated? */
824 assert(0);
825 return FALSE;
826 break;
827
828 case TGSI_OPCODE_ARA:
829 /* deprecated */
830 assert(0);
831 return FALSE;
832 break;
833
834 case TGSI_OPCODE_ARR:
835 src0 = emit_fetch(bld, inst, 0);
836 dst0 = lp_build_round(&bld->base, src0);
837 break;
838
839 case TGSI_OPCODE_BRA:
840 /* deprecated */
841 assert(0);
842 return FALSE;
843 break;
844
845 case TGSI_OPCODE_CAL:
846 return FALSE;
847
848 case TGSI_OPCODE_RET:
849 return FALSE;
850
851 case TGSI_OPCODE_END:
852 *pc = -1;
853 break;
854
855 case TGSI_OPCODE_SSG:
856 /* TGSI_OPCODE_SGN */
857 tmp0 = emit_fetch(bld, inst, 0);
858 dst0 = lp_build_sgn(&bld->base, tmp0);
859 break;
860
861 case TGSI_OPCODE_CMP:
862 src0 = emit_fetch(bld, inst, 0);
863 src1 = emit_fetch(bld, inst, 1);
864 src2 = emit_fetch(bld, inst, 2);
865 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
866 dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
867 break;
868
869 case TGSI_OPCODE_SCS:
870 return FALSE;
871
872 case TGSI_OPCODE_TXB:
873 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
874 break;
875
876 case TGSI_OPCODE_NRM:
877 /* fall-through */
878 case TGSI_OPCODE_NRM4:
879 return FALSE;
880
881 case TGSI_OPCODE_DIV:
882 /* deprecated */
883 assert(0);
884 return FALSE;
885 break;
886
887 case TGSI_OPCODE_DP2:
888 return FALSE;
889
890 case TGSI_OPCODE_TXL:
891 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
892 break;
893
894 case TGSI_OPCODE_TXP:
895 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
896 break;
897
898 case TGSI_OPCODE_BRK:
899 return FALSE;
900
901 case TGSI_OPCODE_IF:
902 return FALSE;
903
904 case TGSI_OPCODE_BGNLOOP:
905 return FALSE;
906
907 case TGSI_OPCODE_BGNSUB:
908 return FALSE;
909
910 case TGSI_OPCODE_ELSE:
911 return FALSE;
912
913 case TGSI_OPCODE_ENDIF:
914 return FALSE;
915
916 case TGSI_OPCODE_ENDLOOP:
917 return FALSE;
918
919 case TGSI_OPCODE_ENDSUB:
920 return FALSE;
921
922 case TGSI_OPCODE_PUSHA:
923 /* deprecated? */
924 assert(0);
925 return FALSE;
926 break;
927
928 case TGSI_OPCODE_POPA:
929 /* deprecated? */
930 assert(0);
931 return FALSE;
932 break;
933
934 case TGSI_OPCODE_CEIL:
935 src0 = emit_fetch(bld, inst, 0);
936 dst0 = lp_build_ceil(&bld->base, src0);
937 break;
938
939 case TGSI_OPCODE_I2F:
940 /* deprecated? */
941 assert(0);
942 return FALSE;
943 break;
944
945 case TGSI_OPCODE_NOT:
946 /* deprecated? */
947 assert(0);
948 return FALSE;
949 break;
950
951 case TGSI_OPCODE_TRUNC:
952 src0 = emit_fetch(bld, inst, 0);
953 dst0 = lp_build_trunc(&bld->base, src0);
954 break;
955
956 case TGSI_OPCODE_SHL:
957 /* deprecated? */
958 assert(0);
959 return FALSE;
960 break;
961
962 case TGSI_OPCODE_ISHR:
963 /* deprecated? */
964 assert(0);
965 return FALSE;
966 break;
967
968 case TGSI_OPCODE_AND:
969 /* deprecated? */
970 assert(0);
971 return FALSE;
972 break;
973
974 case TGSI_OPCODE_OR:
975 /* deprecated? */
976 assert(0);
977 return FALSE;
978 break;
979
980 case TGSI_OPCODE_MOD:
981 /* deprecated? */
982 assert(0);
983 return FALSE;
984 break;
985
986 case TGSI_OPCODE_XOR:
987 /* deprecated? */
988 assert(0);
989 return FALSE;
990 break;
991
992 case TGSI_OPCODE_SAD:
993 /* deprecated? */
994 assert(0);
995 return FALSE;
996 break;
997
998 case TGSI_OPCODE_TXF:
999 /* deprecated? */
1000 assert(0);
1001 return FALSE;
1002 break;
1003
1004 case TGSI_OPCODE_TXQ:
1005 /* deprecated? */
1006 assert(0);
1007 return FALSE;
1008 break;
1009
1010 case TGSI_OPCODE_CONT:
1011 return FALSE;
1012
1013 case TGSI_OPCODE_EMIT:
1014 return FALSE;
1015 break;
1016
1017 case TGSI_OPCODE_ENDPRIM:
1018 return FALSE;
1019 break;
1020
1021 case TGSI_OPCODE_NOP:
1022 break;
1023
1024 default:
1025 return FALSE;
1026 }
1027
1028 if (info->num_dst) {
1029 emit_store(bld, inst, 0, dst0);
1030 }
1031
1032 return TRUE;
1033 }
1034
1035
1036 void
1037 lp_build_tgsi_aos(struct gallivm_state *gallivm,
1038 const struct tgsi_token *tokens,
1039 struct lp_type type,
1040 const unsigned char swizzles[4],
1041 LLVMValueRef consts_ptr,
1042 const LLVMValueRef *inputs,
1043 LLVMValueRef *outputs,
1044 struct lp_build_sampler_aos *sampler,
1045 const struct tgsi_shader_info *info)
1046 {
1047 struct lp_build_tgsi_aos_context bld;
1048 struct tgsi_parse_context parse;
1049 uint num_immediates = 0;
1050 uint num_instructions = 0;
1051 unsigned chan;
1052 int pc = 0;
1053
1054 /* Setup build context */
1055 memset(&bld, 0, sizeof bld);
1056 lp_build_context_init(&bld.base, gallivm, type);
1057 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1058
1059 for (chan = 0; chan < 4; ++chan) {
1060 bld.swizzles[chan] = swizzles[chan];
1061 bld.inv_swizzles[swizzles[chan]] = chan;
1062 }
1063
1064 bld.inputs = inputs;
1065 bld.outputs = outputs;
1066 bld.consts_ptr = consts_ptr;
1067 bld.sampler = sampler;
1068 bld.indirect_files = info->indirect_files;
1069 bld.instructions = (struct tgsi_full_instruction *)
1070 MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
1071 bld.max_instructions = LP_MAX_INSTRUCTIONS;
1072
1073 if (!bld.instructions) {
1074 return;
1075 }
1076
1077 tgsi_parse_init(&parse, tokens);
1078
1079 while (!tgsi_parse_end_of_tokens(&parse)) {
1080 tgsi_parse_token(&parse);
1081
1082 switch(parse.FullToken.Token.Type) {
1083 case TGSI_TOKEN_TYPE_DECLARATION:
1084 /* Inputs already interpolated */
1085 emit_declaration(&bld, &parse.FullToken.FullDeclaration);
1086 break;
1087
1088 case TGSI_TOKEN_TYPE_INSTRUCTION:
1089 {
1090 /* save expanded instruction */
1091 if (num_instructions == bld.max_instructions) {
1092 struct tgsi_full_instruction *instructions;
1093 instructions = REALLOC(bld.instructions,
1094 bld.max_instructions
1095 * sizeof(struct tgsi_full_instruction),
1096 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
1097 * sizeof(struct tgsi_full_instruction));
1098 if (!instructions) {
1099 break;
1100 }
1101 bld.instructions = instructions;
1102 bld.max_instructions += LP_MAX_INSTRUCTIONS;
1103 }
1104
1105 memcpy(bld.instructions + num_instructions,
1106 &parse.FullToken.FullInstruction,
1107 sizeof(bld.instructions[0]));
1108
1109 num_instructions++;
1110 }
1111
1112 break;
1113
1114 case TGSI_TOKEN_TYPE_IMMEDIATE:
1115 /* simply copy the immediate values into the next immediates[] slot */
1116 {
1117 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1118 float imm[4];
1119 assert(size <= 4);
1120 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1121 for (chan = 0; chan < 4; ++chan) {
1122 imm[chan] = 0.0f;
1123 }
1124 for (chan = 0; chan < size; ++chan) {
1125 unsigned swizzle = bld.swizzles[chan];
1126 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1127 }
1128 bld.immediates[num_immediates] =
1129 lp_build_const_aos(gallivm, type,
1130 imm[0], imm[1], imm[2], imm[3],
1131 NULL);
1132 num_immediates++;
1133 }
1134 break;
1135
1136 case TGSI_TOKEN_TYPE_PROPERTY:
1137 break;
1138
1139 default:
1140 assert(0);
1141 }
1142 }
1143
1144 while (pc != -1) {
1145 struct tgsi_full_instruction *instr = bld.instructions + pc;
1146 const struct tgsi_opcode_info *opcode_info =
1147 tgsi_get_opcode_info(instr->Instruction.Opcode);
1148 if (!emit_instruction(&bld, instr, opcode_info, &pc))
1149 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1150 opcode_info->mnemonic);
1151 }
1152
1153 if (0) {
1154 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1155 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1156 debug_printf("11111111111111111111111111111 \n");
1157 tgsi_dump(tokens, 0);
1158 lp_debug_dump_value(function);
1159 debug_printf("2222222222222222222222222222 \n");
1160 }
1161 tgsi_parse_free(&parse);
1162
1163 if (0) {
1164 LLVMModuleRef module = LLVMGetGlobalParent(
1165 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
1166 LLVMDumpModule(module);
1167 }
1168
1169 FREE(bld.instructions);
1170 }
1171