gallivm: Allocate TEMP/OUT arrays only once.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define LP_MAX_INSTRUCTIONS 256
63
64
65 struct lp_build_tgsi_aos_context
66 {
67 struct lp_build_context base;
68
69 /* Builder for integer masks and indices */
70 struct lp_build_context int_bld;
71
72 /*
73 * AoS swizzle used:
74 * - swizzles[0] = red index
75 * - swizzles[1] = green index
76 * - swizzles[2] = blue index
77 * - swizzles[3] = alpha index
78 */
79 unsigned char swizzles[4];
80 unsigned char inv_swizzles[4];
81
82 LLVMValueRef consts_ptr;
83 const LLVMValueRef *inputs;
84 LLVMValueRef *outputs;
85
86 struct lp_build_sampler_aos *sampler;
87
88 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
89 LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
90 LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
91 LLVMValueRef preds[LP_MAX_TGSI_PREDS];
92
93 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
94 * set in the indirect_files field.
95 * The temps[] array above is unused then.
96 */
97 LLVMValueRef temps_array;
98
99 /** bitmask indicating which register files are accessed indirectly */
100 unsigned indirect_files;
101
102 struct tgsi_full_instruction *instructions;
103 uint max_instructions;
104 };
105
106
107 /**
108 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
109 * ordering.
110 */
111 static LLVMValueRef
112 swizzle_aos(struct lp_build_tgsi_aos_context *bld,
113 LLVMValueRef a,
114 unsigned swizzle_x,
115 unsigned swizzle_y,
116 unsigned swizzle_z,
117 unsigned swizzle_w)
118 {
119 unsigned char swizzles[4];
120
121 assert(swizzle_x < 4);
122 assert(swizzle_y < 4);
123 assert(swizzle_z < 4);
124 assert(swizzle_w < 4);
125
126 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
127 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
128 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
129 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
130
131 return lp_build_swizzle_aos(&bld->base, a, swizzles);
132 }
133
134
135 static LLVMValueRef
136 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
137 LLVMValueRef a,
138 unsigned chan)
139 {
140 chan = bld->swizzles[chan];
141 return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
142 }
143
144
145 /**
146 * Register fetch.
147 */
148 static LLVMValueRef
149 emit_fetch(
150 struct lp_build_tgsi_aos_context *bld,
151 const struct tgsi_full_instruction *inst,
152 unsigned src_op)
153 {
154 struct lp_type type = bld->base.type;
155 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
156 LLVMValueRef res;
157 unsigned chan;
158
159 assert(!reg->Register.Indirect);
160
161 /*
162 * Fetch the from the register file.
163 */
164
165 switch (reg->Register.File) {
166 case TGSI_FILE_CONSTANT:
167 /*
168 * Get the constants components
169 */
170
171 res = bld->base.undef;
172 for (chan = 0; chan < 4; ++chan) {
173 LLVMValueRef index;
174 LLVMValueRef scalar_ptr;
175 LLVMValueRef scalar;
176 LLVMValueRef swizzle;
177
178 index = LLVMConstInt(LLVMInt32Type(),
179 reg->Register.Index*4 + chan,
180 0);
181
182 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
183 &index, 1, "");
184
185 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
186
187 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
188
189 /*
190 * NOTE: constants array is always assumed to be RGBA
191 */
192
193 swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0);
194
195 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
196 }
197
198 /*
199 * Broadcast the first quaternion to all others.
200 *
201 * XXX: could be factored into a reusable function.
202 */
203
204 if (type.length > 4) {
205 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
206 unsigned i;
207
208 for (chan = 0; chan < 4; ++chan) {
209 shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
210 }
211
212 for (i = 4; i < type.length; ++i) {
213 shuffles[i] = shuffles[i % 4];
214 }
215
216 res = LLVMBuildShuffleVector(bld->base.builder,
217 res, bld->base.undef,
218 LLVMConstVector(shuffles, type.length),
219 "");
220 }
221 break;
222
223 case TGSI_FILE_IMMEDIATE:
224 res = bld->immediates[reg->Register.Index];
225 assert(res);
226 break;
227
228 case TGSI_FILE_INPUT:
229 res = bld->inputs[reg->Register.Index];
230 assert(res);
231 break;
232
233 case TGSI_FILE_TEMPORARY:
234 {
235 LLVMValueRef temp_ptr;
236 temp_ptr = bld->temps[reg->Register.Index];
237 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
238 if (!res)
239 return bld->base.undef;
240 }
241 break;
242
243 default:
244 assert(0 && "invalid src register in emit_fetch()");
245 return bld->base.undef;
246 }
247
248 /*
249 * Apply sign modifier.
250 */
251
252 if (reg->Register.Absolute) {
253 res = lp_build_abs(&bld->base, res);
254 }
255
256 if(reg->Register.Negate) {
257 res = lp_build_negate(&bld->base, res);
258 }
259
260 /*
261 * Swizzle the argument
262 */
263
264 res = swizzle_aos(bld, res,
265 reg->Register.SwizzleX,
266 reg->Register.SwizzleY,
267 reg->Register.SwizzleZ,
268 reg->Register.SwizzleW);
269
270 return res;
271 }
272
273
274 /**
275 * Register store.
276 */
277 static void
278 emit_store(
279 struct lp_build_tgsi_aos_context *bld,
280 const struct tgsi_full_instruction *inst,
281 unsigned index,
282 LLVMValueRef value)
283 {
284 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
285 LLVMValueRef mask = NULL;
286 LLVMValueRef ptr;
287
288 /*
289 * Saturate the value
290 */
291
292 switch (inst->Instruction.Saturate) {
293 case TGSI_SAT_NONE:
294 break;
295
296 case TGSI_SAT_ZERO_ONE:
297 value = lp_build_max(&bld->base, value, bld->base.zero);
298 value = lp_build_min(&bld->base, value, bld->base.one);
299 break;
300
301 case TGSI_SAT_MINUS_PLUS_ONE:
302 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
303 value = lp_build_min(&bld->base, value, bld->base.one);
304 break;
305
306 default:
307 assert(0);
308 }
309
310 /*
311 * Translate the register file
312 */
313
314 assert(!reg->Register.Indirect);
315
316 switch (reg->Register.File) {
317 case TGSI_FILE_OUTPUT:
318 ptr = bld->outputs[reg->Register.Index];
319 break;
320
321 case TGSI_FILE_TEMPORARY:
322 ptr = bld->temps[reg->Register.Index];
323 break;
324
325 case TGSI_FILE_ADDRESS:
326 ptr = bld->addr[reg->Indirect.Index];
327 break;
328
329 case TGSI_FILE_PREDICATE:
330 ptr = bld->preds[reg->Register.Index];
331 break;
332
333 default:
334 assert(0);
335 return;
336 }
337
338 /*
339 * Predicate
340 */
341
342 if (inst->Instruction.Predicate) {
343 LLVMValueRef pred;
344
345 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
346
347 pred = LLVMBuildLoad(bld->base.builder,
348 bld->preds[inst->Predicate.Index], "");
349
350 /*
351 * Convert the value to an integer mask.
352 */
353 pred = lp_build_compare(bld->base.builder,
354 bld->base.type,
355 PIPE_FUNC_NOTEQUAL,
356 pred,
357 bld->base.zero);
358
359 if (inst->Predicate.Negate) {
360 pred = LLVMBuildNot(bld->base.builder, pred, "");
361 }
362
363 pred = swizzle_aos(bld, pred,
364 inst->Predicate.SwizzleX,
365 inst->Predicate.SwizzleY,
366 inst->Predicate.SwizzleZ,
367 inst->Predicate.SwizzleW);
368
369 if (mask) {
370 mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
371 } else {
372 mask = pred;
373 }
374 }
375
376 /*
377 * Writemask
378 */
379
380 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
381 LLVMValueRef writemask;
382
383 writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask);
384
385 if (mask) {
386 mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
387 } else {
388 mask = writemask;
389 }
390 }
391
392 if (mask) {
393 LLVMValueRef orig_value;
394
395 orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
396 value = lp_build_select(&bld->base,
397 mask, value, orig_value);
398 }
399
400 LLVMBuildStore(bld->base.builder, value, ptr);
401 }
402
403
404 /**
405 * High-level instruction translators.
406 */
407
408 static LLVMValueRef
409 emit_tex(struct lp_build_tgsi_aos_context *bld,
410 const struct tgsi_full_instruction *inst,
411 enum lp_build_tex_modifier modifier)
412 {
413 unsigned target;
414 unsigned unit;
415 LLVMValueRef coords;
416 LLVMValueRef ddx;
417 LLVMValueRef ddy;
418
419 if (!bld->sampler) {
420 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
421 return bld->base.undef;
422 }
423
424 target = inst->Texture.Texture;
425
426 coords = emit_fetch( bld, inst, 0 );
427
428 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
429 ddx = emit_fetch( bld, inst, 1 );
430 ddy = emit_fetch( bld, inst, 2 );
431 unit = inst->Src[3].Register.Index;
432 } else {
433 #if 0
434 ddx = lp_build_ddx( &bld->base, coords );
435 ddy = lp_build_ddy( &bld->base, coords );
436 #else
437 /* TODO */
438 ddx = bld->base.one;
439 ddy = bld->base.one;
440 #endif
441 unit = inst->Src[1].Register.Index;
442 }
443
444 return bld->sampler->emit_fetch_texel(bld->sampler,
445 &bld->base,
446 target, unit,
447 coords, ddx, ddy,
448 modifier);
449 }
450
451
452 static void
453 emit_declaration(
454 struct lp_build_tgsi_aos_context *bld,
455 const struct tgsi_full_declaration *decl)
456 {
457 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
458
459 unsigned first = decl->Range.First;
460 unsigned last = decl->Range.Last;
461 unsigned idx;
462
463 for (idx = first; idx <= last; ++idx) {
464 switch (decl->Declaration.File) {
465 case TGSI_FILE_TEMPORARY:
466 assert(idx < LP_MAX_TGSI_TEMPS);
467 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
468 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
469 last + 1, 0);
470 bld->temps_array = lp_build_array_alloca(bld->base.builder,
471 vec_type, array_size, "");
472 } else {
473 bld->temps[idx] = lp_build_alloca(bld->base.builder,
474 vec_type, "");
475 }
476 break;
477
478 case TGSI_FILE_OUTPUT:
479 bld->outputs[idx] = lp_build_alloca(bld->base.builder,
480 vec_type, "");
481 break;
482
483 case TGSI_FILE_ADDRESS:
484 assert(idx < LP_MAX_TGSI_ADDRS);
485 bld->addr[idx] = lp_build_alloca(bld->base.builder,
486 vec_type, "");
487 break;
488
489 case TGSI_FILE_PREDICATE:
490 assert(idx < LP_MAX_TGSI_PREDS);
491 bld->preds[idx] = lp_build_alloca(bld->base.builder,
492 vec_type, "");
493 break;
494
495 default:
496 /* don't need to declare other vars */
497 break;
498 }
499 }
500 }
501
502
503 /**
504 * Emit LLVM for one TGSI instruction.
505 * \param return TRUE for success, FALSE otherwise
506 */
507 static boolean
508 emit_instruction(
509 struct lp_build_tgsi_aos_context *bld,
510 const struct tgsi_full_instruction *inst,
511 const struct tgsi_opcode_info *info,
512 int *pc)
513 {
514 LLVMValueRef src0, src1, src2;
515 LLVMValueRef tmp0, tmp1;
516 LLVMValueRef dst0 = NULL;
517
518 /*
519 * Stores and write masks are handled in a general fashion after the long
520 * instruction opcode switch statement.
521 *
522 * Although not stricitly necessary, we avoid generating instructions for
523 * channels which won't be stored, in cases where's that easy. For some
524 * complex instructions, like texture sampling, it is more convenient to
525 * assume a full writemask and then let LLVM optimization passes eliminate
526 * redundant code.
527 */
528
529 (*pc)++;
530
531 assert(info->num_dst <= 1);
532 if (info->num_dst) {
533 dst0 = bld->base.undef;
534 }
535
536 switch (inst->Instruction.Opcode) {
537 case TGSI_OPCODE_ARL:
538 src0 = emit_fetch(bld, inst, 0);
539 dst0 = lp_build_floor(&bld->base, src0);
540 break;
541
542 case TGSI_OPCODE_MOV:
543 dst0 = emit_fetch(bld, inst, 0);
544 break;
545
546 case TGSI_OPCODE_LIT:
547 return FALSE;
548
549 case TGSI_OPCODE_RCP:
550 /* TGSI_OPCODE_RECIP */
551 src0 = emit_fetch(bld, inst, 0);
552 dst0 = lp_build_rcp(&bld->base, src0);
553 break;
554
555 case TGSI_OPCODE_RSQ:
556 /* TGSI_OPCODE_RECIPSQRT */
557 src0 = emit_fetch(bld, inst, 0);
558 tmp0 = lp_build_abs(&bld->base, src0);
559 dst0 = lp_build_rsqrt(&bld->base, tmp0);
560 break;
561
562 case TGSI_OPCODE_EXP:
563 return FALSE;
564
565 case TGSI_OPCODE_LOG:
566 return FALSE;
567
568 case TGSI_OPCODE_MUL:
569 src0 = emit_fetch(bld, inst, 0);
570 src1 = emit_fetch(bld, inst, 1);
571 dst0 = lp_build_mul(&bld->base, src0, src1);
572 break;
573
574 case TGSI_OPCODE_ADD:
575 src0 = emit_fetch(bld, inst, 0);
576 src1 = emit_fetch(bld, inst, 1);
577 dst0 = lp_build_add(&bld->base, src0, src1);
578 break;
579
580 case TGSI_OPCODE_DP3:
581 /* TGSI_OPCODE_DOT3 */
582 return FALSE;
583
584 case TGSI_OPCODE_DP4:
585 /* TGSI_OPCODE_DOT4 */
586 return FALSE;
587
588 case TGSI_OPCODE_DST:
589 return FALSE;
590
591 case TGSI_OPCODE_MIN:
592 src0 = emit_fetch(bld, inst, 0);
593 src1 = emit_fetch(bld, inst, 1);
594 dst0 = lp_build_max(&bld->base, src0, src1);
595 break;
596
597 case TGSI_OPCODE_MAX:
598 src0 = emit_fetch(bld, inst, 0);
599 src1 = emit_fetch(bld, inst, 1);
600 dst0 = lp_build_max(&bld->base, src0, src1);
601 break;
602
603 case TGSI_OPCODE_SLT:
604 /* TGSI_OPCODE_SETLT */
605 src0 = emit_fetch(bld, inst, 0);
606 src1 = emit_fetch(bld, inst, 1);
607 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
608 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
609 break;
610
611 case TGSI_OPCODE_SGE:
612 /* TGSI_OPCODE_SETGE */
613 src0 = emit_fetch(bld, inst, 0);
614 src1 = emit_fetch(bld, inst, 1);
615 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
616 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
617 break;
618
619 case TGSI_OPCODE_MAD:
620 /* TGSI_OPCODE_MADD */
621 src0 = emit_fetch(bld, inst, 0);
622 src1 = emit_fetch(bld, inst, 1);
623 src2 = emit_fetch(bld, inst, 2);
624 tmp0 = lp_build_mul(&bld->base, src0, src1);
625 dst0 = lp_build_add(&bld->base, tmp0, src2);
626 break;
627
628 case TGSI_OPCODE_SUB:
629 src0 = emit_fetch(bld, inst, 0);
630 src1 = emit_fetch(bld, inst, 1);
631 dst0 = lp_build_sub(&bld->base, src0, src1);
632 break;
633
634 case TGSI_OPCODE_LRP:
635 src0 = emit_fetch(bld, inst, 0);
636 src1 = emit_fetch(bld, inst, 1);
637 src2 = emit_fetch(bld, inst, 2);
638 tmp0 = lp_build_sub(&bld->base, src1, src2);
639 tmp0 = lp_build_mul(&bld->base, src0, tmp0);
640 dst0 = lp_build_add(&bld->base, tmp0, src2);
641 break;
642
643 case TGSI_OPCODE_CND:
644 src0 = emit_fetch(bld, inst, 0);
645 src1 = emit_fetch(bld, inst, 1);
646 src2 = emit_fetch(bld, inst, 2);
647 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
648 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
649 dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
650 break;
651
652 case TGSI_OPCODE_DP2A:
653 return FALSE;
654
655 case TGSI_OPCODE_FRC:
656 src0 = emit_fetch(bld, inst, 0);
657 tmp0 = lp_build_floor(&bld->base, src0);
658 dst0 = lp_build_sub(&bld->base, src0, tmp0);
659 break;
660
661 case TGSI_OPCODE_CLAMP:
662 src0 = emit_fetch(bld, inst, 0);
663 src1 = emit_fetch(bld, inst, 1);
664 src2 = emit_fetch(bld, inst, 2);
665 tmp0 = lp_build_max(&bld->base, src0, src1);
666 dst0 = lp_build_min(&bld->base, tmp0, src2);
667 break;
668
669 case TGSI_OPCODE_FLR:
670 src0 = emit_fetch(bld, inst, 0);
671 dst0 = lp_build_floor(&bld->base, src0);
672 break;
673
674 case TGSI_OPCODE_ROUND:
675 src0 = emit_fetch(bld, inst, 0);
676 dst0 = lp_build_round(&bld->base, src0);
677 break;
678
679 case TGSI_OPCODE_EX2:
680 src0 = emit_fetch(bld, inst, 0);
681 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
682 dst0 = lp_build_exp2(&bld->base, tmp0);
683 break;
684
685 case TGSI_OPCODE_LG2:
686 src0 = emit_fetch(bld, inst, 0);
687 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
688 dst0 = lp_build_log2(&bld->base, tmp0);
689 break;
690
691 case TGSI_OPCODE_POW:
692 src0 = emit_fetch(bld, inst, 0);
693 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
694 src1 = emit_fetch(bld, inst, 1);
695 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
696 dst0 = lp_build_pow(&bld->base, src0, src1);
697 break;
698
699 case TGSI_OPCODE_XPD:
700 return FALSE;
701
702 case TGSI_OPCODE_ABS:
703 src0 = emit_fetch(bld, inst, 0);
704 dst0 = lp_build_abs(&bld->base, src0);
705 break;
706
707 case TGSI_OPCODE_RCC:
708 /* deprecated? */
709 assert(0);
710 return FALSE;
711
712 case TGSI_OPCODE_DPH:
713 return FALSE;
714
715 case TGSI_OPCODE_COS:
716 src0 = emit_fetch(bld, inst, 0);
717 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
718 dst0 = lp_build_cos(&bld->base, tmp0);
719 break;
720
721 case TGSI_OPCODE_DDX:
722 return FALSE;
723
724 case TGSI_OPCODE_DDY:
725 return FALSE;
726
727 case TGSI_OPCODE_KILP:
728 /* predicated kill */
729 return FALSE;
730
731 case TGSI_OPCODE_KIL:
732 /* conditional kill */
733 return FALSE;
734
735 case TGSI_OPCODE_PK2H:
736 return FALSE;
737 break;
738
739 case TGSI_OPCODE_PK2US:
740 return FALSE;
741 break;
742
743 case TGSI_OPCODE_PK4B:
744 return FALSE;
745 break;
746
747 case TGSI_OPCODE_PK4UB:
748 return FALSE;
749
750 case TGSI_OPCODE_RFL:
751 return FALSE;
752
753 case TGSI_OPCODE_SEQ:
754 src0 = emit_fetch(bld, inst, 0);
755 src1 = emit_fetch(bld, inst, 1);
756 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
757 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
758 break;
759
760 case TGSI_OPCODE_SFL:
761 dst0 = bld->base.zero;
762 break;
763
764 case TGSI_OPCODE_SGT:
765 src0 = emit_fetch(bld, inst, 0);
766 src1 = emit_fetch(bld, inst, 1);
767 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
768 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
769 break;
770
771 case TGSI_OPCODE_SIN:
772 src0 = emit_fetch(bld, inst, 0);
773 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
774 dst0 = lp_build_sin(&bld->base, tmp0);
775 break;
776
777 case TGSI_OPCODE_SLE:
778 src0 = emit_fetch(bld, inst, 0);
779 src1 = emit_fetch(bld, inst, 1);
780 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
781 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
782 break;
783
784 case TGSI_OPCODE_SNE:
785 src0 = emit_fetch(bld, inst, 0);
786 src1 = emit_fetch(bld, inst, 1);
787 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
788 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
789 break;
790
791 case TGSI_OPCODE_STR:
792 dst0 = bld->base.one;
793 break;
794
795 case TGSI_OPCODE_TEX:
796 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
797 break;
798
799 case TGSI_OPCODE_TXD:
800 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
801 break;
802
803 case TGSI_OPCODE_UP2H:
804 /* deprecated */
805 assert (0);
806 return FALSE;
807 break;
808
809 case TGSI_OPCODE_UP2US:
810 /* deprecated */
811 assert(0);
812 return FALSE;
813 break;
814
815 case TGSI_OPCODE_UP4B:
816 /* deprecated */
817 assert(0);
818 return FALSE;
819 break;
820
821 case TGSI_OPCODE_UP4UB:
822 /* deprecated */
823 assert(0);
824 return FALSE;
825 break;
826
827 case TGSI_OPCODE_X2D:
828 /* deprecated? */
829 assert(0);
830 return FALSE;
831 break;
832
833 case TGSI_OPCODE_ARA:
834 /* deprecated */
835 assert(0);
836 return FALSE;
837 break;
838
839 case TGSI_OPCODE_ARR:
840 src0 = emit_fetch(bld, inst, 0);
841 dst0 = lp_build_round(&bld->base, src0);
842 break;
843
844 case TGSI_OPCODE_BRA:
845 /* deprecated */
846 assert(0);
847 return FALSE;
848 break;
849
850 case TGSI_OPCODE_CAL:
851 return FALSE;
852
853 case TGSI_OPCODE_RET:
854 return FALSE;
855
856 case TGSI_OPCODE_END:
857 *pc = -1;
858 break;
859
860 case TGSI_OPCODE_SSG:
861 /* TGSI_OPCODE_SGN */
862 tmp0 = emit_fetch(bld, inst, 0);
863 dst0 = lp_build_sgn(&bld->base, tmp0);
864 break;
865
866 case TGSI_OPCODE_CMP:
867 src0 = emit_fetch(bld, inst, 0);
868 src1 = emit_fetch(bld, inst, 1);
869 src2 = emit_fetch(bld, inst, 2);
870 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
871 dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
872 break;
873
874 case TGSI_OPCODE_SCS:
875 return FALSE;
876
877 case TGSI_OPCODE_TXB:
878 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
879 break;
880
881 case TGSI_OPCODE_NRM:
882 /* fall-through */
883 case TGSI_OPCODE_NRM4:
884 return FALSE;
885
886 case TGSI_OPCODE_DIV:
887 /* deprecated */
888 assert(0);
889 return FALSE;
890 break;
891
892 case TGSI_OPCODE_DP2:
893 return FALSE;
894
895 case TGSI_OPCODE_TXL:
896 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
897 break;
898
899 case TGSI_OPCODE_TXP:
900 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
901 break;
902
903 case TGSI_OPCODE_BRK:
904 return FALSE;
905
906 case TGSI_OPCODE_IF:
907 return FALSE;
908
909 case TGSI_OPCODE_BGNLOOP:
910 return FALSE;
911
912 case TGSI_OPCODE_BGNSUB:
913 return FALSE;
914
915 case TGSI_OPCODE_ELSE:
916 return FALSE;
917
918 case TGSI_OPCODE_ENDIF:
919 return FALSE;
920
921 case TGSI_OPCODE_ENDLOOP:
922 return FALSE;
923
924 case TGSI_OPCODE_ENDSUB:
925 return FALSE;
926
927 case TGSI_OPCODE_PUSHA:
928 /* deprecated? */
929 assert(0);
930 return FALSE;
931 break;
932
933 case TGSI_OPCODE_POPA:
934 /* deprecated? */
935 assert(0);
936 return FALSE;
937 break;
938
939 case TGSI_OPCODE_CEIL:
940 src0 = emit_fetch(bld, inst, 0);
941 dst0 = lp_build_ceil(&bld->base, src0);
942 break;
943
944 case TGSI_OPCODE_I2F:
945 /* deprecated? */
946 assert(0);
947 return FALSE;
948 break;
949
950 case TGSI_OPCODE_NOT:
951 /* deprecated? */
952 assert(0);
953 return FALSE;
954 break;
955
956 case TGSI_OPCODE_TRUNC:
957 src0 = emit_fetch(bld, inst, 0);
958 dst0 = lp_build_trunc(&bld->base, src0);
959 break;
960
961 case TGSI_OPCODE_SHL:
962 /* deprecated? */
963 assert(0);
964 return FALSE;
965 break;
966
967 case TGSI_OPCODE_ISHR:
968 /* deprecated? */
969 assert(0);
970 return FALSE;
971 break;
972
973 case TGSI_OPCODE_AND:
974 /* deprecated? */
975 assert(0);
976 return FALSE;
977 break;
978
979 case TGSI_OPCODE_OR:
980 /* deprecated? */
981 assert(0);
982 return FALSE;
983 break;
984
985 case TGSI_OPCODE_MOD:
986 /* deprecated? */
987 assert(0);
988 return FALSE;
989 break;
990
991 case TGSI_OPCODE_XOR:
992 /* deprecated? */
993 assert(0);
994 return FALSE;
995 break;
996
997 case TGSI_OPCODE_SAD:
998 /* deprecated? */
999 assert(0);
1000 return FALSE;
1001 break;
1002
1003 case TGSI_OPCODE_TXF:
1004 /* deprecated? */
1005 assert(0);
1006 return FALSE;
1007 break;
1008
1009 case TGSI_OPCODE_TXQ:
1010 /* deprecated? */
1011 assert(0);
1012 return FALSE;
1013 break;
1014
1015 case TGSI_OPCODE_CONT:
1016 return FALSE;
1017
1018 case TGSI_OPCODE_EMIT:
1019 return FALSE;
1020 break;
1021
1022 case TGSI_OPCODE_ENDPRIM:
1023 return FALSE;
1024 break;
1025
1026 case TGSI_OPCODE_NOP:
1027 break;
1028
1029 default:
1030 return FALSE;
1031 }
1032
1033 if (info->num_dst) {
1034 emit_store(bld, inst, 0, dst0);
1035 }
1036
1037 return TRUE;
1038 }
1039
1040
1041 void
1042 lp_build_tgsi_aos(LLVMBuilderRef builder,
1043 const struct tgsi_token *tokens,
1044 struct lp_type type,
1045 const unsigned char swizzles[4],
1046 LLVMValueRef consts_ptr,
1047 const LLVMValueRef *inputs,
1048 LLVMValueRef *outputs,
1049 struct lp_build_sampler_aos *sampler,
1050 const struct tgsi_shader_info *info)
1051 {
1052 struct lp_build_tgsi_aos_context bld;
1053 struct tgsi_parse_context parse;
1054 uint num_immediates = 0;
1055 uint num_instructions = 0;
1056 unsigned chan;
1057 int pc = 0;
1058
1059 /* Setup build context */
1060 memset(&bld, 0, sizeof bld);
1061 lp_build_context_init(&bld.base, builder, type);
1062 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1063
1064 for (chan = 0; chan < 4; ++chan) {
1065 bld.swizzles[chan] = swizzles[chan];
1066 bld.inv_swizzles[swizzles[chan]] = chan;
1067 }
1068
1069 bld.inputs = inputs;
1070 bld.outputs = outputs;
1071 bld.consts_ptr = consts_ptr;
1072 bld.sampler = sampler;
1073 bld.indirect_files = info->indirect_files;
1074 bld.instructions = (struct tgsi_full_instruction *)
1075 MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
1076 bld.max_instructions = LP_MAX_INSTRUCTIONS;
1077
1078 if (!bld.instructions) {
1079 return;
1080 }
1081
1082 tgsi_parse_init(&parse, tokens);
1083
1084 while (!tgsi_parse_end_of_tokens(&parse)) {
1085 tgsi_parse_token(&parse);
1086
1087 switch(parse.FullToken.Token.Type) {
1088 case TGSI_TOKEN_TYPE_DECLARATION:
1089 /* Inputs already interpolated */
1090 emit_declaration(&bld, &parse.FullToken.FullDeclaration);
1091 break;
1092
1093 case TGSI_TOKEN_TYPE_INSTRUCTION:
1094 {
1095 /* save expanded instruction */
1096 if (num_instructions == bld.max_instructions) {
1097 struct tgsi_full_instruction *instructions;
1098 instructions = REALLOC(bld.instructions,
1099 bld.max_instructions
1100 * sizeof(struct tgsi_full_instruction),
1101 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
1102 * sizeof(struct tgsi_full_instruction));
1103 if (!instructions) {
1104 break;
1105 }
1106 bld.instructions = instructions;
1107 bld.max_instructions += LP_MAX_INSTRUCTIONS;
1108 }
1109
1110 memcpy(bld.instructions + num_instructions,
1111 &parse.FullToken.FullInstruction,
1112 sizeof(bld.instructions[0]));
1113
1114 num_instructions++;
1115 }
1116
1117 break;
1118
1119 case TGSI_TOKEN_TYPE_IMMEDIATE:
1120 /* simply copy the immediate values into the next immediates[] slot */
1121 {
1122 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1123 float imm[4];
1124 assert(size <= 4);
1125 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1126 for (chan = 0; chan < 4; ++chan) {
1127 imm[chan] = 0.0f;
1128 }
1129 for (chan = 0; chan < size; ++chan) {
1130 unsigned swizzle = bld.swizzles[chan];
1131 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1132 }
1133 bld.immediates[num_immediates] =
1134 lp_build_const_aos(type,
1135 imm[0], imm[1], imm[2], imm[3],
1136 NULL);
1137 num_immediates++;
1138 }
1139 break;
1140
1141 case TGSI_TOKEN_TYPE_PROPERTY:
1142 break;
1143
1144 default:
1145 assert(0);
1146 }
1147 }
1148
1149 while (pc != -1) {
1150 struct tgsi_full_instruction *instr = bld.instructions + pc;
1151 const struct tgsi_opcode_info *opcode_info =
1152 tgsi_get_opcode_info(instr->Instruction.Opcode);
1153 if (!emit_instruction(&bld, instr, opcode_info, &pc))
1154 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1155 opcode_info->mnemonic);
1156 }
1157
1158 if (0) {
1159 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1160 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1161 debug_printf("11111111111111111111111111111 \n");
1162 tgsi_dump(tokens, 0);
1163 lp_debug_dump_value(function);
1164 debug_printf("2222222222222222222222222222 \n");
1165 }
1166 tgsi_parse_free(&parse);
1167
1168 if (0) {
1169 LLVMModuleRef module = LLVMGetGlobalParent(
1170 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
1171 LLVMDumpModule(module);
1172 }
1173
1174 FREE(bld.instructions);
1175 }
1176