1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * TGSI to LLVM IR translation -- AoS.
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
37 * @author Jose Fonseca <jfonseca@vmware.com>
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
63 #define LP_MAX_INSTRUCTIONS 256
66 struct lp_build_tgsi_aos_context
68 struct lp_build_context base
;
70 /* Builder for integer masks and indices */
71 struct lp_build_context int_bld
;
73 LLVMValueRef consts_ptr
;
74 const LLVMValueRef
*inputs
;
75 LLVMValueRef
*outputs
;
77 struct lp_build_sampler_aos
*sampler
;
79 LLVMValueRef immediates
[LP_MAX_TGSI_IMMEDIATES
];
80 LLVMValueRef temps
[LP_MAX_TGSI_TEMPS
];
81 LLVMValueRef addr
[LP_MAX_TGSI_ADDRS
];
82 LLVMValueRef preds
[LP_MAX_TGSI_PREDS
];
84 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
85 * set in the indirect_files field.
86 * The temps[] array above is unused then.
88 LLVMValueRef temps_array
;
90 /** bitmask indicating which register files are accessed indirectly */
91 unsigned indirect_files
;
93 struct tgsi_full_instruction
*instructions
;
94 uint max_instructions
;
103 struct lp_build_tgsi_aos_context
*bld
,
104 const struct tgsi_full_instruction
*inst
,
107 struct lp_type type
= bld
->base
.type
;
108 const struct tgsi_full_src_register
*reg
= &inst
->Src
[src_op
];
109 unsigned char swizzles
[4];
113 assert(!reg
->Register
.Indirect
);
116 * Fetch the from the register file.
119 switch (reg
->Register
.File
) {
120 case TGSI_FILE_CONSTANT
:
122 * Get the constants components
125 res
= bld
->base
.undef
;
126 for (chan
= 0; chan
< 4; ++chan
) {
128 LLVMValueRef scalar_ptr
;
131 index
= LLVMConstInt(LLVMInt32Type(),
132 reg
->Register
.Index
*4 + chan
, 0);
134 scalar_ptr
= LLVMBuildGEP(bld
->base
.builder
, bld
->consts_ptr
,
137 scalar
= LLVMBuildLoad(bld
->base
.builder
, scalar_ptr
, "");
139 lp_build_name(scalar
, "const[%u].%c", reg
->Register
.Index
, "xyzw"[chan
]);
141 index
= LLVMConstInt(LLVMInt32Type(), chan
, 0);
143 res
= LLVMBuildInsertElement(bld
->base
.builder
, res
, scalar
, index
, "");
147 * Broadcast the first quaternion to all others.
149 * XXX: could be factored into a reusable function.
152 if (type
.length
> 4) {
153 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
156 for (chan
= 0; chan
< 4; ++chan
) {
157 shuffles
[chan
] = LLVMConstInt(LLVMInt32Type(), chan
, 0);
160 for (i
= 4; i
< type
.length
; ++i
) {
161 shuffles
[i
] = shuffles
[i
% 4];
164 res
= LLVMBuildShuffleVector(bld
->base
.builder
,
165 res
, bld
->base
.undef
,
166 LLVMConstVector(shuffles
, type
.length
),
171 case TGSI_FILE_IMMEDIATE
:
172 res
= bld
->immediates
[reg
->Register
.Index
];
176 case TGSI_FILE_INPUT
:
177 res
= bld
->inputs
[reg
->Register
.Index
];
181 case TGSI_FILE_TEMPORARY
:
183 LLVMValueRef temp_ptr
;
184 temp_ptr
= bld
->temps
[reg
->Register
.Index
];
185 res
= LLVMBuildLoad(bld
->base
.builder
, temp_ptr
, "");
187 return bld
->base
.undef
;
192 assert(0 && "invalid src register in emit_fetch()");
193 return bld
->base
.undef
;
197 * Apply sign modifier.
200 if (reg
->Register
.Absolute
) {
201 res
= lp_build_abs(&bld
->base
, res
);
204 if(reg
->Register
.Negate
) {
205 res
= lp_build_negate(&bld
->base
, res
);
209 * Swizzle the argument
212 for (chan
= 0; chan
< 4; ++chan
) {
213 const unsigned swizzle
=
214 tgsi_util_get_full_src_register_swizzle(reg
, chan
);
216 assert(0 && "invalid swizzle in emit_fetch()");
217 return bld
->base
.undef
;
219 swizzles
[chan
] = swizzle
;
222 res
= lp_build_swizzle_aos(&bld
->base
, res
, swizzles
);
233 struct lp_build_tgsi_aos_context
*bld
,
234 const struct tgsi_full_instruction
*inst
,
238 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[index
];
239 LLVMValueRef mask
= NULL
;
246 switch (inst
->Instruction
.Saturate
) {
250 case TGSI_SAT_ZERO_ONE
:
251 value
= lp_build_max(&bld
->base
, value
, bld
->base
.zero
);
252 value
= lp_build_min(&bld
->base
, value
, bld
->base
.one
);
255 case TGSI_SAT_MINUS_PLUS_ONE
:
256 value
= lp_build_max(&bld
->base
, value
, lp_build_const_vec(bld
->base
.type
, -1.0));
257 value
= lp_build_min(&bld
->base
, value
, bld
->base
.one
);
265 * Translate the register file
268 assert(!reg
->Register
.Indirect
);
270 switch (reg
->Register
.File
) {
271 case TGSI_FILE_OUTPUT
:
272 ptr
= bld
->outputs
[reg
->Register
.Index
];
275 case TGSI_FILE_TEMPORARY
:
276 ptr
= bld
->temps
[reg
->Register
.Index
];
279 case TGSI_FILE_ADDRESS
:
280 ptr
= bld
->addr
[reg
->Indirect
.Index
];
283 case TGSI_FILE_PREDICATE
:
284 ptr
= bld
->preds
[reg
->Register
.Index
];
296 if (inst
->Instruction
.Predicate
) {
297 unsigned char swizzles
[4];
300 assert(inst
->Predicate
.Index
< LP_MAX_TGSI_PREDS
);
302 pred
= LLVMBuildLoad(bld
->base
.builder
,
303 bld
->preds
[inst
->Predicate
.Index
], "");
306 * Convert the value to an integer mask.
308 pred
= lp_build_compare(bld
->base
.builder
,
314 if (inst
->Predicate
.Negate
) {
315 pred
= LLVMBuildNot(bld
->base
.builder
, pred
, "");
318 swizzles
[0] = inst
->Predicate
.SwizzleX
;
319 swizzles
[1] = inst
->Predicate
.SwizzleY
;
320 swizzles
[2] = inst
->Predicate
.SwizzleZ
;
321 swizzles
[3] = inst
->Predicate
.SwizzleW
;
323 pred
= lp_build_swizzle_aos(&bld
->base
, pred
, swizzles
);
326 mask
= LLVMBuildAnd(bld
->base
.builder
, mask
, pred
, "");
336 if (reg
->Register
.WriteMask
!= TGSI_WRITEMASK_XYZW
) {
337 LLVMValueRef writemask
;
339 writemask
= lp_build_const_mask_aos(bld
->base
.type
, reg
->Register
.WriteMask
);
342 mask
= LLVMBuildAnd(bld
->base
.builder
, mask
, writemask
, "");
349 LLVMValueRef orig_value
;
351 orig_value
= LLVMBuildLoad(bld
->base
.builder
, ptr
, "");
352 value
= lp_build_select(&bld
->base
,
353 mask
, value
, orig_value
);
356 LLVMBuildStore(bld
->base
.builder
, value
, ptr
);
361 * High-level instruction translators.
365 emit_tex(struct lp_build_tgsi_aos_context
*bld
,
366 const struct tgsi_full_instruction
*inst
,
367 enum lp_build_tex_modifier modifier
)
376 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
377 return bld
->base
.undef
;
380 target
= inst
->Texture
.Texture
;
382 coords
= emit_fetch( bld
, inst
, 0 );
384 if (modifier
== LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
) {
385 ddx
= emit_fetch( bld
, inst
, 1 );
386 ddy
= emit_fetch( bld
, inst
, 2 );
387 unit
= inst
->Src
[3].Register
.Index
;
390 ddx
= lp_build_ddx( &bld
->base
, coords
);
391 ddy
= lp_build_ddy( &bld
->base
, coords
);
397 unit
= inst
->Src
[1].Register
.Index
;
400 return bld
->sampler
->emit_fetch_texel(bld
->sampler
,
410 struct lp_build_tgsi_aos_context
*bld
,
411 const struct tgsi_full_declaration
*decl
)
413 LLVMTypeRef vec_type
= lp_build_vec_type(bld
->base
.type
);
415 unsigned first
= decl
->Range
.First
;
416 unsigned last
= decl
->Range
.Last
;
419 for (idx
= first
; idx
<= last
; ++idx
) {
420 switch (decl
->Declaration
.File
) {
421 case TGSI_FILE_TEMPORARY
:
422 assert(idx
< LP_MAX_TGSI_TEMPS
);
423 if (bld
->indirect_files
& (1 << TGSI_FILE_TEMPORARY
)) {
424 LLVMValueRef array_size
= LLVMConstInt(LLVMInt32Type(),
426 bld
->temps_array
= lp_build_array_alloca(bld
->base
.builder
,
427 vec_type
, array_size
, "");
429 bld
->temps
[idx
] = lp_build_alloca(bld
->base
.builder
,
434 case TGSI_FILE_OUTPUT
:
435 bld
->outputs
[idx
] = lp_build_alloca(bld
->base
.builder
,
439 case TGSI_FILE_ADDRESS
:
440 assert(idx
< LP_MAX_TGSI_ADDRS
);
441 bld
->addr
[idx
] = lp_build_alloca(bld
->base
.builder
,
445 case TGSI_FILE_PREDICATE
:
446 assert(idx
< LP_MAX_TGSI_PREDS
);
447 bld
->preds
[idx
] = lp_build_alloca(bld
->base
.builder
,
452 /* don't need to declare other vars */
460 * Emit LLVM for one TGSI instruction.
461 * \param return TRUE for success, FALSE otherwise
465 struct lp_build_tgsi_aos_context
*bld
,
466 const struct tgsi_full_instruction
*inst
,
467 const struct tgsi_opcode_info
*info
,
470 LLVMValueRef src0
, src1
, src2
;
471 LLVMValueRef tmp0
, tmp1
;
475 * Stores and write masks are handled in a general fashion after the long
476 * instruction opcode switch statement.
478 * Although not stricitly necessary, we avoid generating instructions for
479 * channels which won't be stored, in cases where's that easy. For some
480 * complex instructions, like texture sampling, it is more convenient to
481 * assume a full writemask and then let LLVM optimization passes eliminate
487 assert(info
->num_dst
<= 1);
489 dst0
= bld
->base
.undef
;
492 switch (inst
->Instruction
.Opcode
) {
493 case TGSI_OPCODE_ARL
:
494 src0
= emit_fetch(bld
, inst
, 0);
495 dst0
= lp_build_floor(&bld
->base
, src0
);
498 case TGSI_OPCODE_MOV
:
499 dst0
= emit_fetch(bld
, inst
, 0);
502 case TGSI_OPCODE_LIT
:
505 case TGSI_OPCODE_RCP
:
506 /* TGSI_OPCODE_RECIP */
507 src0
= emit_fetch(bld
, inst
, 0);
508 dst0
= lp_build_rcp(&bld
->base
, src0
);
511 case TGSI_OPCODE_RSQ
:
512 /* TGSI_OPCODE_RECIPSQRT */
513 src0
= emit_fetch(bld
, inst
, 0);
514 tmp0
= lp_build_abs(&bld
->base
, src0
);
515 dst0
= lp_build_rsqrt(&bld
->base
, tmp0
);
518 case TGSI_OPCODE_EXP
:
521 case TGSI_OPCODE_LOG
:
524 case TGSI_OPCODE_MUL
:
525 src0
= emit_fetch(bld
, inst
, 0);
526 src1
= emit_fetch(bld
, inst
, 1);
527 dst0
= lp_build_mul(&bld
->base
, src0
, src1
);
530 case TGSI_OPCODE_ADD
:
531 src0
= emit_fetch(bld
, inst
, 0);
532 src1
= emit_fetch(bld
, inst
, 1);
533 dst0
= lp_build_add(&bld
->base
, src0
, src1
);
536 case TGSI_OPCODE_DP3
:
537 /* TGSI_OPCODE_DOT3 */
540 case TGSI_OPCODE_DP4
:
541 /* TGSI_OPCODE_DOT4 */
544 case TGSI_OPCODE_DST
:
547 case TGSI_OPCODE_MIN
:
548 src0
= emit_fetch(bld
, inst
, 0);
549 src1
= emit_fetch(bld
, inst
, 1);
550 dst0
= lp_build_max(&bld
->base
, src0
, src1
);
553 case TGSI_OPCODE_MAX
:
554 src0
= emit_fetch(bld
, inst
, 0);
555 src1
= emit_fetch(bld
, inst
, 1);
556 dst0
= lp_build_max(&bld
->base
, src0
, src1
);
559 case TGSI_OPCODE_SLT
:
560 /* TGSI_OPCODE_SETLT */
561 src0
= emit_fetch(bld
, inst
, 0);
562 src1
= emit_fetch(bld
, inst
, 1);
563 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_LESS
, src0
, src1
);
564 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
567 case TGSI_OPCODE_SGE
:
568 /* TGSI_OPCODE_SETGE */
569 src0
= emit_fetch(bld
, inst
, 0);
570 src1
= emit_fetch(bld
, inst
, 1);
571 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_GEQUAL
, src0
, src1
);
572 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
575 case TGSI_OPCODE_MAD
:
576 /* TGSI_OPCODE_MADD */
577 src0
= emit_fetch(bld
, inst
, 0);
578 src1
= emit_fetch(bld
, inst
, 1);
579 src2
= emit_fetch(bld
, inst
, 2);
580 tmp0
= lp_build_mul(&bld
->base
, src0
, src1
);
581 dst0
= lp_build_add(&bld
->base
, tmp0
, src2
);
584 case TGSI_OPCODE_SUB
:
585 src0
= emit_fetch(bld
, inst
, 0);
586 src1
= emit_fetch(bld
, inst
, 1);
587 dst0
= lp_build_sub(&bld
->base
, src0
, src1
);
590 case TGSI_OPCODE_LRP
:
591 src0
= emit_fetch(bld
, inst
, 0);
592 src1
= emit_fetch(bld
, inst
, 1);
593 src2
= emit_fetch(bld
, inst
, 2);
594 tmp0
= lp_build_sub(&bld
->base
, src1
, src2
);
595 tmp0
= lp_build_mul(&bld
->base
, src0
, tmp0
);
596 dst0
= lp_build_add(&bld
->base
, tmp0
, src2
);
599 case TGSI_OPCODE_CND
:
600 src0
= emit_fetch(bld
, inst
, 0);
601 src1
= emit_fetch(bld
, inst
, 1);
602 src2
= emit_fetch(bld
, inst
, 2);
603 tmp1
= lp_build_const_vec(bld
->base
.type
, 0.5);
604 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_GREATER
, src2
, tmp1
);
605 dst0
= lp_build_select(&bld
->base
, tmp0
, src0
, src1
);
608 case TGSI_OPCODE_DP2A
:
611 case TGSI_OPCODE_FRC
:
612 src0
= emit_fetch(bld
, inst
, 0);
613 tmp0
= lp_build_floor(&bld
->base
, src0
);
614 dst0
= lp_build_sub(&bld
->base
, src0
, tmp0
);
617 case TGSI_OPCODE_CLAMP
:
618 src0
= emit_fetch(bld
, inst
, 0);
619 src1
= emit_fetch(bld
, inst
, 1);
620 src2
= emit_fetch(bld
, inst
, 2);
621 tmp0
= lp_build_max(&bld
->base
, src0
, src1
);
622 dst0
= lp_build_min(&bld
->base
, tmp0
, src2
);
625 case TGSI_OPCODE_FLR
:
626 src0
= emit_fetch(bld
, inst
, 0);
627 dst0
= lp_build_floor(&bld
->base
, src0
);
630 case TGSI_OPCODE_ROUND
:
631 src0
= emit_fetch(bld
, inst
, 0);
632 dst0
= lp_build_round(&bld
->base
, src0
);
635 case TGSI_OPCODE_EX2
:
636 src0
= emit_fetch(bld
, inst
, 0);
637 tmp0
= lp_build_swizzle_scalar_aos(&bld
->base
, src0
, TGSI_SWIZZLE_X
);
638 dst0
= lp_build_exp2(&bld
->base
, tmp0
);
641 case TGSI_OPCODE_LG2
:
642 src0
= emit_fetch(bld
, inst
, 0);
643 tmp0
= lp_build_swizzle_scalar_aos(&bld
->base
, src0
, TGSI_SWIZZLE_X
);
644 dst0
= lp_build_log2(&bld
->base
, tmp0
);
647 case TGSI_OPCODE_POW
:
648 src0
= emit_fetch(bld
, inst
, 0);
649 src0
= lp_build_swizzle_scalar_aos(&bld
->base
, src0
, TGSI_SWIZZLE_X
);
650 src1
= emit_fetch(bld
, inst
, 1);
651 src1
= lp_build_swizzle_scalar_aos(&bld
->base
, src1
, TGSI_SWIZZLE_X
);
652 dst0
= lp_build_pow(&bld
->base
, src0
, src1
);
655 case TGSI_OPCODE_XPD
:
658 case TGSI_OPCODE_ABS
:
659 src0
= emit_fetch(bld
, inst
, 0);
660 dst0
= lp_build_abs(&bld
->base
, src0
);
663 case TGSI_OPCODE_RCC
:
668 case TGSI_OPCODE_DPH
:
671 case TGSI_OPCODE_COS
:
672 src0
= emit_fetch(bld
, inst
, 0);
673 tmp0
= lp_build_swizzle_scalar_aos(&bld
->base
, src0
, TGSI_SWIZZLE_X
);
674 dst0
= lp_build_cos(&bld
->base
, tmp0
);
677 case TGSI_OPCODE_DDX
:
680 case TGSI_OPCODE_DDY
:
683 case TGSI_OPCODE_KILP
:
684 /* predicated kill */
687 case TGSI_OPCODE_KIL
:
688 /* conditional kill */
691 case TGSI_OPCODE_PK2H
:
695 case TGSI_OPCODE_PK2US
:
699 case TGSI_OPCODE_PK4B
:
703 case TGSI_OPCODE_PK4UB
:
706 case TGSI_OPCODE_RFL
:
709 case TGSI_OPCODE_SEQ
:
710 src0
= emit_fetch(bld
, inst
, 0);
711 src1
= emit_fetch(bld
, inst
, 1);
712 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_EQUAL
, src0
, src1
);
713 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
716 case TGSI_OPCODE_SFL
:
717 dst0
= bld
->base
.zero
;
720 case TGSI_OPCODE_SGT
:
721 src0
= emit_fetch(bld
, inst
, 0);
722 src1
= emit_fetch(bld
, inst
, 1);
723 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_GREATER
, src0
, src1
);
724 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
727 case TGSI_OPCODE_SIN
:
728 src0
= emit_fetch(bld
, inst
, 0);
729 tmp0
= lp_build_swizzle_scalar_aos(&bld
->base
, src0
, TGSI_SWIZZLE_X
);
730 dst0
= lp_build_sin(&bld
->base
, tmp0
);
733 case TGSI_OPCODE_SLE
:
734 src0
= emit_fetch(bld
, inst
, 0);
735 src1
= emit_fetch(bld
, inst
, 1);
736 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_LEQUAL
, src0
, src1
);
737 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
740 case TGSI_OPCODE_SNE
:
741 src0
= emit_fetch(bld
, inst
, 0);
742 src1
= emit_fetch(bld
, inst
, 1);
743 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_NOTEQUAL
, src0
, src1
);
744 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
747 case TGSI_OPCODE_STR
:
748 dst0
= bld
->base
.one
;
751 case TGSI_OPCODE_TEX
:
752 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_NONE
);
755 case TGSI_OPCODE_TXD
:
756 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
);
759 case TGSI_OPCODE_UP2H
:
765 case TGSI_OPCODE_UP2US
:
771 case TGSI_OPCODE_UP4B
:
777 case TGSI_OPCODE_UP4UB
:
783 case TGSI_OPCODE_X2D
:
789 case TGSI_OPCODE_ARA
:
795 case TGSI_OPCODE_ARR
:
796 src0
= emit_fetch(bld
, inst
, 0);
797 dst0
= lp_build_round(&bld
->base
, src0
);
800 case TGSI_OPCODE_BRA
:
806 case TGSI_OPCODE_CAL
:
809 case TGSI_OPCODE_RET
:
812 case TGSI_OPCODE_END
:
816 case TGSI_OPCODE_SSG
:
817 /* TGSI_OPCODE_SGN */
818 tmp0
= emit_fetch(bld
, inst
, 0);
819 dst0
= lp_build_sgn(&bld
->base
, tmp0
);
822 case TGSI_OPCODE_CMP
:
823 src0
= emit_fetch(bld
, inst
, 0);
824 src1
= emit_fetch(bld
, inst
, 1);
825 src2
= emit_fetch(bld
, inst
, 2);
826 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_LESS
, src0
, bld
->base
.zero
);
827 dst0
= lp_build_select(&bld
->base
, tmp0
, src1
, src2
);
830 case TGSI_OPCODE_SCS
:
833 case TGSI_OPCODE_TXB
:
834 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_LOD_BIAS
);
837 case TGSI_OPCODE_NRM
:
839 case TGSI_OPCODE_NRM4
:
842 case TGSI_OPCODE_DIV
:
848 case TGSI_OPCODE_DP2
:
851 case TGSI_OPCODE_TXL
:
852 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD
);
855 case TGSI_OPCODE_TXP
:
856 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_PROJECTED
);
859 case TGSI_OPCODE_BRK
:
865 case TGSI_OPCODE_BGNLOOP
:
868 case TGSI_OPCODE_BGNSUB
:
871 case TGSI_OPCODE_ELSE
:
874 case TGSI_OPCODE_ENDIF
:
877 case TGSI_OPCODE_ENDLOOP
:
880 case TGSI_OPCODE_ENDSUB
:
883 case TGSI_OPCODE_PUSHA
:
889 case TGSI_OPCODE_POPA
:
895 case TGSI_OPCODE_CEIL
:
896 src0
= emit_fetch(bld
, inst
, 0);
897 dst0
= lp_build_ceil(&bld
->base
, src0
);
900 case TGSI_OPCODE_I2F
:
906 case TGSI_OPCODE_NOT
:
912 case TGSI_OPCODE_TRUNC
:
913 src0
= emit_fetch(bld
, inst
, 0);
914 dst0
= lp_build_trunc(&bld
->base
, src0
);
917 case TGSI_OPCODE_SHL
:
923 case TGSI_OPCODE_ISHR
:
929 case TGSI_OPCODE_AND
:
941 case TGSI_OPCODE_MOD
:
947 case TGSI_OPCODE_XOR
:
953 case TGSI_OPCODE_SAD
:
959 case TGSI_OPCODE_TXF
:
965 case TGSI_OPCODE_TXQ
:
971 case TGSI_OPCODE_CONT
:
974 case TGSI_OPCODE_EMIT
:
978 case TGSI_OPCODE_ENDPRIM
:
982 case TGSI_OPCODE_NOP
:
990 emit_store(bld
, inst
, 0, dst0
);
998 lp_build_tgsi_aos(LLVMBuilderRef builder
,
999 const struct tgsi_token
*tokens
,
1000 struct lp_type type
,
1001 LLVMValueRef consts_ptr
,
1002 const LLVMValueRef
*inputs
,
1003 LLVMValueRef
*outputs
,
1004 struct lp_build_sampler_aos
*sampler
,
1005 const struct tgsi_shader_info
*info
)
1007 struct lp_build_tgsi_aos_context bld
;
1008 struct tgsi_parse_context parse
;
1009 uint num_immediates
= 0;
1010 uint num_instructions
= 0;
1014 /* Setup build context */
1015 memset(&bld
, 0, sizeof bld
);
1016 lp_build_context_init(&bld
.base
, builder
, type
);
1017 lp_build_context_init(&bld
.int_bld
, builder
, lp_int_type(type
));
1018 bld
.inputs
= inputs
;
1019 bld
.outputs
= outputs
;
1020 bld
.consts_ptr
= consts_ptr
;
1021 bld
.sampler
= sampler
;
1022 bld
.indirect_files
= info
->indirect_files
;
1023 bld
.instructions
= (struct tgsi_full_instruction
*)
1024 MALLOC(LP_MAX_INSTRUCTIONS
* sizeof(struct tgsi_full_instruction
));
1025 bld
.max_instructions
= LP_MAX_INSTRUCTIONS
;
1027 if (!bld
.instructions
) {
1031 tgsi_parse_init(&parse
, tokens
);
1033 while (!tgsi_parse_end_of_tokens(&parse
)) {
1034 tgsi_parse_token(&parse
);
1036 switch(parse
.FullToken
.Token
.Type
) {
1037 case TGSI_TOKEN_TYPE_DECLARATION
:
1038 /* Inputs already interpolated */
1039 emit_declaration(&bld
, &parse
.FullToken
.FullDeclaration
);
1042 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1044 /* save expanded instruction */
1045 if (num_instructions
== bld
.max_instructions
) {
1046 struct tgsi_full_instruction
*instructions
;
1047 instructions
= REALLOC(bld
.instructions
,
1048 bld
.max_instructions
1049 * sizeof(struct tgsi_full_instruction
),
1050 (bld
.max_instructions
+ LP_MAX_INSTRUCTIONS
)
1051 * sizeof(struct tgsi_full_instruction
));
1052 if (!instructions
) {
1055 bld
.instructions
= instructions
;
1056 bld
.max_instructions
+= LP_MAX_INSTRUCTIONS
;
1059 memcpy(bld
.instructions
+ num_instructions
,
1060 &parse
.FullToken
.FullInstruction
,
1061 sizeof(bld
.instructions
[0]));
1068 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1069 /* simply copy the immediate values into the next immediates[] slot */
1071 const uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1074 assert(num_immediates
< LP_MAX_TGSI_IMMEDIATES
);
1075 for (chan
= 0; chan
< size
; ++chan
) {
1076 rgba
[chan
] = parse
.FullToken
.FullImmediate
.u
[chan
].Float
;
1078 for (chan
= size
; chan
< 4; ++chan
) {
1081 bld
.immediates
[num_immediates
] =
1082 lp_build_const_aos(type
,
1083 rgba
[0], rgba
[1], rgba
[2], rgba
[3],
1089 case TGSI_TOKEN_TYPE_PROPERTY
:
1098 struct tgsi_full_instruction
*instr
= bld
.instructions
+ pc
;
1099 const struct tgsi_opcode_info
*opcode_info
=
1100 tgsi_get_opcode_info(instr
->Instruction
.Opcode
);
1101 if (!emit_instruction(&bld
, instr
, opcode_info
, &pc
))
1102 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1103 opcode_info
->mnemonic
);
1107 LLVMBasicBlockRef block
= LLVMGetInsertBlock(builder
);
1108 LLVMValueRef function
= LLVMGetBasicBlockParent(block
);
1109 debug_printf("11111111111111111111111111111 \n");
1110 tgsi_dump(tokens
, 0);
1111 lp_debug_dump_value(function
);
1112 debug_printf("2222222222222222222222222222 \n");
1114 tgsi_parse_free(&parse
);
1117 LLVMModuleRef module
= LLVMGetGlobalParent(
1118 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld
.base
.builder
)));
1119 LLVMDumpModule(module
);
1122 FREE(bld
.instructions
);