1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * TGSI to LLVM IR translation -- AoS.
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
37 * @author Jose Fonseca <jfonseca@vmware.com>
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
62 #define LP_MAX_INSTRUCTIONS 256
65 struct lp_build_tgsi_aos_context
67 struct lp_build_context base
;
69 /* Builder for integer masks and indices */
70 struct lp_build_context int_bld
;
74 * - swizzles[0] = red index
75 * - swizzles[1] = green index
76 * - swizzles[2] = blue index
77 * - swizzles[3] = alpha index
79 unsigned char swizzles
[4];
80 unsigned char inv_swizzles
[4];
82 LLVMValueRef consts_ptr
;
83 const LLVMValueRef
*inputs
;
84 LLVMValueRef
*outputs
;
86 struct lp_build_sampler_aos
*sampler
;
88 LLVMValueRef immediates
[LP_MAX_TGSI_IMMEDIATES
];
89 LLVMValueRef temps
[LP_MAX_TGSI_TEMPS
];
90 LLVMValueRef addr
[LP_MAX_TGSI_ADDRS
];
91 LLVMValueRef preds
[LP_MAX_TGSI_PREDS
];
93 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
94 * set in the indirect_files field.
95 * The temps[] array above is unused then.
97 LLVMValueRef temps_array
;
99 /** bitmask indicating which register files are accessed indirectly */
100 unsigned indirect_files
;
102 struct tgsi_full_instruction
*instructions
;
103 uint max_instructions
;
108 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
112 swizzle_aos(struct lp_build_tgsi_aos_context
*bld
,
119 unsigned char swizzles
[4];
121 assert(swizzle_x
< 4);
122 assert(swizzle_y
< 4);
123 assert(swizzle_z
< 4);
124 assert(swizzle_w
< 4);
126 swizzles
[bld
->inv_swizzles
[0]] = bld
->swizzles
[swizzle_x
];
127 swizzles
[bld
->inv_swizzles
[1]] = bld
->swizzles
[swizzle_y
];
128 swizzles
[bld
->inv_swizzles
[2]] = bld
->swizzles
[swizzle_z
];
129 swizzles
[bld
->inv_swizzles
[3]] = bld
->swizzles
[swizzle_w
];
131 return lp_build_swizzle_aos(&bld
->base
, a
, swizzles
);
136 swizzle_scalar_aos(struct lp_build_tgsi_aos_context
*bld
,
140 chan
= bld
->swizzles
[chan
];
141 return lp_build_swizzle_scalar_aos(&bld
->base
, a
, chan
);
150 struct lp_build_tgsi_aos_context
*bld
,
151 const struct tgsi_full_instruction
*inst
,
154 struct lp_type type
= bld
->base
.type
;
155 const struct tgsi_full_src_register
*reg
= &inst
->Src
[src_op
];
159 assert(!reg
->Register
.Indirect
);
162 * Fetch the from the register file.
165 switch (reg
->Register
.File
) {
166 case TGSI_FILE_CONSTANT
:
168 * Get the constants components
171 res
= bld
->base
.undef
;
172 for (chan
= 0; chan
< 4; ++chan
) {
174 LLVMValueRef scalar_ptr
;
176 LLVMValueRef swizzle
;
178 index
= lp_build_const_int32(bld
->base
.gallivm
, reg
->Register
.Index
* 4 + chan
);
180 scalar_ptr
= LLVMBuildGEP(bld
->base
.builder
, bld
->consts_ptr
,
183 scalar
= LLVMBuildLoad(bld
->base
.builder
, scalar_ptr
, "");
185 lp_build_name(scalar
, "const[%u].%c", reg
->Register
.Index
, "xyzw"[chan
]);
188 * NOTE: constants array is always assumed to be RGBA
191 swizzle
= lp_build_const_int32(bld
->base
.gallivm
, chan
);
193 res
= LLVMBuildInsertElement(bld
->base
.builder
, res
, scalar
, swizzle
, "");
197 * Broadcast the first quaternion to all others.
199 * XXX: could be factored into a reusable function.
202 if (type
.length
> 4) {
203 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
206 for (chan
= 0; chan
< 4; ++chan
) {
207 shuffles
[chan
] = lp_build_const_int32(bld
->base
.gallivm
, chan
);
210 for (i
= 4; i
< type
.length
; ++i
) {
211 shuffles
[i
] = shuffles
[i
% 4];
214 res
= LLVMBuildShuffleVector(bld
->base
.builder
,
215 res
, bld
->base
.undef
,
216 LLVMConstVector(shuffles
, type
.length
),
221 case TGSI_FILE_IMMEDIATE
:
222 res
= bld
->immediates
[reg
->Register
.Index
];
226 case TGSI_FILE_INPUT
:
227 res
= bld
->inputs
[reg
->Register
.Index
];
231 case TGSI_FILE_TEMPORARY
:
233 LLVMValueRef temp_ptr
;
234 temp_ptr
= bld
->temps
[reg
->Register
.Index
];
235 res
= LLVMBuildLoad(bld
->base
.builder
, temp_ptr
, "");
237 return bld
->base
.undef
;
242 assert(0 && "invalid src register in emit_fetch()");
243 return bld
->base
.undef
;
247 * Apply sign modifier.
250 if (reg
->Register
.Absolute
) {
251 res
= lp_build_abs(&bld
->base
, res
);
254 if(reg
->Register
.Negate
) {
255 res
= lp_build_negate(&bld
->base
, res
);
259 * Swizzle the argument
262 res
= swizzle_aos(bld
, res
,
263 reg
->Register
.SwizzleX
,
264 reg
->Register
.SwizzleY
,
265 reg
->Register
.SwizzleZ
,
266 reg
->Register
.SwizzleW
);
277 struct lp_build_tgsi_aos_context
*bld
,
278 const struct tgsi_full_instruction
*inst
,
282 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[index
];
283 LLVMValueRef mask
= NULL
;
290 switch (inst
->Instruction
.Saturate
) {
294 case TGSI_SAT_ZERO_ONE
:
295 value
= lp_build_max(&bld
->base
, value
, bld
->base
.zero
);
296 value
= lp_build_min(&bld
->base
, value
, bld
->base
.one
);
299 case TGSI_SAT_MINUS_PLUS_ONE
:
300 value
= lp_build_max(&bld
->base
, value
, lp_build_const_vec(bld
->base
.gallivm
, bld
->base
.type
, -1.0));
301 value
= lp_build_min(&bld
->base
, value
, bld
->base
.one
);
309 * Translate the register file
312 assert(!reg
->Register
.Indirect
);
314 switch (reg
->Register
.File
) {
315 case TGSI_FILE_OUTPUT
:
316 ptr
= bld
->outputs
[reg
->Register
.Index
];
319 case TGSI_FILE_TEMPORARY
:
320 ptr
= bld
->temps
[reg
->Register
.Index
];
323 case TGSI_FILE_ADDRESS
:
324 ptr
= bld
->addr
[reg
->Indirect
.Index
];
327 case TGSI_FILE_PREDICATE
:
328 ptr
= bld
->preds
[reg
->Register
.Index
];
340 if (inst
->Instruction
.Predicate
) {
343 assert(inst
->Predicate
.Index
< LP_MAX_TGSI_PREDS
);
345 pred
= LLVMBuildLoad(bld
->base
.builder
,
346 bld
->preds
[inst
->Predicate
.Index
], "");
349 * Convert the value to an integer mask.
351 pred
= lp_build_compare(bld
->base
.gallivm
,
357 if (inst
->Predicate
.Negate
) {
358 pred
= LLVMBuildNot(bld
->base
.builder
, pred
, "");
361 pred
= swizzle_aos(bld
, pred
,
362 inst
->Predicate
.SwizzleX
,
363 inst
->Predicate
.SwizzleY
,
364 inst
->Predicate
.SwizzleZ
,
365 inst
->Predicate
.SwizzleW
);
368 mask
= LLVMBuildAnd(bld
->base
.builder
, mask
, pred
, "");
378 if (reg
->Register
.WriteMask
!= TGSI_WRITEMASK_XYZW
) {
379 LLVMValueRef writemask
;
381 writemask
= lp_build_const_mask_aos(bld
->base
.gallivm
, bld
->base
.type
,
382 reg
->Register
.WriteMask
);
385 mask
= LLVMBuildAnd(bld
->base
.builder
, mask
, writemask
, "");
392 LLVMValueRef orig_value
;
394 orig_value
= LLVMBuildLoad(bld
->base
.builder
, ptr
, "");
395 value
= lp_build_select(&bld
->base
,
396 mask
, value
, orig_value
);
399 LLVMBuildStore(bld
->base
.builder
, value
, ptr
);
404 * High-level instruction translators.
408 emit_tex(struct lp_build_tgsi_aos_context
*bld
,
409 const struct tgsi_full_instruction
*inst
,
410 enum lp_build_tex_modifier modifier
)
419 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
420 return bld
->base
.undef
;
423 target
= inst
->Texture
.Texture
;
425 coords
= emit_fetch( bld
, inst
, 0 );
427 if (modifier
== LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
) {
428 ddx
= emit_fetch( bld
, inst
, 1 );
429 ddy
= emit_fetch( bld
, inst
, 2 );
430 unit
= inst
->Src
[3].Register
.Index
;
433 ddx
= lp_build_ddx( &bld
->base
, coords
);
434 ddy
= lp_build_ddy( &bld
->base
, coords
);
440 unit
= inst
->Src
[1].Register
.Index
;
443 return bld
->sampler
->emit_fetch_texel(bld
->sampler
,
453 struct lp_build_tgsi_aos_context
*bld
,
454 const struct tgsi_full_declaration
*decl
)
456 struct gallivm_state
*gallivm
= bld
->base
.gallivm
;
457 LLVMTypeRef vec_type
= lp_build_vec_type(bld
->base
.gallivm
, bld
->base
.type
);
459 unsigned first
= decl
->Range
.First
;
460 unsigned last
= decl
->Range
.Last
;
463 for (idx
= first
; idx
<= last
; ++idx
) {
464 switch (decl
->Declaration
.File
) {
465 case TGSI_FILE_TEMPORARY
:
466 assert(idx
< LP_MAX_TGSI_TEMPS
);
467 if (bld
->indirect_files
& (1 << TGSI_FILE_TEMPORARY
)) {
468 LLVMValueRef array_size
= lp_build_const_int32(gallivm
, last
+ 1);
469 bld
->temps_array
= lp_build_array_alloca(bld
->base
.gallivm
,
470 vec_type
, array_size
, "");
472 bld
->temps
[idx
] = lp_build_alloca(gallivm
, vec_type
, "");
476 case TGSI_FILE_OUTPUT
:
477 bld
->outputs
[idx
] = lp_build_alloca(gallivm
, vec_type
, "");
480 case TGSI_FILE_ADDRESS
:
481 assert(idx
< LP_MAX_TGSI_ADDRS
);
482 bld
->addr
[idx
] = lp_build_alloca(gallivm
, vec_type
, "");
485 case TGSI_FILE_PREDICATE
:
486 assert(idx
< LP_MAX_TGSI_PREDS
);
487 bld
->preds
[idx
] = lp_build_alloca(gallivm
, vec_type
, "");
491 /* don't need to declare other vars */
499 * Emit LLVM for one TGSI instruction.
500 * \param return TRUE for success, FALSE otherwise
504 struct lp_build_tgsi_aos_context
*bld
,
505 const struct tgsi_full_instruction
*inst
,
506 const struct tgsi_opcode_info
*info
,
509 LLVMValueRef src0
, src1
, src2
;
510 LLVMValueRef tmp0
, tmp1
;
511 LLVMValueRef dst0
= NULL
;
514 * Stores and write masks are handled in a general fashion after the long
515 * instruction opcode switch statement.
517 * Although not stricitly necessary, we avoid generating instructions for
518 * channels which won't be stored, in cases where's that easy. For some
519 * complex instructions, like texture sampling, it is more convenient to
520 * assume a full writemask and then let LLVM optimization passes eliminate
526 assert(info
->num_dst
<= 1);
528 dst0
= bld
->base
.undef
;
531 switch (inst
->Instruction
.Opcode
) {
532 case TGSI_OPCODE_ARL
:
533 src0
= emit_fetch(bld
, inst
, 0);
534 dst0
= lp_build_floor(&bld
->base
, src0
);
537 case TGSI_OPCODE_MOV
:
538 dst0
= emit_fetch(bld
, inst
, 0);
541 case TGSI_OPCODE_LIT
:
544 case TGSI_OPCODE_RCP
:
545 /* TGSI_OPCODE_RECIP */
546 src0
= emit_fetch(bld
, inst
, 0);
547 dst0
= lp_build_rcp(&bld
->base
, src0
);
550 case TGSI_OPCODE_RSQ
:
551 /* TGSI_OPCODE_RECIPSQRT */
552 src0
= emit_fetch(bld
, inst
, 0);
553 tmp0
= lp_build_abs(&bld
->base
, src0
);
554 dst0
= lp_build_rsqrt(&bld
->base
, tmp0
);
557 case TGSI_OPCODE_EXP
:
560 case TGSI_OPCODE_LOG
:
563 case TGSI_OPCODE_MUL
:
564 src0
= emit_fetch(bld
, inst
, 0);
565 src1
= emit_fetch(bld
, inst
, 1);
566 dst0
= lp_build_mul(&bld
->base
, src0
, src1
);
569 case TGSI_OPCODE_ADD
:
570 src0
= emit_fetch(bld
, inst
, 0);
571 src1
= emit_fetch(bld
, inst
, 1);
572 dst0
= lp_build_add(&bld
->base
, src0
, src1
);
575 case TGSI_OPCODE_DP3
:
576 /* TGSI_OPCODE_DOT3 */
579 case TGSI_OPCODE_DP4
:
580 /* TGSI_OPCODE_DOT4 */
583 case TGSI_OPCODE_DST
:
586 case TGSI_OPCODE_MIN
:
587 src0
= emit_fetch(bld
, inst
, 0);
588 src1
= emit_fetch(bld
, inst
, 1);
589 dst0
= lp_build_max(&bld
->base
, src0
, src1
);
592 case TGSI_OPCODE_MAX
:
593 src0
= emit_fetch(bld
, inst
, 0);
594 src1
= emit_fetch(bld
, inst
, 1);
595 dst0
= lp_build_max(&bld
->base
, src0
, src1
);
598 case TGSI_OPCODE_SLT
:
599 /* TGSI_OPCODE_SETLT */
600 src0
= emit_fetch(bld
, inst
, 0);
601 src1
= emit_fetch(bld
, inst
, 1);
602 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_LESS
, src0
, src1
);
603 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
606 case TGSI_OPCODE_SGE
:
607 /* TGSI_OPCODE_SETGE */
608 src0
= emit_fetch(bld
, inst
, 0);
609 src1
= emit_fetch(bld
, inst
, 1);
610 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_GEQUAL
, src0
, src1
);
611 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
614 case TGSI_OPCODE_MAD
:
615 /* TGSI_OPCODE_MADD */
616 src0
= emit_fetch(bld
, inst
, 0);
617 src1
= emit_fetch(bld
, inst
, 1);
618 src2
= emit_fetch(bld
, inst
, 2);
619 tmp0
= lp_build_mul(&bld
->base
, src0
, src1
);
620 dst0
= lp_build_add(&bld
->base
, tmp0
, src2
);
623 case TGSI_OPCODE_SUB
:
624 src0
= emit_fetch(bld
, inst
, 0);
625 src1
= emit_fetch(bld
, inst
, 1);
626 dst0
= lp_build_sub(&bld
->base
, src0
, src1
);
629 case TGSI_OPCODE_LRP
:
630 src0
= emit_fetch(bld
, inst
, 0);
631 src1
= emit_fetch(bld
, inst
, 1);
632 src2
= emit_fetch(bld
, inst
, 2);
633 tmp0
= lp_build_sub(&bld
->base
, src1
, src2
);
634 tmp0
= lp_build_mul(&bld
->base
, src0
, tmp0
);
635 dst0
= lp_build_add(&bld
->base
, tmp0
, src2
);
638 case TGSI_OPCODE_CND
:
639 src0
= emit_fetch(bld
, inst
, 0);
640 src1
= emit_fetch(bld
, inst
, 1);
641 src2
= emit_fetch(bld
, inst
, 2);
642 tmp1
= lp_build_const_vec(bld
->base
.gallivm
, bld
->base
.type
, 0.5);
643 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_GREATER
, src2
, tmp1
);
644 dst0
= lp_build_select(&bld
->base
, tmp0
, src0
, src1
);
647 case TGSI_OPCODE_DP2A
:
650 case TGSI_OPCODE_FRC
:
651 src0
= emit_fetch(bld
, inst
, 0);
652 tmp0
= lp_build_floor(&bld
->base
, src0
);
653 dst0
= lp_build_sub(&bld
->base
, src0
, tmp0
);
656 case TGSI_OPCODE_CLAMP
:
657 src0
= emit_fetch(bld
, inst
, 0);
658 src1
= emit_fetch(bld
, inst
, 1);
659 src2
= emit_fetch(bld
, inst
, 2);
660 tmp0
= lp_build_max(&bld
->base
, src0
, src1
);
661 dst0
= lp_build_min(&bld
->base
, tmp0
, src2
);
664 case TGSI_OPCODE_FLR
:
665 src0
= emit_fetch(bld
, inst
, 0);
666 dst0
= lp_build_floor(&bld
->base
, src0
);
669 case TGSI_OPCODE_ROUND
:
670 src0
= emit_fetch(bld
, inst
, 0);
671 dst0
= lp_build_round(&bld
->base
, src0
);
674 case TGSI_OPCODE_EX2
:
675 src0
= emit_fetch(bld
, inst
, 0);
676 tmp0
= lp_build_swizzle_scalar_aos(&bld
->base
, src0
, TGSI_SWIZZLE_X
);
677 dst0
= lp_build_exp2(&bld
->base
, tmp0
);
680 case TGSI_OPCODE_LG2
:
681 src0
= emit_fetch(bld
, inst
, 0);
682 tmp0
= swizzle_scalar_aos(bld
, src0
, TGSI_SWIZZLE_X
);
683 dst0
= lp_build_log2(&bld
->base
, tmp0
);
686 case TGSI_OPCODE_POW
:
687 src0
= emit_fetch(bld
, inst
, 0);
688 src0
= swizzle_scalar_aos(bld
, src0
, TGSI_SWIZZLE_X
);
689 src1
= emit_fetch(bld
, inst
, 1);
690 src1
= swizzle_scalar_aos(bld
, src1
, TGSI_SWIZZLE_X
);
691 dst0
= lp_build_pow(&bld
->base
, src0
, src1
);
694 case TGSI_OPCODE_XPD
:
697 case TGSI_OPCODE_ABS
:
698 src0
= emit_fetch(bld
, inst
, 0);
699 dst0
= lp_build_abs(&bld
->base
, src0
);
702 case TGSI_OPCODE_RCC
:
707 case TGSI_OPCODE_DPH
:
710 case TGSI_OPCODE_COS
:
711 src0
= emit_fetch(bld
, inst
, 0);
712 tmp0
= swizzle_scalar_aos(bld
, src0
, TGSI_SWIZZLE_X
);
713 dst0
= lp_build_cos(&bld
->base
, tmp0
);
716 case TGSI_OPCODE_DDX
:
719 case TGSI_OPCODE_DDY
:
722 case TGSI_OPCODE_KILP
:
723 /* predicated kill */
726 case TGSI_OPCODE_KIL
:
727 /* conditional kill */
730 case TGSI_OPCODE_PK2H
:
734 case TGSI_OPCODE_PK2US
:
738 case TGSI_OPCODE_PK4B
:
742 case TGSI_OPCODE_PK4UB
:
745 case TGSI_OPCODE_RFL
:
748 case TGSI_OPCODE_SEQ
:
749 src0
= emit_fetch(bld
, inst
, 0);
750 src1
= emit_fetch(bld
, inst
, 1);
751 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_EQUAL
, src0
, src1
);
752 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
755 case TGSI_OPCODE_SFL
:
756 dst0
= bld
->base
.zero
;
759 case TGSI_OPCODE_SGT
:
760 src0
= emit_fetch(bld
, inst
, 0);
761 src1
= emit_fetch(bld
, inst
, 1);
762 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_GREATER
, src0
, src1
);
763 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
766 case TGSI_OPCODE_SIN
:
767 src0
= emit_fetch(bld
, inst
, 0);
768 tmp0
= swizzle_scalar_aos(bld
, src0
, TGSI_SWIZZLE_X
);
769 dst0
= lp_build_sin(&bld
->base
, tmp0
);
772 case TGSI_OPCODE_SLE
:
773 src0
= emit_fetch(bld
, inst
, 0);
774 src1
= emit_fetch(bld
, inst
, 1);
775 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_LEQUAL
, src0
, src1
);
776 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
779 case TGSI_OPCODE_SNE
:
780 src0
= emit_fetch(bld
, inst
, 0);
781 src1
= emit_fetch(bld
, inst
, 1);
782 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_NOTEQUAL
, src0
, src1
);
783 dst0
= lp_build_select(&bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
786 case TGSI_OPCODE_STR
:
787 dst0
= bld
->base
.one
;
790 case TGSI_OPCODE_TEX
:
791 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_NONE
);
794 case TGSI_OPCODE_TXD
:
795 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
);
798 case TGSI_OPCODE_UP2H
:
804 case TGSI_OPCODE_UP2US
:
810 case TGSI_OPCODE_UP4B
:
816 case TGSI_OPCODE_UP4UB
:
822 case TGSI_OPCODE_X2D
:
828 case TGSI_OPCODE_ARA
:
834 case TGSI_OPCODE_ARR
:
835 src0
= emit_fetch(bld
, inst
, 0);
836 dst0
= lp_build_round(&bld
->base
, src0
);
839 case TGSI_OPCODE_BRA
:
845 case TGSI_OPCODE_CAL
:
848 case TGSI_OPCODE_RET
:
851 case TGSI_OPCODE_END
:
855 case TGSI_OPCODE_SSG
:
856 /* TGSI_OPCODE_SGN */
857 tmp0
= emit_fetch(bld
, inst
, 0);
858 dst0
= lp_build_sgn(&bld
->base
, tmp0
);
861 case TGSI_OPCODE_CMP
:
862 src0
= emit_fetch(bld
, inst
, 0);
863 src1
= emit_fetch(bld
, inst
, 1);
864 src2
= emit_fetch(bld
, inst
, 2);
865 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_LESS
, src0
, bld
->base
.zero
);
866 dst0
= lp_build_select(&bld
->base
, tmp0
, src1
, src2
);
869 case TGSI_OPCODE_SCS
:
872 case TGSI_OPCODE_TXB
:
873 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_LOD_BIAS
);
876 case TGSI_OPCODE_NRM
:
878 case TGSI_OPCODE_NRM4
:
881 case TGSI_OPCODE_DIV
:
887 case TGSI_OPCODE_DP2
:
890 case TGSI_OPCODE_TXL
:
891 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD
);
894 case TGSI_OPCODE_TXP
:
895 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_PROJECTED
);
898 case TGSI_OPCODE_BRK
:
904 case TGSI_OPCODE_BGNLOOP
:
907 case TGSI_OPCODE_BGNSUB
:
910 case TGSI_OPCODE_ELSE
:
913 case TGSI_OPCODE_ENDIF
:
916 case TGSI_OPCODE_ENDLOOP
:
919 case TGSI_OPCODE_ENDSUB
:
922 case TGSI_OPCODE_PUSHA
:
928 case TGSI_OPCODE_POPA
:
934 case TGSI_OPCODE_CEIL
:
935 src0
= emit_fetch(bld
, inst
, 0);
936 dst0
= lp_build_ceil(&bld
->base
, src0
);
939 case TGSI_OPCODE_I2F
:
945 case TGSI_OPCODE_NOT
:
951 case TGSI_OPCODE_TRUNC
:
952 src0
= emit_fetch(bld
, inst
, 0);
953 dst0
= lp_build_trunc(&bld
->base
, src0
);
956 case TGSI_OPCODE_SHL
:
962 case TGSI_OPCODE_ISHR
:
968 case TGSI_OPCODE_AND
:
980 case TGSI_OPCODE_MOD
:
986 case TGSI_OPCODE_XOR
:
992 case TGSI_OPCODE_SAD
:
998 case TGSI_OPCODE_TXF
:
1004 case TGSI_OPCODE_TXQ
:
1010 case TGSI_OPCODE_CONT
:
1013 case TGSI_OPCODE_EMIT
:
1017 case TGSI_OPCODE_ENDPRIM
:
1021 case TGSI_OPCODE_NOP
:
1028 if (info
->num_dst
) {
1029 emit_store(bld
, inst
, 0, dst0
);
1037 lp_build_tgsi_aos(struct gallivm_state
*gallivm
,
1038 const struct tgsi_token
*tokens
,
1039 struct lp_type type
,
1040 const unsigned char swizzles
[4],
1041 LLVMValueRef consts_ptr
,
1042 const LLVMValueRef
*inputs
,
1043 LLVMValueRef
*outputs
,
1044 struct lp_build_sampler_aos
*sampler
,
1045 const struct tgsi_shader_info
*info
)
1047 struct lp_build_tgsi_aos_context bld
;
1048 struct tgsi_parse_context parse
;
1049 uint num_immediates
= 0;
1050 uint num_instructions
= 0;
1054 /* Setup build context */
1055 memset(&bld
, 0, sizeof bld
);
1056 lp_build_context_init(&bld
.base
, gallivm
, type
);
1057 lp_build_context_init(&bld
.int_bld
, gallivm
, lp_int_type(type
));
1059 for (chan
= 0; chan
< 4; ++chan
) {
1060 bld
.swizzles
[chan
] = swizzles
[chan
];
1061 bld
.inv_swizzles
[swizzles
[chan
]] = chan
;
1064 bld
.inputs
= inputs
;
1065 bld
.outputs
= outputs
;
1066 bld
.consts_ptr
= consts_ptr
;
1067 bld
.sampler
= sampler
;
1068 bld
.indirect_files
= info
->indirect_files
;
1069 bld
.instructions
= (struct tgsi_full_instruction
*)
1070 MALLOC(LP_MAX_INSTRUCTIONS
* sizeof(struct tgsi_full_instruction
));
1071 bld
.max_instructions
= LP_MAX_INSTRUCTIONS
;
1073 if (!bld
.instructions
) {
1077 tgsi_parse_init(&parse
, tokens
);
1079 while (!tgsi_parse_end_of_tokens(&parse
)) {
1080 tgsi_parse_token(&parse
);
1082 switch(parse
.FullToken
.Token
.Type
) {
1083 case TGSI_TOKEN_TYPE_DECLARATION
:
1084 /* Inputs already interpolated */
1085 emit_declaration(&bld
, &parse
.FullToken
.FullDeclaration
);
1088 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1090 /* save expanded instruction */
1091 if (num_instructions
== bld
.max_instructions
) {
1092 struct tgsi_full_instruction
*instructions
;
1093 instructions
= REALLOC(bld
.instructions
,
1094 bld
.max_instructions
1095 * sizeof(struct tgsi_full_instruction
),
1096 (bld
.max_instructions
+ LP_MAX_INSTRUCTIONS
)
1097 * sizeof(struct tgsi_full_instruction
));
1098 if (!instructions
) {
1101 bld
.instructions
= instructions
;
1102 bld
.max_instructions
+= LP_MAX_INSTRUCTIONS
;
1105 memcpy(bld
.instructions
+ num_instructions
,
1106 &parse
.FullToken
.FullInstruction
,
1107 sizeof(bld
.instructions
[0]));
1114 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1115 /* simply copy the immediate values into the next immediates[] slot */
1117 const uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1120 assert(num_immediates
< LP_MAX_TGSI_IMMEDIATES
);
1121 for (chan
= 0; chan
< 4; ++chan
) {
1124 for (chan
= 0; chan
< size
; ++chan
) {
1125 unsigned swizzle
= bld
.swizzles
[chan
];
1126 imm
[swizzle
] = parse
.FullToken
.FullImmediate
.u
[chan
].Float
;
1128 bld
.immediates
[num_immediates
] =
1129 lp_build_const_aos(gallivm
, type
,
1130 imm
[0], imm
[1], imm
[2], imm
[3],
1136 case TGSI_TOKEN_TYPE_PROPERTY
:
1145 struct tgsi_full_instruction
*instr
= bld
.instructions
+ pc
;
1146 const struct tgsi_opcode_info
*opcode_info
=
1147 tgsi_get_opcode_info(instr
->Instruction
.Opcode
);
1148 if (!emit_instruction(&bld
, instr
, opcode_info
, &pc
))
1149 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1150 opcode_info
->mnemonic
);
1154 LLVMBasicBlockRef block
= LLVMGetInsertBlock(gallivm
->builder
);
1155 LLVMValueRef function
= LLVMGetBasicBlockParent(block
);
1156 debug_printf("11111111111111111111111111111 \n");
1157 tgsi_dump(tokens
, 0);
1158 lp_debug_dump_value(function
);
1159 debug_printf("2222222222222222222222222222 \n");
1161 tgsi_parse_free(&parse
);
1164 LLVMModuleRef module
= LLVMGetGlobalParent(
1165 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld
.base
.builder
)));
1166 LLVMDumpModule(module
);
1169 FREE(bld
.instructions
);