1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * TGSI to LLVM IR translation -- AoS.
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
37 * @author Jose Fonseca <jfonseca@vmware.com>
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
62 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
66 swizzle_aos(struct lp_build_tgsi_context
*bld_base
,
73 unsigned char swizzles
[4];
74 struct lp_build_tgsi_aos_context
*bld
= lp_aos_context(bld_base
);
76 assert(swizzle_x
< 4);
77 assert(swizzle_y
< 4);
78 assert(swizzle_z
< 4);
79 assert(swizzle_w
< 4);
81 swizzles
[bld
->inv_swizzles
[0]] = bld
->swizzles
[swizzle_x
];
82 swizzles
[bld
->inv_swizzles
[1]] = bld
->swizzles
[swizzle_y
];
83 swizzles
[bld
->inv_swizzles
[2]] = bld
->swizzles
[swizzle_z
];
84 swizzles
[bld
->inv_swizzles
[3]] = bld
->swizzles
[swizzle_w
];
86 return lp_build_swizzle_aos(&bld
->bld_base
.base
, a
, swizzles
);
91 swizzle_scalar_aos(struct lp_build_tgsi_aos_context
*bld
,
95 chan
= bld
->swizzles
[chan
];
96 return lp_build_swizzle_scalar_aos(&bld
->bld_base
.base
, a
, chan
);
102 struct lp_build_tgsi_context
* bld_base
,
103 const struct tgsi_full_src_register
* reg
,
104 enum tgsi_opcode_type stype
,
107 struct lp_build_tgsi_aos_context
* bld
= lp_aos_context(bld_base
);
108 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
109 struct lp_type type
= bld_base
->base
.type
;
113 assert(!reg
->Register
.Indirect
);
116 * Get the constants components
119 res
= bld
->bld_base
.base
.undef
;
120 for (chan
= 0; chan
< 4; ++chan
) {
122 LLVMValueRef scalar_ptr
;
124 LLVMValueRef swizzle
;
126 index
= lp_build_const_int32(bld
->bld_base
.base
.gallivm
,
127 reg
->Register
.Index
* 4 + chan
);
129 scalar_ptr
= LLVMBuildGEP(builder
, bld
->consts_ptr
, &index
, 1, "");
131 scalar
= LLVMBuildLoad(builder
, scalar_ptr
, "");
133 lp_build_name(scalar
, "const[%u].%c", reg
->Register
.Index
, "xyzw"[chan
]);
136 * NOTE: constants array is always assumed to be RGBA
139 swizzle
= lp_build_const_int32(bld
->bld_base
.base
.gallivm
,
140 bld
->swizzles
[chan
]);
142 res
= LLVMBuildInsertElement(builder
, res
, scalar
, swizzle
, "");
146 * Broadcast the first quaternion to all others.
148 * XXX: could be factored into a reusable function.
151 if (type
.length
> 4) {
152 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
155 for (chan
= 0; chan
< 4; ++chan
) {
156 shuffles
[chan
] = lp_build_const_int32(bld
->bld_base
.base
.gallivm
, chan
);
159 for (i
= 4; i
< type
.length
; ++i
) {
160 shuffles
[i
] = shuffles
[i
% 4];
163 res
= LLVMBuildShuffleVector(builder
,
164 res
, bld
->bld_base
.base
.undef
,
165 LLVMConstVector(shuffles
, type
.length
),
172 emit_fetch_immediate(
173 struct lp_build_tgsi_context
* bld_base
,
174 const struct tgsi_full_src_register
* reg
,
175 enum tgsi_opcode_type stype
,
178 struct lp_build_tgsi_aos_context
* bld
= lp_aos_context(bld_base
);
179 LLVMValueRef res
= bld
->immediates
[reg
->Register
.Index
];
186 struct lp_build_tgsi_context
* bld_base
,
187 const struct tgsi_full_src_register
* reg
,
188 enum tgsi_opcode_type stype
,
191 struct lp_build_tgsi_aos_context
* bld
= lp_aos_context(bld_base
);
192 LLVMValueRef res
= bld
->inputs
[reg
->Register
.Index
];
193 assert(!reg
->Register
.Indirect
);
199 emit_fetch_temporary(
200 struct lp_build_tgsi_context
* bld_base
,
201 const struct tgsi_full_src_register
* reg
,
202 enum tgsi_opcode_type stype
,
205 struct lp_build_tgsi_aos_context
* bld
= lp_aos_context(bld_base
);
206 LLVMBuilderRef builder
= bld_base
->base
.gallivm
->builder
;
207 LLVMValueRef temp_ptr
= bld
->temps
[reg
->Register
.Index
];
208 LLVMValueRef res
= LLVMBuildLoad(builder
, temp_ptr
, "");
209 assert(!reg
->Register
.Indirect
);
211 return bld
->bld_base
.base
.undef
;
221 struct lp_build_tgsi_aos_context
*bld
,
222 const struct tgsi_full_instruction
*inst
,
226 LLVMBuilderRef builder
= bld
->bld_base
.base
.gallivm
->builder
;
227 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[index
];
228 LLVMValueRef mask
= NULL
;
235 switch (inst
->Instruction
.Saturate
) {
239 case TGSI_SAT_ZERO_ONE
:
240 value
= lp_build_max(&bld
->bld_base
.base
, value
, bld
->bld_base
.base
.zero
);
241 value
= lp_build_min(&bld
->bld_base
.base
, value
, bld
->bld_base
.base
.one
);
244 case TGSI_SAT_MINUS_PLUS_ONE
:
245 value
= lp_build_max(&bld
->bld_base
.base
, value
, lp_build_const_vec(bld
->bld_base
.base
.gallivm
, bld
->bld_base
.base
.type
, -1.0));
246 value
= lp_build_min(&bld
->bld_base
.base
, value
, bld
->bld_base
.base
.one
);
254 * Translate the register file
257 assert(!reg
->Register
.Indirect
);
259 switch (reg
->Register
.File
) {
260 case TGSI_FILE_OUTPUT
:
261 ptr
= bld
->outputs
[reg
->Register
.Index
];
264 case TGSI_FILE_TEMPORARY
:
265 ptr
= bld
->temps
[reg
->Register
.Index
];
268 case TGSI_FILE_ADDRESS
:
269 ptr
= bld
->addr
[reg
->Indirect
.Index
];
272 case TGSI_FILE_PREDICATE
:
273 ptr
= bld
->preds
[reg
->Register
.Index
];
287 if (inst
->Instruction
.Predicate
) {
290 assert(inst
->Predicate
.Index
< LP_MAX_TGSI_PREDS
);
292 pred
= LLVMBuildLoad(builder
,
293 bld
->preds
[inst
->Predicate
.Index
], "");
296 * Convert the value to an integer mask.
298 pred
= lp_build_compare(bld
->bld_base
.base
.gallivm
,
299 bld
->bld_base
.base
.type
,
302 bld
->bld_base
.base
.zero
);
304 if (inst
->Predicate
.Negate
) {
305 pred
= LLVMBuildNot(builder
, pred
, "");
308 pred
= bld
->bld_base
.emit_swizzle(&bld
->bld_base
, pred
,
309 inst
->Predicate
.SwizzleX
,
310 inst
->Predicate
.SwizzleY
,
311 inst
->Predicate
.SwizzleZ
,
312 inst
->Predicate
.SwizzleW
);
315 mask
= LLVMBuildAnd(builder
, mask
, pred
, "");
325 if (reg
->Register
.WriteMask
!= TGSI_WRITEMASK_XYZW
) {
326 LLVMValueRef writemask
;
328 writemask
= lp_build_const_mask_aos(bld
->bld_base
.base
.gallivm
, bld
->bld_base
.base
.type
,
329 reg
->Register
.WriteMask
);
332 mask
= LLVMBuildAnd(builder
, mask
, writemask
, "");
339 LLVMValueRef orig_value
;
341 orig_value
= LLVMBuildLoad(builder
, ptr
, "");
342 value
= lp_build_select(&bld
->bld_base
.base
,
343 mask
, value
, orig_value
);
346 LLVMBuildStore(builder
, value
, ptr
);
351 * High-level instruction translators.
355 emit_tex(struct lp_build_tgsi_aos_context
*bld
,
356 const struct tgsi_full_instruction
*inst
,
357 enum lp_build_tex_modifier modifier
)
366 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
367 return bld
->bld_base
.base
.undef
;
370 target
= inst
->Texture
.Texture
;
372 coords
= lp_build_emit_fetch( &bld
->bld_base
, inst
, 0 , LP_CHAN_ALL
);
374 if (modifier
== LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
) {
375 ddx
= lp_build_emit_fetch( &bld
->bld_base
, inst
, 1 , LP_CHAN_ALL
);
376 ddy
= lp_build_emit_fetch( &bld
->bld_base
, inst
, 2 , LP_CHAN_ALL
);
377 unit
= inst
->Src
[3].Register
.Index
;
380 ddx
= lp_build_ddx( &bld
->bld_base
.base
, coords
);
381 ddy
= lp_build_ddy( &bld
->bld_base
.base
, coords
);
384 ddx
= bld
->bld_base
.base
.one
;
385 ddy
= bld
->bld_base
.base
.one
;
387 unit
= inst
->Src
[1].Register
.Index
;
390 return bld
->sampler
->emit_fetch_texel(bld
->sampler
,
399 lp_emit_declaration_aos(
400 struct lp_build_tgsi_aos_context
*bld
,
401 const struct tgsi_full_declaration
*decl
)
403 struct gallivm_state
*gallivm
= bld
->bld_base
.base
.gallivm
;
404 LLVMTypeRef vec_type
= lp_build_vec_type(bld
->bld_base
.base
.gallivm
, bld
->bld_base
.base
.type
);
406 unsigned first
= decl
->Range
.First
;
407 unsigned last
= decl
->Range
.Last
;
410 for (idx
= first
; idx
<= last
; ++idx
) {
411 switch (decl
->Declaration
.File
) {
412 case TGSI_FILE_TEMPORARY
:
413 assert(idx
< LP_MAX_TGSI_TEMPS
);
414 if (bld
->indirect_files
& (1 << TGSI_FILE_TEMPORARY
)) {
415 LLVMValueRef array_size
= lp_build_const_int32(gallivm
, last
+ 1);
416 bld
->temps_array
= lp_build_array_alloca(bld
->bld_base
.base
.gallivm
,
417 vec_type
, array_size
, "");
419 bld
->temps
[idx
] = lp_build_alloca(gallivm
, vec_type
, "");
423 case TGSI_FILE_OUTPUT
:
424 bld
->outputs
[idx
] = lp_build_alloca(gallivm
, vec_type
, "");
427 case TGSI_FILE_ADDRESS
:
428 assert(idx
< LP_MAX_TGSI_ADDRS
);
429 bld
->addr
[idx
] = lp_build_alloca(gallivm
, vec_type
, "");
432 case TGSI_FILE_PREDICATE
:
433 assert(idx
< LP_MAX_TGSI_PREDS
);
434 bld
->preds
[idx
] = lp_build_alloca(gallivm
, vec_type
, "");
438 /* don't need to declare other vars */
446 * Emit LLVM for one TGSI instruction.
447 * \param return TRUE for success, FALSE otherwise
450 lp_emit_instruction_aos(
451 struct lp_build_tgsi_aos_context
*bld
,
452 const struct tgsi_full_instruction
*inst
,
453 const struct tgsi_opcode_info
*info
,
456 LLVMValueRef src0
, src1
, src2
;
457 LLVMValueRef tmp0
, tmp1
;
458 LLVMValueRef dst0
= NULL
;
461 * Stores and write masks are handled in a general fashion after the long
462 * instruction opcode switch statement.
464 * Although not stricitly necessary, we avoid generating instructions for
465 * channels which won't be stored, in cases where's that easy. For some
466 * complex instructions, like texture sampling, it is more convenient to
467 * assume a full writemask and then let LLVM optimization passes eliminate
473 assert(info
->num_dst
<= 1);
475 dst0
= bld
->bld_base
.base
.undef
;
478 switch (inst
->Instruction
.Opcode
) {
479 case TGSI_OPCODE_ARL
:
480 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
481 dst0
= lp_build_floor(&bld
->bld_base
.base
, src0
);
484 case TGSI_OPCODE_MOV
:
485 dst0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
488 case TGSI_OPCODE_LIT
:
491 case TGSI_OPCODE_RCP
:
492 /* TGSI_OPCODE_RECIP */
493 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
494 dst0
= lp_build_rcp(&bld
->bld_base
.base
, src0
);
497 case TGSI_OPCODE_RSQ
:
498 /* TGSI_OPCODE_RECIPSQRT */
499 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
500 tmp0
= lp_build_emit_llvm_unary(&bld
->bld_base
, TGSI_OPCODE_ABS
, src0
);
501 dst0
= lp_build_rsqrt(&bld
->bld_base
.base
, tmp0
);
504 case TGSI_OPCODE_EXP
:
507 case TGSI_OPCODE_LOG
:
510 case TGSI_OPCODE_MUL
:
511 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
512 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
513 dst0
= lp_build_mul(&bld
->bld_base
.base
, src0
, src1
);
516 case TGSI_OPCODE_ADD
:
517 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
518 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
519 dst0
= lp_build_add(&bld
->bld_base
.base
, src0
, src1
);
522 case TGSI_OPCODE_DP3
:
523 /* TGSI_OPCODE_DOT3 */
526 case TGSI_OPCODE_DP4
:
527 /* TGSI_OPCODE_DOT4 */
530 case TGSI_OPCODE_DST
:
533 case TGSI_OPCODE_MIN
:
534 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
535 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
536 dst0
= lp_build_max(&bld
->bld_base
.base
, src0
, src1
);
539 case TGSI_OPCODE_MAX
:
540 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
541 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
542 dst0
= lp_build_max(&bld
->bld_base
.base
, src0
, src1
);
545 case TGSI_OPCODE_SLT
:
546 /* TGSI_OPCODE_SETLT */
547 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
548 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
549 tmp0
= lp_build_cmp(&bld
->bld_base
.base
, PIPE_FUNC_LESS
, src0
, src1
);
550 dst0
= lp_build_select(&bld
->bld_base
.base
, tmp0
, bld
->bld_base
.base
.one
, bld
->bld_base
.base
.zero
);
553 case TGSI_OPCODE_SGE
:
554 /* TGSI_OPCODE_SETGE */
555 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
556 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
557 tmp0
= lp_build_cmp(&bld
->bld_base
.base
, PIPE_FUNC_GEQUAL
, src0
, src1
);
558 dst0
= lp_build_select(&bld
->bld_base
.base
, tmp0
, bld
->bld_base
.base
.one
, bld
->bld_base
.base
.zero
);
561 case TGSI_OPCODE_MAD
:
562 /* TGSI_OPCODE_MADD */
563 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
564 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
565 src2
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 2, LP_CHAN_ALL
);
566 tmp0
= lp_build_mul(&bld
->bld_base
.base
, src0
, src1
);
567 dst0
= lp_build_add(&bld
->bld_base
.base
, tmp0
, src2
);
570 case TGSI_OPCODE_SUB
:
571 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
572 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
573 dst0
= lp_build_sub(&bld
->bld_base
.base
, src0
, src1
);
576 case TGSI_OPCODE_LRP
:
577 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
578 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
579 src2
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 2, LP_CHAN_ALL
);
580 tmp0
= lp_build_sub(&bld
->bld_base
.base
, src1
, src2
);
581 tmp0
= lp_build_mul(&bld
->bld_base
.base
, src0
, tmp0
);
582 dst0
= lp_build_add(&bld
->bld_base
.base
, tmp0
, src2
);
585 case TGSI_OPCODE_CND
:
586 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
587 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
588 src2
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 2, LP_CHAN_ALL
);
589 tmp1
= lp_build_const_vec(bld
->bld_base
.base
.gallivm
, bld
->bld_base
.base
.type
, 0.5);
590 tmp0
= lp_build_cmp(&bld
->bld_base
.base
, PIPE_FUNC_GREATER
, src2
, tmp1
);
591 dst0
= lp_build_select(&bld
->bld_base
.base
, tmp0
, src0
, src1
);
594 case TGSI_OPCODE_DP2A
:
597 case TGSI_OPCODE_FRC
:
598 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
599 tmp0
= lp_build_floor(&bld
->bld_base
.base
, src0
);
600 dst0
= lp_build_sub(&bld
->bld_base
.base
, src0
, tmp0
);
603 case TGSI_OPCODE_CLAMP
:
604 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
605 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
606 src2
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 2, LP_CHAN_ALL
);
607 tmp0
= lp_build_max(&bld
->bld_base
.base
, src0
, src1
);
608 dst0
= lp_build_min(&bld
->bld_base
.base
, tmp0
, src2
);
611 case TGSI_OPCODE_FLR
:
612 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
613 dst0
= lp_build_floor(&bld
->bld_base
.base
, src0
);
616 case TGSI_OPCODE_ROUND
:
617 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
618 dst0
= lp_build_round(&bld
->bld_base
.base
, src0
);
621 case TGSI_OPCODE_EX2
:
622 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
623 tmp0
= lp_build_swizzle_scalar_aos(&bld
->bld_base
.base
, src0
, TGSI_SWIZZLE_X
);
624 dst0
= lp_build_exp2(&bld
->bld_base
.base
, tmp0
);
627 case TGSI_OPCODE_LG2
:
628 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
629 tmp0
= swizzle_scalar_aos(bld
, src0
, TGSI_SWIZZLE_X
);
630 dst0
= lp_build_log2(&bld
->bld_base
.base
, tmp0
);
633 case TGSI_OPCODE_POW
:
634 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
635 src0
= swizzle_scalar_aos(bld
, src0
, TGSI_SWIZZLE_X
);
636 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
637 src1
= swizzle_scalar_aos(bld
, src1
, TGSI_SWIZZLE_X
);
638 dst0
= lp_build_pow(&bld
->bld_base
.base
, src0
, src1
);
641 case TGSI_OPCODE_XPD
:
644 case TGSI_OPCODE_RCC
:
649 case TGSI_OPCODE_DPH
:
652 case TGSI_OPCODE_COS
:
653 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
654 tmp0
= swizzle_scalar_aos(bld
, src0
, TGSI_SWIZZLE_X
);
655 dst0
= lp_build_cos(&bld
->bld_base
.base
, tmp0
);
658 case TGSI_OPCODE_DDX
:
661 case TGSI_OPCODE_DDY
:
664 case TGSI_OPCODE_KILP
:
665 /* predicated kill */
668 case TGSI_OPCODE_KIL
:
669 /* conditional kill */
672 case TGSI_OPCODE_PK2H
:
676 case TGSI_OPCODE_PK2US
:
680 case TGSI_OPCODE_PK4B
:
684 case TGSI_OPCODE_PK4UB
:
687 case TGSI_OPCODE_RFL
:
690 case TGSI_OPCODE_SEQ
:
691 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
692 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
693 tmp0
= lp_build_cmp(&bld
->bld_base
.base
, PIPE_FUNC_EQUAL
, src0
, src1
);
694 dst0
= lp_build_select(&bld
->bld_base
.base
, tmp0
, bld
->bld_base
.base
.one
, bld
->bld_base
.base
.zero
);
697 case TGSI_OPCODE_SFL
:
698 dst0
= bld
->bld_base
.base
.zero
;
701 case TGSI_OPCODE_SGT
:
702 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
703 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
704 tmp0
= lp_build_cmp(&bld
->bld_base
.base
, PIPE_FUNC_GREATER
, src0
, src1
);
705 dst0
= lp_build_select(&bld
->bld_base
.base
, tmp0
, bld
->bld_base
.base
.one
, bld
->bld_base
.base
.zero
);
708 case TGSI_OPCODE_SIN
:
709 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
710 tmp0
= swizzle_scalar_aos(bld
, src0
, TGSI_SWIZZLE_X
);
711 dst0
= lp_build_sin(&bld
->bld_base
.base
, tmp0
);
714 case TGSI_OPCODE_SLE
:
715 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
716 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
717 tmp0
= lp_build_cmp(&bld
->bld_base
.base
, PIPE_FUNC_LEQUAL
, src0
, src1
);
718 dst0
= lp_build_select(&bld
->bld_base
.base
, tmp0
, bld
->bld_base
.base
.one
, bld
->bld_base
.base
.zero
);
721 case TGSI_OPCODE_SNE
:
722 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
723 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
724 tmp0
= lp_build_cmp(&bld
->bld_base
.base
, PIPE_FUNC_NOTEQUAL
, src0
, src1
);
725 dst0
= lp_build_select(&bld
->bld_base
.base
, tmp0
, bld
->bld_base
.base
.one
, bld
->bld_base
.base
.zero
);
728 case TGSI_OPCODE_STR
:
729 dst0
= bld
->bld_base
.base
.one
;
732 case TGSI_OPCODE_TEX
:
733 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_NONE
);
736 case TGSI_OPCODE_TXD
:
737 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
);
740 case TGSI_OPCODE_UP2H
:
746 case TGSI_OPCODE_UP2US
:
752 case TGSI_OPCODE_UP4B
:
758 case TGSI_OPCODE_UP4UB
:
764 case TGSI_OPCODE_X2D
:
770 case TGSI_OPCODE_ARA
:
776 case TGSI_OPCODE_ARR
:
777 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
778 dst0
= lp_build_round(&bld
->bld_base
.base
, src0
);
781 case TGSI_OPCODE_BRA
:
787 case TGSI_OPCODE_CAL
:
790 case TGSI_OPCODE_RET
:
793 case TGSI_OPCODE_END
:
797 case TGSI_OPCODE_SSG
:
798 /* TGSI_OPCODE_SGN */
799 tmp0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
800 dst0
= lp_build_sgn(&bld
->bld_base
.base
, tmp0
);
803 case TGSI_OPCODE_CMP
:
804 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
805 src1
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 1, LP_CHAN_ALL
);
806 src2
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 2, LP_CHAN_ALL
);
807 tmp0
= lp_build_cmp(&bld
->bld_base
.base
, PIPE_FUNC_LESS
, src0
, bld
->bld_base
.base
.zero
);
808 dst0
= lp_build_select(&bld
->bld_base
.base
, tmp0
, src1
, src2
);
811 case TGSI_OPCODE_SCS
:
814 case TGSI_OPCODE_TXB
:
815 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_LOD_BIAS
);
818 case TGSI_OPCODE_NRM
:
820 case TGSI_OPCODE_NRM4
:
823 case TGSI_OPCODE_DIV
:
829 case TGSI_OPCODE_DP2
:
832 case TGSI_OPCODE_TXL
:
833 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD
);
836 case TGSI_OPCODE_TXP
:
837 dst0
= emit_tex(bld
, inst
, LP_BLD_TEX_MODIFIER_PROJECTED
);
840 case TGSI_OPCODE_BRK
:
846 case TGSI_OPCODE_BGNLOOP
:
849 case TGSI_OPCODE_BGNSUB
:
852 case TGSI_OPCODE_ELSE
:
855 case TGSI_OPCODE_ENDIF
:
858 case TGSI_OPCODE_ENDLOOP
:
861 case TGSI_OPCODE_ENDSUB
:
864 case TGSI_OPCODE_PUSHA
:
870 case TGSI_OPCODE_POPA
:
876 case TGSI_OPCODE_CEIL
:
877 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
878 dst0
= lp_build_ceil(&bld
->bld_base
.base
, src0
);
881 case TGSI_OPCODE_I2F
:
887 case TGSI_OPCODE_NOT
:
893 case TGSI_OPCODE_TRUNC
:
894 src0
= lp_build_emit_fetch(&bld
->bld_base
, inst
, 0, LP_CHAN_ALL
);
895 dst0
= lp_build_trunc(&bld
->bld_base
.base
, src0
);
898 case TGSI_OPCODE_SHL
:
904 case TGSI_OPCODE_ISHR
:
910 case TGSI_OPCODE_AND
:
922 case TGSI_OPCODE_MOD
:
928 case TGSI_OPCODE_XOR
:
934 case TGSI_OPCODE_SAD
:
940 case TGSI_OPCODE_TXF
:
946 case TGSI_OPCODE_TXQ
:
952 case TGSI_OPCODE_CONT
:
955 case TGSI_OPCODE_EMIT
:
959 case TGSI_OPCODE_ENDPRIM
:
963 case TGSI_OPCODE_NOP
:
971 lp_emit_store_aos(bld
, inst
, 0, dst0
);
979 lp_build_tgsi_aos(struct gallivm_state
*gallivm
,
980 const struct tgsi_token
*tokens
,
982 const unsigned char swizzles
[4],
983 LLVMValueRef consts_ptr
,
984 const LLVMValueRef
*inputs
,
985 LLVMValueRef
*outputs
,
986 struct lp_build_sampler_aos
*sampler
,
987 const struct tgsi_shader_info
*info
)
989 struct lp_build_tgsi_aos_context bld
;
990 struct tgsi_parse_context parse
;
991 uint num_immediates
= 0;
995 /* Setup build context */
996 memset(&bld
, 0, sizeof bld
);
997 lp_build_context_init(&bld
.bld_base
.base
, gallivm
, type
);
998 lp_build_context_init(&bld
.bld_base
.uint_bld
, gallivm
, lp_uint_type(type
));
999 lp_build_context_init(&bld
.bld_base
.int_bld
, gallivm
, lp_int_type(type
));
1000 lp_build_context_init(&bld
.int_bld
, gallivm
, lp_int_type(type
));
1002 for (chan
= 0; chan
< 4; ++chan
) {
1003 bld
.swizzles
[chan
] = swizzles
[chan
];
1004 bld
.inv_swizzles
[swizzles
[chan
]] = chan
;
1007 bld
.inputs
= inputs
;
1008 bld
.outputs
= outputs
;
1009 bld
.consts_ptr
= consts_ptr
;
1010 bld
.sampler
= sampler
;
1011 bld
.indirect_files
= info
->indirect_files
;
1012 bld
.bld_base
.emit_swizzle
= swizzle_aos
;
1013 bld
.bld_base
.info
= info
;
1015 bld
.bld_base
.emit_fetch_funcs
[TGSI_FILE_CONSTANT
] = emit_fetch_constant
;
1016 bld
.bld_base
.emit_fetch_funcs
[TGSI_FILE_IMMEDIATE
] = emit_fetch_immediate
;
1017 bld
.bld_base
.emit_fetch_funcs
[TGSI_FILE_INPUT
] = emit_fetch_input
;
1018 bld
.bld_base
.emit_fetch_funcs
[TGSI_FILE_TEMPORARY
] = emit_fetch_temporary
;
1020 /* Set opcode actions */
1021 lp_set_default_actions_cpu(&bld
.bld_base
);
1023 if (!lp_bld_tgsi_list_init(&bld
.bld_base
)) {
1027 tgsi_parse_init(&parse
, tokens
);
1029 while (!tgsi_parse_end_of_tokens(&parse
)) {
1030 tgsi_parse_token(&parse
);
1032 switch(parse
.FullToken
.Token
.Type
) {
1033 case TGSI_TOKEN_TYPE_DECLARATION
:
1034 /* Inputs already interpolated */
1035 lp_emit_declaration_aos(&bld
, &parse
.FullToken
.FullDeclaration
);
1038 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1039 /* save expanded instruction */
1040 lp_bld_tgsi_add_instruction(&bld
.bld_base
,
1041 &parse
.FullToken
.FullInstruction
);
1044 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1045 /* simply copy the immediate values into the next immediates[] slot */
1047 const uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1050 assert(num_immediates
< LP_MAX_TGSI_IMMEDIATES
);
1051 for (chan
= 0; chan
< 4; ++chan
) {
1054 for (chan
= 0; chan
< size
; ++chan
) {
1055 unsigned swizzle
= bld
.swizzles
[chan
];
1056 imm
[swizzle
] = parse
.FullToken
.FullImmediate
.u
[chan
].Float
;
1058 bld
.immediates
[num_immediates
] =
1059 lp_build_const_aos(gallivm
, type
,
1060 imm
[0], imm
[1], imm
[2], imm
[3],
1066 case TGSI_TOKEN_TYPE_PROPERTY
:
1075 struct tgsi_full_instruction
*instr
= bld
.bld_base
.instructions
+ pc
;
1076 const struct tgsi_opcode_info
*opcode_info
=
1077 tgsi_get_opcode_info(instr
->Instruction
.Opcode
);
1078 if (!lp_emit_instruction_aos(&bld
, instr
, opcode_info
, &pc
))
1079 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1080 opcode_info
->mnemonic
);
1084 LLVMBasicBlockRef block
= LLVMGetInsertBlock(gallivm
->builder
);
1085 LLVMValueRef function
= LLVMGetBasicBlockParent(block
);
1086 debug_printf("11111111111111111111111111111 \n");
1087 tgsi_dump(tokens
, 0);
1088 lp_debug_dump_value(function
);
1089 debug_printf("2222222222222222222222222222 \n");
1091 tgsi_parse_free(&parse
);
1094 LLVMModuleRef module
= LLVMGetGlobalParent(
1095 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm
->builder
)));
1096 LLVMDumpModule(module
);