1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * TGSI to LLVM IR translation -- SoA.
33 * @author Jose Fonseca <jfonseca@vmware.com>
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61 #include "lp_bld_printf.h"
64 #define FOR_EACH_CHANNEL( CHAN )\
65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
67 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
70 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
73 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74 FOR_EACH_CHANNEL( CHAN )\
75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
81 #define NUM_CHANNELS 4
83 #define LP_MAX_INSTRUCTIONS 256
87 struct lp_build_context
*bld
;
91 LLVMTypeRef int_vec_type
;
93 LLVMValueRef cond_stack
[LP_MAX_TGSI_NESTING
];
95 LLVMValueRef cond_mask
;
97 LLVMBasicBlockRef loop_block
;
98 LLVMValueRef cont_mask
;
99 LLVMValueRef break_mask
;
100 LLVMValueRef break_var
;
102 LLVMBasicBlockRef loop_block
;
103 LLVMValueRef cont_mask
;
104 LLVMValueRef break_mask
;
105 LLVMValueRef break_var
;
106 } loop_stack
[LP_MAX_TGSI_NESTING
];
109 LLVMValueRef ret_mask
;
112 LLVMValueRef ret_mask
;
113 } call_stack
[LP_MAX_TGSI_NESTING
];
116 LLVMValueRef exec_mask
;
119 struct lp_build_tgsi_soa_context
121 struct lp_build_context base
;
123 /* Builder for vector integer masks and indices */
124 struct lp_build_context uint_bld
;
126 /* Builder for scalar elements of shader's data type (float) */
127 struct lp_build_context elem_bld
;
129 LLVMValueRef consts_ptr
;
130 const LLVMValueRef
*pos
;
131 const LLVMValueRef (*inputs
)[NUM_CHANNELS
];
132 LLVMValueRef (*outputs
)[NUM_CHANNELS
];
134 const struct lp_build_sampler_soa
*sampler
;
136 LLVMValueRef immediates
[LP_MAX_TGSI_IMMEDIATES
][NUM_CHANNELS
];
137 LLVMValueRef temps
[LP_MAX_TGSI_TEMPS
][NUM_CHANNELS
];
138 LLVMValueRef addr
[LP_MAX_TGSI_ADDRS
][NUM_CHANNELS
];
139 LLVMValueRef preds
[LP_MAX_TGSI_PREDS
][NUM_CHANNELS
];
141 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
142 * set in the indirect_files field.
143 * The temps[] array above is unused then.
145 LLVMValueRef temps_array
;
147 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
148 * set in the indirect_files field.
149 * The outputs[] array above is unused then.
151 LLVMValueRef outputs_array
;
153 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
154 * set in the indirect_files field.
155 * The inputs[] array above is unused then.
157 LLVMValueRef inputs_array
;
159 const struct tgsi_shader_info
*info
;
160 /** bitmask indicating which register files are accessed indirectly */
161 unsigned indirect_files
;
163 struct lp_build_mask_context
*mask
;
164 struct lp_exec_mask exec_mask
;
166 struct tgsi_full_instruction
*instructions
;
167 uint max_instructions
;
170 static void lp_exec_mask_init(struct lp_exec_mask
*mask
, struct lp_build_context
*bld
)
173 mask
->has_mask
= FALSE
;
174 mask
->cond_stack_size
= 0;
175 mask
->loop_stack_size
= 0;
176 mask
->call_stack_size
= 0;
178 mask
->int_vec_type
= lp_build_int_vec_type(mask
->bld
->type
);
179 mask
->exec_mask
= mask
->ret_mask
= mask
->break_mask
= mask
->cont_mask
= mask
->cond_mask
=
180 LLVMConstAllOnes(mask
->int_vec_type
);
183 static void lp_exec_mask_update(struct lp_exec_mask
*mask
)
185 if (mask
->loop_stack_size
) {
186 /*for loops we need to update the entire mask at runtime */
188 assert(mask
->break_mask
);
189 tmp
= LLVMBuildAnd(mask
->bld
->builder
,
193 mask
->exec_mask
= LLVMBuildAnd(mask
->bld
->builder
,
198 mask
->exec_mask
= mask
->cond_mask
;
200 if (mask
->call_stack_size
) {
201 mask
->exec_mask
= LLVMBuildAnd(mask
->bld
->builder
,
207 mask
->has_mask
= (mask
->cond_stack_size
> 0 ||
208 mask
->loop_stack_size
> 0 ||
209 mask
->call_stack_size
> 0);
212 static void lp_exec_mask_cond_push(struct lp_exec_mask
*mask
,
215 assert(mask
->cond_stack_size
< LP_MAX_TGSI_NESTING
);
216 if (mask
->cond_stack_size
== 0) {
217 assert(mask
->cond_mask
== LLVMConstAllOnes(mask
->int_vec_type
));
219 mask
->cond_stack
[mask
->cond_stack_size
++] = mask
->cond_mask
;
220 assert(LLVMTypeOf(val
) == mask
->int_vec_type
);
221 mask
->cond_mask
= LLVMBuildAnd(mask
->bld
->builder
,
225 lp_exec_mask_update(mask
);
228 static void lp_exec_mask_cond_invert(struct lp_exec_mask
*mask
)
230 LLVMValueRef prev_mask
;
231 LLVMValueRef inv_mask
;
233 assert(mask
->cond_stack_size
);
234 prev_mask
= mask
->cond_stack
[mask
->cond_stack_size
- 1];
235 if (mask
->cond_stack_size
== 1) {
236 assert(prev_mask
== LLVMConstAllOnes(mask
->int_vec_type
));
239 inv_mask
= LLVMBuildNot(mask
->bld
->builder
, mask
->cond_mask
, "");
241 mask
->cond_mask
= LLVMBuildAnd(mask
->bld
->builder
,
244 lp_exec_mask_update(mask
);
247 static void lp_exec_mask_cond_pop(struct lp_exec_mask
*mask
)
249 assert(mask
->cond_stack_size
);
250 mask
->cond_mask
= mask
->cond_stack
[--mask
->cond_stack_size
];
251 lp_exec_mask_update(mask
);
254 static void lp_exec_bgnloop(struct lp_exec_mask
*mask
)
256 if (mask
->loop_stack_size
== 0) {
257 assert(mask
->loop_block
== NULL
);
258 assert(mask
->cont_mask
== LLVMConstAllOnes(mask
->int_vec_type
));
259 assert(mask
->break_mask
== LLVMConstAllOnes(mask
->int_vec_type
));
260 assert(mask
->break_var
== NULL
);
263 assert(mask
->loop_stack_size
< LP_MAX_TGSI_NESTING
);
265 mask
->loop_stack
[mask
->loop_stack_size
].loop_block
= mask
->loop_block
;
266 mask
->loop_stack
[mask
->loop_stack_size
].cont_mask
= mask
->cont_mask
;
267 mask
->loop_stack
[mask
->loop_stack_size
].break_mask
= mask
->break_mask
;
268 mask
->loop_stack
[mask
->loop_stack_size
].break_var
= mask
->break_var
;
269 ++mask
->loop_stack_size
;
271 mask
->break_var
= lp_build_alloca(mask
->bld
->builder
, mask
->int_vec_type
, "");
272 LLVMBuildStore(mask
->bld
->builder
, mask
->break_mask
, mask
->break_var
);
274 mask
->loop_block
= lp_build_insert_new_block(mask
->bld
->builder
, "bgnloop");
275 LLVMBuildBr(mask
->bld
->builder
, mask
->loop_block
);
276 LLVMPositionBuilderAtEnd(mask
->bld
->builder
, mask
->loop_block
);
278 mask
->break_mask
= LLVMBuildLoad(mask
->bld
->builder
, mask
->break_var
, "");
280 lp_exec_mask_update(mask
);
283 static void lp_exec_break(struct lp_exec_mask
*mask
)
285 LLVMValueRef exec_mask
= LLVMBuildNot(mask
->bld
->builder
,
289 mask
->break_mask
= LLVMBuildAnd(mask
->bld
->builder
,
291 exec_mask
, "break_full");
293 lp_exec_mask_update(mask
);
296 static void lp_exec_continue(struct lp_exec_mask
*mask
)
298 LLVMValueRef exec_mask
= LLVMBuildNot(mask
->bld
->builder
,
302 mask
->cont_mask
= LLVMBuildAnd(mask
->bld
->builder
,
306 lp_exec_mask_update(mask
);
310 static void lp_exec_endloop(struct lp_exec_mask
*mask
)
312 LLVMBasicBlockRef endloop
;
313 LLVMTypeRef reg_type
= LLVMIntType(mask
->bld
->type
.width
*
314 mask
->bld
->type
.length
);
317 assert(mask
->break_mask
);
320 * Restore the cont_mask, but don't pop
322 assert(mask
->loop_stack_size
);
323 mask
->cont_mask
= mask
->loop_stack
[mask
->loop_stack_size
- 1].cont_mask
;
324 lp_exec_mask_update(mask
);
327 * Unlike the continue mask, the break_mask must be preserved across loop
330 LLVMBuildStore(mask
->bld
->builder
, mask
->break_mask
, mask
->break_var
);
332 /* i1cond = (mask == 0) */
333 i1cond
= LLVMBuildICmp(
336 LLVMBuildBitCast(mask
->bld
->builder
, mask
->exec_mask
, reg_type
, ""),
337 LLVMConstNull(reg_type
), "");
339 endloop
= lp_build_insert_new_block(mask
->bld
->builder
, "endloop");
341 LLVMBuildCondBr(mask
->bld
->builder
,
342 i1cond
, mask
->loop_block
, endloop
);
344 LLVMPositionBuilderAtEnd(mask
->bld
->builder
, endloop
);
346 assert(mask
->loop_stack_size
);
347 --mask
->loop_stack_size
;
348 mask
->loop_block
= mask
->loop_stack
[mask
->loop_stack_size
].loop_block
;
349 mask
->cont_mask
= mask
->loop_stack
[mask
->loop_stack_size
].cont_mask
;
350 mask
->break_mask
= mask
->loop_stack
[mask
->loop_stack_size
].break_mask
;
351 mask
->break_var
= mask
->loop_stack
[mask
->loop_stack_size
].break_var
;
353 lp_exec_mask_update(mask
);
356 /* stores val into an address pointed to by dst.
357 * mask->exec_mask is used to figure out which bits of val
358 * should be stored into the address
359 * (0 means don't store this bit, 1 means do store).
361 static void lp_exec_mask_store(struct lp_exec_mask
*mask
,
366 /* Mix the predicate and execution mask */
367 if (mask
->has_mask
) {
369 pred
= LLVMBuildAnd(mask
->bld
->builder
, pred
, mask
->exec_mask
, "");
371 pred
= mask
->exec_mask
;
376 LLVMValueRef real_val
, dst_val
;
378 dst_val
= LLVMBuildLoad(mask
->bld
->builder
, dst
, "");
379 real_val
= lp_build_select(mask
->bld
,
383 LLVMBuildStore(mask
->bld
->builder
, real_val
, dst
);
385 LLVMBuildStore(mask
->bld
->builder
, val
, dst
);
388 static void lp_exec_mask_call(struct lp_exec_mask
*mask
,
392 assert(mask
->call_stack_size
< LP_MAX_TGSI_NESTING
);
393 mask
->call_stack
[mask
->call_stack_size
].pc
= *pc
;
394 mask
->call_stack
[mask
->call_stack_size
].ret_mask
= mask
->ret_mask
;
395 mask
->call_stack_size
++;
399 static void lp_exec_mask_ret(struct lp_exec_mask
*mask
, int *pc
)
401 LLVMValueRef exec_mask
;
403 if (mask
->call_stack_size
== 0) {
404 /* returning from main() */
408 exec_mask
= LLVMBuildNot(mask
->bld
->builder
,
412 mask
->ret_mask
= LLVMBuildAnd(mask
->bld
->builder
,
414 exec_mask
, "ret_full");
416 lp_exec_mask_update(mask
);
419 static void lp_exec_mask_bgnsub(struct lp_exec_mask
*mask
)
423 static void lp_exec_mask_endsub(struct lp_exec_mask
*mask
, int *pc
)
425 assert(mask
->call_stack_size
);
426 mask
->call_stack_size
--;
427 *pc
= mask
->call_stack
[mask
->call_stack_size
].pc
;
428 mask
->ret_mask
= mask
->call_stack
[mask
->call_stack_size
].ret_mask
;
429 lp_exec_mask_update(mask
);
434 * Return pointer to a temporary register channel (src or dest).
435 * Note that indirect addressing cannot be handled here.
436 * \param index which temporary register
437 * \param chan which channel of the temp register.
440 get_temp_ptr(struct lp_build_tgsi_soa_context
*bld
,
445 if (bld
->indirect_files
& (1 << TGSI_FILE_TEMPORARY
)) {
446 LLVMValueRef lindex
= lp_build_const_int32(index
* 4 + chan
);
447 return LLVMBuildGEP(bld
->base
.builder
, bld
->temps_array
, &lindex
, 1, "");
450 return bld
->temps
[index
][chan
];
455 * Return pointer to a output register channel (src or dest).
456 * Note that indirect addressing cannot be handled here.
457 * \param index which output register
458 * \param chan which channel of the output register.
461 get_output_ptr(struct lp_build_tgsi_soa_context
*bld
,
466 if (bld
->indirect_files
& (1 << TGSI_FILE_OUTPUT
)) {
467 LLVMValueRef lindex
= lp_build_const_int32(index
* 4 + chan
);
468 return LLVMBuildGEP(bld
->base
.builder
, bld
->outputs_array
, &lindex
, 1, "");
471 return bld
->outputs
[index
][chan
];
477 * XXX the lp_build_gather() function should be capable of doing this
478 * with a little work.
481 build_gather(struct lp_build_tgsi_soa_context
*bld
,
482 LLVMValueRef base_ptr
,
483 LLVMValueRef indexes
)
485 LLVMValueRef res
= bld
->base
.undef
;
489 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
491 for (i
= 0; i
< bld
->base
.type
.length
; i
++) {
492 LLVMValueRef ii
= LLVMConstInt(LLVMInt32Type(), i
, 0);
493 LLVMValueRef index
= LLVMBuildExtractElement(bld
->base
.builder
,
495 LLVMValueRef scalar_ptr
= LLVMBuildGEP(bld
->base
.builder
, base_ptr
,
496 &index
, 1, "gather_ptr");
497 LLVMValueRef scalar
= LLVMBuildLoad(bld
->base
.builder
, scalar_ptr
, "");
499 res
= LLVMBuildInsertElement(bld
->base
.builder
, res
, scalar
, ii
, "");
507 * Scatter/store vector.
510 emit_mask_scatter(struct lp_build_tgsi_soa_context
*bld
,
511 LLVMValueRef base_ptr
,
512 LLVMValueRef indexes
,
514 struct lp_exec_mask
*mask
,
517 LLVMBuilderRef builder
= bld
->base
.builder
;
520 /* Mix the predicate and execution mask */
521 if (mask
->has_mask
) {
523 pred
= LLVMBuildAnd(mask
->bld
->builder
, pred
, mask
->exec_mask
, "");
526 pred
= mask
->exec_mask
;
531 * Loop over elements of index_vec, store scalar value.
533 for (i
= 0; i
< bld
->base
.type
.length
; i
++) {
534 LLVMValueRef ii
= LLVMConstInt(LLVMInt32Type(), i
, 0);
535 LLVMValueRef index
= LLVMBuildExtractElement(builder
, indexes
, ii
, "");
536 LLVMValueRef scalar_ptr
= LLVMBuildGEP(builder
, base_ptr
, &index
, 1, "scatter_ptr");
537 LLVMValueRef val
= LLVMBuildExtractElement(builder
, values
, ii
, "scatter_val");
538 LLVMValueRef scalar_pred
= pred
?
539 LLVMBuildExtractElement(builder
, pred
, ii
, "scatter_pred") : NULL
;
542 lp_build_printf(builder
, "scatter %d: val %f at %d %p\n",
543 ii
, val
, index
, scalar_ptr
);
546 LLVMValueRef real_val
, dst_val
;
547 dst_val
= LLVMBuildLoad(builder
, scalar_ptr
, "");
548 real_val
= lp_build_select(&bld
->elem_bld
, scalar_pred
, val
, dst_val
);
549 LLVMBuildStore(builder
, real_val
, scalar_ptr
);
552 LLVMBuildStore(builder
, val
, scalar_ptr
);
559 * Read the current value of the ADDR register, convert the floats to
560 * ints, add the base index and return the vector of offsets.
561 * The offsets will be used to index into the constant buffer or
562 * temporary register file.
565 get_indirect_index(struct lp_build_tgsi_soa_context
*bld
,
566 unsigned reg_file
, unsigned reg_index
,
567 const struct tgsi_src_register
*indirect_reg
)
569 struct lp_build_context
*uint_bld
= &bld
->uint_bld
;
570 /* always use X component of address register */
571 unsigned swizzle
= indirect_reg
->SwizzleX
;
574 LLVMValueRef max_index
;
577 assert(bld
->indirect_files
& (1 << reg_file
));
579 base
= lp_build_const_int_vec(uint_bld
->type
, reg_index
);
582 rel
= LLVMBuildLoad(bld
->base
.builder
,
583 bld
->addr
[indirect_reg
->Index
][swizzle
],
586 /* for indexing we want integers */
587 rel
= LLVMBuildFPToSI(bld
->base
.builder
,
589 uint_bld
->vec_type
, "");
591 index
= lp_build_add(uint_bld
, base
, rel
);
593 max_index
= lp_build_const_int_vec(uint_bld
->type
,
594 bld
->info
->file_max
[reg_file
]);
596 assert(!uint_bld
->type
.sign
);
597 index
= lp_build_min(uint_bld
, index
, max_index
);
608 struct lp_build_tgsi_soa_context
*bld
,
609 const struct tgsi_full_instruction
*inst
,
611 const unsigned chan_index
)
613 struct lp_build_context
*uint_bld
= &bld
->uint_bld
;
614 const struct tgsi_full_src_register
*reg
= &inst
->Src
[src_op
];
615 const unsigned swizzle
=
616 tgsi_util_get_full_src_register_swizzle(reg
, chan_index
);
618 LLVMValueRef indirect_index
= NULL
;
621 assert(0 && "invalid swizzle in emit_fetch()");
622 return bld
->base
.undef
;
625 if (reg
->Register
.Indirect
) {
626 indirect_index
= get_indirect_index(bld
,
631 assert(reg
->Register
.Index
<= bld
->info
->file_max
[reg
->Register
.File
]);
634 switch (reg
->Register
.File
) {
635 case TGSI_FILE_CONSTANT
:
636 if (reg
->Register
.Indirect
) {
637 LLVMValueRef swizzle_vec
=
638 lp_build_const_int_vec(uint_bld
->type
, swizzle
);
639 LLVMValueRef index_vec
; /* index into the const buffer */
641 /* index_vec = indirect_index * 4 + swizzle */
642 index_vec
= lp_build_shl_imm(uint_bld
, indirect_index
, 2);
643 index_vec
= lp_build_add(uint_bld
, index_vec
, swizzle_vec
);
645 /* Gather values from the constant buffer */
646 res
= build_gather(bld
, bld
->consts_ptr
, index_vec
);
649 LLVMValueRef index
; /* index into the const buffer */
650 LLVMValueRef scalar
, scalar_ptr
;
652 index
= lp_build_const_int32(reg
->Register
.Index
*4 + swizzle
);
654 scalar_ptr
= LLVMBuildGEP(bld
->base
.builder
, bld
->consts_ptr
,
656 scalar
= LLVMBuildLoad(bld
->base
.builder
, scalar_ptr
, "");
658 res
= lp_build_broadcast_scalar(&bld
->base
, scalar
);
662 case TGSI_FILE_IMMEDIATE
:
663 res
= bld
->immediates
[reg
->Register
.Index
][swizzle
];
667 case TGSI_FILE_INPUT
:
668 if (reg
->Register
.Indirect
) {
669 LLVMValueRef swizzle_vec
=
670 lp_build_const_int_vec(uint_bld
->type
, swizzle
);
671 LLVMValueRef length_vec
=
672 lp_build_const_int_vec(uint_bld
->type
, bld
->base
.type
.length
);
673 LLVMValueRef index_vec
; /* index into the const buffer */
674 LLVMValueRef inputs_array
;
675 LLVMTypeRef float4_ptr_type
;
677 /* index_vec = (indirect_index * 4 + swizzle) * length */
678 index_vec
= lp_build_shl_imm(uint_bld
, indirect_index
, 2);
679 index_vec
= lp_build_add(uint_bld
, index_vec
, swizzle_vec
);
680 index_vec
= lp_build_mul(uint_bld
, index_vec
, length_vec
);
682 /* cast inputs_array pointer to float* */
683 float4_ptr_type
= LLVMPointerType(LLVMFloatType(), 0);
684 inputs_array
= LLVMBuildBitCast(uint_bld
->builder
, bld
->inputs_array
,
685 float4_ptr_type
, "");
687 /* Gather values from the temporary register array */
688 res
= build_gather(bld
, inputs_array
, index_vec
);
690 if (bld
->indirect_files
& (1 << TGSI_FILE_INPUT
)) {
691 LLVMValueRef lindex
= lp_build_const_int32(reg
->Register
.Index
* 4 + swizzle
);
692 LLVMValueRef input_ptr
= LLVMBuildGEP(bld
->base
.builder
,
693 bld
->inputs_array
, &lindex
, 1, "");
694 res
= LLVMBuildLoad(bld
->base
.builder
, input_ptr
, "");
697 res
= bld
->inputs
[reg
->Register
.Index
][swizzle
];
703 case TGSI_FILE_TEMPORARY
:
704 if (reg
->Register
.Indirect
) {
705 LLVMValueRef swizzle_vec
=
706 lp_build_const_int_vec(uint_bld
->type
, swizzle
);
707 LLVMValueRef length_vec
=
708 lp_build_const_int_vec(uint_bld
->type
, bld
->base
.type
.length
);
709 LLVMValueRef index_vec
; /* index into the const buffer */
710 LLVMValueRef temps_array
;
711 LLVMTypeRef float4_ptr_type
;
713 /* index_vec = (indirect_index * 4 + swizzle) * length */
714 index_vec
= lp_build_shl_imm(uint_bld
, indirect_index
, 2);
715 index_vec
= lp_build_add(uint_bld
, index_vec
, swizzle_vec
);
716 index_vec
= lp_build_mul(uint_bld
, index_vec
, length_vec
);
718 /* cast temps_array pointer to float* */
719 float4_ptr_type
= LLVMPointerType(LLVMFloatType(), 0);
720 temps_array
= LLVMBuildBitCast(uint_bld
->builder
, bld
->temps_array
,
721 float4_ptr_type
, "");
723 /* Gather values from the temporary register array */
724 res
= build_gather(bld
, temps_array
, index_vec
);
727 LLVMValueRef temp_ptr
;
728 temp_ptr
= get_temp_ptr(bld
, reg
->Register
.Index
, swizzle
);
729 res
= LLVMBuildLoad(bld
->base
.builder
, temp_ptr
, "");
731 return bld
->base
.undef
;
736 assert(0 && "invalid src register in emit_fetch()");
737 return bld
->base
.undef
;
740 switch( tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
) ) {
741 case TGSI_UTIL_SIGN_CLEAR
:
742 res
= lp_build_abs( &bld
->base
, res
);
745 case TGSI_UTIL_SIGN_SET
:
746 res
= lp_build_abs( &bld
->base
, res
);
748 case TGSI_UTIL_SIGN_TOGGLE
:
749 res
= lp_build_negate( &bld
->base
, res
);
752 case TGSI_UTIL_SIGN_KEEP
:
761 * Register fetch with derivatives.
765 struct lp_build_tgsi_soa_context
*bld
,
766 const struct tgsi_full_instruction
*inst
,
768 const unsigned chan_index
,
775 src
= emit_fetch(bld
, inst
, index
, chan_index
);
780 /* TODO: use interpolation coeffs for inputs */
783 *ddx
= lp_build_ddx(&bld
->base
, src
);
786 *ddy
= lp_build_ddy(&bld
->base
, src
);
794 emit_fetch_predicate(
795 struct lp_build_tgsi_soa_context
*bld
,
796 const struct tgsi_full_instruction
*inst
,
800 unsigned char swizzles
[4];
801 LLVMValueRef unswizzled
[4] = {NULL
, NULL
, NULL
, NULL
};
805 if (!inst
->Instruction
.Predicate
) {
806 FOR_EACH_CHANNEL( chan
) {
812 swizzles
[0] = inst
->Predicate
.SwizzleX
;
813 swizzles
[1] = inst
->Predicate
.SwizzleY
;
814 swizzles
[2] = inst
->Predicate
.SwizzleZ
;
815 swizzles
[3] = inst
->Predicate
.SwizzleW
;
817 index
= inst
->Predicate
.Index
;
818 assert(index
< LP_MAX_TGSI_PREDS
);
820 FOR_EACH_CHANNEL( chan
) {
821 unsigned swizzle
= swizzles
[chan
];
824 * Only fetch the predicate register channels that are actually listed
827 if (!unswizzled
[swizzle
]) {
828 value
= LLVMBuildLoad(bld
->base
.builder
,
829 bld
->preds
[index
][swizzle
], "");
832 * Convert the value to an integer mask.
834 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
835 * is needlessly causing two comparisons due to storing the intermediate
836 * result as float vector instead of an integer mask vector.
838 value
= lp_build_compare(bld
->base
.builder
,
843 if (inst
->Predicate
.Negate
) {
844 value
= LLVMBuildNot(bld
->base
.builder
, value
, "");
847 unswizzled
[swizzle
] = value
;
849 value
= unswizzled
[swizzle
];
862 struct lp_build_tgsi_soa_context
*bld
,
863 const struct tgsi_full_instruction
*inst
,
869 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[index
];
870 struct lp_build_context
*uint_bld
= &bld
->uint_bld
;
871 LLVMValueRef indirect_index
= NULL
;
873 switch( inst
->Instruction
.Saturate
) {
877 case TGSI_SAT_ZERO_ONE
:
878 value
= lp_build_max(&bld
->base
, value
, bld
->base
.zero
);
879 value
= lp_build_min(&bld
->base
, value
, bld
->base
.one
);
882 case TGSI_SAT_MINUS_PLUS_ONE
:
883 value
= lp_build_max(&bld
->base
, value
, lp_build_const_vec(bld
->base
.type
, -1.0));
884 value
= lp_build_min(&bld
->base
, value
, bld
->base
.one
);
891 if (reg
->Register
.Indirect
) {
892 indirect_index
= get_indirect_index(bld
,
897 assert(reg
->Register
.Index
<= bld
->info
->file_max
[reg
->Register
.File
]);
900 switch( reg
->Register
.File
) {
901 case TGSI_FILE_OUTPUT
:
902 if (reg
->Register
.Indirect
) {
903 LLVMBuilderRef builder
= bld
->base
.builder
;
904 LLVMValueRef chan_vec
=
905 lp_build_const_int_vec(uint_bld
->type
, chan_index
);
906 LLVMValueRef length_vec
=
907 lp_build_const_int_vec(uint_bld
->type
, bld
->base
.type
.length
);
908 LLVMValueRef index_vec
; /* indexes into the temp registers */
909 LLVMValueRef outputs_array
;
910 LLVMValueRef pixel_offsets
;
911 LLVMTypeRef float_ptr_type
;
914 /* build pixel offset vector: {0, 1, 2, 3, ...} */
915 pixel_offsets
= uint_bld
->undef
;
916 for (i
= 0; i
< bld
->base
.type
.length
; i
++) {
917 LLVMValueRef ii
= lp_build_const_int32(i
);
918 pixel_offsets
= LLVMBuildInsertElement(builder
, pixel_offsets
,
922 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
923 index_vec
= lp_build_shl_imm(uint_bld
, indirect_index
, 2);
924 index_vec
= lp_build_add(uint_bld
, index_vec
, chan_vec
);
925 index_vec
= lp_build_mul(uint_bld
, index_vec
, length_vec
);
926 index_vec
= lp_build_add(uint_bld
, index_vec
, pixel_offsets
);
928 float_ptr_type
= LLVMPointerType(LLVMFloatType(), 0);
929 outputs_array
= LLVMBuildBitCast(builder
, bld
->outputs_array
,
932 /* Scatter store values into temp registers */
933 emit_mask_scatter(bld
, outputs_array
, index_vec
, value
,
934 &bld
->exec_mask
, pred
);
937 LLVMValueRef out_ptr
= get_output_ptr(bld
, reg
->Register
.Index
,
939 lp_exec_mask_store(&bld
->exec_mask
, pred
, value
, out_ptr
);
943 case TGSI_FILE_TEMPORARY
:
944 if (reg
->Register
.Indirect
) {
945 LLVMBuilderRef builder
= bld
->base
.builder
;
946 LLVMValueRef chan_vec
=
947 lp_build_const_int_vec(uint_bld
->type
, chan_index
);
948 LLVMValueRef length_vec
=
949 lp_build_const_int_vec(uint_bld
->type
, bld
->base
.type
.length
);
950 LLVMValueRef index_vec
; /* indexes into the temp registers */
951 LLVMValueRef temps_array
;
952 LLVMValueRef pixel_offsets
;
953 LLVMTypeRef float_ptr_type
;
956 /* build pixel offset vector: {0, 1, 2, 3, ...} */
957 pixel_offsets
= uint_bld
->undef
;
958 for (i
= 0; i
< bld
->base
.type
.length
; i
++) {
959 LLVMValueRef ii
= lp_build_const_int32(i
);
960 pixel_offsets
= LLVMBuildInsertElement(builder
, pixel_offsets
,
964 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
965 index_vec
= lp_build_shl_imm(uint_bld
, indirect_index
, 2);
966 index_vec
= lp_build_add(uint_bld
, index_vec
, chan_vec
);
967 index_vec
= lp_build_mul(uint_bld
, index_vec
, length_vec
);
968 index_vec
= lp_build_add(uint_bld
, index_vec
, pixel_offsets
);
970 float_ptr_type
= LLVMPointerType(LLVMFloatType(), 0);
971 temps_array
= LLVMBuildBitCast(builder
, bld
->temps_array
,
974 /* Scatter store values into temp registers */
975 emit_mask_scatter(bld
, temps_array
, index_vec
, value
,
976 &bld
->exec_mask
, pred
);
979 LLVMValueRef temp_ptr
= get_temp_ptr(bld
, reg
->Register
.Index
,
981 lp_exec_mask_store(&bld
->exec_mask
, pred
, value
, temp_ptr
);
985 case TGSI_FILE_ADDRESS
:
986 lp_exec_mask_store(&bld
->exec_mask
, pred
, value
,
987 bld
->addr
[reg
->Indirect
.Index
][chan_index
]);
990 case TGSI_FILE_PREDICATE
:
991 lp_exec_mask_store(&bld
->exec_mask
, pred
, value
,
992 bld
->preds
[reg
->Register
.Index
][chan_index
]);
1002 * High-level instruction translators.
1006 emit_tex( struct lp_build_tgsi_soa_context
*bld
,
1007 const struct tgsi_full_instruction
*inst
,
1008 enum lp_build_tex_modifier modifier
,
1009 LLVMValueRef
*texel
)
1012 LLVMValueRef lod_bias
, explicit_lod
;
1013 LLVMValueRef oow
= NULL
;
1014 LLVMValueRef coords
[3];
1015 LLVMValueRef ddx
[3];
1016 LLVMValueRef ddy
[3];
1017 unsigned num_coords
;
1020 if (!bld
->sampler
) {
1021 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1022 for (i
= 0; i
< 4; i
++) {
1023 texel
[i
] = bld
->base
.undef
;
1028 switch (inst
->Texture
.Texture
) {
1029 case TGSI_TEXTURE_1D
:
1032 case TGSI_TEXTURE_2D
:
1033 case TGSI_TEXTURE_RECT
:
1036 case TGSI_TEXTURE_SHADOW1D
:
1037 case TGSI_TEXTURE_SHADOW2D
:
1038 case TGSI_TEXTURE_SHADOWRECT
:
1039 case TGSI_TEXTURE_3D
:
1040 case TGSI_TEXTURE_CUBE
:
1048 if (modifier
== LP_BLD_TEX_MODIFIER_LOD_BIAS
) {
1049 lod_bias
= emit_fetch( bld
, inst
, 0, 3 );
1050 explicit_lod
= NULL
;
1052 else if (modifier
== LP_BLD_TEX_MODIFIER_EXPLICIT_LOD
) {
1054 explicit_lod
= emit_fetch( bld
, inst
, 0, 3 );
1058 explicit_lod
= NULL
;
1061 if (modifier
== LP_BLD_TEX_MODIFIER_PROJECTED
) {
1062 oow
= emit_fetch( bld
, inst
, 0, 3 );
1063 oow
= lp_build_rcp(&bld
->base
, oow
);
1066 for (i
= 0; i
< num_coords
; i
++) {
1067 coords
[i
] = emit_fetch( bld
, inst
, 0, i
);
1068 if (modifier
== LP_BLD_TEX_MODIFIER_PROJECTED
)
1069 coords
[i
] = lp_build_mul(&bld
->base
, coords
[i
], oow
);
1071 for (i
= num_coords
; i
< 3; i
++) {
1072 coords
[i
] = bld
->base
.undef
;
1075 if (modifier
== LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
) {
1076 LLVMTypeRef i32t
= LLVMInt32Type();
1077 LLVMValueRef index0
= LLVMConstInt(i32t
, 0, 0);
1078 for (i
= 0; i
< num_coords
; i
++) {
1079 LLVMValueRef src1
= emit_fetch( bld
, inst
, 1, i
);
1080 LLVMValueRef src2
= emit_fetch( bld
, inst
, 2, i
);
1081 ddx
[i
] = LLVMBuildExtractElement(bld
->base
.builder
, src1
, index0
, "");
1082 ddy
[i
] = LLVMBuildExtractElement(bld
->base
.builder
, src2
, index0
, "");
1084 unit
= inst
->Src
[3].Register
.Index
;
1086 for (i
= 0; i
< num_coords
; i
++) {
1087 ddx
[i
] = lp_build_scalar_ddx( &bld
->base
, coords
[i
] );
1088 ddy
[i
] = lp_build_scalar_ddy( &bld
->base
, coords
[i
] );
1090 unit
= inst
->Src
[1].Register
.Index
;
1092 for (i
= num_coords
; i
< 3; i
++) {
1093 ddx
[i
] = LLVMGetUndef(bld
->base
.elem_type
);
1094 ddy
[i
] = LLVMGetUndef(bld
->base
.elem_type
);
1097 bld
->sampler
->emit_fetch_texel(bld
->sampler
,
1100 unit
, num_coords
, coords
,
1102 lod_bias
, explicit_lod
,
1107 near_end_of_shader(struct lp_build_tgsi_soa_context
*bld
,
1112 for (i
= 0; i
< 5; i
++) {
1115 if (pc
+ i
>= bld
->info
->num_instructions
)
1118 opcode
= bld
->instructions
[pc
+ i
].Instruction
.Opcode
;
1120 if (opcode
== TGSI_OPCODE_END
)
1123 if (opcode
== TGSI_OPCODE_TEX
||
1124 opcode
== TGSI_OPCODE_TXP
||
1125 opcode
== TGSI_OPCODE_TXD
||
1126 opcode
== TGSI_OPCODE_TXB
||
1127 opcode
== TGSI_OPCODE_TXL
||
1128 opcode
== TGSI_OPCODE_TXF
||
1129 opcode
== TGSI_OPCODE_TXQ
||
1130 opcode
== TGSI_OPCODE_CAL
||
1131 opcode
== TGSI_OPCODE_CALLNZ
||
1132 opcode
== TGSI_OPCODE_IF
||
1133 opcode
== TGSI_OPCODE_IFC
||
1134 opcode
== TGSI_OPCODE_BGNLOOP
||
1135 opcode
== TGSI_OPCODE_SWITCH
)
1145 * Kill fragment if any of the src register values are negative.
1149 struct lp_build_tgsi_soa_context
*bld
,
1150 const struct tgsi_full_instruction
*inst
,
1153 const struct tgsi_full_src_register
*reg
= &inst
->Src
[0];
1154 LLVMValueRef terms
[NUM_CHANNELS
];
1156 unsigned chan_index
;
1158 memset(&terms
, 0, sizeof terms
);
1160 FOR_EACH_CHANNEL( chan_index
) {
1163 /* Unswizzle channel */
1164 swizzle
= tgsi_util_get_full_src_register_swizzle( reg
, chan_index
);
1166 /* Check if the component has not been already tested. */
1167 assert(swizzle
< NUM_CHANNELS
);
1168 if( !terms
[swizzle
] )
1169 /* TODO: change the comparison operator instead of setting the sign */
1170 terms
[swizzle
] = emit_fetch(bld
, inst
, 0, chan_index
);
1174 FOR_EACH_CHANNEL( chan_index
) {
1175 if(terms
[chan_index
]) {
1176 LLVMValueRef chan_mask
;
1179 * If term < 0 then mask = 0 else mask = ~0.
1181 chan_mask
= lp_build_cmp(&bld
->base
, PIPE_FUNC_GEQUAL
, terms
[chan_index
], bld
->base
.zero
);
1184 mask
= LLVMBuildAnd(bld
->base
.builder
, mask
, chan_mask
, "");
1191 lp_build_mask_update(bld
->mask
, mask
);
1193 if (!near_end_of_shader(bld
, pc
))
1194 lp_build_mask_check(bld
->mask
);
1200 * Predicated fragment kill.
1201 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1202 * The only predication is the execution mask which will apply if
1203 * we're inside a loop or conditional.
1206 emit_kilp(struct lp_build_tgsi_soa_context
*bld
,
1207 const struct tgsi_full_instruction
*inst
,
1212 /* For those channels which are "alive", disable fragment shader
1215 if (bld
->exec_mask
.has_mask
) {
1216 mask
= LLVMBuildNot(bld
->base
.builder
, bld
->exec_mask
.exec_mask
, "kilp");
1219 LLVMValueRef zero
= LLVMConstNull(bld
->base
.int_vec_type
);
1223 lp_build_mask_update(bld
->mask
, mask
);
1225 if (!near_end_of_shader(bld
, pc
))
1226 lp_build_mask_check(bld
->mask
);
1231 * Emit code which will dump the value of all the temporary registers
1235 emit_dump_temps(struct lp_build_tgsi_soa_context
*bld
)
1237 LLVMBuilderRef builder
= bld
->base
.builder
;
1238 LLVMValueRef temp_ptr
;
1239 LLVMValueRef i0
= lp_build_const_int32(0);
1240 LLVMValueRef i1
= lp_build_const_int32(1);
1241 LLVMValueRef i2
= lp_build_const_int32(2);
1242 LLVMValueRef i3
= lp_build_const_int32(3);
1244 int n
= bld
->info
->file_max
[TGSI_FILE_TEMPORARY
];
1246 for (index
= 0; index
< n
; index
++) {
1247 LLVMValueRef idx
= lp_build_const_int32(index
);
1248 LLVMValueRef v
[4][4], res
;
1251 lp_build_printf(builder
, "TEMP[%d]:\n", idx
);
1253 for (chan
= 0; chan
< 4; chan
++) {
1254 temp_ptr
= get_temp_ptr(bld
, index
, chan
);
1255 res
= LLVMBuildLoad(bld
->base
.builder
, temp_ptr
, "");
1256 v
[chan
][0] = LLVMBuildExtractElement(builder
, res
, i0
, "");
1257 v
[chan
][1] = LLVMBuildExtractElement(builder
, res
, i1
, "");
1258 v
[chan
][2] = LLVMBuildExtractElement(builder
, res
, i2
, "");
1259 v
[chan
][3] = LLVMBuildExtractElement(builder
, res
, i3
, "");
1262 lp_build_printf(builder
, " X: %f %f %f %f\n",
1263 v
[0][0], v
[0][1], v
[0][2], v
[0][3]);
1264 lp_build_printf(builder
, " Y: %f %f %f %f\n",
1265 v
[1][0], v
[1][1], v
[1][2], v
[1][3]);
1266 lp_build_printf(builder
, " Z: %f %f %f %f\n",
1267 v
[2][0], v
[2][1], v
[2][2], v
[2][3]);
1268 lp_build_printf(builder
, " W: %f %f %f %f\n",
1269 v
[3][0], v
[3][1], v
[3][2], v
[3][3]);
1277 struct lp_build_tgsi_soa_context
*bld
,
1278 const struct tgsi_full_declaration
*decl
)
1280 LLVMTypeRef vec_type
= bld
->base
.vec_type
;
1281 const unsigned first
= decl
->Range
.First
;
1282 const unsigned last
= decl
->Range
.Last
;
1285 for (idx
= first
; idx
<= last
; ++idx
) {
1286 assert(last
<= bld
->info
->file_max
[decl
->Declaration
.File
]);
1287 switch (decl
->Declaration
.File
) {
1288 case TGSI_FILE_TEMPORARY
:
1289 assert(idx
< LP_MAX_TGSI_TEMPS
);
1290 if (!(bld
->indirect_files
& (1 << TGSI_FILE_TEMPORARY
))) {
1291 for (i
= 0; i
< NUM_CHANNELS
; i
++)
1292 bld
->temps
[idx
][i
] = lp_build_alloca(bld
->base
.builder
,
1297 case TGSI_FILE_OUTPUT
:
1298 if (!(bld
->indirect_files
& (1 << TGSI_FILE_OUTPUT
))) {
1299 for (i
= 0; i
< NUM_CHANNELS
; i
++)
1300 bld
->outputs
[idx
][i
] = lp_build_alloca(bld
->base
.builder
,
1301 vec_type
, "output");
1305 case TGSI_FILE_ADDRESS
:
1306 assert(idx
< LP_MAX_TGSI_ADDRS
);
1307 for (i
= 0; i
< NUM_CHANNELS
; i
++)
1308 bld
->addr
[idx
][i
] = lp_build_alloca(bld
->base
.builder
,
1312 case TGSI_FILE_PREDICATE
:
1313 assert(idx
< LP_MAX_TGSI_PREDS
);
1314 for (i
= 0; i
< NUM_CHANNELS
; i
++)
1315 bld
->preds
[idx
][i
] = lp_build_alloca(bld
->base
.builder
,
1316 vec_type
, "predicate");
1320 /* don't need to declare other vars */
1328 * Emit LLVM for one TGSI instruction.
1329 * \param return TRUE for success, FALSE otherwise
1333 struct lp_build_tgsi_soa_context
*bld
,
1334 const struct tgsi_full_instruction
*inst
,
1335 const struct tgsi_opcode_info
*info
,
1338 unsigned chan_index
;
1339 LLVMValueRef src0
, src1
, src2
;
1340 LLVMValueRef tmp0
, tmp1
, tmp2
;
1341 LLVMValueRef tmp3
= NULL
;
1342 LLVMValueRef tmp4
= NULL
;
1343 LLVMValueRef tmp5
= NULL
;
1344 LLVMValueRef tmp6
= NULL
;
1345 LLVMValueRef tmp7
= NULL
;
1347 LLVMValueRef dst0
[NUM_CHANNELS
];
1350 * Stores and write masks are handled in a general fashion after the long
1351 * instruction opcode switch statement.
1353 * Although not stricitly necessary, we avoid generating instructions for
1354 * channels which won't be stored, in cases where's that easy. For some
1355 * complex instructions, like texture sampling, it is more convenient to
1356 * assume a full writemask and then let LLVM optimization passes eliminate
1362 assert(info
->num_dst
<= 1);
1363 if (info
->num_dst
) {
1364 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1365 dst0
[chan_index
] = bld
->base
.undef
;
1369 switch (inst
->Instruction
.Opcode
) {
1370 case TGSI_OPCODE_ARL
:
1371 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1372 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1373 tmp0
= lp_build_floor(&bld
->base
, tmp0
);
1374 dst0
[chan_index
] = tmp0
;
1378 case TGSI_OPCODE_MOV
:
1379 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1380 dst0
[chan_index
] = emit_fetch( bld
, inst
, 0, chan_index
);
1384 case TGSI_OPCODE_LIT
:
1385 if( IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
) ) {
1386 dst0
[CHAN_X
] = bld
->base
.one
;
1388 if( IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
) ) {
1389 src0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1390 dst0
[CHAN_Y
] = lp_build_max( &bld
->base
, src0
, bld
->base
.zero
);
1392 if( IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
) ) {
1393 /* XMM[1] = SrcReg[0].yyyy */
1394 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Y
);
1395 /* XMM[1] = max(XMM[1], 0) */
1396 tmp1
= lp_build_max( &bld
->base
, tmp1
, bld
->base
.zero
);
1397 /* XMM[2] = SrcReg[0].wwww */
1398 tmp2
= emit_fetch( bld
, inst
, 0, CHAN_W
);
1399 tmp1
= lp_build_pow( &bld
->base
, tmp1
, tmp2
);
1400 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1401 tmp2
= lp_build_cmp(&bld
->base
, PIPE_FUNC_GREATER
, tmp0
, bld
->base
.zero
);
1402 dst0
[CHAN_Z
] = lp_build_select(&bld
->base
, tmp2
, tmp1
, bld
->base
.zero
);
1404 if( IS_DST0_CHANNEL_ENABLED( inst
, CHAN_W
) ) {
1405 dst0
[CHAN_W
] = bld
->base
.one
;
1409 case TGSI_OPCODE_RCP
:
1410 /* TGSI_OPCODE_RECIP */
1411 src0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1412 res
= lp_build_rcp(&bld
->base
, src0
);
1413 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1414 dst0
[chan_index
] = res
;
1418 case TGSI_OPCODE_RSQ
:
1419 /* TGSI_OPCODE_RECIPSQRT */
1420 src0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1421 src0
= lp_build_abs(&bld
->base
, src0
);
1422 res
= lp_build_rsqrt(&bld
->base
, src0
);
1423 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1424 dst0
[chan_index
] = res
;
1428 case TGSI_OPCODE_EXP
:
1429 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
) ||
1430 IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
) ||
1431 IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
)) {
1432 LLVMValueRef
*p_exp2_int_part
= NULL
;
1433 LLVMValueRef
*p_frac_part
= NULL
;
1434 LLVMValueRef
*p_exp2
= NULL
;
1436 src0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1438 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
))
1439 p_exp2_int_part
= &tmp0
;
1440 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
))
1441 p_frac_part
= &tmp1
;
1442 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
))
1445 lp_build_exp2_approx(&bld
->base
, src0
, p_exp2_int_part
, p_frac_part
, p_exp2
);
1447 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
))
1448 dst0
[CHAN_X
] = tmp0
;
1449 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
))
1450 dst0
[CHAN_Y
] = tmp1
;
1451 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
))
1452 dst0
[CHAN_Z
] = tmp2
;
1455 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_W
)) {
1456 dst0
[CHAN_W
] = bld
->base
.one
;
1460 case TGSI_OPCODE_LOG
:
1461 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
) ||
1462 IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
) ||
1463 IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
)) {
1464 LLVMValueRef
*p_floor_log2
= NULL
;
1465 LLVMValueRef
*p_exp
= NULL
;
1466 LLVMValueRef
*p_log2
= NULL
;
1468 src0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1469 src0
= lp_build_abs( &bld
->base
, src0
);
1471 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
))
1472 p_floor_log2
= &tmp0
;
1473 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
))
1475 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
))
1478 lp_build_log2_approx(&bld
->base
, src0
, p_exp
, p_floor_log2
, p_log2
);
1480 /* dst.x = floor(lg2(abs(src.x))) */
1481 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
))
1482 dst0
[CHAN_X
] = tmp0
;
1483 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1484 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
)) {
1485 dst0
[CHAN_Y
] = lp_build_div( &bld
->base
, src0
, tmp1
);
1487 /* dst.z = lg2(abs(src.x)) */
1488 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
))
1489 dst0
[CHAN_Z
] = tmp2
;
1492 if (IS_DST0_CHANNEL_ENABLED( inst
, CHAN_W
)) {
1493 dst0
[CHAN_W
] = bld
->base
.one
;
1497 case TGSI_OPCODE_MUL
:
1498 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1499 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1500 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1501 dst0
[chan_index
] = lp_build_mul(&bld
->base
, src0
, src1
);
1505 case TGSI_OPCODE_ADD
:
1506 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1507 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1508 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1509 dst0
[chan_index
] = lp_build_add(&bld
->base
, src0
, src1
);
1513 case TGSI_OPCODE_DP3
:
1514 /* TGSI_OPCODE_DOT3 */
1515 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1516 tmp1
= emit_fetch( bld
, inst
, 1, CHAN_X
);
1517 tmp0
= lp_build_mul( &bld
->base
, tmp0
, tmp1
);
1518 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Y
);
1519 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_Y
);
1520 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
);
1521 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
1522 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Z
);
1523 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_Z
);
1524 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
);
1525 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
1526 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1527 dst0
[chan_index
] = tmp0
;
1531 case TGSI_OPCODE_DP4
:
1532 /* TGSI_OPCODE_DOT4 */
1533 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1534 tmp1
= emit_fetch( bld
, inst
, 1, CHAN_X
);
1535 tmp0
= lp_build_mul( &bld
->base
, tmp0
, tmp1
);
1536 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Y
);
1537 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_Y
);
1538 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
);
1539 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
1540 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Z
);
1541 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_Z
);
1542 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
);
1543 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
1544 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_W
);
1545 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_W
);
1546 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
);
1547 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
1548 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1549 dst0
[chan_index
] = tmp0
;
1553 case TGSI_OPCODE_DST
:
1554 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
) {
1555 dst0
[CHAN_X
] = bld
->base
.one
;
1557 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
) {
1558 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_Y
);
1559 tmp1
= emit_fetch( bld
, inst
, 1, CHAN_Y
);
1560 dst0
[CHAN_Y
] = lp_build_mul( &bld
->base
, tmp0
, tmp1
);
1562 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
) {
1563 dst0
[CHAN_Z
] = emit_fetch( bld
, inst
, 0, CHAN_Z
);
1565 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_W
) {
1566 dst0
[CHAN_W
] = emit_fetch( bld
, inst
, 1, CHAN_W
);
1570 case TGSI_OPCODE_MIN
:
1571 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1572 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1573 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1574 dst0
[chan_index
] = lp_build_min( &bld
->base
, src0
, src1
);
1578 case TGSI_OPCODE_MAX
:
1579 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1580 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1581 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1582 dst0
[chan_index
] = lp_build_max( &bld
->base
, src0
, src1
);
1586 case TGSI_OPCODE_SLT
:
1587 /* TGSI_OPCODE_SETLT */
1588 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1589 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1590 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1591 tmp0
= lp_build_cmp( &bld
->base
, PIPE_FUNC_LESS
, src0
, src1
);
1592 dst0
[chan_index
] = lp_build_select( &bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
1596 case TGSI_OPCODE_SGE
:
1597 /* TGSI_OPCODE_SETGE */
1598 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1599 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1600 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1601 tmp0
= lp_build_cmp( &bld
->base
, PIPE_FUNC_GEQUAL
, src0
, src1
);
1602 dst0
[chan_index
] = lp_build_select( &bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
1606 case TGSI_OPCODE_MAD
:
1607 /* TGSI_OPCODE_MADD */
1608 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1609 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1610 tmp1
= emit_fetch( bld
, inst
, 1, chan_index
);
1611 tmp2
= emit_fetch( bld
, inst
, 2, chan_index
);
1612 tmp0
= lp_build_mul( &bld
->base
, tmp0
, tmp1
);
1613 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp2
);
1614 dst0
[chan_index
] = tmp0
;
1618 case TGSI_OPCODE_SUB
:
1619 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1620 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1621 tmp1
= emit_fetch( bld
, inst
, 1, chan_index
);
1622 dst0
[chan_index
] = lp_build_sub( &bld
->base
, tmp0
, tmp1
);
1626 case TGSI_OPCODE_LRP
:
1627 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1628 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1629 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1630 src2
= emit_fetch( bld
, inst
, 2, chan_index
);
1631 tmp0
= lp_build_sub( &bld
->base
, src1
, src2
);
1632 tmp0
= lp_build_mul( &bld
->base
, src0
, tmp0
);
1633 dst0
[chan_index
] = lp_build_add( &bld
->base
, tmp0
, src2
);
1637 case TGSI_OPCODE_CND
:
1638 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1639 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1640 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1641 src2
= emit_fetch( bld
, inst
, 2, chan_index
);
1642 tmp1
= lp_build_const_vec(bld
->base
.type
, 0.5);
1643 tmp0
= lp_build_cmp( &bld
->base
, PIPE_FUNC_GREATER
, src2
, tmp1
);
1644 dst0
[chan_index
] = lp_build_select( &bld
->base
, tmp0
, src0
, src1
);
1648 case TGSI_OPCODE_DP2A
:
1649 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
); /* xmm0 = src[0].x */
1650 tmp1
= emit_fetch( bld
, inst
, 1, CHAN_X
); /* xmm1 = src[1].x */
1651 tmp0
= lp_build_mul( &bld
->base
, tmp0
, tmp1
); /* xmm0 = xmm0 * xmm1 */
1652 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Y
); /* xmm1 = src[0].y */
1653 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_Y
); /* xmm2 = src[1].y */
1654 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
); /* xmm1 = xmm1 * xmm2 */
1655 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
); /* xmm0 = xmm0 + xmm1 */
1656 tmp1
= emit_fetch( bld
, inst
, 2, CHAN_X
); /* xmm1 = src[2].x */
1657 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
); /* xmm0 = xmm0 + xmm1 */
1658 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1659 dst0
[chan_index
] = tmp0
; /* dest[ch] = xmm0 */
1663 case TGSI_OPCODE_FRC
:
1664 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1665 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1666 tmp0
= lp_build_floor(&bld
->base
, src0
);
1667 tmp0
= lp_build_sub(&bld
->base
, src0
, tmp0
);
1668 dst0
[chan_index
] = tmp0
;
1672 case TGSI_OPCODE_CLAMP
:
1673 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1674 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1675 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1676 src2
= emit_fetch( bld
, inst
, 2, chan_index
);
1677 tmp0
= lp_build_max(&bld
->base
, tmp0
, src1
);
1678 tmp0
= lp_build_min(&bld
->base
, tmp0
, src2
);
1679 dst0
[chan_index
] = tmp0
;
1683 case TGSI_OPCODE_FLR
:
1684 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1685 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1686 dst0
[chan_index
] = lp_build_floor(&bld
->base
, tmp0
);
1690 case TGSI_OPCODE_ROUND
:
1691 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1692 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1693 dst0
[chan_index
] = lp_build_round(&bld
->base
, tmp0
);
1697 case TGSI_OPCODE_EX2
: {
1698 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1699 tmp0
= lp_build_exp2( &bld
->base
, tmp0
);
1700 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1701 dst0
[chan_index
] = tmp0
;
1706 case TGSI_OPCODE_LG2
:
1707 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1708 tmp0
= lp_build_log2( &bld
->base
, tmp0
);
1709 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1710 dst0
[chan_index
] = tmp0
;
1714 case TGSI_OPCODE_POW
:
1715 src0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1716 src1
= emit_fetch( bld
, inst
, 1, CHAN_X
);
1717 res
= lp_build_pow( &bld
->base
, src0
, src1
);
1718 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1719 dst0
[chan_index
] = res
;
1723 case TGSI_OPCODE_XPD
:
1724 if( IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
) ||
1725 IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
) ) {
1726 tmp1
= emit_fetch( bld
, inst
, 1, CHAN_Z
);
1727 tmp3
= emit_fetch( bld
, inst
, 0, CHAN_Z
);
1729 if( IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
) ||
1730 IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
) ) {
1731 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_Y
);
1732 tmp4
= emit_fetch( bld
, inst
, 1, CHAN_Y
);
1734 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
) {
1736 tmp2
= lp_build_mul( &bld
->base
, tmp2
, tmp1
);
1738 tmp5
= lp_build_mul( &bld
->base
, tmp5
, tmp4
);
1739 tmp2
= lp_build_sub( &bld
->base
, tmp2
, tmp5
);
1740 dst0
[CHAN_X
] = tmp2
;
1742 if( IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
) ||
1743 IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
) ) {
1744 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_X
);
1745 tmp5
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1747 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
) {
1748 tmp3
= lp_build_mul( &bld
->base
, tmp3
, tmp2
);
1749 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp5
);
1750 tmp3
= lp_build_sub( &bld
->base
, tmp3
, tmp1
);
1751 dst0
[CHAN_Y
] = tmp3
;
1753 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
) {
1754 tmp5
= lp_build_mul( &bld
->base
, tmp5
, tmp4
);
1755 tmp0
= lp_build_mul( &bld
->base
, tmp0
, tmp2
);
1756 tmp5
= lp_build_sub( &bld
->base
, tmp5
, tmp0
);
1757 dst0
[CHAN_Z
] = tmp5
;
1759 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_W
) {
1760 dst0
[CHAN_W
] = bld
->base
.one
;
1764 case TGSI_OPCODE_ABS
:
1765 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1766 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1767 dst0
[chan_index
] = lp_build_abs( &bld
->base
, tmp0
);
1771 case TGSI_OPCODE_RCC
:
1776 case TGSI_OPCODE_DPH
:
1777 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1778 tmp1
= emit_fetch( bld
, inst
, 1, CHAN_X
);
1779 tmp0
= lp_build_mul( &bld
->base
, tmp0
, tmp1
);
1780 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Y
);
1781 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_Y
);
1782 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
);
1783 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
1784 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Z
);
1785 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_Z
);
1786 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
);
1787 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
1788 tmp1
= emit_fetch( bld
, inst
, 1, CHAN_W
);
1789 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
1790 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1791 dst0
[chan_index
] = tmp0
;
1795 case TGSI_OPCODE_COS
:
1796 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1797 tmp0
= lp_build_cos( &bld
->base
, tmp0
);
1798 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1799 dst0
[chan_index
] = tmp0
;
1803 case TGSI_OPCODE_DDX
:
1804 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1805 emit_fetch_deriv( bld
, inst
, 0, chan_index
, NULL
, &dst0
[chan_index
], NULL
);
1809 case TGSI_OPCODE_DDY
:
1810 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1811 emit_fetch_deriv( bld
, inst
, 0, chan_index
, NULL
, NULL
, &dst0
[chan_index
]);
1815 case TGSI_OPCODE_KILP
:
1816 /* predicated kill */
1817 emit_kilp( bld
, inst
, (*pc
)-1 );
1820 case TGSI_OPCODE_KIL
:
1821 /* conditional kill */
1822 emit_kil( bld
, inst
, (*pc
)-1 );
1825 case TGSI_OPCODE_PK2H
:
1829 case TGSI_OPCODE_PK2US
:
1833 case TGSI_OPCODE_PK4B
:
1837 case TGSI_OPCODE_PK4UB
:
1841 case TGSI_OPCODE_RFL
:
1845 case TGSI_OPCODE_SEQ
:
1846 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1847 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1848 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1849 tmp0
= lp_build_cmp( &bld
->base
, PIPE_FUNC_EQUAL
, src0
, src1
);
1850 dst0
[chan_index
] = lp_build_select( &bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
1854 case TGSI_OPCODE_SFL
:
1855 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1856 dst0
[chan_index
] = bld
->base
.zero
;
1860 case TGSI_OPCODE_SGT
:
1861 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1862 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1863 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1864 tmp0
= lp_build_cmp( &bld
->base
, PIPE_FUNC_GREATER
, src0
, src1
);
1865 dst0
[chan_index
] = lp_build_select( &bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
1869 case TGSI_OPCODE_SIN
:
1870 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1871 tmp0
= lp_build_sin( &bld
->base
, tmp0
);
1872 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1873 dst0
[chan_index
] = tmp0
;
1877 case TGSI_OPCODE_SLE
:
1878 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1879 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1880 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1881 tmp0
= lp_build_cmp( &bld
->base
, PIPE_FUNC_LEQUAL
, src0
, src1
);
1882 dst0
[chan_index
] = lp_build_select( &bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
1886 case TGSI_OPCODE_SNE
:
1887 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1888 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1889 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1890 tmp0
= lp_build_cmp( &bld
->base
, PIPE_FUNC_NOTEQUAL
, src0
, src1
);
1891 dst0
[chan_index
] = lp_build_select( &bld
->base
, tmp0
, bld
->base
.one
, bld
->base
.zero
);
1895 case TGSI_OPCODE_STR
:
1896 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1897 dst0
[chan_index
] = bld
->base
.one
;
1901 case TGSI_OPCODE_TEX
:
1902 emit_tex( bld
, inst
, LP_BLD_TEX_MODIFIER_NONE
, dst0
);
1905 case TGSI_OPCODE_TXD
:
1906 emit_tex( bld
, inst
, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
, dst0
);
1909 case TGSI_OPCODE_UP2H
:
1915 case TGSI_OPCODE_UP2US
:
1921 case TGSI_OPCODE_UP4B
:
1927 case TGSI_OPCODE_UP4UB
:
1933 case TGSI_OPCODE_X2D
:
1939 case TGSI_OPCODE_ARA
:
1945 case TGSI_OPCODE_ARR
:
1946 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1947 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1948 tmp0
= lp_build_round(&bld
->base
, tmp0
);
1949 dst0
[chan_index
] = tmp0
;
1953 case TGSI_OPCODE_BRA
:
1959 case TGSI_OPCODE_CAL
:
1960 lp_exec_mask_call(&bld
->exec_mask
,
1966 case TGSI_OPCODE_RET
:
1967 lp_exec_mask_ret(&bld
->exec_mask
, pc
);
1970 case TGSI_OPCODE_END
:
1973 emit_dump_temps(bld
);
1978 case TGSI_OPCODE_SSG
:
1979 /* TGSI_OPCODE_SGN */
1980 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1981 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
1982 dst0
[chan_index
] = lp_build_sgn( &bld
->base
, tmp0
);
1986 case TGSI_OPCODE_CMP
:
1987 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
1988 src0
= emit_fetch( bld
, inst
, 0, chan_index
);
1989 src1
= emit_fetch( bld
, inst
, 1, chan_index
);
1990 src2
= emit_fetch( bld
, inst
, 2, chan_index
);
1991 tmp0
= lp_build_cmp( &bld
->base
, PIPE_FUNC_LESS
, src0
, bld
->base
.zero
);
1992 dst0
[chan_index
] = lp_build_select( &bld
->base
, tmp0
, src1
, src2
);
1996 case TGSI_OPCODE_SCS
:
1997 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_X
) {
1998 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
1999 dst0
[CHAN_X
] = lp_build_cos( &bld
->base
, tmp0
);
2001 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Y
) {
2002 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
);
2003 dst0
[CHAN_Y
] = lp_build_sin( &bld
->base
, tmp0
);
2005 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_Z
) {
2006 dst0
[CHAN_Z
] = bld
->base
.zero
;
2008 IF_IS_DST0_CHANNEL_ENABLED( inst
, CHAN_W
) {
2009 dst0
[CHAN_W
] = bld
->base
.one
;
2013 case TGSI_OPCODE_TXB
:
2014 emit_tex( bld
, inst
, LP_BLD_TEX_MODIFIER_LOD_BIAS
, dst0
);
2017 case TGSI_OPCODE_NRM
:
2019 case TGSI_OPCODE_NRM4
:
2020 /* 3 or 4-component normalization */
2022 uint dims
= (inst
->Instruction
.Opcode
== TGSI_OPCODE_NRM
) ? 3 : 4;
2024 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_X
) ||
2025 IS_DST0_CHANNEL_ENABLED(inst
, CHAN_Y
) ||
2026 IS_DST0_CHANNEL_ENABLED(inst
, CHAN_Z
) ||
2027 (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_W
) && dims
== 4)) {
2029 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2032 /* xmm0 = src.x * src.x */
2033 tmp0
= emit_fetch(bld
, inst
, 0, CHAN_X
);
2034 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_X
)) {
2037 tmp0
= lp_build_mul( &bld
->base
, tmp0
, tmp0
);
2040 /* xmm0 = xmm0 + src.y * src.y */
2041 tmp1
= emit_fetch(bld
, inst
, 0, CHAN_Y
);
2042 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_Y
)) {
2045 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp1
);
2046 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
2049 /* xmm0 = xmm0 + src.z * src.z */
2050 tmp1
= emit_fetch(bld
, inst
, 0, CHAN_Z
);
2051 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_Z
)) {
2054 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp1
);
2055 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
2059 /* xmm0 = xmm0 + src.w * src.w */
2060 tmp1
= emit_fetch(bld
, inst
, 0, CHAN_W
);
2061 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_W
)) {
2064 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp1
);
2065 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
);
2068 /* xmm1 = 1 / sqrt(xmm0) */
2069 tmp1
= lp_build_rsqrt( &bld
->base
, tmp0
);
2071 /* dst.x = xmm1 * src.x */
2072 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_X
)) {
2073 dst0
[CHAN_X
] = lp_build_mul( &bld
->base
, tmp4
, tmp1
);
2076 /* dst.y = xmm1 * src.y */
2077 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_Y
)) {
2078 dst0
[CHAN_Y
] = lp_build_mul( &bld
->base
, tmp5
, tmp1
);
2081 /* dst.z = xmm1 * src.z */
2082 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_Z
)) {
2083 dst0
[CHAN_Z
] = lp_build_mul( &bld
->base
, tmp6
, tmp1
);
2086 /* dst.w = xmm1 * src.w */
2087 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_X
) && dims
== 4) {
2088 dst0
[CHAN_W
] = lp_build_mul( &bld
->base
, tmp7
, tmp1
);
2093 if (IS_DST0_CHANNEL_ENABLED(inst
, CHAN_W
) && dims
== 3) {
2094 dst0
[CHAN_W
] = bld
->base
.one
;
2099 case TGSI_OPCODE_DIV
:
2105 case TGSI_OPCODE_DP2
:
2106 tmp0
= emit_fetch( bld
, inst
, 0, CHAN_X
); /* xmm0 = src[0].x */
2107 tmp1
= emit_fetch( bld
, inst
, 1, CHAN_X
); /* xmm1 = src[1].x */
2108 tmp0
= lp_build_mul( &bld
->base
, tmp0
, tmp1
); /* xmm0 = xmm0 * xmm1 */
2109 tmp1
= emit_fetch( bld
, inst
, 0, CHAN_Y
); /* xmm1 = src[0].y */
2110 tmp2
= emit_fetch( bld
, inst
, 1, CHAN_Y
); /* xmm2 = src[1].y */
2111 tmp1
= lp_build_mul( &bld
->base
, tmp1
, tmp2
); /* xmm1 = xmm1 * xmm2 */
2112 tmp0
= lp_build_add( &bld
->base
, tmp0
, tmp1
); /* xmm0 = xmm0 + xmm1 */
2113 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
2114 dst0
[chan_index
] = tmp0
; /* dest[ch] = xmm0 */
2118 case TGSI_OPCODE_TXL
:
2119 emit_tex( bld
, inst
, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD
, dst0
);
2122 case TGSI_OPCODE_TXP
:
2123 emit_tex( bld
, inst
, LP_BLD_TEX_MODIFIER_PROJECTED
, dst0
);
2126 case TGSI_OPCODE_BRK
:
2127 lp_exec_break(&bld
->exec_mask
);
2130 case TGSI_OPCODE_IF
:
2131 tmp0
= emit_fetch(bld
, inst
, 0, CHAN_X
);
2132 tmp0
= lp_build_cmp(&bld
->base
, PIPE_FUNC_NOTEQUAL
,
2133 tmp0
, bld
->base
.zero
);
2134 lp_exec_mask_cond_push(&bld
->exec_mask
, tmp0
);
2137 case TGSI_OPCODE_BGNLOOP
:
2138 lp_exec_bgnloop(&bld
->exec_mask
);
2141 case TGSI_OPCODE_BGNSUB
:
2142 lp_exec_mask_bgnsub(&bld
->exec_mask
);
2145 case TGSI_OPCODE_ELSE
:
2146 lp_exec_mask_cond_invert(&bld
->exec_mask
);
2149 case TGSI_OPCODE_ENDIF
:
2150 lp_exec_mask_cond_pop(&bld
->exec_mask
);
2153 case TGSI_OPCODE_ENDLOOP
:
2154 lp_exec_endloop(&bld
->exec_mask
);
2157 case TGSI_OPCODE_ENDSUB
:
2158 lp_exec_mask_endsub(&bld
->exec_mask
, pc
);
2161 case TGSI_OPCODE_PUSHA
:
2167 case TGSI_OPCODE_POPA
:
2173 case TGSI_OPCODE_CEIL
:
2174 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
2175 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
2176 dst0
[chan_index
] = lp_build_ceil(&bld
->base
, tmp0
);
2180 case TGSI_OPCODE_I2F
:
2186 case TGSI_OPCODE_NOT
:
2192 case TGSI_OPCODE_TRUNC
:
2193 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
2194 tmp0
= emit_fetch( bld
, inst
, 0, chan_index
);
2195 dst0
[chan_index
] = lp_build_trunc(&bld
->base
, tmp0
);
2199 case TGSI_OPCODE_SHL
:
2205 case TGSI_OPCODE_ISHR
:
2211 case TGSI_OPCODE_AND
:
2217 case TGSI_OPCODE_OR
:
2223 case TGSI_OPCODE_MOD
:
2229 case TGSI_OPCODE_XOR
:
2235 case TGSI_OPCODE_SAD
:
2241 case TGSI_OPCODE_TXF
:
2247 case TGSI_OPCODE_TXQ
:
2253 case TGSI_OPCODE_CONT
:
2254 lp_exec_continue(&bld
->exec_mask
);
2257 case TGSI_OPCODE_EMIT
:
2261 case TGSI_OPCODE_ENDPRIM
:
2265 case TGSI_OPCODE_NOP
:
2273 LLVMValueRef pred
[NUM_CHANNELS
];
2275 emit_fetch_predicate( bld
, inst
, pred
);
2277 FOR_EACH_DST0_ENABLED_CHANNEL( inst
, chan_index
) {
2278 emit_store( bld
, inst
, 0, chan_index
, pred
[chan_index
], dst0
[chan_index
]);
2287 lp_build_tgsi_soa(LLVMBuilderRef builder
,
2288 const struct tgsi_token
*tokens
,
2289 struct lp_type type
,
2290 struct lp_build_mask_context
*mask
,
2291 LLVMValueRef consts_ptr
,
2292 const LLVMValueRef
*pos
,
2293 const LLVMValueRef (*inputs
)[NUM_CHANNELS
],
2294 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
2295 struct lp_build_sampler_soa
*sampler
,
2296 const struct tgsi_shader_info
*info
)
2298 struct lp_build_tgsi_soa_context bld
;
2299 struct tgsi_parse_context parse
;
2300 uint num_immediates
= 0;
2301 uint num_instructions
= 0;
2305 struct lp_type res_type
;
2307 assert(type
.length
<= LP_MAX_VECTOR_LENGTH
);
2308 memset(&res_type
, 0, sizeof res_type
);
2309 res_type
.width
= type
.width
;
2310 res_type
.length
= type
.length
;
2313 /* Setup build context */
2314 memset(&bld
, 0, sizeof bld
);
2315 lp_build_context_init(&bld
.base
, builder
, type
);
2316 lp_build_context_init(&bld
.uint_bld
, builder
, lp_uint_type(type
));
2317 lp_build_context_init(&bld
.elem_bld
, builder
, lp_elem_type(type
));
2320 bld
.inputs
= inputs
;
2321 bld
.outputs
= outputs
;
2322 bld
.consts_ptr
= consts_ptr
;
2323 bld
.sampler
= sampler
;
2325 bld
.indirect_files
= info
->indirect_files
;
2326 bld
.instructions
= (struct tgsi_full_instruction
*)
2327 MALLOC( LP_MAX_INSTRUCTIONS
* sizeof(struct tgsi_full_instruction
) );
2328 bld
.max_instructions
= LP_MAX_INSTRUCTIONS
;
2330 if (!bld
.instructions
) {
2334 lp_exec_mask_init(&bld
.exec_mask
, &bld
.base
);
2336 if (bld
.indirect_files
& (1 << TGSI_FILE_TEMPORARY
)) {
2337 LLVMValueRef array_size
= LLVMConstInt(LLVMInt32Type(),
2338 info
->file_max
[TGSI_FILE_TEMPORARY
]*4 + 4, 0);
2339 bld
.temps_array
= lp_build_array_alloca(bld
.base
.builder
,
2340 bld
.base
.vec_type
, array_size
,
2344 if (bld
.indirect_files
& (1 << TGSI_FILE_OUTPUT
)) {
2345 LLVMValueRef array_size
= LLVMConstInt(LLVMInt32Type(),
2346 info
->file_max
[TGSI_FILE_OUTPUT
]*4 + 4, 0);
2347 bld
.outputs_array
= lp_build_array_alloca(bld
.base
.builder
,
2348 bld
.base
.vec_type
, array_size
,
2352 /* If we have indirect addressing in inputs we need to copy them into
2353 * our alloca array to be able to iterate over them */
2354 if (bld
.indirect_files
& (1 << TGSI_FILE_INPUT
)) {
2355 unsigned index
, chan
;
2356 LLVMTypeRef vec_type
= bld
.base
.vec_type
;
2357 LLVMValueRef array_size
= LLVMConstInt(LLVMInt32Type(),
2358 info
->file_max
[TGSI_FILE_INPUT
]*4 + 4, 0);
2359 bld
.inputs_array
= lp_build_array_alloca(bld
.base
.builder
,
2360 vec_type
, array_size
,
2363 assert(info
->num_inputs
<= info
->file_max
[TGSI_FILE_INPUT
] + 1);
2365 for (index
= 0; index
< info
->num_inputs
; ++index
) {
2366 for (chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
2367 LLVMValueRef lindex
= lp_build_const_int32(index
* 4 + chan
);
2368 LLVMValueRef input_ptr
=
2369 LLVMBuildGEP(bld
.base
.builder
, bld
.inputs_array
,
2371 LLVMValueRef value
= bld
.inputs
[index
][chan
];
2373 LLVMBuildStore(bld
.base
.builder
, value
, input_ptr
);
2378 tgsi_parse_init( &parse
, tokens
);
2380 while( !tgsi_parse_end_of_tokens( &parse
) ) {
2381 tgsi_parse_token( &parse
);
2383 switch( parse
.FullToken
.Token
.Type
) {
2384 case TGSI_TOKEN_TYPE_DECLARATION
:
2385 /* Inputs already interpolated */
2386 emit_declaration( &bld
, &parse
.FullToken
.FullDeclaration
);
2389 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2391 /* save expanded instruction */
2392 if (num_instructions
== bld
.max_instructions
) {
2393 struct tgsi_full_instruction
*instructions
;
2394 instructions
= REALLOC(bld
.instructions
,
2395 bld
.max_instructions
2396 * sizeof(struct tgsi_full_instruction
),
2397 (bld
.max_instructions
+ LP_MAX_INSTRUCTIONS
)
2398 * sizeof(struct tgsi_full_instruction
));
2399 if (!instructions
) {
2402 bld
.instructions
= instructions
;
2403 bld
.max_instructions
+= LP_MAX_INSTRUCTIONS
;
2406 memcpy(bld
.instructions
+ num_instructions
,
2407 &parse
.FullToken
.FullInstruction
,
2408 sizeof(bld
.instructions
[0]));
2415 case TGSI_TOKEN_TYPE_IMMEDIATE
:
2416 /* simply copy the immediate values into the next immediates[] slot */
2418 const uint size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
2420 assert(num_immediates
< LP_MAX_TGSI_IMMEDIATES
);
2421 for( i
= 0; i
< size
; ++i
)
2422 bld
.immediates
[num_immediates
][i
] =
2423 lp_build_const_vec(type
, parse
.FullToken
.FullImmediate
.u
[i
].Float
);
2424 for( i
= size
; i
< 4; ++i
)
2425 bld
.immediates
[num_immediates
][i
] = bld
.base
.undef
;
2430 case TGSI_TOKEN_TYPE_PROPERTY
:
2439 struct tgsi_full_instruction
*instr
= bld
.instructions
+ pc
;
2440 const struct tgsi_opcode_info
*opcode_info
=
2441 tgsi_get_opcode_info(instr
->Instruction
.Opcode
);
2442 if (!emit_instruction( &bld
, instr
, opcode_info
, &pc
))
2443 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2444 opcode_info
->mnemonic
);
2447 /* If we have indirect addressing in outputs we need to copy our alloca array
2448 * to the outputs slots specified by the called */
2449 if (bld
.indirect_files
& (1 << TGSI_FILE_OUTPUT
)) {
2450 unsigned index
, chan
;
2451 assert(info
->num_outputs
<= info
->file_max
[TGSI_FILE_OUTPUT
] + 1);
2452 for (index
= 0; index
< info
->num_outputs
; ++index
) {
2453 for (chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
2454 bld
.outputs
[index
][chan
] = get_output_ptr(&bld
, index
, chan
);
2460 LLVMBasicBlockRef block
= LLVMGetInsertBlock(builder
);
2461 LLVMValueRef function
= LLVMGetBasicBlockParent(block
);
2462 debug_printf("11111111111111111111111111111 \n");
2463 tgsi_dump(tokens
, 0);
2464 lp_debug_dump_value(function
);
2465 debug_printf("2222222222222222222222222222 \n");
2467 tgsi_parse_free( &parse
);
2470 LLVMModuleRef module
= LLVMGetGlobalParent(
2471 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld
.base
.builder
)));
2472 LLVMDumpModule(module
);
2476 FREE( bld
.instructions
);