gallivm: use our util_snprintf
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "cso_cache/cso_hash.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "util/u_string.h"
46 #include "tgsi/tgsi_dump.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_exec.h"
51 #include "tgsi/tgsi_scan.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_logic.h"
56 #include "lp_bld_swizzle.h"
57 #include "lp_bld_flow.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81 #define QUAD_TOP_LEFT 0
82 #define QUAD_TOP_RIGHT 1
83 #define QUAD_BOTTOM_LEFT 2
84 #define QUAD_BOTTOM_RIGHT 3
85
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMBasicBlockRef loop_block;
99 LLVMValueRef cont_mask;
100 LLVMValueRef break_mask;
101 LLVMValueRef break_var;
102 LLVMValueRef ret_mask;
103 struct {
104 LLVMBasicBlockRef loop_block;
105 LLVMValueRef cont_mask;
106 LLVMValueRef break_mask;
107 LLVMValueRef break_var;
108 } loop_stack[LP_MAX_TGSI_NESTING];
109 int loop_stack_size;
110
111 LLVMValueRef exec_mask;
112 };
113
114 struct lp_build_tgsi_soa_context
115 {
116 struct lp_build_context base;
117
118 /* Builder for integer masks and indices */
119 struct lp_build_context int_bld;
120
121 LLVMValueRef consts_ptr;
122 const LLVMValueRef *pos;
123
124 const struct lp_build_sampler_soa *sampler;
125
126 const LLVMValueRef (*inputs)[NUM_CHANNELS];
127 LLVMValueRef (*outputs)[NUM_CHANNELS];
128 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
129 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
130 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
131 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
132
133 /* we allocate an array of allocas if we have indirect
134 * addressing and then the temps above is unused */
135 LLVMValueRef temps_array;
136
137 LLVMValueRef inputs_array;
138 LLVMValueRef outputs_array;
139 LLVMValueRef immediates_array;
140 LLVMValueRef addrs_array;
141 LLVMValueRef preds_array;
142
143 boolean has_indirect_addressing;
144 boolean has_function_calls;
145
146 struct lp_build_mask_context *mask;
147 struct lp_exec_mask exec_mask;
148
149 struct cso_hash *func_hash;
150 unsigned instno;
151 LLVMBasicBlockRef main_block;
152
153 struct {
154 struct tgsi_declaration_range inputs;
155 struct tgsi_declaration_range outputs;
156 struct tgsi_declaration_range temps;
157 struct tgsi_declaration_range addrs;
158 struct tgsi_declaration_range preds;
159 } full_range;
160 };
161
162 static const unsigned char
163 swizzle_left[4] = {
164 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
165 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
166 };
167
168 static const unsigned char
169 swizzle_right[4] = {
170 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
171 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
172 };
173
174 static const unsigned char
175 swizzle_top[4] = {
176 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
177 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
178 };
179
180 static const unsigned char
181 swizzle_bottom[4] = {
182 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
183 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
184 };
185
186
187 static LLVMValueRef
188 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
189 unsigned index,
190 unsigned chan,
191 boolean is_indirect,
192 LLVMValueRef addr)
193 {
194 assert(chan < 4);
195 if (!bld->has_indirect_addressing &&
196 !bld->has_function_calls) {
197 return bld->temps[index][chan];
198 } else {
199 LLVMValueRef lindex =
200 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
201 if (is_indirect)
202 lindex = lp_build_add(&bld->base, lindex, addr);
203 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "temp_ptr");
204 }
205 }
206
207 static LLVMValueRef
208 get_input_ptr(struct lp_build_tgsi_soa_context *bld,
209 unsigned index,
210 unsigned swizzle,
211 boolean is_indirect,
212 LLVMValueRef addr)
213 {
214 LLVMValueRef lindex =
215 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
216 assert(bld->has_function_calls);
217 if (is_indirect)
218 lindex = lp_build_add(&bld->base, lindex, addr);
219 return LLVMBuildGEP(bld->base.builder, bld->inputs_array, &lindex, 1, "input_ptr");
220 }
221
222 static LLVMValueRef
223 get_output_ptr(struct lp_build_tgsi_soa_context *bld,
224 unsigned index,
225 unsigned swizzle,
226 boolean is_indirect,
227 LLVMValueRef addr)
228 {
229 if (!bld->has_function_calls) {
230 return bld->outputs[index][swizzle];
231 } else {
232 LLVMValueRef lindex =
233 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
234 if (is_indirect)
235 lindex = lp_build_add(&bld->base, lindex, addr);
236 return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "output_ptr");
237 }
238 }
239
240 static LLVMValueRef
241 get_immediates_ptr(struct lp_build_tgsi_soa_context *bld,
242 unsigned index,
243 unsigned swizzle,
244 boolean is_indirect,
245 LLVMValueRef addr)
246 {
247 LLVMValueRef lindex =
248 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
249 assert(bld->has_function_calls);
250 if (is_indirect)
251 lindex = lp_build_add(&bld->base, lindex, addr);
252 return LLVMBuildGEP(bld->base.builder, bld->immediates_array, &lindex, 1, "immediates_ptr");
253 }
254
255 static LLVMValueRef
256 get_addr_ptr(struct lp_build_tgsi_soa_context *bld,
257 unsigned index,
258 unsigned swizzle,
259 boolean is_indirect,
260 LLVMValueRef addr)
261 {
262 if (!bld->has_function_calls) {
263 return bld->addr[index][swizzle];
264 } else {
265 LLVMValueRef lindex =
266 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
267 if (is_indirect)
268 lindex = lp_build_add(&bld->base, lindex, addr);
269 return LLVMBuildGEP(bld->base.builder, bld->addrs_array, &lindex, 1, "addrs_ptr");
270 }
271 }
272
273 static LLVMValueRef
274 get_preds_ptr(struct lp_build_tgsi_soa_context *bld,
275 unsigned index,
276 unsigned swizzle,
277 boolean is_indirect,
278 LLVMValueRef addr)
279 {
280 if (!bld->has_function_calls) {
281 return bld->preds[index][swizzle];
282 } else {
283 LLVMValueRef lindex =
284 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
285 if (is_indirect)
286 lindex = lp_build_add(&bld->base, lindex, addr);
287 return LLVMBuildGEP(bld->base.builder, bld->preds_array, &lindex, 1, "preds_ptr");
288 }
289 }
290
291 static LLVMValueRef lp_get_function(struct lp_build_tgsi_soa_context *bld,
292 int label)
293 {
294 struct cso_hash *hash = bld->func_hash;
295 struct cso_hash_iter iter = cso_hash_find(hash, label);
296 LLVMValueRef func;
297 LLVMModuleRef module = LLVMGetGlobalParent(
298 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld->base.builder)));
299
300 if (cso_hash_iter_is_null(iter)) {
301 LLVMTypeRef func_type;
302 LLVMTypeRef arg_types[7];
303 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
304 int i;
305 char func_name[32];
306
307 util_snprintf(func_name, 31, "func%d", label);
308
309 arg_types[0] = LLVMPointerType(vec_type, 0); /* inputs */
310 arg_types[1] = LLVMPointerType(vec_type, 0); /* outpus */
311 arg_types[2] = LLVMTypeOf(bld->consts_ptr); /* consts */
312 arg_types[3] = LLVMPointerType(vec_type, 0); /* temps */
313 arg_types[4] = LLVMPointerType(vec_type, 0); /* addrs */
314 arg_types[5] = LLVMPointerType(vec_type, 0); /* preds */
315 arg_types[6] = LLVMPointerType(vec_type, 0); /* immediates */
316
317 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
318
319 func = LLVMAddFunction(module, func_name, func_type);
320 LLVMSetFunctionCallConv(func, LLVMCCallConv);
321 for(i = 0; i < Elements(arg_types); ++i)
322 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
323 LLVMAddAttribute(LLVMGetParam(func, i), LLVMNoAliasAttribute);
324
325 cso_hash_insert(hash, label, func);
326 } else {
327 func = (LLVMValueRef)cso_hash_iter_data(iter);
328 }
329
330 return func;
331 }
332
333 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
334 {
335 mask->bld = bld;
336 mask->has_mask = FALSE;
337 mask->cond_stack_size = 0;
338 mask->loop_stack_size = 0;
339 mask->ret_mask = 0;
340
341 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
342 mask->break_mask = mask->cont_mask = mask->cond_mask =
343 LLVMConstAllOnes(mask->int_vec_type);
344 }
345
346 static void lp_exec_mask_update(struct lp_exec_mask *mask)
347 {
348 if (mask->loop_stack_size) {
349 /*for loops we need to update the entire mask at runtime */
350 LLVMValueRef tmp;
351 assert(mask->break_mask);
352 tmp = LLVMBuildAnd(mask->bld->builder,
353 mask->cont_mask,
354 mask->break_mask,
355 "maskcb");
356 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
357 mask->cond_mask,
358 tmp,
359 "maskfull");
360 } else
361 mask->exec_mask = mask->cond_mask;
362
363 if (mask->ret_mask) {
364 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
365 mask->exec_mask,
366 mask->ret_mask,
367 "retmask");
368 }
369
370
371 mask->has_mask = (mask->cond_stack_size > 0 ||
372 mask->loop_stack_size > 0 ||
373 mask->ret_mask);
374 }
375
376 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
377 LLVMValueRef val)
378 {
379 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
380 if (mask->cond_stack_size == 0) {
381 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
382 }
383 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
384 assert(LLVMTypeOf(val) == mask->int_vec_type);
385 mask->cond_mask = val;
386
387 lp_exec_mask_update(mask);
388 }
389
390 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
391 {
392 LLVMValueRef prev_mask;
393 LLVMValueRef inv_mask;
394
395 assert(mask->cond_stack_size);
396 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
397 if (mask->cond_stack_size == 1) {
398 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
399 }
400
401 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
402
403 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
404 inv_mask,
405 prev_mask, "");
406 lp_exec_mask_update(mask);
407 }
408
409 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
410 {
411 assert(mask->cond_stack_size);
412 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
413 lp_exec_mask_update(mask);
414 }
415
416
417 static void lp_exec_bgnsub(struct lp_exec_mask *mask)
418 {
419 mask->exec_mask = LLVMConstAllOnes(mask->int_vec_type);
420 mask->ret_mask = LLVMConstAllOnes(mask->int_vec_type);
421 }
422
423 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
424 {
425 if (mask->loop_stack_size == 0) {
426 assert(mask->loop_block == NULL);
427 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
428 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
429 assert(mask->break_var == NULL);
430 }
431
432 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
433
434 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
435 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
436 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
437 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
438 ++mask->loop_stack_size;
439
440 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
441 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
442
443 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
444 LLVMBuildBr(mask->bld->builder, mask->loop_block);
445 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
446
447 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
448
449 lp_exec_mask_update(mask);
450 }
451
452 static void lp_exec_break(struct lp_exec_mask *mask)
453 {
454 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
455 mask->exec_mask,
456 "break");
457
458 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
459 mask->break_mask,
460 exec_mask, "break_full");
461
462 lp_exec_mask_update(mask);
463 }
464
465
466 static void lp_exec_ret(struct lp_exec_mask *mask)
467 {
468 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
469 mask->exec_mask,
470 "ret");
471
472 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
473 mask->ret_mask,
474 exec_mask, "ret_full");
475
476 lp_exec_mask_update(mask);
477 }
478
479
480 static void lp_exec_continue(struct lp_exec_mask *mask)
481 {
482 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
483 mask->exec_mask,
484 "");
485
486 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
487 mask->cont_mask,
488 exec_mask, "");
489
490 lp_exec_mask_update(mask);
491 }
492
493
494 static void lp_exec_endloop(struct lp_exec_mask *mask)
495 {
496 LLVMBasicBlockRef endloop;
497 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
498 mask->bld->type.length);
499 LLVMValueRef i1cond;
500
501 assert(mask->break_mask);
502
503 /*
504 * Restore the cont_mask, but don't pop
505 */
506 assert(mask->loop_stack_size);
507 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
508 lp_exec_mask_update(mask);
509
510 /*
511 * Unlike the continue mask, the break_mask must be preserved across loop
512 * iterations
513 */
514 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
515
516 /* i1cond = (mask == 0) */
517 i1cond = LLVMBuildICmp(
518 mask->bld->builder,
519 LLVMIntNE,
520 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
521 LLVMConstNull(reg_type), "");
522
523 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
524
525 LLVMBuildCondBr(mask->bld->builder,
526 i1cond, mask->loop_block, endloop);
527
528 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
529
530 assert(mask->loop_stack_size);
531 --mask->loop_stack_size;
532 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
533 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
534 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
535 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
536
537 lp_exec_mask_update(mask);
538 }
539
540 /* stores val into an address pointed to by dst.
541 * mask->exec_mask is used to figure out which bits of val
542 * should be stored into the address
543 * (0 means don't store this bit, 1 means do store).
544 */
545 static void lp_exec_mask_store(struct lp_exec_mask *mask,
546 LLVMValueRef pred,
547 LLVMValueRef val,
548 LLVMValueRef dst)
549 {
550 /* Mix the predicate and execution mask */
551 if (mask->has_mask) {
552 if (pred) {
553 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
554 } else {
555 pred = mask->exec_mask;
556 }
557 }
558
559 if (pred) {
560 LLVMValueRef real_val, dst_val;
561
562 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
563 real_val = lp_build_select(mask->bld,
564 pred,
565 val, dst_val);
566
567 LLVMBuildStore(mask->bld->builder, real_val, dst);
568 } else
569 LLVMBuildStore(mask->bld->builder, val, dst);
570 }
571
572 static LLVMValueRef
573 emit_vec_alloca_array(struct lp_build_tgsi_soa_context *bld,
574 LLVMTypeRef vec_type,
575 int size)
576 {
577 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
578 size * 4 + 4, 0);
579 return lp_build_array_alloca(bld->base.builder,
580 vec_type, val, "");
581 }
582
583 static void
584 emit_preamble(struct lp_build_tgsi_soa_context *bld, uint num_immediates)
585 {
586 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
587
588 /* temps */
589 if (bld->has_indirect_addressing ||
590 bld->has_function_calls) {
591 int size = bld->full_range.temps.Last + 1;
592 bld->temps_array = emit_vec_alloca_array(bld, vec_type, size);
593 }
594 if (bld->has_function_calls) {
595 int i;
596 int size = bld->full_range.outputs.Last + 1;
597 bld->outputs_array = emit_vec_alloca_array(bld, vec_type, size);
598
599 /* we need to insert the created immediates into our array */
600 size = num_immediates;
601 if (size > 0)
602 bld->immediates_array = emit_vec_alloca_array(bld, vec_type, size);
603 for (i = 0; i < size; ++i) {
604 int j;
605 for (j = 0; j < NUM_CHANNELS; ++j) {
606 LLVMValueRef ptr = get_immediates_ptr(bld,
607 i, j,
608 FALSE, 0);
609 LLVMBuildStore(bld->base.builder,
610 bld->immediates[i][j],
611 ptr);
612 }
613 }
614
615 size = bld->full_range.addrs.Last + 1;
616 bld->addrs_array = emit_vec_alloca_array(bld, vec_type, size);
617
618 size = bld->full_range.preds.Last + 1;
619 bld->preds_array = emit_vec_alloca_array(bld, vec_type, size);
620
621 /*inputs also need to be copied*/
622 size = bld->full_range.inputs.Last + 1;
623 bld->inputs_array = emit_vec_alloca_array(bld, vec_type, size);
624 for (i = bld->full_range.inputs.First; i < size; ++i) {
625 int j;
626 for (j = 0; j < NUM_CHANNELS; ++j) {
627 LLVMValueRef ptr = get_input_ptr(bld,
628 i, j,
629 FALSE, 0);
630 LLVMBuildStore(bld->base.builder,
631 bld->inputs[i][j],
632 ptr);
633 }
634 }
635 }
636 }
637
638 static void
639 emit_end(struct lp_build_tgsi_soa_context *bld)
640 {
641 int i, j;
642
643 bld->main_block = LLVMGetInsertBlock(bld->base.builder);
644
645 /* if we had function calls we want to propagate the
646 * outputs from the array to the values */
647 if (bld->has_function_calls) {
648 int size = bld->full_range.outputs.Last + 1;
649 for (i = bld->full_range.outputs.First; i < size; ++i) {
650 for (j = 0; j < NUM_CHANNELS; ++j) {
651 LLVMValueRef ptr = get_output_ptr(bld, i, j,
652 FALSE, 0);
653 bld->outputs[i][j] = ptr;
654 }
655 }
656 }
657 }
658
659 static void
660 emit_bgnsub(struct lp_build_tgsi_soa_context *bld)
661 {
662 LLVMValueRef func = lp_get_function(bld, bld->instno);
663 LLVMBasicBlockRef block;
664 LLVMValueRef inputs_ptr, outputs_ptr,
665 consts_ptr, temps_ptr, addrs_ptr, preds_ptr, imms_ptr;
666
667 inputs_ptr = LLVMGetParam(func, 0);
668 outputs_ptr = LLVMGetParam(func, 1);
669 consts_ptr = LLVMGetParam(func, 2);
670 temps_ptr = LLVMGetParam(func, 3);
671 addrs_ptr = LLVMGetParam(func, 4);
672 preds_ptr = LLVMGetParam(func, 5);
673 imms_ptr = LLVMGetParam(func, 6);
674
675 lp_build_name(inputs_ptr, "inputs");
676 lp_build_name(outputs_ptr, "outputs");
677 lp_build_name(consts_ptr, "consts");
678 lp_build_name(temps_ptr, "temps");
679 lp_build_name(addrs_ptr, "addrs");
680 lp_build_name(preds_ptr, "preds");
681 lp_build_name(imms_ptr, "immediates");
682
683 bld->inputs_array = inputs_ptr;
684 bld->outputs_array = outputs_ptr;
685 bld->consts_ptr = consts_ptr;
686 bld->temps_array = temps_ptr;
687 bld->addrs_array = addrs_ptr;
688 bld->preds_array = preds_ptr;
689 bld->immediates_array = imms_ptr;
690
691 block = LLVMAppendBasicBlock(func, "entry");
692 LLVMPositionBuilderAtEnd(bld->base.builder, block);
693
694 lp_exec_bgnsub(&bld->exec_mask);
695 }
696
697 static void
698 emit_endsub(struct lp_build_tgsi_soa_context *bld)
699 {
700 LLVMBuildRetVoid(bld->base.builder);
701 }
702
703 static LLVMValueRef
704 emit_ddx(struct lp_build_tgsi_soa_context *bld,
705 LLVMValueRef src)
706 {
707 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
708 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
709 return lp_build_sub(&bld->base, src_right, src_left);
710 }
711
712
713 static LLVMValueRef
714 emit_ddy(struct lp_build_tgsi_soa_context *bld,
715 LLVMValueRef src)
716 {
717 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
718 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
719 return lp_build_sub(&bld->base, src_top, src_bottom);
720 }
721
722 /**
723 * Register fetch.
724 */
725 static LLVMValueRef
726 emit_fetch(
727 struct lp_build_tgsi_soa_context *bld,
728 const struct tgsi_full_instruction *inst,
729 unsigned index,
730 const unsigned chan_index )
731 {
732 const struct tgsi_full_src_register *reg = &inst->Src[index];
733 const unsigned swizzle =
734 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
735 LLVMValueRef res;
736 LLVMValueRef addr = NULL;
737
738 if (swizzle > 3) {
739 assert(0 && "invalid swizzle in emit_fetch()");
740 return bld->base.undef;
741 }
742
743 if (reg->Register.Indirect) {
744 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
745 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
746 addr = LLVMBuildLoad(bld->base.builder,
747 get_addr_ptr(bld, reg->Indirect.Index, swizzle, FALSE, 0),
748 "");
749 /* for indexing we want integers */
750 addr = LLVMBuildFPToSI(bld->base.builder, addr,
751 int_vec_type, "");
752 addr = LLVMBuildExtractElement(bld->base.builder,
753 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
754 "");
755 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
756 }
757
758 switch (reg->Register.File) {
759 case TGSI_FILE_CONSTANT:
760 {
761 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
762 reg->Register.Index*4 + swizzle, 0);
763 LLVMValueRef scalar, scalar_ptr;
764
765 if (reg->Register.Indirect) {
766 /*lp_build_printf(bld->base.builder,
767 "\taddr = %d\n", addr);*/
768 index = lp_build_add(&bld->base, index, addr);
769 }
770 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
771 &index, 1, "");
772 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
773
774 res = lp_build_broadcast_scalar(&bld->base, scalar);
775 }
776 break;
777
778 case TGSI_FILE_IMMEDIATE:
779 if (bld->has_function_calls) {
780 LLVMValueRef ptr = get_immediates_ptr(bld,
781 reg->Register.Index,
782 swizzle,
783 FALSE, 0);
784 res = LLVMBuildLoad(bld->base.builder, ptr, "");
785 } else
786 res = bld->immediates[reg->Register.Index][swizzle];
787 assert(res);
788 break;
789
790 case TGSI_FILE_INPUT:
791 if (bld->has_function_calls) {
792 LLVMValueRef ptr = get_input_ptr(bld,
793 reg->Register.Index,
794 swizzle,
795 FALSE, 0);
796 res = LLVMBuildLoad(bld->base.builder, ptr, "");
797 } else
798 res = bld->inputs[reg->Register.Index][swizzle];
799 assert(res);
800 break;
801
802 case TGSI_FILE_TEMPORARY:
803 {
804 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
805 swizzle,
806 reg->Register.Indirect,
807 addr);
808 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
809 if(!res)
810 return bld->base.undef;
811 }
812 break;
813
814 default:
815 assert(0 && "invalid src register in emit_fetch()");
816 return bld->base.undef;
817 }
818
819 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
820 case TGSI_UTIL_SIGN_CLEAR:
821 res = lp_build_abs( &bld->base, res );
822 break;
823
824 case TGSI_UTIL_SIGN_SET:
825 /* TODO: Use bitwese OR for floating point */
826 res = lp_build_abs( &bld->base, res );
827 res = LLVMBuildNeg( bld->base.builder, res, "" );
828 break;
829
830 case TGSI_UTIL_SIGN_TOGGLE:
831 res = LLVMBuildNeg( bld->base.builder, res, "" );
832 break;
833
834 case TGSI_UTIL_SIGN_KEEP:
835 break;
836 }
837
838 return res;
839 }
840
841
842 /**
843 * Register fetch with derivatives.
844 */
845 static void
846 emit_fetch_deriv(
847 struct lp_build_tgsi_soa_context *bld,
848 const struct tgsi_full_instruction *inst,
849 unsigned index,
850 const unsigned chan_index,
851 LLVMValueRef *res,
852 LLVMValueRef *ddx,
853 LLVMValueRef *ddy)
854 {
855 LLVMValueRef src;
856
857 src = emit_fetch(bld, inst, index, chan_index);
858
859 if(res)
860 *res = src;
861
862 /* TODO: use interpolation coeffs for inputs */
863
864 if(ddx)
865 *ddx = emit_ddx(bld, src);
866
867 if(ddy)
868 *ddy = emit_ddy(bld, src);
869 }
870
871
872 /**
873 * Predicate.
874 */
875 static void
876 emit_fetch_predicate(
877 struct lp_build_tgsi_soa_context *bld,
878 const struct tgsi_full_instruction *inst,
879 LLVMValueRef *pred)
880 {
881 unsigned index;
882 unsigned char swizzles[4];
883 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
884 LLVMValueRef value;
885 unsigned chan;
886
887 if (!inst->Instruction.Predicate) {
888 FOR_EACH_CHANNEL( chan ) {
889 pred[chan] = NULL;
890 }
891 return;
892 }
893
894 swizzles[0] = inst->Predicate.SwizzleX;
895 swizzles[1] = inst->Predicate.SwizzleY;
896 swizzles[2] = inst->Predicate.SwizzleZ;
897 swizzles[3] = inst->Predicate.SwizzleW;
898
899 index = inst->Predicate.Index;
900 assert(index < LP_MAX_TGSI_PREDS);
901
902 FOR_EACH_CHANNEL( chan ) {
903 unsigned swizzle = swizzles[chan];
904
905 /*
906 * Only fetch the predicate register channels that are actually listed
907 * in the swizzles
908 */
909 if (!unswizzled[swizzle]) {
910 LLVMValueRef pred_ptr = get_preds_ptr(bld, index, swizzle,
911 FALSE, 0);
912 value = LLVMBuildLoad(bld->base.builder,
913 pred_ptr, "");
914
915 /*
916 * Convert the value to an integer mask.
917 *
918 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
919 * is needlessly causing two comparisons due to storing the intermediate
920 * result as float vector instead of an integer mask vector.
921 */
922 value = lp_build_compare(bld->base.builder,
923 bld->base.type,
924 PIPE_FUNC_NOTEQUAL,
925 value,
926 bld->base.zero);
927 if (inst->Predicate.Negate) {
928 value = LLVMBuildNot(bld->base.builder, value, "");
929 }
930
931 unswizzled[swizzle] = value;
932 } else {
933 value = unswizzled[swizzle];
934 }
935
936 pred[chan] = value;
937 }
938 }
939
940
941 /**
942 * Register store.
943 */
944 static void
945 emit_store(
946 struct lp_build_tgsi_soa_context *bld,
947 const struct tgsi_full_instruction *inst,
948 unsigned index,
949 unsigned chan_index,
950 LLVMValueRef pred,
951 LLVMValueRef value)
952 {
953 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
954 LLVMValueRef addr = NULL;
955
956 switch( inst->Instruction.Saturate ) {
957 case TGSI_SAT_NONE:
958 break;
959
960 case TGSI_SAT_ZERO_ONE:
961 value = lp_build_max(&bld->base, value, bld->base.zero);
962 value = lp_build_min(&bld->base, value, bld->base.one);
963 break;
964
965 case TGSI_SAT_MINUS_PLUS_ONE:
966 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
967 value = lp_build_min(&bld->base, value, bld->base.one);
968 break;
969
970 default:
971 assert(0);
972 }
973
974 if (reg->Register.Indirect) {
975 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
976 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
977 addr = LLVMBuildLoad(bld->base.builder,
978 get_addr_ptr(bld, reg->Indirect.Index, swizzle, FALSE, 0),
979 "");
980 /* for indexing we want integers */
981 addr = LLVMBuildFPToSI(bld->base.builder, addr,
982 int_vec_type, "");
983 addr = LLVMBuildExtractElement(bld->base.builder,
984 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
985 "");
986 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
987 }
988
989 switch( reg->Register.File ) {
990 case TGSI_FILE_OUTPUT:
991 lp_exec_mask_store(&bld->exec_mask, pred, value,
992 get_output_ptr(bld, reg->Register.Index, chan_index,
993 FALSE, 0));
994 break;
995
996 case TGSI_FILE_TEMPORARY: {
997 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
998 chan_index,
999 reg->Register.Indirect,
1000 addr);
1001 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
1002 break;
1003 }
1004
1005 case TGSI_FILE_ADDRESS:
1006 lp_exec_mask_store(&bld->exec_mask, pred, value,
1007 get_addr_ptr(bld, reg->Indirect.Index, chan_index,
1008 FALSE, 0));
1009 break;
1010
1011 case TGSI_FILE_PREDICATE:
1012 lp_exec_mask_store(&bld->exec_mask, pred, value,
1013 get_preds_ptr(bld, index, chan_index,
1014 FALSE, 0));
1015 break;
1016
1017 default:
1018 assert( 0 );
1019 }
1020 }
1021
1022
1023 /**
1024 * High-level instruction translators.
1025 */
1026
1027 enum tex_modifier {
1028 TEX_MODIFIER_NONE = 0,
1029 TEX_MODIFIER_PROJECTED,
1030 TEX_MODIFIER_LOD_BIAS,
1031 TEX_MODIFIER_EXPLICIT_LOD,
1032 TEX_MODIFIER_EXPLICIT_DERIV
1033 };
1034
1035 static void
1036 emit_tex( struct lp_build_tgsi_soa_context *bld,
1037 const struct tgsi_full_instruction *inst,
1038 enum tex_modifier modifier,
1039 LLVMValueRef *texel)
1040 {
1041 unsigned unit;
1042 LLVMValueRef lod_bias, explicit_lod;
1043 LLVMValueRef oow = NULL;
1044 LLVMValueRef coords[3];
1045 LLVMValueRef ddx[3];
1046 LLVMValueRef ddy[3];
1047 unsigned num_coords;
1048 unsigned i;
1049
1050 if (!bld->sampler) {
1051 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1052 for (i = 0; i < 4; i++) {
1053 texel[i] = bld->base.undef;
1054 }
1055 return;
1056 }
1057
1058 switch (inst->Texture.Texture) {
1059 case TGSI_TEXTURE_1D:
1060 num_coords = 1;
1061 break;
1062 case TGSI_TEXTURE_2D:
1063 case TGSI_TEXTURE_RECT:
1064 num_coords = 2;
1065 break;
1066 case TGSI_TEXTURE_SHADOW1D:
1067 case TGSI_TEXTURE_SHADOW2D:
1068 case TGSI_TEXTURE_SHADOWRECT:
1069 case TGSI_TEXTURE_3D:
1070 case TGSI_TEXTURE_CUBE:
1071 num_coords = 3;
1072 break;
1073 default:
1074 assert(0);
1075 return;
1076 }
1077
1078 if (modifier == TEX_MODIFIER_LOD_BIAS) {
1079 lod_bias = emit_fetch( bld, inst, 0, 3 );
1080 explicit_lod = NULL;
1081 }
1082 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1083 lod_bias = NULL;
1084 explicit_lod = emit_fetch( bld, inst, 0, 3 );
1085 }
1086 else {
1087 lod_bias = NULL;
1088 explicit_lod = NULL;
1089 }
1090
1091 if (modifier == TEX_MODIFIER_PROJECTED) {
1092 oow = emit_fetch( bld, inst, 0, 3 );
1093 oow = lp_build_rcp(&bld->base, oow);
1094 }
1095
1096 for (i = 0; i < num_coords; i++) {
1097 coords[i] = emit_fetch( bld, inst, 0, i );
1098 if (modifier == TEX_MODIFIER_PROJECTED)
1099 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1100 }
1101 for (i = num_coords; i < 3; i++) {
1102 coords[i] = bld->base.undef;
1103 }
1104
1105 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
1106 for (i = 0; i < num_coords; i++) {
1107 ddx[i] = emit_fetch( bld, inst, 1, i );
1108 ddy[i] = emit_fetch( bld, inst, 2, i );
1109 }
1110 unit = inst->Src[3].Register.Index;
1111 } else {
1112 for (i = 0; i < num_coords; i++) {
1113 ddx[i] = emit_ddx( bld, coords[i] );
1114 ddy[i] = emit_ddy( bld, coords[i] );
1115 }
1116 unit = inst->Src[1].Register.Index;
1117 }
1118 for (i = num_coords; i < 3; i++) {
1119 ddx[i] = bld->base.undef;
1120 ddy[i] = bld->base.undef;
1121 }
1122
1123 bld->sampler->emit_fetch_texel(bld->sampler,
1124 bld->base.builder,
1125 bld->base.type,
1126 unit, num_coords, coords,
1127 ddx, ddy,
1128 lod_bias, explicit_lod,
1129 texel);
1130 }
1131
1132
1133 /**
1134 * Kill fragment if any of the src register values are negative.
1135 */
1136 static void
1137 emit_kil(
1138 struct lp_build_tgsi_soa_context *bld,
1139 const struct tgsi_full_instruction *inst )
1140 {
1141 const struct tgsi_full_src_register *reg = &inst->Src[0];
1142 LLVMValueRef terms[NUM_CHANNELS];
1143 LLVMValueRef mask;
1144 unsigned chan_index;
1145
1146 memset(&terms, 0, sizeof terms);
1147
1148 FOR_EACH_CHANNEL( chan_index ) {
1149 unsigned swizzle;
1150
1151 /* Unswizzle channel */
1152 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1153
1154 /* Check if the component has not been already tested. */
1155 assert(swizzle < NUM_CHANNELS);
1156 if( !terms[swizzle] )
1157 /* TODO: change the comparison operator instead of setting the sign */
1158 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1159 }
1160
1161 mask = NULL;
1162 FOR_EACH_CHANNEL( chan_index ) {
1163 if(terms[chan_index]) {
1164 LLVMValueRef chan_mask;
1165
1166 /*
1167 * If term < 0 then mask = 0 else mask = ~0.
1168 */
1169 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1170
1171 if(mask)
1172 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1173 else
1174 mask = chan_mask;
1175 }
1176 }
1177
1178 if(mask)
1179 lp_build_mask_update(bld->mask, mask);
1180 }
1181
1182
1183 /**
1184 * Predicated fragment kill.
1185 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1186 * The only predication is the execution mask which will apply if
1187 * we're inside a loop or conditional.
1188 */
1189 static void
1190 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1191 const struct tgsi_full_instruction *inst)
1192 {
1193 LLVMValueRef mask;
1194
1195 /* For those channels which are "alive", disable fragment shader
1196 * execution.
1197 */
1198 if (bld->exec_mask.has_mask) {
1199 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1200 }
1201 else {
1202 mask = bld->base.zero;
1203 }
1204
1205 lp_build_mask_update(bld->mask, mask);
1206 }
1207
1208 static void
1209 range_check(struct tgsi_declaration_range *range,
1210 unsigned new_first, unsigned new_last)
1211 {
1212 range->First = MIN2(range->First, new_first);
1213 range->Last = MAX2(range->Last, new_last);
1214 }
1215
1216 static void
1217 emit_declaration(
1218 struct lp_build_tgsi_soa_context *bld,
1219 const struct tgsi_full_declaration *decl)
1220 {
1221 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
1222
1223 unsigned first = decl->Range.First;
1224 unsigned last = decl->Range.Last;
1225 unsigned idx, i;
1226
1227 for (idx = first; idx <= last; ++idx) {
1228 switch (decl->Declaration.File) {
1229 case TGSI_FILE_TEMPORARY:
1230 assert(idx < LP_MAX_TGSI_TEMPS);
1231 range_check(&bld->full_range.temps,
1232 first, last);
1233 if (!bld->has_indirect_addressing &&
1234 !bld->has_function_calls) {
1235 for (i = 0; i < NUM_CHANNELS; i++)
1236 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1237 vec_type, "");
1238 }
1239 break;
1240
1241 case TGSI_FILE_OUTPUT:
1242 range_check(&bld->full_range.outputs,
1243 first, last);
1244 if (!bld->has_function_calls) {
1245 for (i = 0; i < NUM_CHANNELS; i++)
1246 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1247 vec_type, "");
1248 }
1249 break;
1250
1251 case TGSI_FILE_ADDRESS:
1252 assert(idx < LP_MAX_TGSI_ADDRS);
1253 range_check(&bld->full_range.addrs,
1254 first, last);
1255 if (!bld->has_function_calls) {
1256 for (i = 0; i < NUM_CHANNELS; i++)
1257 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1258 vec_type, "");
1259 }
1260 break;
1261
1262 case TGSI_FILE_PREDICATE:
1263 assert(idx < LP_MAX_TGSI_PREDS);
1264 range_check(&bld->full_range.preds,
1265 first, last);
1266 if (!bld->has_function_calls) {
1267 for (i = 0; i < NUM_CHANNELS; i++)
1268 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1269 vec_type, "");
1270 }
1271 break;
1272
1273 case TGSI_FILE_INPUT:
1274 range_check(&bld->full_range.inputs,
1275 first, last);
1276 break;
1277
1278 default:
1279 /* don't need to declare other vars */
1280 break;
1281 }
1282 }
1283 }
1284
1285
1286 /**
1287 * Emit LLVM for one TGSI instruction.
1288 * \param return TRUE for success, FALSE otherwise
1289 */
1290 static boolean
1291 emit_instruction(
1292 struct lp_build_tgsi_soa_context *bld,
1293 const struct tgsi_full_instruction *inst,
1294 const struct tgsi_opcode_info *info)
1295 {
1296 unsigned chan_index;
1297 LLVMValueRef src0, src1, src2;
1298 LLVMValueRef tmp0, tmp1, tmp2;
1299 LLVMValueRef tmp3 = NULL;
1300 LLVMValueRef tmp4 = NULL;
1301 LLVMValueRef tmp5 = NULL;
1302 LLVMValueRef tmp6 = NULL;
1303 LLVMValueRef tmp7 = NULL;
1304 LLVMValueRef res;
1305 LLVMValueRef dst0[NUM_CHANNELS];
1306
1307 /*
1308 * Stores and write masks are handled in a general fashion after the long
1309 * instruction opcode switch statement.
1310 *
1311 * Although not stricitly necessary, we avoid generating instructions for
1312 * channels which won't be stored, in cases where's that easy. For some
1313 * complex instructions, like texture sampling, it is more convenient to
1314 * assume a full writemask and then let LLVM optimization passes eliminate
1315 * redundant code.
1316 */
1317
1318 assert(info->num_dst <= 1);
1319 if (info->num_dst) {
1320 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1321 dst0[chan_index] = bld->base.undef;
1322 }
1323 }
1324
1325 switch (inst->Instruction.Opcode) {
1326 case TGSI_OPCODE_ARL:
1327 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1328 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1329 tmp0 = lp_build_floor(&bld->base, tmp0);
1330 dst0[chan_index] = tmp0;
1331 }
1332 break;
1333
1334 case TGSI_OPCODE_MOV:
1335 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1336 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1337 }
1338 break;
1339
1340 case TGSI_OPCODE_LIT:
1341 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1342 dst0[CHAN_X] = bld->base.one;
1343 }
1344 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1345 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1346 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1347 }
1348 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1349 /* XMM[1] = SrcReg[0].yyyy */
1350 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1351 /* XMM[1] = max(XMM[1], 0) */
1352 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1353 /* XMM[2] = SrcReg[0].wwww */
1354 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1355 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1356 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1357 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1358 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1359 }
1360 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1361 dst0[CHAN_W] = bld->base.one;
1362 }
1363 break;
1364
1365 case TGSI_OPCODE_RCP:
1366 /* TGSI_OPCODE_RECIP */
1367 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1368 res = lp_build_rcp(&bld->base, src0);
1369 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1370 dst0[chan_index] = res;
1371 }
1372 break;
1373
1374 case TGSI_OPCODE_RSQ:
1375 /* TGSI_OPCODE_RECIPSQRT */
1376 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1377 src0 = lp_build_abs(&bld->base, src0);
1378 res = lp_build_rsqrt(&bld->base, src0);
1379 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1380 dst0[chan_index] = res;
1381 }
1382 break;
1383
1384 case TGSI_OPCODE_EXP:
1385 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1386 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1387 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1388 LLVMValueRef *p_exp2_int_part = NULL;
1389 LLVMValueRef *p_frac_part = NULL;
1390 LLVMValueRef *p_exp2 = NULL;
1391
1392 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1393
1394 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1395 p_exp2_int_part = &tmp0;
1396 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1397 p_frac_part = &tmp1;
1398 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1399 p_exp2 = &tmp2;
1400
1401 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1402
1403 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1404 dst0[CHAN_X] = tmp0;
1405 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1406 dst0[CHAN_Y] = tmp1;
1407 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1408 dst0[CHAN_Z] = tmp2;
1409 }
1410 /* dst.w = 1.0 */
1411 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1412 dst0[CHAN_W] = bld->base.one;
1413 }
1414 break;
1415
1416 case TGSI_OPCODE_LOG:
1417 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1418 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1419 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1420 LLVMValueRef *p_floor_log2 = NULL;
1421 LLVMValueRef *p_exp = NULL;
1422 LLVMValueRef *p_log2 = NULL;
1423
1424 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1425 src0 = lp_build_abs( &bld->base, src0 );
1426
1427 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1428 p_floor_log2 = &tmp0;
1429 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1430 p_exp = &tmp1;
1431 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1432 p_log2 = &tmp2;
1433
1434 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1435
1436 /* dst.x = floor(lg2(abs(src.x))) */
1437 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1438 dst0[CHAN_X] = tmp0;
1439 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1440 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1441 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1442 }
1443 /* dst.z = lg2(abs(src.x)) */
1444 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1445 dst0[CHAN_Z] = tmp2;
1446 }
1447 /* dst.w = 1.0 */
1448 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1449 dst0[CHAN_W] = bld->base.one;
1450 }
1451 break;
1452
1453 case TGSI_OPCODE_MUL:
1454 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1455 src0 = emit_fetch( bld, inst, 0, chan_index );
1456 src1 = emit_fetch( bld, inst, 1, chan_index );
1457 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1458 }
1459 break;
1460
1461 case TGSI_OPCODE_ADD:
1462 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1463 src0 = emit_fetch( bld, inst, 0, chan_index );
1464 src1 = emit_fetch( bld, inst, 1, chan_index );
1465 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1466 }
1467 break;
1468
1469 case TGSI_OPCODE_DP3:
1470 /* TGSI_OPCODE_DOT3 */
1471 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1472 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1473 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1474 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1475 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1476 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1477 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1478 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1479 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1480 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1481 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1482 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1483 dst0[chan_index] = tmp0;
1484 }
1485 break;
1486
1487 case TGSI_OPCODE_DP4:
1488 /* TGSI_OPCODE_DOT4 */
1489 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1490 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1491 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1492 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1493 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1494 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1495 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1496 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1497 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1498 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1499 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1500 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1501 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1502 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1503 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1504 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1505 dst0[chan_index] = tmp0;
1506 }
1507 break;
1508
1509 case TGSI_OPCODE_DST:
1510 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1511 dst0[CHAN_X] = bld->base.one;
1512 }
1513 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1514 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1515 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1516 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1517 }
1518 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1519 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1520 }
1521 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1522 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1523 }
1524 break;
1525
1526 case TGSI_OPCODE_MIN:
1527 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1528 src0 = emit_fetch( bld, inst, 0, chan_index );
1529 src1 = emit_fetch( bld, inst, 1, chan_index );
1530 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1531 }
1532 break;
1533
1534 case TGSI_OPCODE_MAX:
1535 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1536 src0 = emit_fetch( bld, inst, 0, chan_index );
1537 src1 = emit_fetch( bld, inst, 1, chan_index );
1538 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1539 }
1540 break;
1541
1542 case TGSI_OPCODE_SLT:
1543 /* TGSI_OPCODE_SETLT */
1544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1545 src0 = emit_fetch( bld, inst, 0, chan_index );
1546 src1 = emit_fetch( bld, inst, 1, chan_index );
1547 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1548 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1549 }
1550 break;
1551
1552 case TGSI_OPCODE_SGE:
1553 /* TGSI_OPCODE_SETGE */
1554 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1555 src0 = emit_fetch( bld, inst, 0, chan_index );
1556 src1 = emit_fetch( bld, inst, 1, chan_index );
1557 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1558 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1559 }
1560 break;
1561
1562 case TGSI_OPCODE_MAD:
1563 /* TGSI_OPCODE_MADD */
1564 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1565 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1566 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1567 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1568 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1569 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1570 dst0[chan_index] = tmp0;
1571 }
1572 break;
1573
1574 case TGSI_OPCODE_SUB:
1575 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1576 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1577 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1578 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1579 }
1580 break;
1581
1582 case TGSI_OPCODE_LRP:
1583 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1584 src0 = emit_fetch( bld, inst, 0, chan_index );
1585 src1 = emit_fetch( bld, inst, 1, chan_index );
1586 src2 = emit_fetch( bld, inst, 2, chan_index );
1587 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1588 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1589 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1590 }
1591 break;
1592
1593 case TGSI_OPCODE_CND:
1594 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1595 src0 = emit_fetch( bld, inst, 0, chan_index );
1596 src1 = emit_fetch( bld, inst, 1, chan_index );
1597 src2 = emit_fetch( bld, inst, 2, chan_index );
1598 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1599 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1600 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1601 }
1602 break;
1603
1604 case TGSI_OPCODE_DP2A:
1605 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1606 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1607 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1608 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1609 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1610 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1611 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1612 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1613 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1614 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1615 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1616 }
1617 break;
1618
1619 case TGSI_OPCODE_FRC:
1620 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1621 src0 = emit_fetch( bld, inst, 0, chan_index );
1622 tmp0 = lp_build_floor(&bld->base, src0);
1623 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1624 dst0[chan_index] = tmp0;
1625 }
1626 break;
1627
1628 case TGSI_OPCODE_CLAMP:
1629 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1630 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1631 src1 = emit_fetch( bld, inst, 1, chan_index );
1632 src2 = emit_fetch( bld, inst, 2, chan_index );
1633 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1634 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1635 dst0[chan_index] = tmp0;
1636 }
1637 break;
1638
1639 case TGSI_OPCODE_FLR:
1640 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1641 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1642 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1643 }
1644 break;
1645
1646 case TGSI_OPCODE_ROUND:
1647 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1648 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1649 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1650 }
1651 break;
1652
1653 case TGSI_OPCODE_EX2: {
1654 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1655 tmp0 = lp_build_exp2( &bld->base, tmp0);
1656 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1657 dst0[chan_index] = tmp0;
1658 }
1659 break;
1660 }
1661
1662 case TGSI_OPCODE_LG2:
1663 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1664 tmp0 = lp_build_log2( &bld->base, tmp0);
1665 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1666 dst0[chan_index] = tmp0;
1667 }
1668 break;
1669
1670 case TGSI_OPCODE_POW:
1671 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1672 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1673 res = lp_build_pow( &bld->base, src0, src1 );
1674 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1675 dst0[chan_index] = res;
1676 }
1677 break;
1678
1679 case TGSI_OPCODE_XPD:
1680 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1681 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1682 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1683 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1684 }
1685 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1686 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1687 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1688 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1689 }
1690 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1691 tmp2 = tmp0;
1692 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1693 tmp5 = tmp3;
1694 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1695 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1696 dst0[CHAN_X] = tmp2;
1697 }
1698 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1699 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1700 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1701 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1702 }
1703 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1704 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1705 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1706 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1707 dst0[CHAN_Y] = tmp3;
1708 }
1709 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1710 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1711 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1712 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1713 dst0[CHAN_Z] = tmp5;
1714 }
1715 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1716 dst0[CHAN_W] = bld->base.one;
1717 }
1718 break;
1719
1720 case TGSI_OPCODE_ABS:
1721 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1722 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1723 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1724 }
1725 break;
1726
1727 case TGSI_OPCODE_RCC:
1728 /* deprecated? */
1729 assert(0);
1730 return FALSE;
1731
1732 case TGSI_OPCODE_DPH:
1733 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1734 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1735 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1736 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1737 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1738 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1739 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1740 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1741 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1742 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1743 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1744 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1745 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1746 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1747 dst0[chan_index] = tmp0;
1748 }
1749 break;
1750
1751 case TGSI_OPCODE_COS:
1752 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1753 tmp0 = lp_build_cos( &bld->base, tmp0 );
1754 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1755 dst0[chan_index] = tmp0;
1756 }
1757 break;
1758
1759 case TGSI_OPCODE_DDX:
1760 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1761 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1762 }
1763 break;
1764
1765 case TGSI_OPCODE_DDY:
1766 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1767 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1768 }
1769 break;
1770
1771 case TGSI_OPCODE_KILP:
1772 /* predicated kill */
1773 emit_kilp( bld, inst );
1774 break;
1775
1776 case TGSI_OPCODE_KIL:
1777 /* conditional kill */
1778 emit_kil( bld, inst );
1779 break;
1780
1781 case TGSI_OPCODE_PK2H:
1782 return FALSE;
1783 break;
1784
1785 case TGSI_OPCODE_PK2US:
1786 return FALSE;
1787 break;
1788
1789 case TGSI_OPCODE_PK4B:
1790 return FALSE;
1791 break;
1792
1793 case TGSI_OPCODE_PK4UB:
1794 return FALSE;
1795 break;
1796
1797 case TGSI_OPCODE_RFL:
1798 return FALSE;
1799 break;
1800
1801 case TGSI_OPCODE_SEQ:
1802 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1803 src0 = emit_fetch( bld, inst, 0, chan_index );
1804 src1 = emit_fetch( bld, inst, 1, chan_index );
1805 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1806 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1807 }
1808 break;
1809
1810 case TGSI_OPCODE_SFL:
1811 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1812 dst0[chan_index] = bld->base.zero;
1813 }
1814 break;
1815
1816 case TGSI_OPCODE_SGT:
1817 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1818 src0 = emit_fetch( bld, inst, 0, chan_index );
1819 src1 = emit_fetch( bld, inst, 1, chan_index );
1820 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1821 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1822 }
1823 break;
1824
1825 case TGSI_OPCODE_SIN:
1826 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1827 tmp0 = lp_build_sin( &bld->base, tmp0 );
1828 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1829 dst0[chan_index] = tmp0;
1830 }
1831 break;
1832
1833 case TGSI_OPCODE_SLE:
1834 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1835 src0 = emit_fetch( bld, inst, 0, chan_index );
1836 src1 = emit_fetch( bld, inst, 1, chan_index );
1837 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1838 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1839 }
1840 break;
1841
1842 case TGSI_OPCODE_SNE:
1843 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1844 src0 = emit_fetch( bld, inst, 0, chan_index );
1845 src1 = emit_fetch( bld, inst, 1, chan_index );
1846 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1847 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1848 }
1849 break;
1850
1851 case TGSI_OPCODE_STR:
1852 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1853 dst0[chan_index] = bld->base.one;
1854 }
1855 break;
1856
1857 case TGSI_OPCODE_TEX:
1858 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1859 break;
1860
1861 case TGSI_OPCODE_TXD:
1862 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1863 break;
1864
1865 case TGSI_OPCODE_UP2H:
1866 /* deprecated */
1867 assert (0);
1868 return FALSE;
1869 break;
1870
1871 case TGSI_OPCODE_UP2US:
1872 /* deprecated */
1873 assert(0);
1874 return FALSE;
1875 break;
1876
1877 case TGSI_OPCODE_UP4B:
1878 /* deprecated */
1879 assert(0);
1880 return FALSE;
1881 break;
1882
1883 case TGSI_OPCODE_UP4UB:
1884 /* deprecated */
1885 assert(0);
1886 return FALSE;
1887 break;
1888
1889 case TGSI_OPCODE_X2D:
1890 /* deprecated? */
1891 assert(0);
1892 return FALSE;
1893 break;
1894
1895 case TGSI_OPCODE_ARA:
1896 /* deprecated */
1897 assert(0);
1898 return FALSE;
1899 break;
1900
1901 case TGSI_OPCODE_ARR:
1902 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1903 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1904 tmp0 = lp_build_round(&bld->base, tmp0);
1905 dst0[chan_index] = tmp0;
1906 }
1907 break;
1908
1909 case TGSI_OPCODE_BRA:
1910 /* deprecated */
1911 assert(0);
1912 return FALSE;
1913 break;
1914
1915 case TGSI_OPCODE_CAL: {
1916 LLVMValueRef args[7];
1917 LLVMValueRef func = lp_get_function(bld, inst->Label.Label);
1918 args[0] = bld->inputs_array;
1919 args[1] = bld->outputs_array;
1920 args[2] = bld->consts_ptr;
1921 args[3] = bld->temps_array;
1922 args[4] = bld->addrs_array;
1923 args[5] = bld->preds_array;
1924 args[6] = bld->immediates_array;
1925 LLVMBuildCall(bld->base.builder, func, args, Elements(args), "");
1926 }
1927 break;
1928
1929 case TGSI_OPCODE_RET:
1930 lp_exec_ret(&bld->exec_mask);
1931 break;
1932
1933 case TGSI_OPCODE_END:
1934 emit_end(bld);
1935 break;
1936
1937 case TGSI_OPCODE_SSG:
1938 /* TGSI_OPCODE_SGN */
1939 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1940 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1941 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1942 }
1943 break;
1944
1945 case TGSI_OPCODE_CMP:
1946 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1947 src0 = emit_fetch( bld, inst, 0, chan_index );
1948 src1 = emit_fetch( bld, inst, 1, chan_index );
1949 src2 = emit_fetch( bld, inst, 2, chan_index );
1950 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1951 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1952 }
1953 break;
1954
1955 case TGSI_OPCODE_SCS:
1956 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1957 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1958 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1959 }
1960 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1961 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1962 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1963 }
1964 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1965 dst0[CHAN_Z] = bld->base.zero;
1966 }
1967 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1968 dst0[CHAN_W] = bld->base.one;
1969 }
1970 break;
1971
1972 case TGSI_OPCODE_TXB:
1973 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1974 break;
1975
1976 case TGSI_OPCODE_NRM:
1977 /* fall-through */
1978 case TGSI_OPCODE_NRM4:
1979 /* 3 or 4-component normalization */
1980 {
1981 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1982
1983 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1984 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1985 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1986 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1987
1988 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1989
1990 /* xmm4 = src.x */
1991 /* xmm0 = src.x * src.x */
1992 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1993 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1994 tmp4 = tmp0;
1995 }
1996 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1997
1998 /* xmm5 = src.y */
1999 /* xmm0 = xmm0 + src.y * src.y */
2000 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2001 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2002 tmp5 = tmp1;
2003 }
2004 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2005 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2006
2007 /* xmm6 = src.z */
2008 /* xmm0 = xmm0 + src.z * src.z */
2009 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2010 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2011 tmp6 = tmp1;
2012 }
2013 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2014 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2015
2016 if (dims == 4) {
2017 /* xmm7 = src.w */
2018 /* xmm0 = xmm0 + src.w * src.w */
2019 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2020 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2021 tmp7 = tmp1;
2022 }
2023 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2024 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2025 }
2026
2027 /* xmm1 = 1 / sqrt(xmm0) */
2028 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2029
2030 /* dst.x = xmm1 * src.x */
2031 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2032 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2033 }
2034
2035 /* dst.y = xmm1 * src.y */
2036 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2037 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2038 }
2039
2040 /* dst.z = xmm1 * src.z */
2041 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2042 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2043 }
2044
2045 /* dst.w = xmm1 * src.w */
2046 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2047 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2048 }
2049 }
2050
2051 /* dst.w = 1.0 */
2052 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2053 dst0[CHAN_W] = bld->base.one;
2054 }
2055 }
2056 break;
2057
2058 case TGSI_OPCODE_DIV:
2059 /* deprecated */
2060 assert( 0 );
2061 return FALSE;
2062 break;
2063
2064 case TGSI_OPCODE_DP2:
2065 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
2066 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
2067 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
2068 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
2069 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
2070 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
2071 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
2072 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2073 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
2074 }
2075 break;
2076
2077 case TGSI_OPCODE_TXL:
2078 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2079 break;
2080
2081 case TGSI_OPCODE_TXP:
2082 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
2083 break;
2084
2085 case TGSI_OPCODE_BRK:
2086 lp_exec_break(&bld->exec_mask);
2087 break;
2088
2089 case TGSI_OPCODE_IF:
2090 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2091 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2092 tmp0, bld->base.zero);
2093 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2094 break;
2095
2096 case TGSI_OPCODE_BGNLOOP:
2097 lp_exec_bgnloop(&bld->exec_mask);
2098 break;
2099
2100 case TGSI_OPCODE_BGNSUB:
2101 emit_bgnsub(bld);
2102 break;
2103
2104 case TGSI_OPCODE_ELSE:
2105 lp_exec_mask_cond_invert(&bld->exec_mask);
2106 break;
2107
2108 case TGSI_OPCODE_ENDIF:
2109 lp_exec_mask_cond_pop(&bld->exec_mask);
2110 break;
2111
2112 case TGSI_OPCODE_ENDLOOP:
2113 lp_exec_endloop(&bld->exec_mask);
2114 break;
2115
2116 case TGSI_OPCODE_ENDSUB:
2117 emit_endsub(bld);
2118 break;
2119
2120 case TGSI_OPCODE_PUSHA:
2121 /* deprecated? */
2122 assert(0);
2123 return FALSE;
2124 break;
2125
2126 case TGSI_OPCODE_POPA:
2127 /* deprecated? */
2128 assert(0);
2129 return FALSE;
2130 break;
2131
2132 case TGSI_OPCODE_CEIL:
2133 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2134 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2135 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2136 }
2137 break;
2138
2139 case TGSI_OPCODE_I2F:
2140 /* deprecated? */
2141 assert(0);
2142 return FALSE;
2143 break;
2144
2145 case TGSI_OPCODE_NOT:
2146 /* deprecated? */
2147 assert(0);
2148 return FALSE;
2149 break;
2150
2151 case TGSI_OPCODE_TRUNC:
2152 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2153 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2154 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2155 }
2156 break;
2157
2158 case TGSI_OPCODE_SHL:
2159 /* deprecated? */
2160 assert(0);
2161 return FALSE;
2162 break;
2163
2164 case TGSI_OPCODE_ISHR:
2165 /* deprecated? */
2166 assert(0);
2167 return FALSE;
2168 break;
2169
2170 case TGSI_OPCODE_AND:
2171 /* deprecated? */
2172 assert(0);
2173 return FALSE;
2174 break;
2175
2176 case TGSI_OPCODE_OR:
2177 /* deprecated? */
2178 assert(0);
2179 return FALSE;
2180 break;
2181
2182 case TGSI_OPCODE_MOD:
2183 /* deprecated? */
2184 assert(0);
2185 return FALSE;
2186 break;
2187
2188 case TGSI_OPCODE_XOR:
2189 /* deprecated? */
2190 assert(0);
2191 return FALSE;
2192 break;
2193
2194 case TGSI_OPCODE_SAD:
2195 /* deprecated? */
2196 assert(0);
2197 return FALSE;
2198 break;
2199
2200 case TGSI_OPCODE_TXF:
2201 /* deprecated? */
2202 assert(0);
2203 return FALSE;
2204 break;
2205
2206 case TGSI_OPCODE_TXQ:
2207 /* deprecated? */
2208 assert(0);
2209 return FALSE;
2210 break;
2211
2212 case TGSI_OPCODE_CONT:
2213 lp_exec_continue(&bld->exec_mask);
2214 break;
2215
2216 case TGSI_OPCODE_EMIT:
2217 return FALSE;
2218 break;
2219
2220 case TGSI_OPCODE_ENDPRIM:
2221 return FALSE;
2222 break;
2223
2224 case TGSI_OPCODE_NOP:
2225 break;
2226
2227 default:
2228 return FALSE;
2229 }
2230
2231 if(info->num_dst) {
2232 LLVMValueRef pred[NUM_CHANNELS];
2233
2234 emit_fetch_predicate( bld, inst, pred );
2235
2236 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2237 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2238 }
2239 }
2240
2241 return TRUE;
2242 }
2243
2244
2245 void
2246 lp_build_tgsi_soa(LLVMBuilderRef builder,
2247 const struct tgsi_token *tokens,
2248 struct lp_type type,
2249 struct lp_build_mask_context *mask,
2250 LLVMValueRef consts_ptr,
2251 const LLVMValueRef *pos,
2252 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2253 LLVMValueRef (*outputs)[NUM_CHANNELS],
2254 struct lp_build_sampler_soa *sampler,
2255 const struct tgsi_shader_info *info)
2256 {
2257 struct lp_build_tgsi_soa_context bld;
2258 struct tgsi_parse_context parse;
2259 uint num_immediates = 0;
2260 unsigned i;
2261
2262 /* Setup build context */
2263 memset(&bld, 0, sizeof bld);
2264 lp_build_context_init(&bld.base, builder, type);
2265 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
2266 bld.mask = mask;
2267 bld.pos = pos;
2268 bld.inputs = inputs;
2269 bld.outputs = outputs;
2270 bld.consts_ptr = consts_ptr;
2271 bld.sampler = sampler;
2272 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
2273 info->opcode_count[TGSI_OPCODE_ARL] > 0;
2274 bld.has_function_calls = info->opcode_count[TGSI_OPCODE_CAL] > 0;
2275 bld.func_hash = cso_hash_create();
2276
2277 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2278
2279 tgsi_parse_init( &parse, tokens );
2280
2281 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2282 tgsi_parse_token( &parse );
2283
2284 switch( parse.FullToken.Token.Type ) {
2285 case TGSI_TOKEN_TYPE_DECLARATION:
2286 /* Inputs already interpolated */
2287 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2288 break;
2289
2290 case TGSI_TOKEN_TYPE_INSTRUCTION:
2291 {
2292 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
2293 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
2294 /* we finished processing declarations, emit anything that needs
2295 * to go before the first instruction */
2296 if (bld.instno == 0) {
2297 emit_preamble(&bld, num_immediates);
2298 }
2299 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
2300 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2301 opcode_info->mnemonic);
2302 bld.instno++;
2303 }
2304
2305 break;
2306
2307 case TGSI_TOKEN_TYPE_IMMEDIATE:
2308 /* simply copy the immediate values into the next immediates[] slot */
2309 {
2310 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2311 assert(size <= 4);
2312 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2313 for( i = 0; i < size; ++i )
2314 bld.immediates[num_immediates][i] =
2315 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2316 for( i = size; i < 4; ++i )
2317 bld.immediates[num_immediates][i] = bld.base.undef;
2318 num_immediates++;
2319 }
2320 break;
2321
2322 case TGSI_TOKEN_TYPE_PROPERTY:
2323 break;
2324
2325 default:
2326 assert( 0 );
2327 }
2328 }
2329 /* we have to make sure we're at the end of the main block
2330 * (which won't be the case if we had more than one TGSI function
2331 * in the given shader) to let the calling function append
2332 * whatever it needs at the end of the main function */
2333 LLVMPositionBuilderAtEnd(bld.base.builder, bld.main_block);
2334
2335 if (0) {
2336 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2337 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2338 tgsi_dump(tokens, 0);
2339 lp_debug_dump_value(function);
2340 }
2341 tgsi_parse_free( &parse );
2342
2343 cso_hash_delete(bld.func_hash);
2344 }
2345