gallivm: implement function calls
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "cso_cache/cso_hash.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_exec.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "lp_bld_type.h"
52 #include "lp_bld_const.h"
53 #include "lp_bld_arit.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_limits.h"
59 #include "lp_bld_debug.h"
60
61
62 #define FOR_EACH_CHANNEL( CHAN )\
63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
64
65 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
67
68 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
70
71 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
72 FOR_EACH_CHANNEL( CHAN )\
73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
74
75 #define CHAN_X 0
76 #define CHAN_Y 1
77 #define CHAN_Z 2
78 #define CHAN_W 3
79
80 #define QUAD_TOP_LEFT 0
81 #define QUAD_TOP_RIGHT 1
82 #define QUAD_BOTTOM_LEFT 2
83 #define QUAD_BOTTOM_RIGHT 3
84
85
86 struct lp_exec_mask {
87 struct lp_build_context *bld;
88
89 boolean has_mask;
90
91 LLVMTypeRef int_vec_type;
92
93 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
94 int cond_stack_size;
95 LLVMValueRef cond_mask;
96
97 LLVMBasicBlockRef loop_block;
98 LLVMValueRef cont_mask;
99 LLVMValueRef break_mask;
100 LLVMValueRef break_var;
101 LLVMValueRef ret_mask;
102 struct {
103 LLVMBasicBlockRef loop_block;
104 LLVMValueRef cont_mask;
105 LLVMValueRef break_mask;
106 LLVMValueRef break_var;
107 } loop_stack[LP_MAX_TGSI_NESTING];
108 int loop_stack_size;
109
110 LLVMValueRef exec_mask;
111 };
112
113 struct lp_build_tgsi_soa_context
114 {
115 struct lp_build_context base;
116
117 /* Builder for integer masks and indices */
118 struct lp_build_context int_bld;
119
120 LLVMValueRef consts_ptr;
121 const LLVMValueRef *pos;
122
123 const struct lp_build_sampler_soa *sampler;
124
125 const LLVMValueRef (*inputs)[NUM_CHANNELS];
126 LLVMValueRef (*outputs)[NUM_CHANNELS];
127 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
128 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
129 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
130 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
131
132 /* we allocate an array of allocas if we have indirect
133 * addressing and then the temps above is unused */
134 LLVMValueRef temps_array;
135
136 LLVMValueRef inputs_array;
137 LLVMValueRef outputs_array;
138 LLVMValueRef immediates_array;
139 LLVMValueRef addrs_array;
140 LLVMValueRef preds_array;
141
142 boolean has_indirect_addressing;
143 boolean has_function_calls;
144
145 struct lp_build_mask_context *mask;
146 struct lp_exec_mask exec_mask;
147
148 struct cso_hash *func_hash;
149 unsigned instno;
150 LLVMBasicBlockRef main_block;
151
152 struct {
153 struct tgsi_declaration_range inputs;
154 struct tgsi_declaration_range outputs;
155 struct tgsi_declaration_range temps;
156 struct tgsi_declaration_range addrs;
157 struct tgsi_declaration_range preds;
158 } full_range;
159 };
160
161 static const unsigned char
162 swizzle_left[4] = {
163 QUAD_TOP_LEFT, QUAD_TOP_LEFT,
164 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
165 };
166
167 static const unsigned char
168 swizzle_right[4] = {
169 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
170 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
171 };
172
173 static const unsigned char
174 swizzle_top[4] = {
175 QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
176 QUAD_TOP_LEFT, QUAD_TOP_RIGHT
177 };
178
179 static const unsigned char
180 swizzle_bottom[4] = {
181 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
182 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
183 };
184
185
186 static LLVMValueRef
187 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
188 unsigned index,
189 unsigned chan,
190 boolean is_indirect,
191 LLVMValueRef addr)
192 {
193 assert(chan < 4);
194 if (!bld->has_indirect_addressing &&
195 !bld->has_function_calls) {
196 return bld->temps[index][chan];
197 } else {
198 LLVMValueRef lindex =
199 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
200 if (is_indirect)
201 lindex = lp_build_add(&bld->base, lindex, addr);
202 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "temp_ptr");
203 }
204 }
205
206 static LLVMValueRef
207 get_input_ptr(struct lp_build_tgsi_soa_context *bld,
208 unsigned index,
209 unsigned swizzle,
210 boolean is_indirect,
211 LLVMValueRef addr)
212 {
213 LLVMValueRef lindex =
214 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
215 assert(bld->has_function_calls);
216 if (is_indirect)
217 lindex = lp_build_add(&bld->base, lindex, addr);
218 return LLVMBuildGEP(bld->base.builder, bld->inputs_array, &lindex, 1, "input_ptr");
219 }
220
221 static LLVMValueRef
222 get_output_ptr(struct lp_build_tgsi_soa_context *bld,
223 unsigned index,
224 unsigned swizzle,
225 boolean is_indirect,
226 LLVMValueRef addr)
227 {
228 if (!bld->has_function_calls) {
229 return bld->outputs[index][swizzle];
230 } else {
231 LLVMValueRef lindex =
232 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
233 if (is_indirect)
234 lindex = lp_build_add(&bld->base, lindex, addr);
235 return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "output_ptr");
236 }
237 }
238
239 static LLVMValueRef
240 get_immediates_ptr(struct lp_build_tgsi_soa_context *bld,
241 unsigned index,
242 unsigned swizzle,
243 boolean is_indirect,
244 LLVMValueRef addr)
245 {
246 LLVMValueRef lindex =
247 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
248 assert(bld->has_function_calls);
249 if (is_indirect)
250 lindex = lp_build_add(&bld->base, lindex, addr);
251 return LLVMBuildGEP(bld->base.builder, bld->immediates_array, &lindex, 1, "immediates_ptr");
252 }
253
254 static LLVMValueRef
255 get_addr_ptr(struct lp_build_tgsi_soa_context *bld,
256 unsigned index,
257 unsigned swizzle,
258 boolean is_indirect,
259 LLVMValueRef addr)
260 {
261 if (!bld->has_function_calls) {
262 return bld->addr[index][swizzle];
263 } else {
264 LLVMValueRef lindex =
265 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
266 if (is_indirect)
267 lindex = lp_build_add(&bld->base, lindex, addr);
268 return LLVMBuildGEP(bld->base.builder, bld->addrs_array, &lindex, 1, "addrs_ptr");
269 }
270 }
271
272 static LLVMValueRef
273 get_preds_ptr(struct lp_build_tgsi_soa_context *bld,
274 unsigned index,
275 unsigned swizzle,
276 boolean is_indirect,
277 LLVMValueRef addr)
278 {
279 if (!bld->has_function_calls) {
280 return bld->preds[index][swizzle];
281 } else {
282 LLVMValueRef lindex =
283 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
284 if (is_indirect)
285 lindex = lp_build_add(&bld->base, lindex, addr);
286 return LLVMBuildGEP(bld->base.builder, bld->preds_array, &lindex, 1, "preds_ptr");
287 }
288 }
289
290 static LLVMValueRef lp_get_function(struct lp_build_tgsi_soa_context *bld,
291 int label)
292 {
293 struct cso_hash *hash = bld->func_hash;
294 struct cso_hash_iter iter = cso_hash_find(hash, label);
295 LLVMValueRef func;
296 LLVMModuleRef module = LLVMGetGlobalParent(
297 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld->base.builder)));
298
299 if (cso_hash_iter_is_null(iter)) {
300 LLVMTypeRef func_type;
301 LLVMTypeRef arg_types[7];
302 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
303 int i;
304 char func_name[32];
305
306 snprintf(func_name, 31, "func%d", label);
307
308 arg_types[0] = LLVMPointerType(vec_type, 0); /* inputs */
309 arg_types[1] = LLVMPointerType(vec_type, 0); /* outpus */
310 arg_types[2] = LLVMTypeOf(bld->consts_ptr); /* consts */
311 arg_types[3] = LLVMPointerType(vec_type, 0); /* temps */
312 arg_types[4] = LLVMPointerType(vec_type, 0); /* addrs */
313 arg_types[5] = LLVMPointerType(vec_type, 0); /* preds */
314 arg_types[6] = LLVMPointerType(vec_type, 0); /* immediates */
315
316 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
317
318 func = LLVMAddFunction(module, func_name, func_type);
319 LLVMSetFunctionCallConv(func, LLVMCCallConv);
320 for(i = 0; i < Elements(arg_types); ++i)
321 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
322 LLVMAddAttribute(LLVMGetParam(func, i), LLVMNoAliasAttribute);
323
324 cso_hash_insert(hash, label, func);
325 } else {
326 func = (LLVMValueRef)cso_hash_iter_data(iter);
327 }
328
329 return func;
330 }
331
332 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
333 {
334 mask->bld = bld;
335 mask->has_mask = FALSE;
336 mask->cond_stack_size = 0;
337 mask->loop_stack_size = 0;
338 mask->ret_mask = 0;
339
340 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
341 mask->break_mask = mask->cont_mask = mask->cond_mask =
342 LLVMConstAllOnes(mask->int_vec_type);
343 }
344
345 static void lp_exec_mask_update(struct lp_exec_mask *mask)
346 {
347 if (mask->loop_stack_size) {
348 /*for loops we need to update the entire mask at runtime */
349 LLVMValueRef tmp;
350 assert(mask->break_mask);
351 tmp = LLVMBuildAnd(mask->bld->builder,
352 mask->cont_mask,
353 mask->break_mask,
354 "maskcb");
355 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
356 mask->cond_mask,
357 tmp,
358 "maskfull");
359 } else
360 mask->exec_mask = mask->cond_mask;
361
362 if (mask->ret_mask) {
363 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
364 mask->exec_mask,
365 mask->ret_mask,
366 "retmask");
367 }
368
369
370 mask->has_mask = (mask->cond_stack_size > 0 ||
371 mask->loop_stack_size > 0 ||
372 mask->ret_mask);
373 }
374
375 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
376 LLVMValueRef val)
377 {
378 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
379 if (mask->cond_stack_size == 0) {
380 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
381 }
382 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
383 assert(LLVMTypeOf(val) == mask->int_vec_type);
384 mask->cond_mask = val;
385
386 lp_exec_mask_update(mask);
387 }
388
389 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
390 {
391 LLVMValueRef prev_mask;
392 LLVMValueRef inv_mask;
393
394 assert(mask->cond_stack_size);
395 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
396 if (mask->cond_stack_size == 1) {
397 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
398 }
399
400 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
401
402 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
403 inv_mask,
404 prev_mask, "");
405 lp_exec_mask_update(mask);
406 }
407
408 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
409 {
410 assert(mask->cond_stack_size);
411 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
412 lp_exec_mask_update(mask);
413 }
414
415
416 static void lp_exec_bgnsub(struct lp_exec_mask *mask)
417 {
418 mask->exec_mask = LLVMConstAllOnes(mask->int_vec_type);
419 mask->ret_mask = LLVMConstAllOnes(mask->int_vec_type);
420 }
421
422 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
423 {
424 if (mask->loop_stack_size == 0) {
425 assert(mask->loop_block == NULL);
426 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
427 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
428 assert(mask->break_var == NULL);
429 }
430
431 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
432
433 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
434 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
435 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
436 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
437 ++mask->loop_stack_size;
438
439 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
440 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
441
442 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
443 LLVMBuildBr(mask->bld->builder, mask->loop_block);
444 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
445
446 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
447
448 lp_exec_mask_update(mask);
449 }
450
451 static void lp_exec_break(struct lp_exec_mask *mask)
452 {
453 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
454 mask->exec_mask,
455 "break");
456
457 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
458 mask->break_mask,
459 exec_mask, "break_full");
460
461 lp_exec_mask_update(mask);
462 }
463
464
465 static void lp_exec_ret(struct lp_exec_mask *mask)
466 {
467 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
468 mask->exec_mask,
469 "ret");
470
471 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
472 mask->ret_mask,
473 exec_mask, "ret_full");
474
475 lp_exec_mask_update(mask);
476 }
477
478
479 static void lp_exec_continue(struct lp_exec_mask *mask)
480 {
481 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
482 mask->exec_mask,
483 "");
484
485 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
486 mask->cont_mask,
487 exec_mask, "");
488
489 lp_exec_mask_update(mask);
490 }
491
492
493 static void lp_exec_endloop(struct lp_exec_mask *mask)
494 {
495 LLVMBasicBlockRef endloop;
496 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
497 mask->bld->type.length);
498 LLVMValueRef i1cond;
499
500 assert(mask->break_mask);
501
502 /*
503 * Restore the cont_mask, but don't pop
504 */
505 assert(mask->loop_stack_size);
506 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
507 lp_exec_mask_update(mask);
508
509 /*
510 * Unlike the continue mask, the break_mask must be preserved across loop
511 * iterations
512 */
513 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
514
515 /* i1cond = (mask == 0) */
516 i1cond = LLVMBuildICmp(
517 mask->bld->builder,
518 LLVMIntNE,
519 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
520 LLVMConstNull(reg_type), "");
521
522 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
523
524 LLVMBuildCondBr(mask->bld->builder,
525 i1cond, mask->loop_block, endloop);
526
527 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
528
529 assert(mask->loop_stack_size);
530 --mask->loop_stack_size;
531 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
532 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
533 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
534 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
535
536 lp_exec_mask_update(mask);
537 }
538
539 /* stores val into an address pointed to by dst.
540 * mask->exec_mask is used to figure out which bits of val
541 * should be stored into the address
542 * (0 means don't store this bit, 1 means do store).
543 */
544 static void lp_exec_mask_store(struct lp_exec_mask *mask,
545 LLVMValueRef pred,
546 LLVMValueRef val,
547 LLVMValueRef dst)
548 {
549 /* Mix the predicate and execution mask */
550 if (mask->has_mask) {
551 if (pred) {
552 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
553 } else {
554 pred = mask->exec_mask;
555 }
556 }
557
558 if (pred) {
559 LLVMValueRef real_val, dst_val;
560
561 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
562 real_val = lp_build_select(mask->bld,
563 pred,
564 val, dst_val);
565
566 LLVMBuildStore(mask->bld->builder, real_val, dst);
567 } else
568 LLVMBuildStore(mask->bld->builder, val, dst);
569 }
570
571 static LLVMValueRef
572 emit_vec_alloca_array(struct lp_build_tgsi_soa_context *bld,
573 LLVMTypeRef vec_type,
574 int size)
575 {
576 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
577 size * 4 + 4, 0);
578 return lp_build_array_alloca(bld->base.builder,
579 vec_type, val, "");
580 }
581
582 static void
583 emit_preamble(struct lp_build_tgsi_soa_context *bld, uint num_immediates)
584 {
585 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
586
587 /* temps */
588 if (bld->has_indirect_addressing ||
589 bld->has_function_calls) {
590 int size = bld->full_range.temps.Last + 1;
591 bld->temps_array = emit_vec_alloca_array(bld, vec_type, size);
592 }
593 if (bld->has_function_calls) {
594 int i;
595 int size = bld->full_range.outputs.Last + 1;
596 bld->outputs_array = emit_vec_alloca_array(bld, vec_type, size);
597
598 /* we need to insert the created immediates into our array */
599 size = num_immediates;
600 if (size > 0)
601 bld->immediates_array = emit_vec_alloca_array(bld, vec_type, size);
602 for (i = 0; i < size; ++i) {
603 int j;
604 for (j = 0; j < NUM_CHANNELS; ++j) {
605 LLVMValueRef ptr = get_immediates_ptr(bld,
606 i, j,
607 FALSE, 0);
608 LLVMBuildStore(bld->base.builder,
609 bld->immediates[i][j],
610 ptr);
611 }
612 }
613
614 size = bld->full_range.addrs.Last + 1;
615 bld->addrs_array = emit_vec_alloca_array(bld, vec_type, size);
616
617 size = bld->full_range.preds.Last + 1;
618 bld->preds_array = emit_vec_alloca_array(bld, vec_type, size);
619
620 /*inputs also need to be copied*/
621 size = bld->full_range.inputs.Last + 1;
622 bld->inputs_array = emit_vec_alloca_array(bld, vec_type, size);
623 for (i = bld->full_range.inputs.First; i < size; ++i) {
624 int j;
625 for (j = 0; j < NUM_CHANNELS; ++j) {
626 LLVMValueRef ptr = get_input_ptr(bld,
627 i, j,
628 FALSE, 0);
629 LLVMBuildStore(bld->base.builder,
630 bld->inputs[i][j],
631 ptr);
632 }
633 }
634 }
635 }
636
637 static void
638 emit_end(struct lp_build_tgsi_soa_context *bld)
639 {
640 int i, j;
641
642 bld->main_block = LLVMGetInsertBlock(bld->base.builder);
643
644 /* if we had function calls we want to propagate the
645 * outputs from the array to the values */
646 if (bld->has_function_calls) {
647 int size = bld->full_range.outputs.Last + 1;
648 for (i = bld->full_range.outputs.First; i < size; ++i) {
649 for (j = 0; j < NUM_CHANNELS; ++j) {
650 LLVMValueRef ptr = get_output_ptr(bld, i, j,
651 FALSE, 0);
652 bld->outputs[i][j] = ptr;
653 }
654 }
655 }
656 }
657
658 static void
659 emit_bgnsub(struct lp_build_tgsi_soa_context *bld)
660 {
661 LLVMValueRef func = lp_get_function(bld, bld->instno);
662 LLVMBasicBlockRef block;
663 LLVMValueRef inputs_ptr, outputs_ptr,
664 consts_ptr, temps_ptr, addrs_ptr, preds_ptr, imms_ptr;
665
666 inputs_ptr = LLVMGetParam(func, 0);
667 outputs_ptr = LLVMGetParam(func, 1);
668 consts_ptr = LLVMGetParam(func, 2);
669 temps_ptr = LLVMGetParam(func, 3);
670 addrs_ptr = LLVMGetParam(func, 4);
671 preds_ptr = LLVMGetParam(func, 5);
672 imms_ptr = LLVMGetParam(func, 6);
673
674 lp_build_name(inputs_ptr, "inputs");
675 lp_build_name(outputs_ptr, "outputs");
676 lp_build_name(consts_ptr, "consts");
677 lp_build_name(temps_ptr, "temps");
678 lp_build_name(addrs_ptr, "addrs");
679 lp_build_name(preds_ptr, "preds");
680 lp_build_name(imms_ptr, "immediates");
681
682 bld->inputs_array = inputs_ptr;
683 bld->outputs_array = outputs_ptr;
684 bld->consts_ptr = consts_ptr;
685 bld->temps_array = temps_ptr;
686 bld->addrs_array = addrs_ptr;
687 bld->preds_array = preds_ptr;
688 bld->immediates_array = imms_ptr;
689
690 block = LLVMAppendBasicBlock(func, "entry");
691 LLVMPositionBuilderAtEnd(bld->base.builder, block);
692
693 lp_exec_bgnsub(&bld->exec_mask);
694 }
695
696 static void
697 emit_endsub(struct lp_build_tgsi_soa_context *bld)
698 {
699 LLVMBuildRetVoid(bld->base.builder);
700 }
701
702 static LLVMValueRef
703 emit_ddx(struct lp_build_tgsi_soa_context *bld,
704 LLVMValueRef src)
705 {
706 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
707 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
708 return lp_build_sub(&bld->base, src_right, src_left);
709 }
710
711
712 static LLVMValueRef
713 emit_ddy(struct lp_build_tgsi_soa_context *bld,
714 LLVMValueRef src)
715 {
716 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
717 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
718 return lp_build_sub(&bld->base, src_top, src_bottom);
719 }
720
721 /**
722 * Register fetch.
723 */
724 static LLVMValueRef
725 emit_fetch(
726 struct lp_build_tgsi_soa_context *bld,
727 const struct tgsi_full_instruction *inst,
728 unsigned index,
729 const unsigned chan_index )
730 {
731 const struct tgsi_full_src_register *reg = &inst->Src[index];
732 const unsigned swizzle =
733 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
734 LLVMValueRef res;
735 LLVMValueRef addr = NULL;
736
737 if (swizzle > 3) {
738 assert(0 && "invalid swizzle in emit_fetch()");
739 return bld->base.undef;
740 }
741
742 if (reg->Register.Indirect) {
743 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
744 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
745 addr = LLVMBuildLoad(bld->base.builder,
746 get_addr_ptr(bld, reg->Indirect.Index, swizzle, FALSE, 0),
747 "");
748 /* for indexing we want integers */
749 addr = LLVMBuildFPToSI(bld->base.builder, addr,
750 int_vec_type, "");
751 addr = LLVMBuildExtractElement(bld->base.builder,
752 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
753 "");
754 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
755 }
756
757 switch (reg->Register.File) {
758 case TGSI_FILE_CONSTANT:
759 {
760 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
761 reg->Register.Index*4 + swizzle, 0);
762 LLVMValueRef scalar, scalar_ptr;
763
764 if (reg->Register.Indirect) {
765 /*lp_build_printf(bld->base.builder,
766 "\taddr = %d\n", addr);*/
767 index = lp_build_add(&bld->base, index, addr);
768 }
769 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
770 &index, 1, "");
771 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
772
773 res = lp_build_broadcast_scalar(&bld->base, scalar);
774 }
775 break;
776
777 case TGSI_FILE_IMMEDIATE:
778 if (bld->has_function_calls) {
779 LLVMValueRef ptr = get_immediates_ptr(bld,
780 reg->Register.Index,
781 swizzle,
782 FALSE, 0);
783 res = LLVMBuildLoad(bld->base.builder, ptr, "");
784 } else
785 res = bld->immediates[reg->Register.Index][swizzle];
786 assert(res);
787 break;
788
789 case TGSI_FILE_INPUT:
790 if (bld->has_function_calls) {
791 LLVMValueRef ptr = get_input_ptr(bld,
792 reg->Register.Index,
793 swizzle,
794 FALSE, 0);
795 res = LLVMBuildLoad(bld->base.builder, ptr, "");
796 } else
797 res = bld->inputs[reg->Register.Index][swizzle];
798 assert(res);
799 break;
800
801 case TGSI_FILE_TEMPORARY:
802 {
803 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
804 swizzle,
805 reg->Register.Indirect,
806 addr);
807 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
808 if(!res)
809 return bld->base.undef;
810 }
811 break;
812
813 default:
814 assert(0 && "invalid src register in emit_fetch()");
815 return bld->base.undef;
816 }
817
818 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
819 case TGSI_UTIL_SIGN_CLEAR:
820 res = lp_build_abs( &bld->base, res );
821 break;
822
823 case TGSI_UTIL_SIGN_SET:
824 /* TODO: Use bitwese OR for floating point */
825 res = lp_build_abs( &bld->base, res );
826 res = LLVMBuildNeg( bld->base.builder, res, "" );
827 break;
828
829 case TGSI_UTIL_SIGN_TOGGLE:
830 res = LLVMBuildNeg( bld->base.builder, res, "" );
831 break;
832
833 case TGSI_UTIL_SIGN_KEEP:
834 break;
835 }
836
837 return res;
838 }
839
840
841 /**
842 * Register fetch with derivatives.
843 */
844 static void
845 emit_fetch_deriv(
846 struct lp_build_tgsi_soa_context *bld,
847 const struct tgsi_full_instruction *inst,
848 unsigned index,
849 const unsigned chan_index,
850 LLVMValueRef *res,
851 LLVMValueRef *ddx,
852 LLVMValueRef *ddy)
853 {
854 LLVMValueRef src;
855
856 src = emit_fetch(bld, inst, index, chan_index);
857
858 if(res)
859 *res = src;
860
861 /* TODO: use interpolation coeffs for inputs */
862
863 if(ddx)
864 *ddx = emit_ddx(bld, src);
865
866 if(ddy)
867 *ddy = emit_ddy(bld, src);
868 }
869
870
871 /**
872 * Predicate.
873 */
874 static void
875 emit_fetch_predicate(
876 struct lp_build_tgsi_soa_context *bld,
877 const struct tgsi_full_instruction *inst,
878 LLVMValueRef *pred)
879 {
880 unsigned index;
881 unsigned char swizzles[4];
882 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
883 LLVMValueRef value;
884 unsigned chan;
885
886 if (!inst->Instruction.Predicate) {
887 FOR_EACH_CHANNEL( chan ) {
888 pred[chan] = NULL;
889 }
890 return;
891 }
892
893 swizzles[0] = inst->Predicate.SwizzleX;
894 swizzles[1] = inst->Predicate.SwizzleY;
895 swizzles[2] = inst->Predicate.SwizzleZ;
896 swizzles[3] = inst->Predicate.SwizzleW;
897
898 index = inst->Predicate.Index;
899 assert(index < LP_MAX_TGSI_PREDS);
900
901 FOR_EACH_CHANNEL( chan ) {
902 unsigned swizzle = swizzles[chan];
903
904 /*
905 * Only fetch the predicate register channels that are actually listed
906 * in the swizzles
907 */
908 if (!unswizzled[swizzle]) {
909 LLVMValueRef pred_ptr = get_preds_ptr(bld, index, swizzle,
910 FALSE, 0);
911 value = LLVMBuildLoad(bld->base.builder,
912 pred_ptr, "");
913
914 /*
915 * Convert the value to an integer mask.
916 *
917 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
918 * is needlessly causing two comparisons due to storing the intermediate
919 * result as float vector instead of an integer mask vector.
920 */
921 value = lp_build_compare(bld->base.builder,
922 bld->base.type,
923 PIPE_FUNC_NOTEQUAL,
924 value,
925 bld->base.zero);
926 if (inst->Predicate.Negate) {
927 value = LLVMBuildNot(bld->base.builder, value, "");
928 }
929
930 unswizzled[swizzle] = value;
931 } else {
932 value = unswizzled[swizzle];
933 }
934
935 pred[chan] = value;
936 }
937 }
938
939
940 /**
941 * Register store.
942 */
943 static void
944 emit_store(
945 struct lp_build_tgsi_soa_context *bld,
946 const struct tgsi_full_instruction *inst,
947 unsigned index,
948 unsigned chan_index,
949 LLVMValueRef pred,
950 LLVMValueRef value)
951 {
952 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
953 LLVMValueRef addr = NULL;
954
955 switch( inst->Instruction.Saturate ) {
956 case TGSI_SAT_NONE:
957 break;
958
959 case TGSI_SAT_ZERO_ONE:
960 value = lp_build_max(&bld->base, value, bld->base.zero);
961 value = lp_build_min(&bld->base, value, bld->base.one);
962 break;
963
964 case TGSI_SAT_MINUS_PLUS_ONE:
965 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
966 value = lp_build_min(&bld->base, value, bld->base.one);
967 break;
968
969 default:
970 assert(0);
971 }
972
973 if (reg->Register.Indirect) {
974 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
975 unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
976 addr = LLVMBuildLoad(bld->base.builder,
977 get_addr_ptr(bld, reg->Indirect.Index, swizzle, FALSE, 0),
978 "");
979 /* for indexing we want integers */
980 addr = LLVMBuildFPToSI(bld->base.builder, addr,
981 int_vec_type, "");
982 addr = LLVMBuildExtractElement(bld->base.builder,
983 addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
984 "");
985 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
986 }
987
988 switch( reg->Register.File ) {
989 case TGSI_FILE_OUTPUT:
990 lp_exec_mask_store(&bld->exec_mask, pred, value,
991 get_output_ptr(bld, reg->Register.Index, chan_index,
992 FALSE, 0));
993 break;
994
995 case TGSI_FILE_TEMPORARY: {
996 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
997 chan_index,
998 reg->Register.Indirect,
999 addr);
1000 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
1001 break;
1002 }
1003
1004 case TGSI_FILE_ADDRESS:
1005 lp_exec_mask_store(&bld->exec_mask, pred, value,
1006 get_addr_ptr(bld, reg->Indirect.Index, chan_index,
1007 FALSE, 0));
1008 break;
1009
1010 case TGSI_FILE_PREDICATE:
1011 lp_exec_mask_store(&bld->exec_mask, pred, value,
1012 get_preds_ptr(bld, index, chan_index,
1013 FALSE, 0));
1014 break;
1015
1016 default:
1017 assert( 0 );
1018 }
1019 }
1020
1021
1022 /**
1023 * High-level instruction translators.
1024 */
1025
1026 enum tex_modifier {
1027 TEX_MODIFIER_NONE = 0,
1028 TEX_MODIFIER_PROJECTED,
1029 TEX_MODIFIER_LOD_BIAS,
1030 TEX_MODIFIER_EXPLICIT_LOD,
1031 TEX_MODIFIER_EXPLICIT_DERIV
1032 };
1033
1034 static void
1035 emit_tex( struct lp_build_tgsi_soa_context *bld,
1036 const struct tgsi_full_instruction *inst,
1037 enum tex_modifier modifier,
1038 LLVMValueRef *texel)
1039 {
1040 unsigned unit;
1041 LLVMValueRef lod_bias, explicit_lod;
1042 LLVMValueRef oow = NULL;
1043 LLVMValueRef coords[3];
1044 LLVMValueRef ddx[3];
1045 LLVMValueRef ddy[3];
1046 unsigned num_coords;
1047 unsigned i;
1048
1049 if (!bld->sampler) {
1050 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1051 for (i = 0; i < 4; i++) {
1052 texel[i] = bld->base.undef;
1053 }
1054 return;
1055 }
1056
1057 switch (inst->Texture.Texture) {
1058 case TGSI_TEXTURE_1D:
1059 num_coords = 1;
1060 break;
1061 case TGSI_TEXTURE_2D:
1062 case TGSI_TEXTURE_RECT:
1063 num_coords = 2;
1064 break;
1065 case TGSI_TEXTURE_SHADOW1D:
1066 case TGSI_TEXTURE_SHADOW2D:
1067 case TGSI_TEXTURE_SHADOWRECT:
1068 case TGSI_TEXTURE_3D:
1069 case TGSI_TEXTURE_CUBE:
1070 num_coords = 3;
1071 break;
1072 default:
1073 assert(0);
1074 return;
1075 }
1076
1077 if (modifier == TEX_MODIFIER_LOD_BIAS) {
1078 lod_bias = emit_fetch( bld, inst, 0, 3 );
1079 explicit_lod = NULL;
1080 }
1081 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
1082 lod_bias = NULL;
1083 explicit_lod = emit_fetch( bld, inst, 0, 3 );
1084 }
1085 else {
1086 lod_bias = NULL;
1087 explicit_lod = NULL;
1088 }
1089
1090 if (modifier == TEX_MODIFIER_PROJECTED) {
1091 oow = emit_fetch( bld, inst, 0, 3 );
1092 oow = lp_build_rcp(&bld->base, oow);
1093 }
1094
1095 for (i = 0; i < num_coords; i++) {
1096 coords[i] = emit_fetch( bld, inst, 0, i );
1097 if (modifier == TEX_MODIFIER_PROJECTED)
1098 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1099 }
1100 for (i = num_coords; i < 3; i++) {
1101 coords[i] = bld->base.undef;
1102 }
1103
1104 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
1105 for (i = 0; i < num_coords; i++) {
1106 ddx[i] = emit_fetch( bld, inst, 1, i );
1107 ddy[i] = emit_fetch( bld, inst, 2, i );
1108 }
1109 unit = inst->Src[3].Register.Index;
1110 } else {
1111 for (i = 0; i < num_coords; i++) {
1112 ddx[i] = emit_ddx( bld, coords[i] );
1113 ddy[i] = emit_ddy( bld, coords[i] );
1114 }
1115 unit = inst->Src[1].Register.Index;
1116 }
1117 for (i = num_coords; i < 3; i++) {
1118 ddx[i] = bld->base.undef;
1119 ddy[i] = bld->base.undef;
1120 }
1121
1122 bld->sampler->emit_fetch_texel(bld->sampler,
1123 bld->base.builder,
1124 bld->base.type,
1125 unit, num_coords, coords,
1126 ddx, ddy,
1127 lod_bias, explicit_lod,
1128 texel);
1129 }
1130
1131
1132 /**
1133 * Kill fragment if any of the src register values are negative.
1134 */
1135 static void
1136 emit_kil(
1137 struct lp_build_tgsi_soa_context *bld,
1138 const struct tgsi_full_instruction *inst )
1139 {
1140 const struct tgsi_full_src_register *reg = &inst->Src[0];
1141 LLVMValueRef terms[NUM_CHANNELS];
1142 LLVMValueRef mask;
1143 unsigned chan_index;
1144
1145 memset(&terms, 0, sizeof terms);
1146
1147 FOR_EACH_CHANNEL( chan_index ) {
1148 unsigned swizzle;
1149
1150 /* Unswizzle channel */
1151 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1152
1153 /* Check if the component has not been already tested. */
1154 assert(swizzle < NUM_CHANNELS);
1155 if( !terms[swizzle] )
1156 /* TODO: change the comparison operator instead of setting the sign */
1157 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1158 }
1159
1160 mask = NULL;
1161 FOR_EACH_CHANNEL( chan_index ) {
1162 if(terms[chan_index]) {
1163 LLVMValueRef chan_mask;
1164
1165 /*
1166 * If term < 0 then mask = 0 else mask = ~0.
1167 */
1168 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1169
1170 if(mask)
1171 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1172 else
1173 mask = chan_mask;
1174 }
1175 }
1176
1177 if(mask)
1178 lp_build_mask_update(bld->mask, mask);
1179 }
1180
1181
1182 /**
1183 * Predicated fragment kill.
1184 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1185 * The only predication is the execution mask which will apply if
1186 * we're inside a loop or conditional.
1187 */
1188 static void
1189 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1190 const struct tgsi_full_instruction *inst)
1191 {
1192 LLVMValueRef mask;
1193
1194 /* For those channels which are "alive", disable fragment shader
1195 * execution.
1196 */
1197 if (bld->exec_mask.has_mask) {
1198 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1199 }
1200 else {
1201 mask = bld->base.zero;
1202 }
1203
1204 lp_build_mask_update(bld->mask, mask);
1205 }
1206
1207 static void
1208 range_check(struct tgsi_declaration_range *range,
1209 unsigned new_first, unsigned new_last)
1210 {
1211 range->First = MIN2(range->First, new_first);
1212 range->Last = MAX2(range->Last, new_last);
1213 }
1214
1215 static void
1216 emit_declaration(
1217 struct lp_build_tgsi_soa_context *bld,
1218 const struct tgsi_full_declaration *decl)
1219 {
1220 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
1221
1222 unsigned first = decl->Range.First;
1223 unsigned last = decl->Range.Last;
1224 unsigned idx, i;
1225
1226 for (idx = first; idx <= last; ++idx) {
1227 switch (decl->Declaration.File) {
1228 case TGSI_FILE_TEMPORARY:
1229 assert(idx < LP_MAX_TGSI_TEMPS);
1230 range_check(&bld->full_range.temps,
1231 first, last);
1232 if (!bld->has_indirect_addressing &&
1233 !bld->has_function_calls) {
1234 for (i = 0; i < NUM_CHANNELS; i++)
1235 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1236 vec_type, "");
1237 }
1238 break;
1239
1240 case TGSI_FILE_OUTPUT:
1241 range_check(&bld->full_range.outputs,
1242 first, last);
1243 if (!bld->has_function_calls) {
1244 for (i = 0; i < NUM_CHANNELS; i++)
1245 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1246 vec_type, "");
1247 }
1248 break;
1249
1250 case TGSI_FILE_ADDRESS:
1251 assert(idx < LP_MAX_TGSI_ADDRS);
1252 range_check(&bld->full_range.addrs,
1253 first, last);
1254 if (!bld->has_function_calls) {
1255 for (i = 0; i < NUM_CHANNELS; i++)
1256 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1257 vec_type, "");
1258 }
1259 break;
1260
1261 case TGSI_FILE_PREDICATE:
1262 assert(idx < LP_MAX_TGSI_PREDS);
1263 range_check(&bld->full_range.preds,
1264 first, last);
1265 if (!bld->has_function_calls) {
1266 for (i = 0; i < NUM_CHANNELS; i++)
1267 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1268 vec_type, "");
1269 }
1270 break;
1271
1272 case TGSI_FILE_INPUT:
1273 range_check(&bld->full_range.inputs,
1274 first, last);
1275 break;
1276
1277 default:
1278 /* don't need to declare other vars */
1279 break;
1280 }
1281 }
1282 }
1283
1284
1285 /**
1286 * Emit LLVM for one TGSI instruction.
1287 * \param return TRUE for success, FALSE otherwise
1288 */
1289 static boolean
1290 emit_instruction(
1291 struct lp_build_tgsi_soa_context *bld,
1292 const struct tgsi_full_instruction *inst,
1293 const struct tgsi_opcode_info *info)
1294 {
1295 unsigned chan_index;
1296 LLVMValueRef src0, src1, src2;
1297 LLVMValueRef tmp0, tmp1, tmp2;
1298 LLVMValueRef tmp3 = NULL;
1299 LLVMValueRef tmp4 = NULL;
1300 LLVMValueRef tmp5 = NULL;
1301 LLVMValueRef tmp6 = NULL;
1302 LLVMValueRef tmp7 = NULL;
1303 LLVMValueRef res;
1304 LLVMValueRef dst0[NUM_CHANNELS];
1305
1306 /*
1307 * Stores and write masks are handled in a general fashion after the long
1308 * instruction opcode switch statement.
1309 *
1310 * Although not stricitly necessary, we avoid generating instructions for
1311 * channels which won't be stored, in cases where's that easy. For some
1312 * complex instructions, like texture sampling, it is more convenient to
1313 * assume a full writemask and then let LLVM optimization passes eliminate
1314 * redundant code.
1315 */
1316
1317 assert(info->num_dst <= 1);
1318 if (info->num_dst) {
1319 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1320 dst0[chan_index] = bld->base.undef;
1321 }
1322 }
1323
1324 switch (inst->Instruction.Opcode) {
1325 case TGSI_OPCODE_ARL:
1326 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1327 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1328 tmp0 = lp_build_floor(&bld->base, tmp0);
1329 dst0[chan_index] = tmp0;
1330 }
1331 break;
1332
1333 case TGSI_OPCODE_MOV:
1334 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1335 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1336 }
1337 break;
1338
1339 case TGSI_OPCODE_LIT:
1340 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1341 dst0[CHAN_X] = bld->base.one;
1342 }
1343 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1344 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1345 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1346 }
1347 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1348 /* XMM[1] = SrcReg[0].yyyy */
1349 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1350 /* XMM[1] = max(XMM[1], 0) */
1351 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1352 /* XMM[2] = SrcReg[0].wwww */
1353 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1354 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1355 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1356 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1357 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1358 }
1359 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1360 dst0[CHAN_W] = bld->base.one;
1361 }
1362 break;
1363
1364 case TGSI_OPCODE_RCP:
1365 /* TGSI_OPCODE_RECIP */
1366 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1367 res = lp_build_rcp(&bld->base, src0);
1368 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1369 dst0[chan_index] = res;
1370 }
1371 break;
1372
1373 case TGSI_OPCODE_RSQ:
1374 /* TGSI_OPCODE_RECIPSQRT */
1375 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1376 src0 = lp_build_abs(&bld->base, src0);
1377 res = lp_build_rsqrt(&bld->base, src0);
1378 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1379 dst0[chan_index] = res;
1380 }
1381 break;
1382
1383 case TGSI_OPCODE_EXP:
1384 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1385 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1386 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1387 LLVMValueRef *p_exp2_int_part = NULL;
1388 LLVMValueRef *p_frac_part = NULL;
1389 LLVMValueRef *p_exp2 = NULL;
1390
1391 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1392
1393 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1394 p_exp2_int_part = &tmp0;
1395 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1396 p_frac_part = &tmp1;
1397 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1398 p_exp2 = &tmp2;
1399
1400 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1401
1402 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1403 dst0[CHAN_X] = tmp0;
1404 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1405 dst0[CHAN_Y] = tmp1;
1406 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1407 dst0[CHAN_Z] = tmp2;
1408 }
1409 /* dst.w = 1.0 */
1410 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1411 dst0[CHAN_W] = bld->base.one;
1412 }
1413 break;
1414
1415 case TGSI_OPCODE_LOG:
1416 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1417 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1418 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1419 LLVMValueRef *p_floor_log2 = NULL;
1420 LLVMValueRef *p_exp = NULL;
1421 LLVMValueRef *p_log2 = NULL;
1422
1423 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1424 src0 = lp_build_abs( &bld->base, src0 );
1425
1426 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1427 p_floor_log2 = &tmp0;
1428 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1429 p_exp = &tmp1;
1430 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1431 p_log2 = &tmp2;
1432
1433 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1434
1435 /* dst.x = floor(lg2(abs(src.x))) */
1436 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1437 dst0[CHAN_X] = tmp0;
1438 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1439 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1440 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1441 }
1442 /* dst.z = lg2(abs(src.x)) */
1443 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1444 dst0[CHAN_Z] = tmp2;
1445 }
1446 /* dst.w = 1.0 */
1447 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1448 dst0[CHAN_W] = bld->base.one;
1449 }
1450 break;
1451
1452 case TGSI_OPCODE_MUL:
1453 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1454 src0 = emit_fetch( bld, inst, 0, chan_index );
1455 src1 = emit_fetch( bld, inst, 1, chan_index );
1456 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1457 }
1458 break;
1459
1460 case TGSI_OPCODE_ADD:
1461 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1462 src0 = emit_fetch( bld, inst, 0, chan_index );
1463 src1 = emit_fetch( bld, inst, 1, chan_index );
1464 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1465 }
1466 break;
1467
1468 case TGSI_OPCODE_DP3:
1469 /* TGSI_OPCODE_DOT3 */
1470 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1471 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1472 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1473 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1474 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1475 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1476 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1477 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1478 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1479 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1480 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1481 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1482 dst0[chan_index] = tmp0;
1483 }
1484 break;
1485
1486 case TGSI_OPCODE_DP4:
1487 /* TGSI_OPCODE_DOT4 */
1488 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1489 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1490 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1491 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1492 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1493 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1494 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1495 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1496 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1497 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1498 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1499 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1500 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1501 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1502 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1503 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1504 dst0[chan_index] = tmp0;
1505 }
1506 break;
1507
1508 case TGSI_OPCODE_DST:
1509 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1510 dst0[CHAN_X] = bld->base.one;
1511 }
1512 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1513 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1514 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1515 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1516 }
1517 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1518 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1519 }
1520 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1521 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1522 }
1523 break;
1524
1525 case TGSI_OPCODE_MIN:
1526 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1527 src0 = emit_fetch( bld, inst, 0, chan_index );
1528 src1 = emit_fetch( bld, inst, 1, chan_index );
1529 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1530 }
1531 break;
1532
1533 case TGSI_OPCODE_MAX:
1534 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1535 src0 = emit_fetch( bld, inst, 0, chan_index );
1536 src1 = emit_fetch( bld, inst, 1, chan_index );
1537 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1538 }
1539 break;
1540
1541 case TGSI_OPCODE_SLT:
1542 /* TGSI_OPCODE_SETLT */
1543 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1544 src0 = emit_fetch( bld, inst, 0, chan_index );
1545 src1 = emit_fetch( bld, inst, 1, chan_index );
1546 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1547 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1548 }
1549 break;
1550
1551 case TGSI_OPCODE_SGE:
1552 /* TGSI_OPCODE_SETGE */
1553 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1554 src0 = emit_fetch( bld, inst, 0, chan_index );
1555 src1 = emit_fetch( bld, inst, 1, chan_index );
1556 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1557 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1558 }
1559 break;
1560
1561 case TGSI_OPCODE_MAD:
1562 /* TGSI_OPCODE_MADD */
1563 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1564 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1565 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1566 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1567 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1568 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1569 dst0[chan_index] = tmp0;
1570 }
1571 break;
1572
1573 case TGSI_OPCODE_SUB:
1574 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1575 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1576 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1577 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1578 }
1579 break;
1580
1581 case TGSI_OPCODE_LRP:
1582 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1583 src0 = emit_fetch( bld, inst, 0, chan_index );
1584 src1 = emit_fetch( bld, inst, 1, chan_index );
1585 src2 = emit_fetch( bld, inst, 2, chan_index );
1586 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1587 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1588 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1589 }
1590 break;
1591
1592 case TGSI_OPCODE_CND:
1593 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1594 src0 = emit_fetch( bld, inst, 0, chan_index );
1595 src1 = emit_fetch( bld, inst, 1, chan_index );
1596 src2 = emit_fetch( bld, inst, 2, chan_index );
1597 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1598 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1599 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1600 }
1601 break;
1602
1603 case TGSI_OPCODE_DP2A:
1604 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1605 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1606 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1607 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1608 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1609 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1610 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1611 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1612 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1613 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1614 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1615 }
1616 break;
1617
1618 case TGSI_OPCODE_FRC:
1619 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1620 src0 = emit_fetch( bld, inst, 0, chan_index );
1621 tmp0 = lp_build_floor(&bld->base, src0);
1622 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1623 dst0[chan_index] = tmp0;
1624 }
1625 break;
1626
1627 case TGSI_OPCODE_CLAMP:
1628 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1629 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1630 src1 = emit_fetch( bld, inst, 1, chan_index );
1631 src2 = emit_fetch( bld, inst, 2, chan_index );
1632 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1633 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1634 dst0[chan_index] = tmp0;
1635 }
1636 break;
1637
1638 case TGSI_OPCODE_FLR:
1639 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1640 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1641 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1642 }
1643 break;
1644
1645 case TGSI_OPCODE_ROUND:
1646 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1647 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1648 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1649 }
1650 break;
1651
1652 case TGSI_OPCODE_EX2: {
1653 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1654 tmp0 = lp_build_exp2( &bld->base, tmp0);
1655 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1656 dst0[chan_index] = tmp0;
1657 }
1658 break;
1659 }
1660
1661 case TGSI_OPCODE_LG2:
1662 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1663 tmp0 = lp_build_log2( &bld->base, tmp0);
1664 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1665 dst0[chan_index] = tmp0;
1666 }
1667 break;
1668
1669 case TGSI_OPCODE_POW:
1670 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1671 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1672 res = lp_build_pow( &bld->base, src0, src1 );
1673 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1674 dst0[chan_index] = res;
1675 }
1676 break;
1677
1678 case TGSI_OPCODE_XPD:
1679 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1680 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1681 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1682 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1683 }
1684 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1685 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1686 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1687 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1688 }
1689 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1690 tmp2 = tmp0;
1691 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1692 tmp5 = tmp3;
1693 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1694 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1695 dst0[CHAN_X] = tmp2;
1696 }
1697 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1698 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1699 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1700 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1701 }
1702 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1703 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1704 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1705 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1706 dst0[CHAN_Y] = tmp3;
1707 }
1708 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1709 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1710 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1711 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1712 dst0[CHAN_Z] = tmp5;
1713 }
1714 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1715 dst0[CHAN_W] = bld->base.one;
1716 }
1717 break;
1718
1719 case TGSI_OPCODE_ABS:
1720 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1721 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1722 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1723 }
1724 break;
1725
1726 case TGSI_OPCODE_RCC:
1727 /* deprecated? */
1728 assert(0);
1729 return FALSE;
1730
1731 case TGSI_OPCODE_DPH:
1732 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1733 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1734 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1735 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1736 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1737 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1738 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1739 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1740 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1741 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1742 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1743 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1744 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1745 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1746 dst0[chan_index] = tmp0;
1747 }
1748 break;
1749
1750 case TGSI_OPCODE_COS:
1751 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1752 tmp0 = lp_build_cos( &bld->base, tmp0 );
1753 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1754 dst0[chan_index] = tmp0;
1755 }
1756 break;
1757
1758 case TGSI_OPCODE_DDX:
1759 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1760 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1761 }
1762 break;
1763
1764 case TGSI_OPCODE_DDY:
1765 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1766 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1767 }
1768 break;
1769
1770 case TGSI_OPCODE_KILP:
1771 /* predicated kill */
1772 emit_kilp( bld, inst );
1773 break;
1774
1775 case TGSI_OPCODE_KIL:
1776 /* conditional kill */
1777 emit_kil( bld, inst );
1778 break;
1779
1780 case TGSI_OPCODE_PK2H:
1781 return FALSE;
1782 break;
1783
1784 case TGSI_OPCODE_PK2US:
1785 return FALSE;
1786 break;
1787
1788 case TGSI_OPCODE_PK4B:
1789 return FALSE;
1790 break;
1791
1792 case TGSI_OPCODE_PK4UB:
1793 return FALSE;
1794 break;
1795
1796 case TGSI_OPCODE_RFL:
1797 return FALSE;
1798 break;
1799
1800 case TGSI_OPCODE_SEQ:
1801 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1802 src0 = emit_fetch( bld, inst, 0, chan_index );
1803 src1 = emit_fetch( bld, inst, 1, chan_index );
1804 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1805 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1806 }
1807 break;
1808
1809 case TGSI_OPCODE_SFL:
1810 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1811 dst0[chan_index] = bld->base.zero;
1812 }
1813 break;
1814
1815 case TGSI_OPCODE_SGT:
1816 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1817 src0 = emit_fetch( bld, inst, 0, chan_index );
1818 src1 = emit_fetch( bld, inst, 1, chan_index );
1819 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1820 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1821 }
1822 break;
1823
1824 case TGSI_OPCODE_SIN:
1825 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1826 tmp0 = lp_build_sin( &bld->base, tmp0 );
1827 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1828 dst0[chan_index] = tmp0;
1829 }
1830 break;
1831
1832 case TGSI_OPCODE_SLE:
1833 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1834 src0 = emit_fetch( bld, inst, 0, chan_index );
1835 src1 = emit_fetch( bld, inst, 1, chan_index );
1836 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1837 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1838 }
1839 break;
1840
1841 case TGSI_OPCODE_SNE:
1842 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1843 src0 = emit_fetch( bld, inst, 0, chan_index );
1844 src1 = emit_fetch( bld, inst, 1, chan_index );
1845 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1846 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1847 }
1848 break;
1849
1850 case TGSI_OPCODE_STR:
1851 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1852 dst0[chan_index] = bld->base.one;
1853 }
1854 break;
1855
1856 case TGSI_OPCODE_TEX:
1857 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1858 break;
1859
1860 case TGSI_OPCODE_TXD:
1861 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1862 break;
1863
1864 case TGSI_OPCODE_UP2H:
1865 /* deprecated */
1866 assert (0);
1867 return FALSE;
1868 break;
1869
1870 case TGSI_OPCODE_UP2US:
1871 /* deprecated */
1872 assert(0);
1873 return FALSE;
1874 break;
1875
1876 case TGSI_OPCODE_UP4B:
1877 /* deprecated */
1878 assert(0);
1879 return FALSE;
1880 break;
1881
1882 case TGSI_OPCODE_UP4UB:
1883 /* deprecated */
1884 assert(0);
1885 return FALSE;
1886 break;
1887
1888 case TGSI_OPCODE_X2D:
1889 /* deprecated? */
1890 assert(0);
1891 return FALSE;
1892 break;
1893
1894 case TGSI_OPCODE_ARA:
1895 /* deprecated */
1896 assert(0);
1897 return FALSE;
1898 break;
1899
1900 case TGSI_OPCODE_ARR:
1901 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1902 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1903 tmp0 = lp_build_round(&bld->base, tmp0);
1904 dst0[chan_index] = tmp0;
1905 }
1906 break;
1907
1908 case TGSI_OPCODE_BRA:
1909 /* deprecated */
1910 assert(0);
1911 return FALSE;
1912 break;
1913
1914 case TGSI_OPCODE_CAL: {
1915 LLVMValueRef args[7];
1916 LLVMValueRef func = lp_get_function(bld, inst->Label.Label);
1917 args[0] = bld->inputs_array;
1918 args[1] = bld->outputs_array;
1919 args[2] = bld->consts_ptr;
1920 args[3] = bld->temps_array;
1921 args[4] = bld->addrs_array;
1922 args[5] = bld->preds_array;
1923 args[6] = bld->immediates_array;
1924 LLVMBuildCall(bld->base.builder, func, args, Elements(args), "");
1925 }
1926 break;
1927
1928 case TGSI_OPCODE_RET:
1929 lp_exec_ret(&bld->exec_mask);
1930 break;
1931
1932 case TGSI_OPCODE_END:
1933 emit_end(bld);
1934 break;
1935
1936 case TGSI_OPCODE_SSG:
1937 /* TGSI_OPCODE_SGN */
1938 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1939 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1940 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1941 }
1942 break;
1943
1944 case TGSI_OPCODE_CMP:
1945 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1946 src0 = emit_fetch( bld, inst, 0, chan_index );
1947 src1 = emit_fetch( bld, inst, 1, chan_index );
1948 src2 = emit_fetch( bld, inst, 2, chan_index );
1949 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1950 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1951 }
1952 break;
1953
1954 case TGSI_OPCODE_SCS:
1955 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1956 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1957 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1958 }
1959 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1960 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1961 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1962 }
1963 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1964 dst0[CHAN_Z] = bld->base.zero;
1965 }
1966 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1967 dst0[CHAN_W] = bld->base.one;
1968 }
1969 break;
1970
1971 case TGSI_OPCODE_TXB:
1972 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1973 break;
1974
1975 case TGSI_OPCODE_NRM:
1976 /* fall-through */
1977 case TGSI_OPCODE_NRM4:
1978 /* 3 or 4-component normalization */
1979 {
1980 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1981
1982 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1983 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1984 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1985 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1986
1987 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1988
1989 /* xmm4 = src.x */
1990 /* xmm0 = src.x * src.x */
1991 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1992 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1993 tmp4 = tmp0;
1994 }
1995 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1996
1997 /* xmm5 = src.y */
1998 /* xmm0 = xmm0 + src.y * src.y */
1999 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2000 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2001 tmp5 = tmp1;
2002 }
2003 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2004 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2005
2006 /* xmm6 = src.z */
2007 /* xmm0 = xmm0 + src.z * src.z */
2008 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2009 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2010 tmp6 = tmp1;
2011 }
2012 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2013 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2014
2015 if (dims == 4) {
2016 /* xmm7 = src.w */
2017 /* xmm0 = xmm0 + src.w * src.w */
2018 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2019 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2020 tmp7 = tmp1;
2021 }
2022 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2023 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2024 }
2025
2026 /* xmm1 = 1 / sqrt(xmm0) */
2027 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2028
2029 /* dst.x = xmm1 * src.x */
2030 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2031 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2032 }
2033
2034 /* dst.y = xmm1 * src.y */
2035 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2036 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2037 }
2038
2039 /* dst.z = xmm1 * src.z */
2040 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2041 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2042 }
2043
2044 /* dst.w = xmm1 * src.w */
2045 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2046 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2047 }
2048 }
2049
2050 /* dst.w = 1.0 */
2051 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2052 dst0[CHAN_W] = bld->base.one;
2053 }
2054 }
2055 break;
2056
2057 case TGSI_OPCODE_DIV:
2058 /* deprecated */
2059 assert( 0 );
2060 return FALSE;
2061 break;
2062
2063 case TGSI_OPCODE_DP2:
2064 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
2065 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
2066 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
2067 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
2068 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
2069 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
2070 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
2071 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2072 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
2073 }
2074 break;
2075
2076 case TGSI_OPCODE_TXL:
2077 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2078 break;
2079
2080 case TGSI_OPCODE_TXP:
2081 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
2082 break;
2083
2084 case TGSI_OPCODE_BRK:
2085 lp_exec_break(&bld->exec_mask);
2086 break;
2087
2088 case TGSI_OPCODE_IF:
2089 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2090 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2091 tmp0, bld->base.zero);
2092 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2093 break;
2094
2095 case TGSI_OPCODE_BGNLOOP:
2096 lp_exec_bgnloop(&bld->exec_mask);
2097 break;
2098
2099 case TGSI_OPCODE_BGNSUB:
2100 emit_bgnsub(bld);
2101 break;
2102
2103 case TGSI_OPCODE_ELSE:
2104 lp_exec_mask_cond_invert(&bld->exec_mask);
2105 break;
2106
2107 case TGSI_OPCODE_ENDIF:
2108 lp_exec_mask_cond_pop(&bld->exec_mask);
2109 break;
2110
2111 case TGSI_OPCODE_ENDLOOP:
2112 lp_exec_endloop(&bld->exec_mask);
2113 break;
2114
2115 case TGSI_OPCODE_ENDSUB:
2116 emit_endsub(bld);
2117 break;
2118
2119 case TGSI_OPCODE_PUSHA:
2120 /* deprecated? */
2121 assert(0);
2122 return FALSE;
2123 break;
2124
2125 case TGSI_OPCODE_POPA:
2126 /* deprecated? */
2127 assert(0);
2128 return FALSE;
2129 break;
2130
2131 case TGSI_OPCODE_CEIL:
2132 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2133 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2134 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2135 }
2136 break;
2137
2138 case TGSI_OPCODE_I2F:
2139 /* deprecated? */
2140 assert(0);
2141 return FALSE;
2142 break;
2143
2144 case TGSI_OPCODE_NOT:
2145 /* deprecated? */
2146 assert(0);
2147 return FALSE;
2148 break;
2149
2150 case TGSI_OPCODE_TRUNC:
2151 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2152 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2153 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2154 }
2155 break;
2156
2157 case TGSI_OPCODE_SHL:
2158 /* deprecated? */
2159 assert(0);
2160 return FALSE;
2161 break;
2162
2163 case TGSI_OPCODE_ISHR:
2164 /* deprecated? */
2165 assert(0);
2166 return FALSE;
2167 break;
2168
2169 case TGSI_OPCODE_AND:
2170 /* deprecated? */
2171 assert(0);
2172 return FALSE;
2173 break;
2174
2175 case TGSI_OPCODE_OR:
2176 /* deprecated? */
2177 assert(0);
2178 return FALSE;
2179 break;
2180
2181 case TGSI_OPCODE_MOD:
2182 /* deprecated? */
2183 assert(0);
2184 return FALSE;
2185 break;
2186
2187 case TGSI_OPCODE_XOR:
2188 /* deprecated? */
2189 assert(0);
2190 return FALSE;
2191 break;
2192
2193 case TGSI_OPCODE_SAD:
2194 /* deprecated? */
2195 assert(0);
2196 return FALSE;
2197 break;
2198
2199 case TGSI_OPCODE_TXF:
2200 /* deprecated? */
2201 assert(0);
2202 return FALSE;
2203 break;
2204
2205 case TGSI_OPCODE_TXQ:
2206 /* deprecated? */
2207 assert(0);
2208 return FALSE;
2209 break;
2210
2211 case TGSI_OPCODE_CONT:
2212 lp_exec_continue(&bld->exec_mask);
2213 break;
2214
2215 case TGSI_OPCODE_EMIT:
2216 return FALSE;
2217 break;
2218
2219 case TGSI_OPCODE_ENDPRIM:
2220 return FALSE;
2221 break;
2222
2223 case TGSI_OPCODE_NOP:
2224 break;
2225
2226 default:
2227 return FALSE;
2228 }
2229
2230 if(info->num_dst) {
2231 LLVMValueRef pred[NUM_CHANNELS];
2232
2233 emit_fetch_predicate( bld, inst, pred );
2234
2235 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2236 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2237 }
2238 }
2239
2240 return TRUE;
2241 }
2242
2243
2244 void
2245 lp_build_tgsi_soa(LLVMBuilderRef builder,
2246 const struct tgsi_token *tokens,
2247 struct lp_type type,
2248 struct lp_build_mask_context *mask,
2249 LLVMValueRef consts_ptr,
2250 const LLVMValueRef *pos,
2251 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2252 LLVMValueRef (*outputs)[NUM_CHANNELS],
2253 struct lp_build_sampler_soa *sampler,
2254 const struct tgsi_shader_info *info)
2255 {
2256 struct lp_build_tgsi_soa_context bld;
2257 struct tgsi_parse_context parse;
2258 uint num_immediates = 0;
2259 unsigned i;
2260
2261 /* Setup build context */
2262 memset(&bld, 0, sizeof bld);
2263 lp_build_context_init(&bld.base, builder, type);
2264 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
2265 bld.mask = mask;
2266 bld.pos = pos;
2267 bld.inputs = inputs;
2268 bld.outputs = outputs;
2269 bld.consts_ptr = consts_ptr;
2270 bld.sampler = sampler;
2271 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
2272 info->opcode_count[TGSI_OPCODE_ARL] > 0;
2273 bld.has_function_calls = info->opcode_count[TGSI_OPCODE_CAL] > 0;
2274 bld.func_hash = cso_hash_create();
2275
2276 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2277
2278 tgsi_parse_init( &parse, tokens );
2279
2280 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2281 tgsi_parse_token( &parse );
2282
2283 switch( parse.FullToken.Token.Type ) {
2284 case TGSI_TOKEN_TYPE_DECLARATION:
2285 /* Inputs already interpolated */
2286 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2287 break;
2288
2289 case TGSI_TOKEN_TYPE_INSTRUCTION:
2290 {
2291 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
2292 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
2293 /* we finished processing declarations, emit anything that needs
2294 * to go before the first instruction */
2295 if (bld.instno == 0) {
2296 emit_preamble(&bld, num_immediates);
2297 }
2298 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
2299 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2300 opcode_info->mnemonic);
2301 bld.instno++;
2302 }
2303
2304 break;
2305
2306 case TGSI_TOKEN_TYPE_IMMEDIATE:
2307 /* simply copy the immediate values into the next immediates[] slot */
2308 {
2309 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2310 assert(size <= 4);
2311 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2312 for( i = 0; i < size; ++i )
2313 bld.immediates[num_immediates][i] =
2314 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2315 for( i = size; i < 4; ++i )
2316 bld.immediates[num_immediates][i] = bld.base.undef;
2317 num_immediates++;
2318 }
2319 break;
2320
2321 case TGSI_TOKEN_TYPE_PROPERTY:
2322 break;
2323
2324 default:
2325 assert( 0 );
2326 }
2327 }
2328 /* we have to make sure we're at the end of the main block
2329 * (which won't be the case if we had more than one TGSI function
2330 * in the given shader) to let the calling function append
2331 * whatever it needs at the end of the main function */
2332 LLVMPositionBuilderAtEnd(bld.base.builder, bld.main_block);
2333
2334 if (0) {
2335 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2336 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2337 tgsi_dump(tokens, 0);
2338 lp_debug_dump_value(function);
2339 }
2340 tgsi_parse_free( &parse );
2341
2342 cso_hash_delete(bld.func_hash);
2343 }
2344