Merge remote branch 'origin/master' into nvc0-new
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_init.h"
55 #include "lp_bld_logic.h"
56 #include "lp_bld_swizzle.h"
57 #include "lp_bld_flow.h"
58 #include "lp_bld_quad.h"
59 #include "lp_bld_tgsi.h"
60 #include "lp_bld_limits.h"
61 #include "lp_bld_debug.h"
62 #include "lp_bld_printf.h"
63
64
65 #define FOR_EACH_CHANNEL( CHAN )\
66 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
67
68 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
70
71 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
72 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
73
74 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
75 FOR_EACH_CHANNEL( CHAN )\
76 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
77
78 #define CHAN_X 0
79 #define CHAN_Y 1
80 #define CHAN_Z 2
81 #define CHAN_W 3
82 #define NUM_CHANNELS 4
83
84 #define LP_MAX_INSTRUCTIONS 256
85
86
87 struct lp_exec_mask {
88 struct lp_build_context *bld;
89
90 boolean has_mask;
91
92 LLVMTypeRef int_vec_type;
93
94 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
95 int cond_stack_size;
96 LLVMValueRef cond_mask;
97
98 LLVMBasicBlockRef loop_block;
99 LLVMValueRef cont_mask;
100 LLVMValueRef break_mask;
101 LLVMValueRef break_var;
102 struct {
103 LLVMBasicBlockRef loop_block;
104 LLVMValueRef cont_mask;
105 LLVMValueRef break_mask;
106 LLVMValueRef break_var;
107 } loop_stack[LP_MAX_TGSI_NESTING];
108 int loop_stack_size;
109
110 LLVMValueRef ret_mask;
111 struct {
112 int pc;
113 LLVMValueRef ret_mask;
114 } call_stack[LP_MAX_TGSI_NESTING];
115 int call_stack_size;
116
117 LLVMValueRef exec_mask;
118 };
119
120 struct lp_build_tgsi_soa_context
121 {
122 struct lp_build_context base;
123
124 /* Builder for vector integer masks and indices */
125 struct lp_build_context uint_bld;
126
127 /* Builder for scalar elements of shader's data type (float) */
128 struct lp_build_context elem_bld;
129
130 LLVMValueRef consts_ptr;
131 const LLVMValueRef *pos;
132 const LLVMValueRef (*inputs)[NUM_CHANNELS];
133 LLVMValueRef (*outputs)[NUM_CHANNELS];
134
135 const struct lp_build_sampler_soa *sampler;
136
137 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
138 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
139 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
140 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
141
142 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
143 * set in the indirect_files field.
144 * The temps[] array above is unused then.
145 */
146 LLVMValueRef temps_array;
147
148 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
149 * set in the indirect_files field.
150 * The outputs[] array above is unused then.
151 */
152 LLVMValueRef outputs_array;
153
154 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
155 * set in the indirect_files field.
156 * The inputs[] array above is unused then.
157 */
158 LLVMValueRef inputs_array;
159
160 const struct tgsi_shader_info *info;
161 /** bitmask indicating which register files are accessed indirectly */
162 unsigned indirect_files;
163
164 struct lp_build_mask_context *mask;
165 struct lp_exec_mask exec_mask;
166
167 struct tgsi_full_instruction *instructions;
168 uint max_instructions;
169 };
170
171 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
172 {
173 mask->bld = bld;
174 mask->has_mask = FALSE;
175 mask->cond_stack_size = 0;
176 mask->loop_stack_size = 0;
177 mask->call_stack_size = 0;
178
179 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
180 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
181 LLVMConstAllOnes(mask->int_vec_type);
182 }
183
184 static void lp_exec_mask_update(struct lp_exec_mask *mask)
185 {
186 LLVMBuilderRef builder = mask->bld->gallivm->builder;
187
188 if (mask->loop_stack_size) {
189 /*for loops we need to update the entire mask at runtime */
190 LLVMValueRef tmp;
191 assert(mask->break_mask);
192 tmp = LLVMBuildAnd(builder,
193 mask->cont_mask,
194 mask->break_mask,
195 "maskcb");
196 mask->exec_mask = LLVMBuildAnd(builder,
197 mask->cond_mask,
198 tmp,
199 "maskfull");
200 } else
201 mask->exec_mask = mask->cond_mask;
202
203 if (mask->call_stack_size) {
204 mask->exec_mask = LLVMBuildAnd(builder,
205 mask->exec_mask,
206 mask->ret_mask,
207 "callmask");
208 }
209
210 mask->has_mask = (mask->cond_stack_size > 0 ||
211 mask->loop_stack_size > 0 ||
212 mask->call_stack_size > 0);
213 }
214
215 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
216 LLVMValueRef val)
217 {
218 LLVMBuilderRef builder = mask->bld->gallivm->builder;
219
220 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
221 if (mask->cond_stack_size == 0) {
222 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
223 }
224 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
225 assert(LLVMTypeOf(val) == mask->int_vec_type);
226 mask->cond_mask = LLVMBuildAnd(builder,
227 mask->cond_mask,
228 val,
229 "");
230 lp_exec_mask_update(mask);
231 }
232
233 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
234 {
235 LLVMBuilderRef builder = mask->bld->gallivm->builder;
236 LLVMValueRef prev_mask;
237 LLVMValueRef inv_mask;
238
239 assert(mask->cond_stack_size);
240 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
241 if (mask->cond_stack_size == 1) {
242 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
243 }
244
245 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
246
247 mask->cond_mask = LLVMBuildAnd(builder,
248 inv_mask,
249 prev_mask, "");
250 lp_exec_mask_update(mask);
251 }
252
253 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
254 {
255 assert(mask->cond_stack_size);
256 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
257 lp_exec_mask_update(mask);
258 }
259
260 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
261 {
262 LLVMBuilderRef builder = mask->bld->gallivm->builder;
263
264 if (mask->loop_stack_size == 0) {
265 assert(mask->loop_block == NULL);
266 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
267 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
268 assert(mask->break_var == NULL);
269 }
270
271 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
272
273 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
274 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
275 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
276 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
277 ++mask->loop_stack_size;
278
279 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
280 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
281
282 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
283 LLVMBuildBr(builder, mask->loop_block);
284 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
285
286 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
287
288 lp_exec_mask_update(mask);
289 }
290
291 static void lp_exec_break(struct lp_exec_mask *mask)
292 {
293 LLVMBuilderRef builder = mask->bld->gallivm->builder;
294 LLVMValueRef exec_mask = LLVMBuildNot(builder,
295 mask->exec_mask,
296 "break");
297
298 mask->break_mask = LLVMBuildAnd(builder,
299 mask->break_mask,
300 exec_mask, "break_full");
301
302 lp_exec_mask_update(mask);
303 }
304
305 static void lp_exec_continue(struct lp_exec_mask *mask)
306 {
307 LLVMBuilderRef builder = mask->bld->gallivm->builder;
308 LLVMValueRef exec_mask = LLVMBuildNot(builder,
309 mask->exec_mask,
310 "");
311
312 mask->cont_mask = LLVMBuildAnd(builder,
313 mask->cont_mask,
314 exec_mask, "");
315
316 lp_exec_mask_update(mask);
317 }
318
319
320 static void lp_exec_endloop(struct gallivm_state *gallivm,
321 struct lp_exec_mask *mask)
322 {
323 LLVMBuilderRef builder = mask->bld->gallivm->builder;
324 LLVMBasicBlockRef endloop;
325 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
326 mask->bld->type.width *
327 mask->bld->type.length);
328 LLVMValueRef i1cond;
329
330 assert(mask->break_mask);
331
332 /*
333 * Restore the cont_mask, but don't pop
334 */
335 assert(mask->loop_stack_size);
336 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
337 lp_exec_mask_update(mask);
338
339 /*
340 * Unlike the continue mask, the break_mask must be preserved across loop
341 * iterations
342 */
343 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
344
345 /* i1cond = (mask == 0) */
346 i1cond = LLVMBuildICmp(
347 builder,
348 LLVMIntNE,
349 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
350 LLVMConstNull(reg_type), "");
351
352 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
353
354 LLVMBuildCondBr(builder,
355 i1cond, mask->loop_block, endloop);
356
357 LLVMPositionBuilderAtEnd(builder, endloop);
358
359 assert(mask->loop_stack_size);
360 --mask->loop_stack_size;
361 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
362 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
363 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
364 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
365
366 lp_exec_mask_update(mask);
367 }
368
369 /* stores val into an address pointed to by dst.
370 * mask->exec_mask is used to figure out which bits of val
371 * should be stored into the address
372 * (0 means don't store this bit, 1 means do store).
373 */
374 static void lp_exec_mask_store(struct lp_exec_mask *mask,
375 LLVMValueRef pred,
376 LLVMValueRef val,
377 LLVMValueRef dst)
378 {
379 LLVMBuilderRef builder = mask->bld->gallivm->builder;
380
381 /* Mix the predicate and execution mask */
382 if (mask->has_mask) {
383 if (pred) {
384 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
385 } else {
386 pred = mask->exec_mask;
387 }
388 }
389
390 if (pred) {
391 LLVMValueRef real_val, dst_val;
392
393 dst_val = LLVMBuildLoad(builder, dst, "");
394 real_val = lp_build_select(mask->bld,
395 pred,
396 val, dst_val);
397
398 LLVMBuildStore(builder, real_val, dst);
399 } else
400 LLVMBuildStore(builder, val, dst);
401 }
402
403 static void lp_exec_mask_call(struct lp_exec_mask *mask,
404 int func,
405 int *pc)
406 {
407 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
408 mask->call_stack[mask->call_stack_size].pc = *pc;
409 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
410 mask->call_stack_size++;
411 *pc = func;
412 }
413
414 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
415 {
416 LLVMBuilderRef builder = mask->bld->gallivm->builder;
417 LLVMValueRef exec_mask;
418
419 if (mask->call_stack_size == 0) {
420 /* returning from main() */
421 *pc = -1;
422 return;
423 }
424 exec_mask = LLVMBuildNot(builder,
425 mask->exec_mask,
426 "ret");
427
428 mask->ret_mask = LLVMBuildAnd(builder,
429 mask->ret_mask,
430 exec_mask, "ret_full");
431
432 lp_exec_mask_update(mask);
433 }
434
435 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
436 {
437 }
438
439 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
440 {
441 assert(mask->call_stack_size);
442 mask->call_stack_size--;
443 *pc = mask->call_stack[mask->call_stack_size].pc;
444 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
445 lp_exec_mask_update(mask);
446 }
447
448
449 /**
450 * Return pointer to a temporary register channel (src or dest).
451 * Note that indirect addressing cannot be handled here.
452 * \param index which temporary register
453 * \param chan which channel of the temp register.
454 */
455 static LLVMValueRef
456 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
457 unsigned index,
458 unsigned chan)
459 {
460 LLVMBuilderRef builder = bld->base.gallivm->builder;
461 assert(chan < 4);
462 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
463 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan);
464 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
465 }
466 else {
467 return bld->temps[index][chan];
468 }
469 }
470
471 /**
472 * Return pointer to a output register channel (src or dest).
473 * Note that indirect addressing cannot be handled here.
474 * \param index which output register
475 * \param chan which channel of the output register.
476 */
477 static LLVMValueRef
478 get_output_ptr(struct lp_build_tgsi_soa_context *bld,
479 unsigned index,
480 unsigned chan)
481 {
482 LLVMBuilderRef builder = bld->base.gallivm->builder;
483 assert(chan < 4);
484 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
485 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
486 index * 4 + chan);
487 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
488 }
489 else {
490 return bld->outputs[index][chan];
491 }
492 }
493
494 /**
495 * Gather vector.
496 * XXX the lp_build_gather() function should be capable of doing this
497 * with a little work.
498 */
499 static LLVMValueRef
500 build_gather(struct lp_build_tgsi_soa_context *bld,
501 LLVMValueRef base_ptr,
502 LLVMValueRef indexes)
503 {
504 LLVMBuilderRef builder = bld->base.gallivm->builder;
505 LLVMValueRef res = bld->base.undef;
506 unsigned i;
507
508 /*
509 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
510 */
511 for (i = 0; i < bld->base.type.length; i++) {
512 LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
513 LLVMValueRef index = LLVMBuildExtractElement(builder,
514 indexes, ii, "");
515 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
516 &index, 1, "gather_ptr");
517 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
518
519 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
520 }
521
522 return res;
523 }
524
525
526 /**
527 * Scatter/store vector.
528 */
529 static void
530 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
531 LLVMValueRef base_ptr,
532 LLVMValueRef indexes,
533 LLVMValueRef values,
534 struct lp_exec_mask *mask,
535 LLVMValueRef pred)
536 {
537 struct gallivm_state *gallivm = bld->base.gallivm;
538 LLVMBuilderRef builder = gallivm->builder;
539 unsigned i;
540
541 /* Mix the predicate and execution mask */
542 if (mask->has_mask) {
543 if (pred) {
544 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
545 }
546 else {
547 pred = mask->exec_mask;
548 }
549 }
550
551 /*
552 * Loop over elements of index_vec, store scalar value.
553 */
554 for (i = 0; i < bld->base.type.length; i++) {
555 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
556 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
557 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
558 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
559 LLVMValueRef scalar_pred = pred ?
560 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
561
562 if (0)
563 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
564 ii, val, index, scalar_ptr);
565
566 if (scalar_pred) {
567 LLVMValueRef real_val, dst_val;
568 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
569 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
570 LLVMBuildStore(builder, real_val, scalar_ptr);
571 }
572 else {
573 LLVMBuildStore(builder, val, scalar_ptr);
574 }
575 }
576 }
577
578
579 /**
580 * Read the current value of the ADDR register, convert the floats to
581 * ints, add the base index and return the vector of offsets.
582 * The offsets will be used to index into the constant buffer or
583 * temporary register file.
584 */
585 static LLVMValueRef
586 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
587 unsigned reg_file, unsigned reg_index,
588 const struct tgsi_src_register *indirect_reg)
589 {
590 LLVMBuilderRef builder = bld->base.gallivm->builder;
591 struct lp_build_context *uint_bld = &bld->uint_bld;
592 /* always use X component of address register */
593 unsigned swizzle = indirect_reg->SwizzleX;
594 LLVMValueRef base;
595 LLVMValueRef rel;
596 LLVMValueRef max_index;
597 LLVMValueRef index;
598
599 assert(bld->indirect_files & (1 << reg_file));
600
601 base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index);
602
603 assert(swizzle < 4);
604 rel = LLVMBuildLoad(builder,
605 bld->addr[indirect_reg->Index][swizzle],
606 "load addr reg");
607
608 /* for indexing we want integers */
609 rel = LLVMBuildFPToSI(builder,
610 rel,
611 uint_bld->vec_type, "");
612
613 index = lp_build_add(uint_bld, base, rel);
614
615 max_index = lp_build_const_int_vec(bld->base.gallivm,
616 uint_bld->type,
617 bld->info->file_max[reg_file]);
618
619 assert(!uint_bld->type.sign);
620 index = lp_build_min(uint_bld, index, max_index);
621
622 return index;
623 }
624
625
626 /**
627 * Register fetch.
628 */
629 static LLVMValueRef
630 emit_fetch(
631 struct lp_build_tgsi_soa_context *bld,
632 const struct tgsi_full_instruction *inst,
633 unsigned src_op,
634 const unsigned chan_index )
635 {
636 struct gallivm_state *gallivm = bld->base.gallivm;
637 LLVMBuilderRef builder = gallivm->builder;
638 struct lp_build_context *uint_bld = &bld->uint_bld;
639 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
640 const unsigned swizzle =
641 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
642 LLVMValueRef res;
643 LLVMValueRef indirect_index = NULL;
644
645 if (swizzle > 3) {
646 assert(0 && "invalid swizzle in emit_fetch()");
647 return bld->base.undef;
648 }
649
650 if (reg->Register.Indirect) {
651 indirect_index = get_indirect_index(bld,
652 reg->Register.File,
653 reg->Register.Index,
654 &reg->Indirect);
655 } else {
656 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
657 }
658
659 switch (reg->Register.File) {
660 case TGSI_FILE_CONSTANT:
661 if (reg->Register.Indirect) {
662 LLVMValueRef swizzle_vec =
663 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
664 LLVMValueRef index_vec; /* index into the const buffer */
665
666 /* index_vec = indirect_index * 4 + swizzle */
667 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
668 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
669
670 /* Gather values from the constant buffer */
671 res = build_gather(bld, bld->consts_ptr, index_vec);
672 }
673 else {
674 LLVMValueRef index; /* index into the const buffer */
675 LLVMValueRef scalar, scalar_ptr;
676
677 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
678
679 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
680 &index, 1, "");
681 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
682
683 res = lp_build_broadcast_scalar(&bld->base, scalar);
684 }
685 break;
686
687 case TGSI_FILE_IMMEDIATE:
688 res = bld->immediates[reg->Register.Index][swizzle];
689 assert(res);
690 break;
691
692 case TGSI_FILE_INPUT:
693 if (reg->Register.Indirect) {
694 LLVMValueRef swizzle_vec =
695 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
696 LLVMValueRef length_vec =
697 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
698 LLVMValueRef index_vec; /* index into the const buffer */
699 LLVMValueRef inputs_array;
700 LLVMTypeRef float4_ptr_type;
701
702 /* index_vec = (indirect_index * 4 + swizzle) * length */
703 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
704 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
705 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
706
707 /* cast inputs_array pointer to float* */
708 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
709 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
710 float4_ptr_type, "");
711
712 /* Gather values from the temporary register array */
713 res = build_gather(bld, inputs_array, index_vec);
714 } else {
715 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
716 LLVMValueRef lindex = lp_build_const_int32(gallivm,
717 reg->Register.Index * 4 + swizzle);
718 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
719 bld->inputs_array, &lindex, 1, "");
720 res = LLVMBuildLoad(builder, input_ptr, "");
721 }
722 else {
723 res = bld->inputs[reg->Register.Index][swizzle];
724 }
725 }
726 assert(res);
727 break;
728
729 case TGSI_FILE_TEMPORARY:
730 if (reg->Register.Indirect) {
731 LLVMValueRef swizzle_vec =
732 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
733 LLVMValueRef length_vec =
734 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
735 bld->base.type.length);
736 LLVMValueRef index_vec; /* index into the const buffer */
737 LLVMValueRef temps_array;
738 LLVMTypeRef float4_ptr_type;
739
740 /* index_vec = (indirect_index * 4 + swizzle) * length */
741 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
742 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
743 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
744
745 /* cast temps_array pointer to float* */
746 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0);
747 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
748 float4_ptr_type, "");
749
750 /* Gather values from the temporary register array */
751 res = build_gather(bld, temps_array, index_vec);
752 }
753 else {
754 LLVMValueRef temp_ptr;
755 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
756 res = LLVMBuildLoad(builder, temp_ptr, "");
757 if (!res)
758 return bld->base.undef;
759 }
760 break;
761
762 default:
763 assert(0 && "invalid src register in emit_fetch()");
764 return bld->base.undef;
765 }
766
767 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
768 case TGSI_UTIL_SIGN_CLEAR:
769 res = lp_build_abs( &bld->base, res );
770 break;
771
772 case TGSI_UTIL_SIGN_SET:
773 res = lp_build_abs( &bld->base, res );
774 /* fall through */
775 case TGSI_UTIL_SIGN_TOGGLE:
776 res = lp_build_negate( &bld->base, res );
777 break;
778
779 case TGSI_UTIL_SIGN_KEEP:
780 break;
781 }
782
783 return res;
784 }
785
786
787 /**
788 * Register fetch with derivatives.
789 */
790 static void
791 emit_fetch_deriv(
792 struct lp_build_tgsi_soa_context *bld,
793 const struct tgsi_full_instruction *inst,
794 unsigned index,
795 const unsigned chan_index,
796 LLVMValueRef *res,
797 LLVMValueRef *ddx,
798 LLVMValueRef *ddy)
799 {
800 LLVMValueRef src;
801
802 src = emit_fetch(bld, inst, index, chan_index);
803
804 if(res)
805 *res = src;
806
807 /* TODO: use interpolation coeffs for inputs */
808
809 if(ddx)
810 *ddx = lp_build_ddx(&bld->base, src);
811
812 if(ddy)
813 *ddy = lp_build_ddy(&bld->base, src);
814 }
815
816
817 /**
818 * Predicate.
819 */
820 static void
821 emit_fetch_predicate(
822 struct lp_build_tgsi_soa_context *bld,
823 const struct tgsi_full_instruction *inst,
824 LLVMValueRef *pred)
825 {
826 LLVMBuilderRef builder = bld->base.gallivm->builder;
827 unsigned index;
828 unsigned char swizzles[4];
829 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
830 LLVMValueRef value;
831 unsigned chan;
832
833 if (!inst->Instruction.Predicate) {
834 FOR_EACH_CHANNEL( chan ) {
835 pred[chan] = NULL;
836 }
837 return;
838 }
839
840 swizzles[0] = inst->Predicate.SwizzleX;
841 swizzles[1] = inst->Predicate.SwizzleY;
842 swizzles[2] = inst->Predicate.SwizzleZ;
843 swizzles[3] = inst->Predicate.SwizzleW;
844
845 index = inst->Predicate.Index;
846 assert(index < LP_MAX_TGSI_PREDS);
847
848 FOR_EACH_CHANNEL( chan ) {
849 unsigned swizzle = swizzles[chan];
850
851 /*
852 * Only fetch the predicate register channels that are actually listed
853 * in the swizzles
854 */
855 if (!unswizzled[swizzle]) {
856 value = LLVMBuildLoad(builder,
857 bld->preds[index][swizzle], "");
858
859 /*
860 * Convert the value to an integer mask.
861 *
862 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
863 * is needlessly causing two comparisons due to storing the intermediate
864 * result as float vector instead of an integer mask vector.
865 */
866 value = lp_build_compare(bld->base.gallivm,
867 bld->base.type,
868 PIPE_FUNC_NOTEQUAL,
869 value,
870 bld->base.zero);
871 if (inst->Predicate.Negate) {
872 value = LLVMBuildNot(builder, value, "");
873 }
874
875 unswizzled[swizzle] = value;
876 } else {
877 value = unswizzled[swizzle];
878 }
879
880 pred[chan] = value;
881 }
882 }
883
884
885 /**
886 * Register store.
887 */
888 static void
889 emit_store(
890 struct lp_build_tgsi_soa_context *bld,
891 const struct tgsi_full_instruction *inst,
892 unsigned index,
893 unsigned chan_index,
894 LLVMValueRef pred,
895 LLVMValueRef value)
896 {
897 struct gallivm_state *gallivm = bld->base.gallivm;
898 LLVMBuilderRef builder = gallivm->builder;
899 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
900 struct lp_build_context *uint_bld = &bld->uint_bld;
901 LLVMValueRef indirect_index = NULL;
902
903 switch( inst->Instruction.Saturate ) {
904 case TGSI_SAT_NONE:
905 break;
906
907 case TGSI_SAT_ZERO_ONE:
908 value = lp_build_max(&bld->base, value, bld->base.zero);
909 value = lp_build_min(&bld->base, value, bld->base.one);
910 break;
911
912 case TGSI_SAT_MINUS_PLUS_ONE:
913 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
914 value = lp_build_min(&bld->base, value, bld->base.one);
915 break;
916
917 default:
918 assert(0);
919 }
920
921 if (reg->Register.Indirect) {
922 indirect_index = get_indirect_index(bld,
923 reg->Register.File,
924 reg->Register.Index,
925 &reg->Indirect);
926 } else {
927 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
928 }
929
930 switch( reg->Register.File ) {
931 case TGSI_FILE_OUTPUT:
932 if (reg->Register.Indirect) {
933 LLVMValueRef chan_vec =
934 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
935 LLVMValueRef length_vec =
936 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
937 LLVMValueRef index_vec; /* indexes into the temp registers */
938 LLVMValueRef outputs_array;
939 LLVMValueRef pixel_offsets;
940 LLVMTypeRef float_ptr_type;
941 int i;
942
943 /* build pixel offset vector: {0, 1, 2, 3, ...} */
944 pixel_offsets = uint_bld->undef;
945 for (i = 0; i < bld->base.type.length; i++) {
946 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
947 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
948 ii, ii, "");
949 }
950
951 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
952 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
953 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
954 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
955 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
956
957 float_ptr_type =
958 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
959 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
960 float_ptr_type, "");
961
962 /* Scatter store values into temp registers */
963 emit_mask_scatter(bld, outputs_array, index_vec, value,
964 &bld->exec_mask, pred);
965 }
966 else {
967 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
968 chan_index);
969 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
970 }
971 break;
972
973 case TGSI_FILE_TEMPORARY:
974 if (reg->Register.Indirect) {
975 LLVMValueRef chan_vec =
976 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
977 LLVMValueRef length_vec =
978 lp_build_const_int_vec(gallivm, uint_bld->type,
979 bld->base.type.length);
980 LLVMValueRef index_vec; /* indexes into the temp registers */
981 LLVMValueRef temps_array;
982 LLVMValueRef pixel_offsets;
983 LLVMTypeRef float_ptr_type;
984 int i;
985
986 /* build pixel offset vector: {0, 1, 2, 3, ...} */
987 pixel_offsets = uint_bld->undef;
988 for (i = 0; i < bld->base.type.length; i++) {
989 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
990 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
991 ii, ii, "");
992 }
993
994 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
995 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
996 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
997 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
998 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
999
1000 float_ptr_type =
1001 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1002 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1003 float_ptr_type, "");
1004
1005 /* Scatter store values into temp registers */
1006 emit_mask_scatter(bld, temps_array, index_vec, value,
1007 &bld->exec_mask, pred);
1008 }
1009 else {
1010 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
1011 chan_index);
1012 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
1013 }
1014 break;
1015
1016 case TGSI_FILE_ADDRESS:
1017 lp_exec_mask_store(&bld->exec_mask, pred, value,
1018 bld->addr[reg->Register.Index][chan_index]);
1019 break;
1020
1021 case TGSI_FILE_PREDICATE:
1022 lp_exec_mask_store(&bld->exec_mask, pred, value,
1023 bld->preds[reg->Register.Index][chan_index]);
1024 break;
1025
1026 default:
1027 assert( 0 );
1028 }
1029 }
1030
1031
1032 /**
1033 * High-level instruction translators.
1034 */
1035
1036 static void
1037 emit_tex( struct lp_build_tgsi_soa_context *bld,
1038 const struct tgsi_full_instruction *inst,
1039 enum lp_build_tex_modifier modifier,
1040 LLVMValueRef *texel)
1041 {
1042 LLVMBuilderRef builder = bld->base.gallivm->builder;
1043 unsigned unit;
1044 LLVMValueRef lod_bias, explicit_lod;
1045 LLVMValueRef oow = NULL;
1046 LLVMValueRef coords[3];
1047 LLVMValueRef ddx[3];
1048 LLVMValueRef ddy[3];
1049 unsigned num_coords;
1050 unsigned i;
1051
1052 if (!bld->sampler) {
1053 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1054 for (i = 0; i < 4; i++) {
1055 texel[i] = bld->base.undef;
1056 }
1057 return;
1058 }
1059
1060 switch (inst->Texture.Texture) {
1061 case TGSI_TEXTURE_1D:
1062 num_coords = 1;
1063 break;
1064 case TGSI_TEXTURE_2D:
1065 case TGSI_TEXTURE_RECT:
1066 num_coords = 2;
1067 break;
1068 case TGSI_TEXTURE_SHADOW1D:
1069 case TGSI_TEXTURE_SHADOW2D:
1070 case TGSI_TEXTURE_SHADOWRECT:
1071 case TGSI_TEXTURE_3D:
1072 case TGSI_TEXTURE_CUBE:
1073 num_coords = 3;
1074 break;
1075 default:
1076 assert(0);
1077 return;
1078 }
1079
1080 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1081 lod_bias = emit_fetch( bld, inst, 0, 3 );
1082 explicit_lod = NULL;
1083 }
1084 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1085 lod_bias = NULL;
1086 explicit_lod = emit_fetch( bld, inst, 0, 3 );
1087 }
1088 else {
1089 lod_bias = NULL;
1090 explicit_lod = NULL;
1091 }
1092
1093 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1094 oow = emit_fetch( bld, inst, 0, 3 );
1095 oow = lp_build_rcp(&bld->base, oow);
1096 }
1097
1098 for (i = 0; i < num_coords; i++) {
1099 coords[i] = emit_fetch( bld, inst, 0, i );
1100 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1101 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1102 }
1103 for (i = num_coords; i < 3; i++) {
1104 coords[i] = bld->base.undef;
1105 }
1106
1107 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1108 LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0);
1109 for (i = 0; i < num_coords; i++) {
1110 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1111 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1112 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
1113 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
1114 }
1115 unit = inst->Src[3].Register.Index;
1116 } else {
1117 for (i = 0; i < num_coords; i++) {
1118 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1119 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1120 }
1121 unit = inst->Src[1].Register.Index;
1122 }
1123 for (i = num_coords; i < 3; i++) {
1124 ddx[i] = LLVMGetUndef(bld->base.elem_type);
1125 ddy[i] = LLVMGetUndef(bld->base.elem_type);
1126 }
1127
1128 bld->sampler->emit_fetch_texel(bld->sampler,
1129 bld->base.gallivm,
1130 bld->base.type,
1131 unit, num_coords, coords,
1132 ddx, ddy,
1133 lod_bias, explicit_lod,
1134 texel);
1135 }
1136
1137 static boolean
1138 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1139 int pc)
1140 {
1141 int i;
1142
1143 for (i = 0; i < 5; i++) {
1144 unsigned opcode;
1145
1146 if (pc + i >= bld->info->num_instructions)
1147 return TRUE;
1148
1149 opcode = bld->instructions[pc + i].Instruction.Opcode;
1150
1151 if (opcode == TGSI_OPCODE_END)
1152 return TRUE;
1153
1154 if (opcode == TGSI_OPCODE_TEX ||
1155 opcode == TGSI_OPCODE_TXP ||
1156 opcode == TGSI_OPCODE_TXD ||
1157 opcode == TGSI_OPCODE_TXB ||
1158 opcode == TGSI_OPCODE_TXL ||
1159 opcode == TGSI_OPCODE_TXF ||
1160 opcode == TGSI_OPCODE_TXQ ||
1161 opcode == TGSI_OPCODE_CAL ||
1162 opcode == TGSI_OPCODE_CALLNZ ||
1163 opcode == TGSI_OPCODE_IF ||
1164 opcode == TGSI_OPCODE_IFC ||
1165 opcode == TGSI_OPCODE_BGNLOOP ||
1166 opcode == TGSI_OPCODE_SWITCH)
1167 return FALSE;
1168 }
1169
1170 return TRUE;
1171 }
1172
1173
1174
1175 /**
1176 * Kill fragment if any of the src register values are negative.
1177 */
1178 static void
1179 emit_kil(
1180 struct lp_build_tgsi_soa_context *bld,
1181 const struct tgsi_full_instruction *inst,
1182 int pc)
1183 {
1184 LLVMBuilderRef builder = bld->base.gallivm->builder;
1185 const struct tgsi_full_src_register *reg = &inst->Src[0];
1186 LLVMValueRef terms[NUM_CHANNELS];
1187 LLVMValueRef mask;
1188 unsigned chan_index;
1189
1190 memset(&terms, 0, sizeof terms);
1191
1192 FOR_EACH_CHANNEL( chan_index ) {
1193 unsigned swizzle;
1194
1195 /* Unswizzle channel */
1196 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1197
1198 /* Check if the component has not been already tested. */
1199 assert(swizzle < NUM_CHANNELS);
1200 if( !terms[swizzle] )
1201 /* TODO: change the comparison operator instead of setting the sign */
1202 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1203 }
1204
1205 mask = NULL;
1206 FOR_EACH_CHANNEL( chan_index ) {
1207 if(terms[chan_index]) {
1208 LLVMValueRef chan_mask;
1209
1210 /*
1211 * If term < 0 then mask = 0 else mask = ~0.
1212 */
1213 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1214
1215 if(mask)
1216 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1217 else
1218 mask = chan_mask;
1219 }
1220 }
1221
1222 if(mask) {
1223 lp_build_mask_update(bld->mask, mask);
1224
1225 if (!near_end_of_shader(bld, pc))
1226 lp_build_mask_check(bld->mask);
1227 }
1228 }
1229
1230
1231 /**
1232 * Predicated fragment kill.
1233 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1234 * The only predication is the execution mask which will apply if
1235 * we're inside a loop or conditional.
1236 */
1237 static void
1238 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1239 const struct tgsi_full_instruction *inst,
1240 int pc)
1241 {
1242 LLVMBuilderRef builder = bld->base.gallivm->builder;
1243 LLVMValueRef mask;
1244
1245 /* For those channels which are "alive", disable fragment shader
1246 * execution.
1247 */
1248 if (bld->exec_mask.has_mask) {
1249 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1250 }
1251 else {
1252 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1253 mask = zero;
1254 }
1255
1256 lp_build_mask_update(bld->mask, mask);
1257
1258 if (!near_end_of_shader(bld, pc))
1259 lp_build_mask_check(bld->mask);
1260 }
1261
1262
1263 /**
1264 * Emit code which will dump the value of all the temporary registers
1265 * to stdout.
1266 */
1267 static void
1268 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1269 {
1270 struct gallivm_state *gallivm = bld->base.gallivm;
1271 LLVMBuilderRef builder = gallivm->builder;
1272 LLVMValueRef temp_ptr;
1273 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1274 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1275 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1276 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1277 int index;
1278 int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1279
1280 for (index = 0; index < n; index++) {
1281 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1282 LLVMValueRef v[4][4], res;
1283 int chan;
1284
1285 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1286
1287 for (chan = 0; chan < 4; chan++) {
1288 temp_ptr = get_temp_ptr(bld, index, chan);
1289 res = LLVMBuildLoad(builder, temp_ptr, "");
1290 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1291 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1292 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1293 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1294 }
1295
1296 lp_build_printf(gallivm, " X: %f %f %f %f\n",
1297 v[0][0], v[0][1], v[0][2], v[0][3]);
1298 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
1299 v[1][0], v[1][1], v[1][2], v[1][3]);
1300 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
1301 v[2][0], v[2][1], v[2][2], v[2][3]);
1302 lp_build_printf(gallivm, " W: %f %f %f %f\n",
1303 v[3][0], v[3][1], v[3][2], v[3][3]);
1304 }
1305 }
1306
1307
1308
1309 static void
1310 emit_declaration(
1311 struct lp_build_tgsi_soa_context *bld,
1312 const struct tgsi_full_declaration *decl)
1313 {
1314 struct gallivm_state *gallivm = bld->base.gallivm;
1315 LLVMTypeRef vec_type = bld->base.vec_type;
1316 const unsigned first = decl->Range.First;
1317 const unsigned last = decl->Range.Last;
1318 unsigned idx, i;
1319
1320 for (idx = first; idx <= last; ++idx) {
1321 assert(last <= bld->info->file_max[decl->Declaration.File]);
1322 switch (decl->Declaration.File) {
1323 case TGSI_FILE_TEMPORARY:
1324 assert(idx < LP_MAX_TGSI_TEMPS);
1325 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1326 for (i = 0; i < NUM_CHANNELS; i++)
1327 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1328 }
1329 break;
1330
1331 case TGSI_FILE_OUTPUT:
1332 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1333 for (i = 0; i < NUM_CHANNELS; i++)
1334 bld->outputs[idx][i] = lp_build_alloca(gallivm,
1335 vec_type, "output");
1336 }
1337 break;
1338
1339 case TGSI_FILE_ADDRESS:
1340 assert(idx < LP_MAX_TGSI_ADDRS);
1341 for (i = 0; i < NUM_CHANNELS; i++)
1342 bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr");
1343 break;
1344
1345 case TGSI_FILE_PREDICATE:
1346 assert(idx < LP_MAX_TGSI_PREDS);
1347 for (i = 0; i < NUM_CHANNELS; i++)
1348 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1349 "predicate");
1350 break;
1351
1352 default:
1353 /* don't need to declare other vars */
1354 break;
1355 }
1356 }
1357 }
1358
1359
1360 /**
1361 * Emit LLVM for one TGSI instruction.
1362 * \param return TRUE for success, FALSE otherwise
1363 */
1364 static boolean
1365 emit_instruction(
1366 struct lp_build_tgsi_soa_context *bld,
1367 const struct tgsi_full_instruction *inst,
1368 const struct tgsi_opcode_info *info,
1369 int *pc)
1370 {
1371 unsigned chan_index;
1372 LLVMValueRef src0, src1, src2;
1373 LLVMValueRef tmp0, tmp1, tmp2;
1374 LLVMValueRef tmp3 = NULL;
1375 LLVMValueRef tmp4 = NULL;
1376 LLVMValueRef tmp5 = NULL;
1377 LLVMValueRef tmp6 = NULL;
1378 LLVMValueRef tmp7 = NULL;
1379 LLVMValueRef res;
1380 LLVMValueRef dst0[NUM_CHANNELS];
1381
1382 /*
1383 * Stores and write masks are handled in a general fashion after the long
1384 * instruction opcode switch statement.
1385 *
1386 * Although not stricitly necessary, we avoid generating instructions for
1387 * channels which won't be stored, in cases where's that easy. For some
1388 * complex instructions, like texture sampling, it is more convenient to
1389 * assume a full writemask and then let LLVM optimization passes eliminate
1390 * redundant code.
1391 */
1392
1393 (*pc)++;
1394
1395 assert(info->num_dst <= 1);
1396 if (info->num_dst) {
1397 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1398 dst0[chan_index] = bld->base.undef;
1399 }
1400 }
1401
1402 switch (inst->Instruction.Opcode) {
1403 case TGSI_OPCODE_ARL:
1404 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1405 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1406 tmp0 = lp_build_floor(&bld->base, tmp0);
1407 dst0[chan_index] = tmp0;
1408 }
1409 break;
1410
1411 case TGSI_OPCODE_MOV:
1412 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1413 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1414 }
1415 break;
1416
1417 case TGSI_OPCODE_LIT:
1418 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1419 dst0[CHAN_X] = bld->base.one;
1420 }
1421 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1422 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1423 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1424 }
1425 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1426 /* XMM[1] = SrcReg[0].yyyy */
1427 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1428 /* XMM[1] = max(XMM[1], 0) */
1429 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1430 /* XMM[2] = SrcReg[0].wwww */
1431 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1432 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1433 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1434 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1435 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1436 }
1437 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1438 dst0[CHAN_W] = bld->base.one;
1439 }
1440 break;
1441
1442 case TGSI_OPCODE_RCP:
1443 /* TGSI_OPCODE_RECIP */
1444 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1445 res = lp_build_rcp(&bld->base, src0);
1446 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1447 dst0[chan_index] = res;
1448 }
1449 break;
1450
1451 case TGSI_OPCODE_RSQ:
1452 /* TGSI_OPCODE_RECIPSQRT */
1453 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1454 src0 = lp_build_abs(&bld->base, src0);
1455 res = lp_build_rsqrt(&bld->base, src0);
1456 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1457 dst0[chan_index] = res;
1458 }
1459 break;
1460
1461 case TGSI_OPCODE_EXP:
1462 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1463 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1464 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1465 LLVMValueRef *p_exp2_int_part = NULL;
1466 LLVMValueRef *p_frac_part = NULL;
1467 LLVMValueRef *p_exp2 = NULL;
1468
1469 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1470
1471 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1472 p_exp2_int_part = &tmp0;
1473 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1474 p_frac_part = &tmp1;
1475 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1476 p_exp2 = &tmp2;
1477
1478 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1479
1480 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1481 dst0[CHAN_X] = tmp0;
1482 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1483 dst0[CHAN_Y] = tmp1;
1484 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1485 dst0[CHAN_Z] = tmp2;
1486 }
1487 /* dst.w = 1.0 */
1488 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1489 dst0[CHAN_W] = bld->base.one;
1490 }
1491 break;
1492
1493 case TGSI_OPCODE_LOG:
1494 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1495 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1496 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1497 LLVMValueRef *p_floor_log2 = NULL;
1498 LLVMValueRef *p_exp = NULL;
1499 LLVMValueRef *p_log2 = NULL;
1500
1501 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1502 src0 = lp_build_abs( &bld->base, src0 );
1503
1504 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1505 p_floor_log2 = &tmp0;
1506 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1507 p_exp = &tmp1;
1508 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1509 p_log2 = &tmp2;
1510
1511 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1512
1513 /* dst.x = floor(lg2(abs(src.x))) */
1514 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1515 dst0[CHAN_X] = tmp0;
1516 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1517 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1518 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1519 }
1520 /* dst.z = lg2(abs(src.x)) */
1521 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1522 dst0[CHAN_Z] = tmp2;
1523 }
1524 /* dst.w = 1.0 */
1525 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1526 dst0[CHAN_W] = bld->base.one;
1527 }
1528 break;
1529
1530 case TGSI_OPCODE_MUL:
1531 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1532 src0 = emit_fetch( bld, inst, 0, chan_index );
1533 src1 = emit_fetch( bld, inst, 1, chan_index );
1534 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1535 }
1536 break;
1537
1538 case TGSI_OPCODE_ADD:
1539 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1540 src0 = emit_fetch( bld, inst, 0, chan_index );
1541 src1 = emit_fetch( bld, inst, 1, chan_index );
1542 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1543 }
1544 break;
1545
1546 case TGSI_OPCODE_DP3:
1547 /* TGSI_OPCODE_DOT3 */
1548 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1549 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1550 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1551 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1552 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1553 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1554 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1555 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1556 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1557 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1558 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1559 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1560 dst0[chan_index] = tmp0;
1561 }
1562 break;
1563
1564 case TGSI_OPCODE_DP4:
1565 /* TGSI_OPCODE_DOT4 */
1566 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1567 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1568 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1569 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1570 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1571 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1572 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1573 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1574 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1575 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1576 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1577 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1578 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1579 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1580 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1581 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1582 dst0[chan_index] = tmp0;
1583 }
1584 break;
1585
1586 case TGSI_OPCODE_DST:
1587 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1588 dst0[CHAN_X] = bld->base.one;
1589 }
1590 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1591 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1592 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1593 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1594 }
1595 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1596 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1597 }
1598 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1599 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1600 }
1601 break;
1602
1603 case TGSI_OPCODE_MIN:
1604 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1605 src0 = emit_fetch( bld, inst, 0, chan_index );
1606 src1 = emit_fetch( bld, inst, 1, chan_index );
1607 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1608 }
1609 break;
1610
1611 case TGSI_OPCODE_MAX:
1612 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1613 src0 = emit_fetch( bld, inst, 0, chan_index );
1614 src1 = emit_fetch( bld, inst, 1, chan_index );
1615 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1616 }
1617 break;
1618
1619 case TGSI_OPCODE_SLT:
1620 /* TGSI_OPCODE_SETLT */
1621 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1622 src0 = emit_fetch( bld, inst, 0, chan_index );
1623 src1 = emit_fetch( bld, inst, 1, chan_index );
1624 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1625 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1626 }
1627 break;
1628
1629 case TGSI_OPCODE_SGE:
1630 /* TGSI_OPCODE_SETGE */
1631 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1632 src0 = emit_fetch( bld, inst, 0, chan_index );
1633 src1 = emit_fetch( bld, inst, 1, chan_index );
1634 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1635 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1636 }
1637 break;
1638
1639 case TGSI_OPCODE_MAD:
1640 /* TGSI_OPCODE_MADD */
1641 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1642 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1643 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1644 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1645 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1646 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1647 dst0[chan_index] = tmp0;
1648 }
1649 break;
1650
1651 case TGSI_OPCODE_SUB:
1652 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1653 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1654 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1655 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1656 }
1657 break;
1658
1659 case TGSI_OPCODE_LRP:
1660 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1661 src0 = emit_fetch( bld, inst, 0, chan_index );
1662 src1 = emit_fetch( bld, inst, 1, chan_index );
1663 src2 = emit_fetch( bld, inst, 2, chan_index );
1664 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1665 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1666 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1667 }
1668 break;
1669
1670 case TGSI_OPCODE_CND:
1671 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1672 src0 = emit_fetch( bld, inst, 0, chan_index );
1673 src1 = emit_fetch( bld, inst, 1, chan_index );
1674 src2 = emit_fetch( bld, inst, 2, chan_index );
1675 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
1676 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1677 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1678 }
1679 break;
1680
1681 case TGSI_OPCODE_DP2A:
1682 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1683 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1684 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1685 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1686 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1687 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1688 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1689 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1690 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1691 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1692 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1693 }
1694 break;
1695
1696 case TGSI_OPCODE_FRC:
1697 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1698 src0 = emit_fetch( bld, inst, 0, chan_index );
1699 tmp0 = lp_build_floor(&bld->base, src0);
1700 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1701 dst0[chan_index] = tmp0;
1702 }
1703 break;
1704
1705 case TGSI_OPCODE_CLAMP:
1706 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1707 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1708 src1 = emit_fetch( bld, inst, 1, chan_index );
1709 src2 = emit_fetch( bld, inst, 2, chan_index );
1710 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1711 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1712 dst0[chan_index] = tmp0;
1713 }
1714 break;
1715
1716 case TGSI_OPCODE_FLR:
1717 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1718 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1719 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1720 }
1721 break;
1722
1723 case TGSI_OPCODE_ROUND:
1724 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1725 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1726 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1727 }
1728 break;
1729
1730 case TGSI_OPCODE_EX2: {
1731 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1732 tmp0 = lp_build_exp2( &bld->base, tmp0);
1733 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1734 dst0[chan_index] = tmp0;
1735 }
1736 break;
1737 }
1738
1739 case TGSI_OPCODE_LG2:
1740 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1741 tmp0 = lp_build_log2( &bld->base, tmp0);
1742 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1743 dst0[chan_index] = tmp0;
1744 }
1745 break;
1746
1747 case TGSI_OPCODE_POW:
1748 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1749 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1750 res = lp_build_pow( &bld->base, src0, src1 );
1751 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1752 dst0[chan_index] = res;
1753 }
1754 break;
1755
1756 case TGSI_OPCODE_XPD:
1757 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1758 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1759 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1760 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1761 }
1762 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1763 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1764 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1765 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1766 }
1767 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1768 tmp2 = tmp0;
1769 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1770 tmp5 = tmp3;
1771 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1772 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1773 dst0[CHAN_X] = tmp2;
1774 }
1775 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1776 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1777 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1778 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1779 }
1780 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1781 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1782 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1783 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1784 dst0[CHAN_Y] = tmp3;
1785 }
1786 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1787 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1788 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1789 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1790 dst0[CHAN_Z] = tmp5;
1791 }
1792 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1793 dst0[CHAN_W] = bld->base.one;
1794 }
1795 break;
1796
1797 case TGSI_OPCODE_ABS:
1798 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1799 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1800 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1801 }
1802 break;
1803
1804 case TGSI_OPCODE_RCC:
1805 /* deprecated? */
1806 assert(0);
1807 return FALSE;
1808
1809 case TGSI_OPCODE_DPH:
1810 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1811 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1812 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1813 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1814 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1815 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1816 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1817 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1818 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1819 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1820 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1821 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1822 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1823 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1824 dst0[chan_index] = tmp0;
1825 }
1826 break;
1827
1828 case TGSI_OPCODE_COS:
1829 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1830 tmp0 = lp_build_cos( &bld->base, tmp0 );
1831 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1832 dst0[chan_index] = tmp0;
1833 }
1834 break;
1835
1836 case TGSI_OPCODE_DDX:
1837 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1838 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1839 }
1840 break;
1841
1842 case TGSI_OPCODE_DDY:
1843 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1844 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1845 }
1846 break;
1847
1848 case TGSI_OPCODE_KILP:
1849 /* predicated kill */
1850 emit_kilp( bld, inst, (*pc)-1 );
1851 break;
1852
1853 case TGSI_OPCODE_KIL:
1854 /* conditional kill */
1855 emit_kil( bld, inst, (*pc)-1 );
1856 break;
1857
1858 case TGSI_OPCODE_PK2H:
1859 return FALSE;
1860 break;
1861
1862 case TGSI_OPCODE_PK2US:
1863 return FALSE;
1864 break;
1865
1866 case TGSI_OPCODE_PK4B:
1867 return FALSE;
1868 break;
1869
1870 case TGSI_OPCODE_PK4UB:
1871 return FALSE;
1872 break;
1873
1874 case TGSI_OPCODE_RFL:
1875 return FALSE;
1876 break;
1877
1878 case TGSI_OPCODE_SEQ:
1879 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1880 src0 = emit_fetch( bld, inst, 0, chan_index );
1881 src1 = emit_fetch( bld, inst, 1, chan_index );
1882 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1883 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1884 }
1885 break;
1886
1887 case TGSI_OPCODE_SFL:
1888 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1889 dst0[chan_index] = bld->base.zero;
1890 }
1891 break;
1892
1893 case TGSI_OPCODE_SGT:
1894 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1895 src0 = emit_fetch( bld, inst, 0, chan_index );
1896 src1 = emit_fetch( bld, inst, 1, chan_index );
1897 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1898 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1899 }
1900 break;
1901
1902 case TGSI_OPCODE_SIN:
1903 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1904 tmp0 = lp_build_sin( &bld->base, tmp0 );
1905 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1906 dst0[chan_index] = tmp0;
1907 }
1908 break;
1909
1910 case TGSI_OPCODE_SLE:
1911 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1912 src0 = emit_fetch( bld, inst, 0, chan_index );
1913 src1 = emit_fetch( bld, inst, 1, chan_index );
1914 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1915 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1916 }
1917 break;
1918
1919 case TGSI_OPCODE_SNE:
1920 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1921 src0 = emit_fetch( bld, inst, 0, chan_index );
1922 src1 = emit_fetch( bld, inst, 1, chan_index );
1923 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1924 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1925 }
1926 break;
1927
1928 case TGSI_OPCODE_STR:
1929 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1930 dst0[chan_index] = bld->base.one;
1931 }
1932 break;
1933
1934 case TGSI_OPCODE_TEX:
1935 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1936 break;
1937
1938 case TGSI_OPCODE_TXD:
1939 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1940 break;
1941
1942 case TGSI_OPCODE_UP2H:
1943 /* deprecated */
1944 assert (0);
1945 return FALSE;
1946 break;
1947
1948 case TGSI_OPCODE_UP2US:
1949 /* deprecated */
1950 assert(0);
1951 return FALSE;
1952 break;
1953
1954 case TGSI_OPCODE_UP4B:
1955 /* deprecated */
1956 assert(0);
1957 return FALSE;
1958 break;
1959
1960 case TGSI_OPCODE_UP4UB:
1961 /* deprecated */
1962 assert(0);
1963 return FALSE;
1964 break;
1965
1966 case TGSI_OPCODE_X2D:
1967 /* deprecated? */
1968 assert(0);
1969 return FALSE;
1970 break;
1971
1972 case TGSI_OPCODE_ARA:
1973 /* deprecated */
1974 assert(0);
1975 return FALSE;
1976 break;
1977
1978 case TGSI_OPCODE_ARR:
1979 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1980 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1981 tmp0 = lp_build_round(&bld->base, tmp0);
1982 dst0[chan_index] = tmp0;
1983 }
1984 break;
1985
1986 case TGSI_OPCODE_BRA:
1987 /* deprecated */
1988 assert(0);
1989 return FALSE;
1990 break;
1991
1992 case TGSI_OPCODE_CAL:
1993 lp_exec_mask_call(&bld->exec_mask,
1994 inst->Label.Label,
1995 pc);
1996
1997 break;
1998
1999 case TGSI_OPCODE_RET:
2000 lp_exec_mask_ret(&bld->exec_mask, pc);
2001 break;
2002
2003 case TGSI_OPCODE_END:
2004 if (0) {
2005 /* for debugging */
2006 emit_dump_temps(bld);
2007 }
2008 *pc = -1;
2009 break;
2010
2011 case TGSI_OPCODE_SSG:
2012 /* TGSI_OPCODE_SGN */
2013 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2014 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2015 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
2016 }
2017 break;
2018
2019 case TGSI_OPCODE_CMP:
2020 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2021 src0 = emit_fetch( bld, inst, 0, chan_index );
2022 src1 = emit_fetch( bld, inst, 1, chan_index );
2023 src2 = emit_fetch( bld, inst, 2, chan_index );
2024 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
2025 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
2026 }
2027 break;
2028
2029 case TGSI_OPCODE_SCS:
2030 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
2031 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2032 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
2033 }
2034 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
2035 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2036 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
2037 }
2038 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
2039 dst0[CHAN_Z] = bld->base.zero;
2040 }
2041 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
2042 dst0[CHAN_W] = bld->base.one;
2043 }
2044 break;
2045
2046 case TGSI_OPCODE_TXB:
2047 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
2048 break;
2049
2050 case TGSI_OPCODE_NRM:
2051 /* fall-through */
2052 case TGSI_OPCODE_NRM4:
2053 /* 3 or 4-component normalization */
2054 {
2055 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2056
2057 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
2058 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
2059 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
2060 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
2061
2062 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2063
2064 /* xmm4 = src.x */
2065 /* xmm0 = src.x * src.x */
2066 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2067 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2068 tmp4 = tmp0;
2069 }
2070 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2071
2072 /* xmm5 = src.y */
2073 /* xmm0 = xmm0 + src.y * src.y */
2074 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2075 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2076 tmp5 = tmp1;
2077 }
2078 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2079 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2080
2081 /* xmm6 = src.z */
2082 /* xmm0 = xmm0 + src.z * src.z */
2083 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2084 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2085 tmp6 = tmp1;
2086 }
2087 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2088 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2089
2090 if (dims == 4) {
2091 /* xmm7 = src.w */
2092 /* xmm0 = xmm0 + src.w * src.w */
2093 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2094 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2095 tmp7 = tmp1;
2096 }
2097 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2098 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2099 }
2100
2101 /* xmm1 = 1 / sqrt(xmm0) */
2102 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2103
2104 /* dst.x = xmm1 * src.x */
2105 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2106 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2107 }
2108
2109 /* dst.y = xmm1 * src.y */
2110 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2111 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2112 }
2113
2114 /* dst.z = xmm1 * src.z */
2115 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2116 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2117 }
2118
2119 /* dst.w = xmm1 * src.w */
2120 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2121 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2122 }
2123 }
2124
2125 /* dst.w = 1.0 */
2126 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2127 dst0[CHAN_W] = bld->base.one;
2128 }
2129 }
2130 break;
2131
2132 case TGSI_OPCODE_DIV:
2133 /* deprecated */
2134 assert( 0 );
2135 return FALSE;
2136 break;
2137
2138 case TGSI_OPCODE_DP2:
2139 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
2140 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
2141 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
2142 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
2143 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
2144 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
2145 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
2146 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2147 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
2148 }
2149 break;
2150
2151 case TGSI_OPCODE_TXL:
2152 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2153 break;
2154
2155 case TGSI_OPCODE_TXP:
2156 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2157 break;
2158
2159 case TGSI_OPCODE_BRK:
2160 lp_exec_break(&bld->exec_mask);
2161 break;
2162
2163 case TGSI_OPCODE_IF:
2164 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2165 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2166 tmp0, bld->base.zero);
2167 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2168 break;
2169
2170 case TGSI_OPCODE_BGNLOOP:
2171 lp_exec_bgnloop(&bld->exec_mask);
2172 break;
2173
2174 case TGSI_OPCODE_BGNSUB:
2175 lp_exec_mask_bgnsub(&bld->exec_mask);
2176 break;
2177
2178 case TGSI_OPCODE_ELSE:
2179 lp_exec_mask_cond_invert(&bld->exec_mask);
2180 break;
2181
2182 case TGSI_OPCODE_ENDIF:
2183 lp_exec_mask_cond_pop(&bld->exec_mask);
2184 break;
2185
2186 case TGSI_OPCODE_ENDLOOP:
2187 lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
2188 break;
2189
2190 case TGSI_OPCODE_ENDSUB:
2191 lp_exec_mask_endsub(&bld->exec_mask, pc);
2192 break;
2193
2194 case TGSI_OPCODE_PUSHA:
2195 /* deprecated? */
2196 assert(0);
2197 return FALSE;
2198 break;
2199
2200 case TGSI_OPCODE_POPA:
2201 /* deprecated? */
2202 assert(0);
2203 return FALSE;
2204 break;
2205
2206 case TGSI_OPCODE_CEIL:
2207 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2208 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2209 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2210 }
2211 break;
2212
2213 case TGSI_OPCODE_I2F:
2214 /* deprecated? */
2215 assert(0);
2216 return FALSE;
2217 break;
2218
2219 case TGSI_OPCODE_NOT:
2220 /* deprecated? */
2221 assert(0);
2222 return FALSE;
2223 break;
2224
2225 case TGSI_OPCODE_TRUNC:
2226 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2227 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2228 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2229 }
2230 break;
2231
2232 case TGSI_OPCODE_SHL:
2233 /* deprecated? */
2234 assert(0);
2235 return FALSE;
2236 break;
2237
2238 case TGSI_OPCODE_ISHR:
2239 /* deprecated? */
2240 assert(0);
2241 return FALSE;
2242 break;
2243
2244 case TGSI_OPCODE_AND:
2245 /* deprecated? */
2246 assert(0);
2247 return FALSE;
2248 break;
2249
2250 case TGSI_OPCODE_OR:
2251 /* deprecated? */
2252 assert(0);
2253 return FALSE;
2254 break;
2255
2256 case TGSI_OPCODE_MOD:
2257 /* deprecated? */
2258 assert(0);
2259 return FALSE;
2260 break;
2261
2262 case TGSI_OPCODE_XOR:
2263 /* deprecated? */
2264 assert(0);
2265 return FALSE;
2266 break;
2267
2268 case TGSI_OPCODE_SAD:
2269 /* deprecated? */
2270 assert(0);
2271 return FALSE;
2272 break;
2273
2274 case TGSI_OPCODE_TXF:
2275 /* deprecated? */
2276 assert(0);
2277 return FALSE;
2278 break;
2279
2280 case TGSI_OPCODE_TXQ:
2281 /* deprecated? */
2282 assert(0);
2283 return FALSE;
2284 break;
2285
2286 case TGSI_OPCODE_CONT:
2287 lp_exec_continue(&bld->exec_mask);
2288 break;
2289
2290 case TGSI_OPCODE_EMIT:
2291 return FALSE;
2292 break;
2293
2294 case TGSI_OPCODE_ENDPRIM:
2295 return FALSE;
2296 break;
2297
2298 case TGSI_OPCODE_NOP:
2299 break;
2300
2301 default:
2302 return FALSE;
2303 }
2304
2305 if(info->num_dst) {
2306 LLVMValueRef pred[NUM_CHANNELS];
2307
2308 emit_fetch_predicate( bld, inst, pred );
2309
2310 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2311 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2312 }
2313 }
2314
2315 return TRUE;
2316 }
2317
2318
2319 void
2320 lp_build_tgsi_soa(struct gallivm_state *gallivm,
2321 const struct tgsi_token *tokens,
2322 struct lp_type type,
2323 struct lp_build_mask_context *mask,
2324 LLVMValueRef consts_ptr,
2325 const LLVMValueRef *pos,
2326 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2327 LLVMValueRef (*outputs)[NUM_CHANNELS],
2328 struct lp_build_sampler_soa *sampler,
2329 const struct tgsi_shader_info *info)
2330 {
2331 struct lp_build_tgsi_soa_context bld;
2332 struct tgsi_parse_context parse;
2333 uint num_immediates = 0;
2334 uint num_instructions = 0;
2335 unsigned i;
2336 int pc = 0;
2337
2338 struct lp_type res_type;
2339
2340 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2341 memset(&res_type, 0, sizeof res_type);
2342 res_type.width = type.width;
2343 res_type.length = type.length;
2344 res_type.sign = 1;
2345
2346 /* Setup build context */
2347 memset(&bld, 0, sizeof bld);
2348 lp_build_context_init(&bld.base, gallivm, type);
2349 lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type));
2350 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2351 bld.mask = mask;
2352 bld.pos = pos;
2353 bld.inputs = inputs;
2354 bld.outputs = outputs;
2355 bld.consts_ptr = consts_ptr;
2356 bld.sampler = sampler;
2357 bld.info = info;
2358 bld.indirect_files = info->indirect_files;
2359 bld.instructions = (struct tgsi_full_instruction *)
2360 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2361 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2362
2363 if (!bld.instructions) {
2364 return;
2365 }
2366
2367 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2368
2369 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2370 LLVMValueRef array_size =
2371 lp_build_const_int32(gallivm,
2372 info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2373 bld.temps_array = lp_build_array_alloca(gallivm,
2374 bld.base.vec_type, array_size,
2375 "temp_array");
2376 }
2377
2378 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2379 LLVMValueRef array_size =
2380 lp_build_const_int32(gallivm,
2381 info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2382 bld.outputs_array = lp_build_array_alloca(gallivm,
2383 bld.base.vec_type, array_size,
2384 "output_array");
2385 }
2386
2387 /* If we have indirect addressing in inputs we need to copy them into
2388 * our alloca array to be able to iterate over them */
2389 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
2390 unsigned index, chan;
2391 LLVMTypeRef vec_type = bld.base.vec_type;
2392 LLVMValueRef array_size =
2393 lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4);
2394 bld.inputs_array = lp_build_array_alloca(gallivm,
2395 vec_type, array_size,
2396 "input_array");
2397
2398 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
2399
2400 for (index = 0; index < info->num_inputs; ++index) {
2401 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2402 LLVMValueRef lindex =
2403 lp_build_const_int32(gallivm, index * 4 + chan);
2404 LLVMValueRef input_ptr =
2405 LLVMBuildGEP(gallivm->builder, bld.inputs_array,
2406 &lindex, 1, "");
2407 LLVMValueRef value = bld.inputs[index][chan];
2408 if (value)
2409 LLVMBuildStore(gallivm->builder, value, input_ptr);
2410 }
2411 }
2412 }
2413
2414 tgsi_parse_init( &parse, tokens );
2415
2416 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2417 tgsi_parse_token( &parse );
2418
2419 switch( parse.FullToken.Token.Type ) {
2420 case TGSI_TOKEN_TYPE_DECLARATION:
2421 /* Inputs already interpolated */
2422 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2423 break;
2424
2425 case TGSI_TOKEN_TYPE_INSTRUCTION:
2426 {
2427 /* save expanded instruction */
2428 if (num_instructions == bld.max_instructions) {
2429 struct tgsi_full_instruction *instructions;
2430 instructions = REALLOC(bld.instructions,
2431 bld.max_instructions
2432 * sizeof(struct tgsi_full_instruction),
2433 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2434 * sizeof(struct tgsi_full_instruction));
2435 if (!instructions) {
2436 break;
2437 }
2438 bld.instructions = instructions;
2439 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2440 }
2441
2442 memcpy(bld.instructions + num_instructions,
2443 &parse.FullToken.FullInstruction,
2444 sizeof(bld.instructions[0]));
2445
2446 num_instructions++;
2447 }
2448
2449 break;
2450
2451 case TGSI_TOKEN_TYPE_IMMEDIATE:
2452 /* simply copy the immediate values into the next immediates[] slot */
2453 {
2454 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2455 assert(size <= 4);
2456 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2457 for( i = 0; i < size; ++i )
2458 bld.immediates[num_immediates][i] =
2459 lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float);
2460 for( i = size; i < 4; ++i )
2461 bld.immediates[num_immediates][i] = bld.base.undef;
2462 num_immediates++;
2463 }
2464 break;
2465
2466 case TGSI_TOKEN_TYPE_PROPERTY:
2467 break;
2468
2469 default:
2470 assert( 0 );
2471 }
2472 }
2473
2474 while (pc != -1) {
2475 struct tgsi_full_instruction *instr = bld.instructions + pc;
2476 const struct tgsi_opcode_info *opcode_info =
2477 tgsi_get_opcode_info(instr->Instruction.Opcode);
2478 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2479 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2480 opcode_info->mnemonic);
2481 }
2482
2483 /* If we have indirect addressing in outputs we need to copy our alloca array
2484 * to the outputs slots specified by the called */
2485 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2486 unsigned index, chan;
2487 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2488 for (index = 0; index < info->num_outputs; ++index) {
2489 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2490 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2491 }
2492 }
2493 }
2494
2495 if (0) {
2496 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2497 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2498 debug_printf("11111111111111111111111111111 \n");
2499 tgsi_dump(tokens, 0);
2500 lp_debug_dump_value(function);
2501 debug_printf("2222222222222222222222222222 \n");
2502 }
2503 tgsi_parse_free( &parse );
2504
2505 if (0) {
2506 LLVMModuleRef module = LLVMGetGlobalParent(
2507 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2508 LLVMDumpModule(module);
2509
2510 }
2511
2512 FREE( bld.instructions );
2513 }
2514