Merge branch 'lp-offset-twoside'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61 #include "lp_bld_printf.h"
62
63
64 #define FOR_EACH_CHANNEL( CHAN )\
65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
66
67 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
69
70 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
72
73 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74 FOR_EACH_CHANNEL( CHAN )\
75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
76
77 #define CHAN_X 0
78 #define CHAN_Y 1
79 #define CHAN_Z 2
80 #define CHAN_W 3
81 #define NUM_CHANNELS 4
82
83 #define LP_MAX_INSTRUCTIONS 256
84
85
86 struct lp_exec_mask {
87 struct lp_build_context *bld;
88
89 boolean has_mask;
90
91 LLVMTypeRef int_vec_type;
92
93 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
94 int cond_stack_size;
95 LLVMValueRef cond_mask;
96
97 LLVMBasicBlockRef loop_block;
98 LLVMValueRef cont_mask;
99 LLVMValueRef break_mask;
100 LLVMValueRef break_var;
101 struct {
102 LLVMBasicBlockRef loop_block;
103 LLVMValueRef cont_mask;
104 LLVMValueRef break_mask;
105 LLVMValueRef break_var;
106 } loop_stack[LP_MAX_TGSI_NESTING];
107 int loop_stack_size;
108
109 LLVMValueRef ret_mask;
110 struct {
111 int pc;
112 LLVMValueRef ret_mask;
113 } call_stack[LP_MAX_TGSI_NESTING];
114 int call_stack_size;
115
116 LLVMValueRef exec_mask;
117 };
118
119 struct lp_build_tgsi_soa_context
120 {
121 struct lp_build_context base;
122
123 /* Builder for vector integer masks and indices */
124 struct lp_build_context uint_bld;
125
126 /* Builder for scalar elements of shader's data type (float) */
127 struct lp_build_context elem_bld;
128
129 LLVMValueRef consts_ptr;
130 const LLVMValueRef *pos;
131 const LLVMValueRef (*inputs)[NUM_CHANNELS];
132 LLVMValueRef (*outputs)[NUM_CHANNELS];
133
134 const struct lp_build_sampler_soa *sampler;
135
136 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
137 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
138 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
139 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
140
141 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
142 * set in the indirect_files field.
143 * The temps[] array above is unused then.
144 */
145 LLVMValueRef temps_array;
146
147 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
148 * set in the indirect_files field.
149 * The outputs[] array above is unused then.
150 */
151 LLVMValueRef outputs_array;
152
153 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
154 * set in the indirect_files field.
155 * The inputs[] array above is unused then.
156 */
157 LLVMValueRef inputs_array;
158
159 const struct tgsi_shader_info *info;
160 /** bitmask indicating which register files are accessed indirectly */
161 unsigned indirect_files;
162
163 struct lp_build_mask_context *mask;
164 struct lp_exec_mask exec_mask;
165
166 struct tgsi_full_instruction *instructions;
167 uint max_instructions;
168 };
169
170 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
171 {
172 mask->bld = bld;
173 mask->has_mask = FALSE;
174 mask->cond_stack_size = 0;
175 mask->loop_stack_size = 0;
176 mask->call_stack_size = 0;
177
178 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
179 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
180 LLVMConstAllOnes(mask->int_vec_type);
181 }
182
183 static void lp_exec_mask_update(struct lp_exec_mask *mask)
184 {
185 if (mask->loop_stack_size) {
186 /*for loops we need to update the entire mask at runtime */
187 LLVMValueRef tmp;
188 assert(mask->break_mask);
189 tmp = LLVMBuildAnd(mask->bld->builder,
190 mask->cont_mask,
191 mask->break_mask,
192 "maskcb");
193 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
194 mask->cond_mask,
195 tmp,
196 "maskfull");
197 } else
198 mask->exec_mask = mask->cond_mask;
199
200 if (mask->call_stack_size) {
201 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
202 mask->exec_mask,
203 mask->ret_mask,
204 "callmask");
205 }
206
207 mask->has_mask = (mask->cond_stack_size > 0 ||
208 mask->loop_stack_size > 0 ||
209 mask->call_stack_size > 0);
210 }
211
212 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
213 LLVMValueRef val)
214 {
215 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
216 if (mask->cond_stack_size == 0) {
217 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
218 }
219 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
220 assert(LLVMTypeOf(val) == mask->int_vec_type);
221 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
222 mask->cond_mask,
223 val,
224 "");
225 lp_exec_mask_update(mask);
226 }
227
228 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
229 {
230 LLVMValueRef prev_mask;
231 LLVMValueRef inv_mask;
232
233 assert(mask->cond_stack_size);
234 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
235 if (mask->cond_stack_size == 1) {
236 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
237 }
238
239 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
240
241 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
242 inv_mask,
243 prev_mask, "");
244 lp_exec_mask_update(mask);
245 }
246
247 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
248 {
249 assert(mask->cond_stack_size);
250 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
251 lp_exec_mask_update(mask);
252 }
253
254 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
255 {
256 if (mask->loop_stack_size == 0) {
257 assert(mask->loop_block == NULL);
258 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
259 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
260 assert(mask->break_var == NULL);
261 }
262
263 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
264
265 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
266 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
267 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
268 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
269 ++mask->loop_stack_size;
270
271 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
272 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
273
274 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
275 LLVMBuildBr(mask->bld->builder, mask->loop_block);
276 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
277
278 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
279
280 lp_exec_mask_update(mask);
281 }
282
283 static void lp_exec_break(struct lp_exec_mask *mask)
284 {
285 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
286 mask->exec_mask,
287 "break");
288
289 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
290 mask->break_mask,
291 exec_mask, "break_full");
292
293 lp_exec_mask_update(mask);
294 }
295
296 static void lp_exec_continue(struct lp_exec_mask *mask)
297 {
298 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
299 mask->exec_mask,
300 "");
301
302 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
303 mask->cont_mask,
304 exec_mask, "");
305
306 lp_exec_mask_update(mask);
307 }
308
309
310 static void lp_exec_endloop(struct lp_exec_mask *mask)
311 {
312 LLVMBasicBlockRef endloop;
313 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
314 mask->bld->type.length);
315 LLVMValueRef i1cond;
316
317 assert(mask->break_mask);
318
319 /*
320 * Restore the cont_mask, but don't pop
321 */
322 assert(mask->loop_stack_size);
323 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
324 lp_exec_mask_update(mask);
325
326 /*
327 * Unlike the continue mask, the break_mask must be preserved across loop
328 * iterations
329 */
330 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
331
332 /* i1cond = (mask == 0) */
333 i1cond = LLVMBuildICmp(
334 mask->bld->builder,
335 LLVMIntNE,
336 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
337 LLVMConstNull(reg_type), "");
338
339 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
340
341 LLVMBuildCondBr(mask->bld->builder,
342 i1cond, mask->loop_block, endloop);
343
344 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
345
346 assert(mask->loop_stack_size);
347 --mask->loop_stack_size;
348 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
349 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
350 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
351 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
352
353 lp_exec_mask_update(mask);
354 }
355
356 /* stores val into an address pointed to by dst.
357 * mask->exec_mask is used to figure out which bits of val
358 * should be stored into the address
359 * (0 means don't store this bit, 1 means do store).
360 */
361 static void lp_exec_mask_store(struct lp_exec_mask *mask,
362 LLVMValueRef pred,
363 LLVMValueRef val,
364 LLVMValueRef dst)
365 {
366 /* Mix the predicate and execution mask */
367 if (mask->has_mask) {
368 if (pred) {
369 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
370 } else {
371 pred = mask->exec_mask;
372 }
373 }
374
375 if (pred) {
376 LLVMValueRef real_val, dst_val;
377
378 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
379 real_val = lp_build_select(mask->bld,
380 pred,
381 val, dst_val);
382
383 LLVMBuildStore(mask->bld->builder, real_val, dst);
384 } else
385 LLVMBuildStore(mask->bld->builder, val, dst);
386 }
387
388 static void lp_exec_mask_call(struct lp_exec_mask *mask,
389 int func,
390 int *pc)
391 {
392 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
393 mask->call_stack[mask->call_stack_size].pc = *pc;
394 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
395 mask->call_stack_size++;
396 *pc = func;
397 }
398
399 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
400 {
401 LLVMValueRef exec_mask;
402
403 if (mask->call_stack_size == 0) {
404 /* returning from main() */
405 *pc = -1;
406 return;
407 }
408 exec_mask = LLVMBuildNot(mask->bld->builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 assert(mask->call_stack_size);
426 mask->call_stack_size--;
427 *pc = mask->call_stack[mask->call_stack_size].pc;
428 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
429 lp_exec_mask_update(mask);
430 }
431
432
433 /**
434 * Return pointer to a temporary register channel (src or dest).
435 * Note that indirect addressing cannot be handled here.
436 * \param index which temporary register
437 * \param chan which channel of the temp register.
438 */
439 static LLVMValueRef
440 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
441 unsigned index,
442 unsigned chan)
443 {
444 assert(chan < 4);
445 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
446 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
447 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
448 }
449 else {
450 return bld->temps[index][chan];
451 }
452 }
453
454 /**
455 * Return pointer to a output register channel (src or dest).
456 * Note that indirect addressing cannot be handled here.
457 * \param index which output register
458 * \param chan which channel of the output register.
459 */
460 static LLVMValueRef
461 get_output_ptr(struct lp_build_tgsi_soa_context *bld,
462 unsigned index,
463 unsigned chan)
464 {
465 assert(chan < 4);
466 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
467 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
468 return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "");
469 }
470 else {
471 return bld->outputs[index][chan];
472 }
473 }
474
475 /**
476 * Gather vector.
477 * XXX the lp_build_gather() function should be capable of doing this
478 * with a little work.
479 */
480 static LLVMValueRef
481 build_gather(struct lp_build_tgsi_soa_context *bld,
482 LLVMValueRef base_ptr,
483 LLVMValueRef indexes)
484 {
485 LLVMValueRef res = bld->base.undef;
486 unsigned i;
487
488 /*
489 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
490 */
491 for (i = 0; i < bld->base.type.length; i++) {
492 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
493 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
494 indexes, ii, "");
495 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
496 &index, 1, "gather_ptr");
497 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
498
499 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
500 }
501
502 return res;
503 }
504
505
506 /**
507 * Scatter/store vector.
508 */
509 static void
510 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
511 LLVMValueRef base_ptr,
512 LLVMValueRef indexes,
513 LLVMValueRef values,
514 struct lp_exec_mask *mask,
515 LLVMValueRef pred)
516 {
517 LLVMBuilderRef builder = bld->base.builder;
518 unsigned i;
519
520 /* Mix the predicate and execution mask */
521 if (mask->has_mask) {
522 if (pred) {
523 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
524 }
525 else {
526 pred = mask->exec_mask;
527 }
528 }
529
530 /*
531 * Loop over elements of index_vec, store scalar value.
532 */
533 for (i = 0; i < bld->base.type.length; i++) {
534 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
535 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
536 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
537 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
538 LLVMValueRef scalar_pred = pred ?
539 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
540
541 if (0)
542 lp_build_printf(builder, "scatter %d: val %f at %d %p\n",
543 ii, val, index, scalar_ptr);
544
545 if (scalar_pred) {
546 LLVMValueRef real_val, dst_val;
547 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
548 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
549 LLVMBuildStore(builder, real_val, scalar_ptr);
550 }
551 else {
552 LLVMBuildStore(builder, val, scalar_ptr);
553 }
554 }
555 }
556
557
558 /**
559 * Read the current value of the ADDR register, convert the floats to
560 * ints, add the base index and return the vector of offsets.
561 * The offsets will be used to index into the constant buffer or
562 * temporary register file.
563 */
564 static LLVMValueRef
565 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
566 unsigned reg_file, unsigned reg_index,
567 const struct tgsi_src_register *indirect_reg)
568 {
569 struct lp_build_context *uint_bld = &bld->uint_bld;
570 /* always use X component of address register */
571 unsigned swizzle = indirect_reg->SwizzleX;
572 LLVMValueRef base;
573 LLVMValueRef rel;
574 LLVMValueRef max_index;
575 LLVMValueRef index;
576
577 assert(bld->indirect_files & (1 << reg_file));
578
579 base = lp_build_const_int_vec(uint_bld->type, reg_index);
580
581 assert(swizzle < 4);
582 rel = LLVMBuildLoad(bld->base.builder,
583 bld->addr[indirect_reg->Index][swizzle],
584 "load addr reg");
585
586 /* for indexing we want integers */
587 rel = LLVMBuildFPToSI(bld->base.builder,
588 rel,
589 uint_bld->vec_type, "");
590
591 index = lp_build_add(uint_bld, base, rel);
592
593 max_index = lp_build_const_int_vec(uint_bld->type,
594 bld->info->file_max[reg_file]);
595
596 assert(!uint_bld->type.sign);
597 index = lp_build_min(uint_bld, index, max_index);
598
599 return index;
600 }
601
602
603 /**
604 * Register fetch.
605 */
606 static LLVMValueRef
607 emit_fetch(
608 struct lp_build_tgsi_soa_context *bld,
609 const struct tgsi_full_instruction *inst,
610 unsigned src_op,
611 const unsigned chan_index )
612 {
613 struct lp_build_context *uint_bld = &bld->uint_bld;
614 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
615 const unsigned swizzle =
616 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
617 LLVMValueRef res;
618 LLVMValueRef indirect_index = NULL;
619
620 if (swizzle > 3) {
621 assert(0 && "invalid swizzle in emit_fetch()");
622 return bld->base.undef;
623 }
624
625 if (reg->Register.Indirect) {
626 indirect_index = get_indirect_index(bld,
627 reg->Register.File,
628 reg->Register.Index,
629 &reg->Indirect);
630 } else {
631 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
632 }
633
634 switch (reg->Register.File) {
635 case TGSI_FILE_CONSTANT:
636 if (reg->Register.Indirect) {
637 LLVMValueRef swizzle_vec =
638 lp_build_const_int_vec(uint_bld->type, swizzle);
639 LLVMValueRef index_vec; /* index into the const buffer */
640
641 /* index_vec = indirect_index * 4 + swizzle */
642 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
643 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
644
645 /* Gather values from the constant buffer */
646 res = build_gather(bld, bld->consts_ptr, index_vec);
647 }
648 else {
649 LLVMValueRef index; /* index into the const buffer */
650 LLVMValueRef scalar, scalar_ptr;
651
652 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
653
654 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
655 &index, 1, "");
656 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
657
658 res = lp_build_broadcast_scalar(&bld->base, scalar);
659 }
660 break;
661
662 case TGSI_FILE_IMMEDIATE:
663 res = bld->immediates[reg->Register.Index][swizzle];
664 assert(res);
665 break;
666
667 case TGSI_FILE_INPUT:
668 if (reg->Register.Indirect) {
669 LLVMValueRef swizzle_vec =
670 lp_build_const_int_vec(uint_bld->type, swizzle);
671 LLVMValueRef length_vec =
672 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
673 LLVMValueRef index_vec; /* index into the const buffer */
674 LLVMValueRef inputs_array;
675 LLVMTypeRef float4_ptr_type;
676
677 /* index_vec = (indirect_index * 4 + swizzle) * length */
678 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
679 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
680 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
681
682 /* cast inputs_array pointer to float* */
683 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
684 inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array,
685 float4_ptr_type, "");
686
687 /* Gather values from the temporary register array */
688 res = build_gather(bld, inputs_array, index_vec);
689 } else {
690 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
691 LLVMValueRef lindex = lp_build_const_int32(reg->Register.Index * 4 + swizzle);
692 LLVMValueRef input_ptr = LLVMBuildGEP(bld->base.builder,
693 bld->inputs_array, &lindex, 1, "");
694 res = LLVMBuildLoad(bld->base.builder, input_ptr, "");
695 }
696 else {
697 res = bld->inputs[reg->Register.Index][swizzle];
698 }
699 }
700 assert(res);
701 break;
702
703 case TGSI_FILE_TEMPORARY:
704 if (reg->Register.Indirect) {
705 LLVMValueRef swizzle_vec =
706 lp_build_const_int_vec(uint_bld->type, swizzle);
707 LLVMValueRef length_vec =
708 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
709 LLVMValueRef index_vec; /* index into the const buffer */
710 LLVMValueRef temps_array;
711 LLVMTypeRef float4_ptr_type;
712
713 /* index_vec = (indirect_index * 4 + swizzle) * length */
714 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
715 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
716 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
717
718 /* cast temps_array pointer to float* */
719 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
720 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
721 float4_ptr_type, "");
722
723 /* Gather values from the temporary register array */
724 res = build_gather(bld, temps_array, index_vec);
725 }
726 else {
727 LLVMValueRef temp_ptr;
728 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
729 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
730 if (!res)
731 return bld->base.undef;
732 }
733 break;
734
735 default:
736 assert(0 && "invalid src register in emit_fetch()");
737 return bld->base.undef;
738 }
739
740 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
741 case TGSI_UTIL_SIGN_CLEAR:
742 res = lp_build_abs( &bld->base, res );
743 break;
744
745 case TGSI_UTIL_SIGN_SET:
746 res = lp_build_abs( &bld->base, res );
747 /* fall through */
748 case TGSI_UTIL_SIGN_TOGGLE:
749 res = lp_build_negate( &bld->base, res );
750 break;
751
752 case TGSI_UTIL_SIGN_KEEP:
753 break;
754 }
755
756 return res;
757 }
758
759
760 /**
761 * Register fetch with derivatives.
762 */
763 static void
764 emit_fetch_deriv(
765 struct lp_build_tgsi_soa_context *bld,
766 const struct tgsi_full_instruction *inst,
767 unsigned index,
768 const unsigned chan_index,
769 LLVMValueRef *res,
770 LLVMValueRef *ddx,
771 LLVMValueRef *ddy)
772 {
773 LLVMValueRef src;
774
775 src = emit_fetch(bld, inst, index, chan_index);
776
777 if(res)
778 *res = src;
779
780 /* TODO: use interpolation coeffs for inputs */
781
782 if(ddx)
783 *ddx = lp_build_ddx(&bld->base, src);
784
785 if(ddy)
786 *ddy = lp_build_ddy(&bld->base, src);
787 }
788
789
790 /**
791 * Predicate.
792 */
793 static void
794 emit_fetch_predicate(
795 struct lp_build_tgsi_soa_context *bld,
796 const struct tgsi_full_instruction *inst,
797 LLVMValueRef *pred)
798 {
799 unsigned index;
800 unsigned char swizzles[4];
801 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
802 LLVMValueRef value;
803 unsigned chan;
804
805 if (!inst->Instruction.Predicate) {
806 FOR_EACH_CHANNEL( chan ) {
807 pred[chan] = NULL;
808 }
809 return;
810 }
811
812 swizzles[0] = inst->Predicate.SwizzleX;
813 swizzles[1] = inst->Predicate.SwizzleY;
814 swizzles[2] = inst->Predicate.SwizzleZ;
815 swizzles[3] = inst->Predicate.SwizzleW;
816
817 index = inst->Predicate.Index;
818 assert(index < LP_MAX_TGSI_PREDS);
819
820 FOR_EACH_CHANNEL( chan ) {
821 unsigned swizzle = swizzles[chan];
822
823 /*
824 * Only fetch the predicate register channels that are actually listed
825 * in the swizzles
826 */
827 if (!unswizzled[swizzle]) {
828 value = LLVMBuildLoad(bld->base.builder,
829 bld->preds[index][swizzle], "");
830
831 /*
832 * Convert the value to an integer mask.
833 *
834 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
835 * is needlessly causing two comparisons due to storing the intermediate
836 * result as float vector instead of an integer mask vector.
837 */
838 value = lp_build_compare(bld->base.builder,
839 bld->base.type,
840 PIPE_FUNC_NOTEQUAL,
841 value,
842 bld->base.zero);
843 if (inst->Predicate.Negate) {
844 value = LLVMBuildNot(bld->base.builder, value, "");
845 }
846
847 unswizzled[swizzle] = value;
848 } else {
849 value = unswizzled[swizzle];
850 }
851
852 pred[chan] = value;
853 }
854 }
855
856
857 /**
858 * Register store.
859 */
860 static void
861 emit_store(
862 struct lp_build_tgsi_soa_context *bld,
863 const struct tgsi_full_instruction *inst,
864 unsigned index,
865 unsigned chan_index,
866 LLVMValueRef pred,
867 LLVMValueRef value)
868 {
869 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
870 struct lp_build_context *uint_bld = &bld->uint_bld;
871 LLVMValueRef indirect_index = NULL;
872
873 switch( inst->Instruction.Saturate ) {
874 case TGSI_SAT_NONE:
875 break;
876
877 case TGSI_SAT_ZERO_ONE:
878 value = lp_build_max(&bld->base, value, bld->base.zero);
879 value = lp_build_min(&bld->base, value, bld->base.one);
880 break;
881
882 case TGSI_SAT_MINUS_PLUS_ONE:
883 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
884 value = lp_build_min(&bld->base, value, bld->base.one);
885 break;
886
887 default:
888 assert(0);
889 }
890
891 if (reg->Register.Indirect) {
892 indirect_index = get_indirect_index(bld,
893 reg->Register.File,
894 reg->Register.Index,
895 &reg->Indirect);
896 } else {
897 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
898 }
899
900 switch( reg->Register.File ) {
901 case TGSI_FILE_OUTPUT:
902 if (reg->Register.Indirect) {
903 LLVMBuilderRef builder = bld->base.builder;
904 LLVMValueRef chan_vec =
905 lp_build_const_int_vec(uint_bld->type, chan_index);
906 LLVMValueRef length_vec =
907 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
908 LLVMValueRef index_vec; /* indexes into the temp registers */
909 LLVMValueRef outputs_array;
910 LLVMValueRef pixel_offsets;
911 LLVMTypeRef float_ptr_type;
912 int i;
913
914 /* build pixel offset vector: {0, 1, 2, 3, ...} */
915 pixel_offsets = uint_bld->undef;
916 for (i = 0; i < bld->base.type.length; i++) {
917 LLVMValueRef ii = lp_build_const_int32(i);
918 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
919 ii, ii, "");
920 }
921
922 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
923 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
924 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
925 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
926 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
927
928 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
929 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
930 float_ptr_type, "");
931
932 /* Scatter store values into temp registers */
933 emit_mask_scatter(bld, outputs_array, index_vec, value,
934 &bld->exec_mask, pred);
935 }
936 else {
937 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
938 chan_index);
939 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
940 }
941 break;
942
943 case TGSI_FILE_TEMPORARY:
944 if (reg->Register.Indirect) {
945 LLVMBuilderRef builder = bld->base.builder;
946 LLVMValueRef chan_vec =
947 lp_build_const_int_vec(uint_bld->type, chan_index);
948 LLVMValueRef length_vec =
949 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
950 LLVMValueRef index_vec; /* indexes into the temp registers */
951 LLVMValueRef temps_array;
952 LLVMValueRef pixel_offsets;
953 LLVMTypeRef float_ptr_type;
954 int i;
955
956 /* build pixel offset vector: {0, 1, 2, 3, ...} */
957 pixel_offsets = uint_bld->undef;
958 for (i = 0; i < bld->base.type.length; i++) {
959 LLVMValueRef ii = lp_build_const_int32(i);
960 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
961 ii, ii, "");
962 }
963
964 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
965 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
966 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
967 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
968 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
969
970 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
971 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
972 float_ptr_type, "");
973
974 /* Scatter store values into temp registers */
975 emit_mask_scatter(bld, temps_array, index_vec, value,
976 &bld->exec_mask, pred);
977 }
978 else {
979 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
980 chan_index);
981 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
982 }
983 break;
984
985 case TGSI_FILE_ADDRESS:
986 lp_exec_mask_store(&bld->exec_mask, pred, value,
987 bld->addr[reg->Indirect.Index][chan_index]);
988 break;
989
990 case TGSI_FILE_PREDICATE:
991 lp_exec_mask_store(&bld->exec_mask, pred, value,
992 bld->preds[reg->Register.Index][chan_index]);
993 break;
994
995 default:
996 assert( 0 );
997 }
998 }
999
1000
1001 /**
1002 * High-level instruction translators.
1003 */
1004
1005 static void
1006 emit_tex( struct lp_build_tgsi_soa_context *bld,
1007 const struct tgsi_full_instruction *inst,
1008 enum lp_build_tex_modifier modifier,
1009 LLVMValueRef *texel)
1010 {
1011 unsigned unit;
1012 LLVMValueRef lod_bias, explicit_lod;
1013 LLVMValueRef oow = NULL;
1014 LLVMValueRef coords[3];
1015 LLVMValueRef ddx[3];
1016 LLVMValueRef ddy[3];
1017 unsigned num_coords;
1018 unsigned i;
1019
1020 if (!bld->sampler) {
1021 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1022 for (i = 0; i < 4; i++) {
1023 texel[i] = bld->base.undef;
1024 }
1025 return;
1026 }
1027
1028 switch (inst->Texture.Texture) {
1029 case TGSI_TEXTURE_1D:
1030 num_coords = 1;
1031 break;
1032 case TGSI_TEXTURE_2D:
1033 case TGSI_TEXTURE_RECT:
1034 num_coords = 2;
1035 break;
1036 case TGSI_TEXTURE_SHADOW1D:
1037 case TGSI_TEXTURE_SHADOW2D:
1038 case TGSI_TEXTURE_SHADOWRECT:
1039 case TGSI_TEXTURE_3D:
1040 case TGSI_TEXTURE_CUBE:
1041 num_coords = 3;
1042 break;
1043 default:
1044 assert(0);
1045 return;
1046 }
1047
1048 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1049 lod_bias = emit_fetch( bld, inst, 0, 3 );
1050 explicit_lod = NULL;
1051 }
1052 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1053 lod_bias = NULL;
1054 explicit_lod = emit_fetch( bld, inst, 0, 3 );
1055 }
1056 else {
1057 lod_bias = NULL;
1058 explicit_lod = NULL;
1059 }
1060
1061 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1062 oow = emit_fetch( bld, inst, 0, 3 );
1063 oow = lp_build_rcp(&bld->base, oow);
1064 }
1065
1066 for (i = 0; i < num_coords; i++) {
1067 coords[i] = emit_fetch( bld, inst, 0, i );
1068 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1069 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1070 }
1071 for (i = num_coords; i < 3; i++) {
1072 coords[i] = bld->base.undef;
1073 }
1074
1075 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1076 LLVMTypeRef i32t = LLVMInt32Type();
1077 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
1078 for (i = 0; i < num_coords; i++) {
1079 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1080 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1081 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
1082 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
1083 }
1084 unit = inst->Src[3].Register.Index;
1085 } else {
1086 for (i = 0; i < num_coords; i++) {
1087 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1088 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1089 }
1090 unit = inst->Src[1].Register.Index;
1091 }
1092 for (i = num_coords; i < 3; i++) {
1093 ddx[i] = LLVMGetUndef(bld->base.elem_type);
1094 ddy[i] = LLVMGetUndef(bld->base.elem_type);
1095 }
1096
1097 bld->sampler->emit_fetch_texel(bld->sampler,
1098 bld->base.builder,
1099 bld->base.type,
1100 unit, num_coords, coords,
1101 ddx, ddy,
1102 lod_bias, explicit_lod,
1103 texel);
1104 }
1105
1106 static boolean
1107 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1108 int pc)
1109 {
1110 int i;
1111
1112 for (i = 0; i < 5; i++) {
1113 unsigned opcode;
1114
1115 if (pc + i >= bld->info->num_instructions)
1116 return TRUE;
1117
1118 opcode = bld->instructions[pc + i].Instruction.Opcode;
1119
1120 if (opcode == TGSI_OPCODE_END)
1121 return TRUE;
1122
1123 if (opcode == TGSI_OPCODE_TEX ||
1124 opcode == TGSI_OPCODE_TXP ||
1125 opcode == TGSI_OPCODE_TXD ||
1126 opcode == TGSI_OPCODE_TXB ||
1127 opcode == TGSI_OPCODE_TXL ||
1128 opcode == TGSI_OPCODE_TXF ||
1129 opcode == TGSI_OPCODE_TXQ ||
1130 opcode == TGSI_OPCODE_CAL ||
1131 opcode == TGSI_OPCODE_CALLNZ ||
1132 opcode == TGSI_OPCODE_IF ||
1133 opcode == TGSI_OPCODE_IFC ||
1134 opcode == TGSI_OPCODE_BGNLOOP ||
1135 opcode == TGSI_OPCODE_SWITCH)
1136 return FALSE;
1137 }
1138
1139 return TRUE;
1140 }
1141
1142
1143
1144 /**
1145 * Kill fragment if any of the src register values are negative.
1146 */
1147 static void
1148 emit_kil(
1149 struct lp_build_tgsi_soa_context *bld,
1150 const struct tgsi_full_instruction *inst,
1151 int pc)
1152 {
1153 const struct tgsi_full_src_register *reg = &inst->Src[0];
1154 LLVMValueRef terms[NUM_CHANNELS];
1155 LLVMValueRef mask;
1156 unsigned chan_index;
1157
1158 memset(&terms, 0, sizeof terms);
1159
1160 FOR_EACH_CHANNEL( chan_index ) {
1161 unsigned swizzle;
1162
1163 /* Unswizzle channel */
1164 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1165
1166 /* Check if the component has not been already tested. */
1167 assert(swizzle < NUM_CHANNELS);
1168 if( !terms[swizzle] )
1169 /* TODO: change the comparison operator instead of setting the sign */
1170 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1171 }
1172
1173 mask = NULL;
1174 FOR_EACH_CHANNEL( chan_index ) {
1175 if(terms[chan_index]) {
1176 LLVMValueRef chan_mask;
1177
1178 /*
1179 * If term < 0 then mask = 0 else mask = ~0.
1180 */
1181 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1182
1183 if(mask)
1184 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1185 else
1186 mask = chan_mask;
1187 }
1188 }
1189
1190 if(mask) {
1191 lp_build_mask_update(bld->mask, mask);
1192
1193 if (!near_end_of_shader(bld, pc))
1194 lp_build_mask_check(bld->mask);
1195 }
1196 }
1197
1198
1199 /**
1200 * Predicated fragment kill.
1201 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1202 * The only predication is the execution mask which will apply if
1203 * we're inside a loop or conditional.
1204 */
1205 static void
1206 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1207 const struct tgsi_full_instruction *inst,
1208 int pc)
1209 {
1210 LLVMValueRef mask;
1211
1212 /* For those channels which are "alive", disable fragment shader
1213 * execution.
1214 */
1215 if (bld->exec_mask.has_mask) {
1216 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1217 }
1218 else {
1219 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1220 mask = zero;
1221 }
1222
1223 lp_build_mask_update(bld->mask, mask);
1224
1225 if (!near_end_of_shader(bld, pc))
1226 lp_build_mask_check(bld->mask);
1227 }
1228
1229
1230 /**
1231 * Emit code which will dump the value of all the temporary registers
1232 * to stdout.
1233 */
1234 static void
1235 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1236 {
1237 LLVMBuilderRef builder = bld->base.builder;
1238 LLVMValueRef temp_ptr;
1239 LLVMValueRef i0 = lp_build_const_int32(0);
1240 LLVMValueRef i1 = lp_build_const_int32(1);
1241 LLVMValueRef i2 = lp_build_const_int32(2);
1242 LLVMValueRef i3 = lp_build_const_int32(3);
1243 int index;
1244 int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1245
1246 for (index = 0; index < n; index++) {
1247 LLVMValueRef idx = lp_build_const_int32(index);
1248 LLVMValueRef v[4][4], res;
1249 int chan;
1250
1251 lp_build_printf(builder, "TEMP[%d]:\n", idx);
1252
1253 for (chan = 0; chan < 4; chan++) {
1254 temp_ptr = get_temp_ptr(bld, index, chan);
1255 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
1256 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1257 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1258 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1259 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1260 }
1261
1262 lp_build_printf(builder, " X: %f %f %f %f\n",
1263 v[0][0], v[0][1], v[0][2], v[0][3]);
1264 lp_build_printf(builder, " Y: %f %f %f %f\n",
1265 v[1][0], v[1][1], v[1][2], v[1][3]);
1266 lp_build_printf(builder, " Z: %f %f %f %f\n",
1267 v[2][0], v[2][1], v[2][2], v[2][3]);
1268 lp_build_printf(builder, " W: %f %f %f %f\n",
1269 v[3][0], v[3][1], v[3][2], v[3][3]);
1270 }
1271 }
1272
1273
1274
1275 static void
1276 emit_declaration(
1277 struct lp_build_tgsi_soa_context *bld,
1278 const struct tgsi_full_declaration *decl)
1279 {
1280 LLVMTypeRef vec_type = bld->base.vec_type;
1281 const unsigned first = decl->Range.First;
1282 const unsigned last = decl->Range.Last;
1283 unsigned idx, i;
1284
1285 for (idx = first; idx <= last; ++idx) {
1286 assert(last <= bld->info->file_max[decl->Declaration.File]);
1287 switch (decl->Declaration.File) {
1288 case TGSI_FILE_TEMPORARY:
1289 assert(idx < LP_MAX_TGSI_TEMPS);
1290 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1291 for (i = 0; i < NUM_CHANNELS; i++)
1292 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1293 vec_type, "temp");
1294 }
1295 break;
1296
1297 case TGSI_FILE_OUTPUT:
1298 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1299 for (i = 0; i < NUM_CHANNELS; i++)
1300 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1301 vec_type, "output");
1302 }
1303 break;
1304
1305 case TGSI_FILE_ADDRESS:
1306 assert(idx < LP_MAX_TGSI_ADDRS);
1307 for (i = 0; i < NUM_CHANNELS; i++)
1308 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1309 vec_type, "addr");
1310 break;
1311
1312 case TGSI_FILE_PREDICATE:
1313 assert(idx < LP_MAX_TGSI_PREDS);
1314 for (i = 0; i < NUM_CHANNELS; i++)
1315 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1316 vec_type, "predicate");
1317 break;
1318
1319 default:
1320 /* don't need to declare other vars */
1321 break;
1322 }
1323 }
1324 }
1325
1326
1327 /**
1328 * Emit LLVM for one TGSI instruction.
1329 * \param return TRUE for success, FALSE otherwise
1330 */
1331 static boolean
1332 emit_instruction(
1333 struct lp_build_tgsi_soa_context *bld,
1334 const struct tgsi_full_instruction *inst,
1335 const struct tgsi_opcode_info *info,
1336 int *pc)
1337 {
1338 unsigned chan_index;
1339 LLVMValueRef src0, src1, src2;
1340 LLVMValueRef tmp0, tmp1, tmp2;
1341 LLVMValueRef tmp3 = NULL;
1342 LLVMValueRef tmp4 = NULL;
1343 LLVMValueRef tmp5 = NULL;
1344 LLVMValueRef tmp6 = NULL;
1345 LLVMValueRef tmp7 = NULL;
1346 LLVMValueRef res;
1347 LLVMValueRef dst0[NUM_CHANNELS];
1348
1349 /*
1350 * Stores and write masks are handled in a general fashion after the long
1351 * instruction opcode switch statement.
1352 *
1353 * Although not stricitly necessary, we avoid generating instructions for
1354 * channels which won't be stored, in cases where's that easy. For some
1355 * complex instructions, like texture sampling, it is more convenient to
1356 * assume a full writemask and then let LLVM optimization passes eliminate
1357 * redundant code.
1358 */
1359
1360 (*pc)++;
1361
1362 assert(info->num_dst <= 1);
1363 if (info->num_dst) {
1364 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1365 dst0[chan_index] = bld->base.undef;
1366 }
1367 }
1368
1369 switch (inst->Instruction.Opcode) {
1370 case TGSI_OPCODE_ARL:
1371 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1372 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1373 tmp0 = lp_build_floor(&bld->base, tmp0);
1374 dst0[chan_index] = tmp0;
1375 }
1376 break;
1377
1378 case TGSI_OPCODE_MOV:
1379 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1380 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1381 }
1382 break;
1383
1384 case TGSI_OPCODE_LIT:
1385 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1386 dst0[CHAN_X] = bld->base.one;
1387 }
1388 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1389 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1390 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1391 }
1392 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1393 /* XMM[1] = SrcReg[0].yyyy */
1394 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1395 /* XMM[1] = max(XMM[1], 0) */
1396 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1397 /* XMM[2] = SrcReg[0].wwww */
1398 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1399 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1400 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1401 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1402 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1403 }
1404 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1405 dst0[CHAN_W] = bld->base.one;
1406 }
1407 break;
1408
1409 case TGSI_OPCODE_RCP:
1410 /* TGSI_OPCODE_RECIP */
1411 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1412 res = lp_build_rcp(&bld->base, src0);
1413 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1414 dst0[chan_index] = res;
1415 }
1416 break;
1417
1418 case TGSI_OPCODE_RSQ:
1419 /* TGSI_OPCODE_RECIPSQRT */
1420 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1421 src0 = lp_build_abs(&bld->base, src0);
1422 res = lp_build_rsqrt(&bld->base, src0);
1423 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1424 dst0[chan_index] = res;
1425 }
1426 break;
1427
1428 case TGSI_OPCODE_EXP:
1429 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1430 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1431 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1432 LLVMValueRef *p_exp2_int_part = NULL;
1433 LLVMValueRef *p_frac_part = NULL;
1434 LLVMValueRef *p_exp2 = NULL;
1435
1436 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1437
1438 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1439 p_exp2_int_part = &tmp0;
1440 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1441 p_frac_part = &tmp1;
1442 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1443 p_exp2 = &tmp2;
1444
1445 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1446
1447 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1448 dst0[CHAN_X] = tmp0;
1449 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1450 dst0[CHAN_Y] = tmp1;
1451 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1452 dst0[CHAN_Z] = tmp2;
1453 }
1454 /* dst.w = 1.0 */
1455 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1456 dst0[CHAN_W] = bld->base.one;
1457 }
1458 break;
1459
1460 case TGSI_OPCODE_LOG:
1461 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1462 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1463 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1464 LLVMValueRef *p_floor_log2 = NULL;
1465 LLVMValueRef *p_exp = NULL;
1466 LLVMValueRef *p_log2 = NULL;
1467
1468 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1469 src0 = lp_build_abs( &bld->base, src0 );
1470
1471 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1472 p_floor_log2 = &tmp0;
1473 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1474 p_exp = &tmp1;
1475 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1476 p_log2 = &tmp2;
1477
1478 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1479
1480 /* dst.x = floor(lg2(abs(src.x))) */
1481 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1482 dst0[CHAN_X] = tmp0;
1483 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1484 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1485 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1486 }
1487 /* dst.z = lg2(abs(src.x)) */
1488 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1489 dst0[CHAN_Z] = tmp2;
1490 }
1491 /* dst.w = 1.0 */
1492 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1493 dst0[CHAN_W] = bld->base.one;
1494 }
1495 break;
1496
1497 case TGSI_OPCODE_MUL:
1498 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1499 src0 = emit_fetch( bld, inst, 0, chan_index );
1500 src1 = emit_fetch( bld, inst, 1, chan_index );
1501 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1502 }
1503 break;
1504
1505 case TGSI_OPCODE_ADD:
1506 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1507 src0 = emit_fetch( bld, inst, 0, chan_index );
1508 src1 = emit_fetch( bld, inst, 1, chan_index );
1509 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1510 }
1511 break;
1512
1513 case TGSI_OPCODE_DP3:
1514 /* TGSI_OPCODE_DOT3 */
1515 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1516 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1517 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1518 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1519 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1520 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1521 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1522 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1523 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1524 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1525 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1526 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1527 dst0[chan_index] = tmp0;
1528 }
1529 break;
1530
1531 case TGSI_OPCODE_DP4:
1532 /* TGSI_OPCODE_DOT4 */
1533 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1534 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1535 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1536 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1537 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1538 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1539 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1540 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1541 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1542 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1543 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1544 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1545 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1546 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1547 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1548 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1549 dst0[chan_index] = tmp0;
1550 }
1551 break;
1552
1553 case TGSI_OPCODE_DST:
1554 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1555 dst0[CHAN_X] = bld->base.one;
1556 }
1557 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1558 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1559 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1560 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1561 }
1562 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1563 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1564 }
1565 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1566 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1567 }
1568 break;
1569
1570 case TGSI_OPCODE_MIN:
1571 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1572 src0 = emit_fetch( bld, inst, 0, chan_index );
1573 src1 = emit_fetch( bld, inst, 1, chan_index );
1574 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1575 }
1576 break;
1577
1578 case TGSI_OPCODE_MAX:
1579 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1580 src0 = emit_fetch( bld, inst, 0, chan_index );
1581 src1 = emit_fetch( bld, inst, 1, chan_index );
1582 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1583 }
1584 break;
1585
1586 case TGSI_OPCODE_SLT:
1587 /* TGSI_OPCODE_SETLT */
1588 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1589 src0 = emit_fetch( bld, inst, 0, chan_index );
1590 src1 = emit_fetch( bld, inst, 1, chan_index );
1591 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1592 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1593 }
1594 break;
1595
1596 case TGSI_OPCODE_SGE:
1597 /* TGSI_OPCODE_SETGE */
1598 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1599 src0 = emit_fetch( bld, inst, 0, chan_index );
1600 src1 = emit_fetch( bld, inst, 1, chan_index );
1601 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1602 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1603 }
1604 break;
1605
1606 case TGSI_OPCODE_MAD:
1607 /* TGSI_OPCODE_MADD */
1608 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1609 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1610 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1611 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1612 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1613 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1614 dst0[chan_index] = tmp0;
1615 }
1616 break;
1617
1618 case TGSI_OPCODE_SUB:
1619 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1620 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1621 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1622 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1623 }
1624 break;
1625
1626 case TGSI_OPCODE_LRP:
1627 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1628 src0 = emit_fetch( bld, inst, 0, chan_index );
1629 src1 = emit_fetch( bld, inst, 1, chan_index );
1630 src2 = emit_fetch( bld, inst, 2, chan_index );
1631 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1632 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1633 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1634 }
1635 break;
1636
1637 case TGSI_OPCODE_CND:
1638 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1639 src0 = emit_fetch( bld, inst, 0, chan_index );
1640 src1 = emit_fetch( bld, inst, 1, chan_index );
1641 src2 = emit_fetch( bld, inst, 2, chan_index );
1642 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1643 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1644 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1645 }
1646 break;
1647
1648 case TGSI_OPCODE_DP2A:
1649 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1650 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1651 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1652 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1653 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1654 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1655 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1656 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1657 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1658 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1659 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1660 }
1661 break;
1662
1663 case TGSI_OPCODE_FRC:
1664 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1665 src0 = emit_fetch( bld, inst, 0, chan_index );
1666 tmp0 = lp_build_floor(&bld->base, src0);
1667 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1668 dst0[chan_index] = tmp0;
1669 }
1670 break;
1671
1672 case TGSI_OPCODE_CLAMP:
1673 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1674 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1675 src1 = emit_fetch( bld, inst, 1, chan_index );
1676 src2 = emit_fetch( bld, inst, 2, chan_index );
1677 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1678 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1679 dst0[chan_index] = tmp0;
1680 }
1681 break;
1682
1683 case TGSI_OPCODE_FLR:
1684 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1685 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1686 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1687 }
1688 break;
1689
1690 case TGSI_OPCODE_ROUND:
1691 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1692 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1693 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1694 }
1695 break;
1696
1697 case TGSI_OPCODE_EX2: {
1698 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1699 tmp0 = lp_build_exp2( &bld->base, tmp0);
1700 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1701 dst0[chan_index] = tmp0;
1702 }
1703 break;
1704 }
1705
1706 case TGSI_OPCODE_LG2:
1707 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1708 tmp0 = lp_build_log2( &bld->base, tmp0);
1709 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1710 dst0[chan_index] = tmp0;
1711 }
1712 break;
1713
1714 case TGSI_OPCODE_POW:
1715 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1716 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1717 res = lp_build_pow( &bld->base, src0, src1 );
1718 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1719 dst0[chan_index] = res;
1720 }
1721 break;
1722
1723 case TGSI_OPCODE_XPD:
1724 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1725 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1726 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1727 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1728 }
1729 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1730 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1731 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1732 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1733 }
1734 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1735 tmp2 = tmp0;
1736 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1737 tmp5 = tmp3;
1738 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1739 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1740 dst0[CHAN_X] = tmp2;
1741 }
1742 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1743 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1744 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1745 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1746 }
1747 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1748 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1749 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1750 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1751 dst0[CHAN_Y] = tmp3;
1752 }
1753 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1754 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1755 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1756 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1757 dst0[CHAN_Z] = tmp5;
1758 }
1759 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1760 dst0[CHAN_W] = bld->base.one;
1761 }
1762 break;
1763
1764 case TGSI_OPCODE_ABS:
1765 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1766 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1767 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1768 }
1769 break;
1770
1771 case TGSI_OPCODE_RCC:
1772 /* deprecated? */
1773 assert(0);
1774 return FALSE;
1775
1776 case TGSI_OPCODE_DPH:
1777 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1778 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1779 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1780 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1781 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1782 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1783 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1784 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1785 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1786 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1787 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1788 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1789 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1790 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1791 dst0[chan_index] = tmp0;
1792 }
1793 break;
1794
1795 case TGSI_OPCODE_COS:
1796 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1797 tmp0 = lp_build_cos( &bld->base, tmp0 );
1798 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1799 dst0[chan_index] = tmp0;
1800 }
1801 break;
1802
1803 case TGSI_OPCODE_DDX:
1804 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1805 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1806 }
1807 break;
1808
1809 case TGSI_OPCODE_DDY:
1810 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1811 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1812 }
1813 break;
1814
1815 case TGSI_OPCODE_KILP:
1816 /* predicated kill */
1817 emit_kilp( bld, inst, (*pc)-1 );
1818 break;
1819
1820 case TGSI_OPCODE_KIL:
1821 /* conditional kill */
1822 emit_kil( bld, inst, (*pc)-1 );
1823 break;
1824
1825 case TGSI_OPCODE_PK2H:
1826 return FALSE;
1827 break;
1828
1829 case TGSI_OPCODE_PK2US:
1830 return FALSE;
1831 break;
1832
1833 case TGSI_OPCODE_PK4B:
1834 return FALSE;
1835 break;
1836
1837 case TGSI_OPCODE_PK4UB:
1838 return FALSE;
1839 break;
1840
1841 case TGSI_OPCODE_RFL:
1842 return FALSE;
1843 break;
1844
1845 case TGSI_OPCODE_SEQ:
1846 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1847 src0 = emit_fetch( bld, inst, 0, chan_index );
1848 src1 = emit_fetch( bld, inst, 1, chan_index );
1849 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1850 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1851 }
1852 break;
1853
1854 case TGSI_OPCODE_SFL:
1855 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1856 dst0[chan_index] = bld->base.zero;
1857 }
1858 break;
1859
1860 case TGSI_OPCODE_SGT:
1861 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1862 src0 = emit_fetch( bld, inst, 0, chan_index );
1863 src1 = emit_fetch( bld, inst, 1, chan_index );
1864 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1865 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1866 }
1867 break;
1868
1869 case TGSI_OPCODE_SIN:
1870 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1871 tmp0 = lp_build_sin( &bld->base, tmp0 );
1872 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1873 dst0[chan_index] = tmp0;
1874 }
1875 break;
1876
1877 case TGSI_OPCODE_SLE:
1878 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1879 src0 = emit_fetch( bld, inst, 0, chan_index );
1880 src1 = emit_fetch( bld, inst, 1, chan_index );
1881 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1882 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1883 }
1884 break;
1885
1886 case TGSI_OPCODE_SNE:
1887 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1888 src0 = emit_fetch( bld, inst, 0, chan_index );
1889 src1 = emit_fetch( bld, inst, 1, chan_index );
1890 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1891 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1892 }
1893 break;
1894
1895 case TGSI_OPCODE_STR:
1896 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1897 dst0[chan_index] = bld->base.one;
1898 }
1899 break;
1900
1901 case TGSI_OPCODE_TEX:
1902 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1903 break;
1904
1905 case TGSI_OPCODE_TXD:
1906 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1907 break;
1908
1909 case TGSI_OPCODE_UP2H:
1910 /* deprecated */
1911 assert (0);
1912 return FALSE;
1913 break;
1914
1915 case TGSI_OPCODE_UP2US:
1916 /* deprecated */
1917 assert(0);
1918 return FALSE;
1919 break;
1920
1921 case TGSI_OPCODE_UP4B:
1922 /* deprecated */
1923 assert(0);
1924 return FALSE;
1925 break;
1926
1927 case TGSI_OPCODE_UP4UB:
1928 /* deprecated */
1929 assert(0);
1930 return FALSE;
1931 break;
1932
1933 case TGSI_OPCODE_X2D:
1934 /* deprecated? */
1935 assert(0);
1936 return FALSE;
1937 break;
1938
1939 case TGSI_OPCODE_ARA:
1940 /* deprecated */
1941 assert(0);
1942 return FALSE;
1943 break;
1944
1945 case TGSI_OPCODE_ARR:
1946 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1947 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1948 tmp0 = lp_build_round(&bld->base, tmp0);
1949 dst0[chan_index] = tmp0;
1950 }
1951 break;
1952
1953 case TGSI_OPCODE_BRA:
1954 /* deprecated */
1955 assert(0);
1956 return FALSE;
1957 break;
1958
1959 case TGSI_OPCODE_CAL:
1960 lp_exec_mask_call(&bld->exec_mask,
1961 inst->Label.Label,
1962 pc);
1963
1964 break;
1965
1966 case TGSI_OPCODE_RET:
1967 lp_exec_mask_ret(&bld->exec_mask, pc);
1968 break;
1969
1970 case TGSI_OPCODE_END:
1971 if (0) {
1972 /* for debugging */
1973 emit_dump_temps(bld);
1974 }
1975 *pc = -1;
1976 break;
1977
1978 case TGSI_OPCODE_SSG:
1979 /* TGSI_OPCODE_SGN */
1980 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1981 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1982 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1983 }
1984 break;
1985
1986 case TGSI_OPCODE_CMP:
1987 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1988 src0 = emit_fetch( bld, inst, 0, chan_index );
1989 src1 = emit_fetch( bld, inst, 1, chan_index );
1990 src2 = emit_fetch( bld, inst, 2, chan_index );
1991 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1992 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1993 }
1994 break;
1995
1996 case TGSI_OPCODE_SCS:
1997 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1998 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1999 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
2000 }
2001 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
2002 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2003 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
2004 }
2005 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
2006 dst0[CHAN_Z] = bld->base.zero;
2007 }
2008 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
2009 dst0[CHAN_W] = bld->base.one;
2010 }
2011 break;
2012
2013 case TGSI_OPCODE_TXB:
2014 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
2015 break;
2016
2017 case TGSI_OPCODE_NRM:
2018 /* fall-through */
2019 case TGSI_OPCODE_NRM4:
2020 /* 3 or 4-component normalization */
2021 {
2022 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2023
2024 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
2025 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
2026 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
2027 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
2028
2029 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2030
2031 /* xmm4 = src.x */
2032 /* xmm0 = src.x * src.x */
2033 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2034 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2035 tmp4 = tmp0;
2036 }
2037 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2038
2039 /* xmm5 = src.y */
2040 /* xmm0 = xmm0 + src.y * src.y */
2041 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2042 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2043 tmp5 = tmp1;
2044 }
2045 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2046 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2047
2048 /* xmm6 = src.z */
2049 /* xmm0 = xmm0 + src.z * src.z */
2050 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2051 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2052 tmp6 = tmp1;
2053 }
2054 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2055 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2056
2057 if (dims == 4) {
2058 /* xmm7 = src.w */
2059 /* xmm0 = xmm0 + src.w * src.w */
2060 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2061 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2062 tmp7 = tmp1;
2063 }
2064 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2065 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2066 }
2067
2068 /* xmm1 = 1 / sqrt(xmm0) */
2069 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2070
2071 /* dst.x = xmm1 * src.x */
2072 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2073 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2074 }
2075
2076 /* dst.y = xmm1 * src.y */
2077 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2078 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2079 }
2080
2081 /* dst.z = xmm1 * src.z */
2082 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2083 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2084 }
2085
2086 /* dst.w = xmm1 * src.w */
2087 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2088 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2089 }
2090 }
2091
2092 /* dst.w = 1.0 */
2093 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2094 dst0[CHAN_W] = bld->base.one;
2095 }
2096 }
2097 break;
2098
2099 case TGSI_OPCODE_DIV:
2100 /* deprecated */
2101 assert( 0 );
2102 return FALSE;
2103 break;
2104
2105 case TGSI_OPCODE_DP2:
2106 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
2107 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
2108 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
2109 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
2110 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
2111 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
2112 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
2113 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2114 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
2115 }
2116 break;
2117
2118 case TGSI_OPCODE_TXL:
2119 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2120 break;
2121
2122 case TGSI_OPCODE_TXP:
2123 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2124 break;
2125
2126 case TGSI_OPCODE_BRK:
2127 lp_exec_break(&bld->exec_mask);
2128 break;
2129
2130 case TGSI_OPCODE_IF:
2131 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2132 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2133 tmp0, bld->base.zero);
2134 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2135 break;
2136
2137 case TGSI_OPCODE_BGNLOOP:
2138 lp_exec_bgnloop(&bld->exec_mask);
2139 break;
2140
2141 case TGSI_OPCODE_BGNSUB:
2142 lp_exec_mask_bgnsub(&bld->exec_mask);
2143 break;
2144
2145 case TGSI_OPCODE_ELSE:
2146 lp_exec_mask_cond_invert(&bld->exec_mask);
2147 break;
2148
2149 case TGSI_OPCODE_ENDIF:
2150 lp_exec_mask_cond_pop(&bld->exec_mask);
2151 break;
2152
2153 case TGSI_OPCODE_ENDLOOP:
2154 lp_exec_endloop(&bld->exec_mask);
2155 break;
2156
2157 case TGSI_OPCODE_ENDSUB:
2158 lp_exec_mask_endsub(&bld->exec_mask, pc);
2159 break;
2160
2161 case TGSI_OPCODE_PUSHA:
2162 /* deprecated? */
2163 assert(0);
2164 return FALSE;
2165 break;
2166
2167 case TGSI_OPCODE_POPA:
2168 /* deprecated? */
2169 assert(0);
2170 return FALSE;
2171 break;
2172
2173 case TGSI_OPCODE_CEIL:
2174 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2175 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2176 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2177 }
2178 break;
2179
2180 case TGSI_OPCODE_I2F:
2181 /* deprecated? */
2182 assert(0);
2183 return FALSE;
2184 break;
2185
2186 case TGSI_OPCODE_NOT:
2187 /* deprecated? */
2188 assert(0);
2189 return FALSE;
2190 break;
2191
2192 case TGSI_OPCODE_TRUNC:
2193 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2194 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2195 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2196 }
2197 break;
2198
2199 case TGSI_OPCODE_SHL:
2200 /* deprecated? */
2201 assert(0);
2202 return FALSE;
2203 break;
2204
2205 case TGSI_OPCODE_ISHR:
2206 /* deprecated? */
2207 assert(0);
2208 return FALSE;
2209 break;
2210
2211 case TGSI_OPCODE_AND:
2212 /* deprecated? */
2213 assert(0);
2214 return FALSE;
2215 break;
2216
2217 case TGSI_OPCODE_OR:
2218 /* deprecated? */
2219 assert(0);
2220 return FALSE;
2221 break;
2222
2223 case TGSI_OPCODE_MOD:
2224 /* deprecated? */
2225 assert(0);
2226 return FALSE;
2227 break;
2228
2229 case TGSI_OPCODE_XOR:
2230 /* deprecated? */
2231 assert(0);
2232 return FALSE;
2233 break;
2234
2235 case TGSI_OPCODE_SAD:
2236 /* deprecated? */
2237 assert(0);
2238 return FALSE;
2239 break;
2240
2241 case TGSI_OPCODE_TXF:
2242 /* deprecated? */
2243 assert(0);
2244 return FALSE;
2245 break;
2246
2247 case TGSI_OPCODE_TXQ:
2248 /* deprecated? */
2249 assert(0);
2250 return FALSE;
2251 break;
2252
2253 case TGSI_OPCODE_CONT:
2254 lp_exec_continue(&bld->exec_mask);
2255 break;
2256
2257 case TGSI_OPCODE_EMIT:
2258 return FALSE;
2259 break;
2260
2261 case TGSI_OPCODE_ENDPRIM:
2262 return FALSE;
2263 break;
2264
2265 case TGSI_OPCODE_NOP:
2266 break;
2267
2268 default:
2269 return FALSE;
2270 }
2271
2272 if(info->num_dst) {
2273 LLVMValueRef pred[NUM_CHANNELS];
2274
2275 emit_fetch_predicate( bld, inst, pred );
2276
2277 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2278 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2279 }
2280 }
2281
2282 return TRUE;
2283 }
2284
2285
2286 void
2287 lp_build_tgsi_soa(LLVMBuilderRef builder,
2288 const struct tgsi_token *tokens,
2289 struct lp_type type,
2290 struct lp_build_mask_context *mask,
2291 LLVMValueRef consts_ptr,
2292 const LLVMValueRef *pos,
2293 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2294 LLVMValueRef (*outputs)[NUM_CHANNELS],
2295 struct lp_build_sampler_soa *sampler,
2296 const struct tgsi_shader_info *info)
2297 {
2298 struct lp_build_tgsi_soa_context bld;
2299 struct tgsi_parse_context parse;
2300 uint num_immediates = 0;
2301 uint num_instructions = 0;
2302 unsigned i;
2303 int pc = 0;
2304
2305 struct lp_type res_type;
2306
2307 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2308 memset(&res_type, 0, sizeof res_type);
2309 res_type.width = type.width;
2310 res_type.length = type.length;
2311 res_type.sign = 1;
2312
2313 /* Setup build context */
2314 memset(&bld, 0, sizeof bld);
2315 lp_build_context_init(&bld.base, builder, type);
2316 lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
2317 lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type));
2318 bld.mask = mask;
2319 bld.pos = pos;
2320 bld.inputs = inputs;
2321 bld.outputs = outputs;
2322 bld.consts_ptr = consts_ptr;
2323 bld.sampler = sampler;
2324 bld.info = info;
2325 bld.indirect_files = info->indirect_files;
2326 bld.instructions = (struct tgsi_full_instruction *)
2327 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2328 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2329
2330 if (!bld.instructions) {
2331 return;
2332 }
2333
2334 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2335
2336 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2337 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2338 info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0);
2339 bld.temps_array = lp_build_array_alloca(bld.base.builder,
2340 bld.base.vec_type, array_size,
2341 "temp_array");
2342 }
2343
2344 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2345 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2346 info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0);
2347 bld.outputs_array = lp_build_array_alloca(bld.base.builder,
2348 bld.base.vec_type, array_size,
2349 "output_array");
2350 }
2351
2352 /* If we have indirect addressing in inputs we need to copy them into
2353 * our alloca array to be able to iterate over them */
2354 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
2355 unsigned index, chan;
2356 LLVMTypeRef vec_type = bld.base.vec_type;
2357 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2358 info->file_max[TGSI_FILE_INPUT]*4 + 4, 0);
2359 bld.inputs_array = lp_build_array_alloca(bld.base.builder,
2360 vec_type, array_size,
2361 "input_array");
2362
2363 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
2364
2365 for (index = 0; index < info->num_inputs; ++index) {
2366 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2367 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
2368 LLVMValueRef input_ptr =
2369 LLVMBuildGEP(bld.base.builder, bld.inputs_array,
2370 &lindex, 1, "");
2371 LLVMValueRef value = bld.inputs[index][chan];
2372 if (value)
2373 LLVMBuildStore(bld.base.builder, value, input_ptr);
2374 }
2375 }
2376 }
2377
2378 tgsi_parse_init( &parse, tokens );
2379
2380 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2381 tgsi_parse_token( &parse );
2382
2383 switch( parse.FullToken.Token.Type ) {
2384 case TGSI_TOKEN_TYPE_DECLARATION:
2385 /* Inputs already interpolated */
2386 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2387 break;
2388
2389 case TGSI_TOKEN_TYPE_INSTRUCTION:
2390 {
2391 /* save expanded instruction */
2392 if (num_instructions == bld.max_instructions) {
2393 struct tgsi_full_instruction *instructions;
2394 instructions = REALLOC(bld.instructions,
2395 bld.max_instructions
2396 * sizeof(struct tgsi_full_instruction),
2397 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2398 * sizeof(struct tgsi_full_instruction));
2399 if (!instructions) {
2400 break;
2401 }
2402 bld.instructions = instructions;
2403 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2404 }
2405
2406 memcpy(bld.instructions + num_instructions,
2407 &parse.FullToken.FullInstruction,
2408 sizeof(bld.instructions[0]));
2409
2410 num_instructions++;
2411 }
2412
2413 break;
2414
2415 case TGSI_TOKEN_TYPE_IMMEDIATE:
2416 /* simply copy the immediate values into the next immediates[] slot */
2417 {
2418 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2419 assert(size <= 4);
2420 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2421 for( i = 0; i < size; ++i )
2422 bld.immediates[num_immediates][i] =
2423 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2424 for( i = size; i < 4; ++i )
2425 bld.immediates[num_immediates][i] = bld.base.undef;
2426 num_immediates++;
2427 }
2428 break;
2429
2430 case TGSI_TOKEN_TYPE_PROPERTY:
2431 break;
2432
2433 default:
2434 assert( 0 );
2435 }
2436 }
2437
2438 while (pc != -1) {
2439 struct tgsi_full_instruction *instr = bld.instructions + pc;
2440 const struct tgsi_opcode_info *opcode_info =
2441 tgsi_get_opcode_info(instr->Instruction.Opcode);
2442 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2443 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2444 opcode_info->mnemonic);
2445 }
2446
2447 /* If we have indirect addressing in outputs we need to copy our alloca array
2448 * to the outputs slots specified by the called */
2449 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2450 unsigned index, chan;
2451 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2452 for (index = 0; index < info->num_outputs; ++index) {
2453 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2454 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2455 }
2456 }
2457 }
2458
2459 if (0) {
2460 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2461 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2462 debug_printf("11111111111111111111111111111 \n");
2463 tgsi_dump(tokens, 0);
2464 lp_debug_dump_value(function);
2465 debug_printf("2222222222222222222222222222 \n");
2466 }
2467 tgsi_parse_free( &parse );
2468
2469 if (0) {
2470 LLVMModuleRef module = LLVMGetGlobalParent(
2471 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2472 LLVMDumpModule(module);
2473
2474 }
2475
2476 FREE( bld.instructions );
2477 }
2478