gallivm/llvmpipe: implement system values and instanceID
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61 #include "lp_bld_printf.h"
62
63
64 #define FOR_EACH_CHANNEL( CHAN )\
65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
66
67 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
69
70 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
72
73 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74 FOR_EACH_CHANNEL( CHAN )\
75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
76
77 #define CHAN_X 0
78 #define CHAN_Y 1
79 #define CHAN_Z 2
80 #define CHAN_W 3
81 #define NUM_CHANNELS 4
82
83 #define LP_MAX_INSTRUCTIONS 256
84
85
86 struct lp_exec_mask {
87 struct lp_build_context *bld;
88
89 boolean has_mask;
90
91 LLVMTypeRef int_vec_type;
92
93 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
94 int cond_stack_size;
95 LLVMValueRef cond_mask;
96
97 LLVMBasicBlockRef loop_block;
98 LLVMValueRef cont_mask;
99 LLVMValueRef break_mask;
100 LLVMValueRef break_var;
101 struct {
102 LLVMBasicBlockRef loop_block;
103 LLVMValueRef cont_mask;
104 LLVMValueRef break_mask;
105 LLVMValueRef break_var;
106 } loop_stack[LP_MAX_TGSI_NESTING];
107 int loop_stack_size;
108
109 LLVMValueRef ret_mask;
110 struct {
111 int pc;
112 LLVMValueRef ret_mask;
113 } call_stack[LP_MAX_TGSI_NESTING];
114 int call_stack_size;
115
116 LLVMValueRef exec_mask;
117 };
118
119 struct lp_build_tgsi_soa_context
120 {
121 struct lp_build_context base;
122
123 /* Builder for vector integer masks and indices */
124 struct lp_build_context uint_bld;
125
126 /* Builder for scalar elements of shader's data type (float) */
127 struct lp_build_context elem_bld;
128
129 LLVMValueRef consts_ptr;
130 const LLVMValueRef *pos;
131 const LLVMValueRef (*inputs)[NUM_CHANNELS];
132 LLVMValueRef (*outputs)[NUM_CHANNELS];
133
134 const struct lp_build_sampler_soa *sampler;
135
136 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
137 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
138 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
139 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
140
141 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
142 * set in the indirect_files field.
143 * The temps[] array above is unused then.
144 */
145 LLVMValueRef temps_array;
146
147 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
148 * set in the indirect_files field.
149 * The outputs[] array above is unused then.
150 */
151 LLVMValueRef outputs_array;
152
153 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
154 * set in the indirect_files field.
155 * The inputs[] array above is unused then.
156 */
157 LLVMValueRef inputs_array;
158
159 LLVMValueRef system_values_array;
160
161 const struct tgsi_shader_info *info;
162 /** bitmask indicating which register files are accessed indirectly */
163 unsigned indirect_files;
164
165 struct lp_build_mask_context *mask;
166 struct lp_exec_mask exec_mask;
167
168 struct tgsi_full_instruction *instructions;
169 uint max_instructions;
170 };
171
172 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
173 {
174 mask->bld = bld;
175 mask->has_mask = FALSE;
176 mask->cond_stack_size = 0;
177 mask->loop_stack_size = 0;
178 mask->call_stack_size = 0;
179
180 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
181 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
182 LLVMConstAllOnes(mask->int_vec_type);
183 }
184
185 static void lp_exec_mask_update(struct lp_exec_mask *mask)
186 {
187 if (mask->loop_stack_size) {
188 /*for loops we need to update the entire mask at runtime */
189 LLVMValueRef tmp;
190 assert(mask->break_mask);
191 tmp = LLVMBuildAnd(mask->bld->builder,
192 mask->cont_mask,
193 mask->break_mask,
194 "maskcb");
195 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
196 mask->cond_mask,
197 tmp,
198 "maskfull");
199 } else
200 mask->exec_mask = mask->cond_mask;
201
202 if (mask->call_stack_size) {
203 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
204 mask->exec_mask,
205 mask->ret_mask,
206 "callmask");
207 }
208
209 mask->has_mask = (mask->cond_stack_size > 0 ||
210 mask->loop_stack_size > 0 ||
211 mask->call_stack_size > 0);
212 }
213
214 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
215 LLVMValueRef val)
216 {
217 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
218 if (mask->cond_stack_size == 0) {
219 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
220 }
221 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
222 assert(LLVMTypeOf(val) == mask->int_vec_type);
223 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
224 mask->cond_mask,
225 val,
226 "");
227 lp_exec_mask_update(mask);
228 }
229
230 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
231 {
232 LLVMValueRef prev_mask;
233 LLVMValueRef inv_mask;
234
235 assert(mask->cond_stack_size);
236 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
237 if (mask->cond_stack_size == 1) {
238 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
239 }
240
241 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
242
243 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
244 inv_mask,
245 prev_mask, "");
246 lp_exec_mask_update(mask);
247 }
248
249 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
250 {
251 assert(mask->cond_stack_size);
252 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
253 lp_exec_mask_update(mask);
254 }
255
256 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
257 {
258 if (mask->loop_stack_size == 0) {
259 assert(mask->loop_block == NULL);
260 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
261 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
262 assert(mask->break_var == NULL);
263 }
264
265 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
266
267 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
268 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
269 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
270 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
271 ++mask->loop_stack_size;
272
273 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
274 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
275
276 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
277 LLVMBuildBr(mask->bld->builder, mask->loop_block);
278 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
279
280 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
281
282 lp_exec_mask_update(mask);
283 }
284
285 static void lp_exec_break(struct lp_exec_mask *mask)
286 {
287 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
288 mask->exec_mask,
289 "break");
290
291 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
292 mask->break_mask,
293 exec_mask, "break_full");
294
295 lp_exec_mask_update(mask);
296 }
297
298 static void lp_exec_continue(struct lp_exec_mask *mask)
299 {
300 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
301 mask->exec_mask,
302 "");
303
304 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
305 mask->cont_mask,
306 exec_mask, "");
307
308 lp_exec_mask_update(mask);
309 }
310
311
312 static void lp_exec_endloop(struct lp_exec_mask *mask)
313 {
314 LLVMBasicBlockRef endloop;
315 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
316 mask->bld->type.length);
317 LLVMValueRef i1cond;
318
319 assert(mask->break_mask);
320
321 /*
322 * Restore the cont_mask, but don't pop
323 */
324 assert(mask->loop_stack_size);
325 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
326 lp_exec_mask_update(mask);
327
328 /*
329 * Unlike the continue mask, the break_mask must be preserved across loop
330 * iterations
331 */
332 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
333
334 /* i1cond = (mask == 0) */
335 i1cond = LLVMBuildICmp(
336 mask->bld->builder,
337 LLVMIntNE,
338 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
339 LLVMConstNull(reg_type), "");
340
341 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
342
343 LLVMBuildCondBr(mask->bld->builder,
344 i1cond, mask->loop_block, endloop);
345
346 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
347
348 assert(mask->loop_stack_size);
349 --mask->loop_stack_size;
350 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
351 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
352 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
353 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
354
355 lp_exec_mask_update(mask);
356 }
357
358 /* stores val into an address pointed to by dst.
359 * mask->exec_mask is used to figure out which bits of val
360 * should be stored into the address
361 * (0 means don't store this bit, 1 means do store).
362 */
363 static void lp_exec_mask_store(struct lp_exec_mask *mask,
364 LLVMValueRef pred,
365 LLVMValueRef val,
366 LLVMValueRef dst)
367 {
368 /* Mix the predicate and execution mask */
369 if (mask->has_mask) {
370 if (pred) {
371 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
372 } else {
373 pred = mask->exec_mask;
374 }
375 }
376
377 if (pred) {
378 LLVMValueRef real_val, dst_val;
379
380 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
381 real_val = lp_build_select(mask->bld,
382 pred,
383 val, dst_val);
384
385 LLVMBuildStore(mask->bld->builder, real_val, dst);
386 } else
387 LLVMBuildStore(mask->bld->builder, val, dst);
388 }
389
390 static void lp_exec_mask_call(struct lp_exec_mask *mask,
391 int func,
392 int *pc)
393 {
394 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
395 mask->call_stack[mask->call_stack_size].pc = *pc;
396 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
397 mask->call_stack_size++;
398 *pc = func;
399 }
400
401 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
402 {
403 LLVMValueRef exec_mask;
404
405 if (mask->call_stack_size == 0) {
406 /* returning from main() */
407 *pc = -1;
408 return;
409 }
410 exec_mask = LLVMBuildNot(mask->bld->builder,
411 mask->exec_mask,
412 "ret");
413
414 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
415 mask->ret_mask,
416 exec_mask, "ret_full");
417
418 lp_exec_mask_update(mask);
419 }
420
421 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
422 {
423 }
424
425 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
426 {
427 assert(mask->call_stack_size);
428 mask->call_stack_size--;
429 *pc = mask->call_stack[mask->call_stack_size].pc;
430 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
431 lp_exec_mask_update(mask);
432 }
433
434
435 /**
436 * Return pointer to a temporary register channel (src or dest).
437 * Note that indirect addressing cannot be handled here.
438 * \param index which temporary register
439 * \param chan which channel of the temp register.
440 */
441 static LLVMValueRef
442 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
443 unsigned index,
444 unsigned chan)
445 {
446 assert(chan < 4);
447 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
448 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
449 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
450 }
451 else {
452 return bld->temps[index][chan];
453 }
454 }
455
456 /**
457 * Return pointer to a output register channel (src or dest).
458 * Note that indirect addressing cannot be handled here.
459 * \param index which output register
460 * \param chan which channel of the output register.
461 */
462 static LLVMValueRef
463 get_output_ptr(struct lp_build_tgsi_soa_context *bld,
464 unsigned index,
465 unsigned chan)
466 {
467 assert(chan < 4);
468 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
469 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
470 return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "");
471 }
472 else {
473 return bld->outputs[index][chan];
474 }
475 }
476
477 /**
478 * Gather vector.
479 * XXX the lp_build_gather() function should be capable of doing this
480 * with a little work.
481 */
482 static LLVMValueRef
483 build_gather(struct lp_build_tgsi_soa_context *bld,
484 LLVMValueRef base_ptr,
485 LLVMValueRef indexes)
486 {
487 LLVMValueRef res = bld->base.undef;
488 unsigned i;
489
490 /*
491 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
492 */
493 for (i = 0; i < bld->base.type.length; i++) {
494 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
495 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
496 indexes, ii, "");
497 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
498 &index, 1, "gather_ptr");
499 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
500
501 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
502 }
503
504 return res;
505 }
506
507
508 /**
509 * Scatter/store vector.
510 */
511 static void
512 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
513 LLVMValueRef base_ptr,
514 LLVMValueRef indexes,
515 LLVMValueRef values,
516 struct lp_exec_mask *mask,
517 LLVMValueRef pred)
518 {
519 LLVMBuilderRef builder = bld->base.builder;
520 unsigned i;
521
522 /* Mix the predicate and execution mask */
523 if (mask->has_mask) {
524 if (pred) {
525 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
526 }
527 else {
528 pred = mask->exec_mask;
529 }
530 }
531
532 /*
533 * Loop over elements of index_vec, store scalar value.
534 */
535 for (i = 0; i < bld->base.type.length; i++) {
536 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
537 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
538 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
539 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
540 LLVMValueRef scalar_pred = pred ?
541 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
542
543 if (0)
544 lp_build_printf(builder, "scatter %d: val %f at %d %p\n",
545 ii, val, index, scalar_ptr);
546
547 if (scalar_pred) {
548 LLVMValueRef real_val, dst_val;
549 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
550 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
551 LLVMBuildStore(builder, real_val, scalar_ptr);
552 }
553 else {
554 LLVMBuildStore(builder, val, scalar_ptr);
555 }
556 }
557 }
558
559
560 /**
561 * Read the current value of the ADDR register, convert the floats to
562 * ints, add the base index and return the vector of offsets.
563 * The offsets will be used to index into the constant buffer or
564 * temporary register file.
565 */
566 static LLVMValueRef
567 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
568 unsigned reg_file, unsigned reg_index,
569 const struct tgsi_src_register *indirect_reg)
570 {
571 struct lp_build_context *uint_bld = &bld->uint_bld;
572 /* always use X component of address register */
573 unsigned swizzle = indirect_reg->SwizzleX;
574 LLVMValueRef base;
575 LLVMValueRef rel;
576 LLVMValueRef max_index;
577 LLVMValueRef index;
578
579 assert(bld->indirect_files & (1 << reg_file));
580
581 base = lp_build_const_int_vec(uint_bld->type, reg_index);
582
583 assert(swizzle < 4);
584 rel = LLVMBuildLoad(bld->base.builder,
585 bld->addr[indirect_reg->Index][swizzle],
586 "load addr reg");
587
588 /* for indexing we want integers */
589 rel = LLVMBuildFPToSI(bld->base.builder,
590 rel,
591 uint_bld->vec_type, "");
592
593 index = lp_build_add(uint_bld, base, rel);
594
595 max_index = lp_build_const_int_vec(uint_bld->type,
596 bld->info->file_max[reg_file]);
597
598 assert(!uint_bld->type.sign);
599 index = lp_build_min(uint_bld, index, max_index);
600
601 return index;
602 }
603
604
605 /**
606 * Register fetch.
607 */
608 static LLVMValueRef
609 emit_fetch(
610 struct lp_build_tgsi_soa_context *bld,
611 const struct tgsi_full_instruction *inst,
612 unsigned src_op,
613 const unsigned chan_index )
614 {
615 struct lp_build_context *uint_bld = &bld->uint_bld;
616 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
617 const unsigned swizzle =
618 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
619 LLVMValueRef res;
620 LLVMValueRef indirect_index = NULL;
621
622 if (swizzle > 3) {
623 assert(0 && "invalid swizzle in emit_fetch()");
624 return bld->base.undef;
625 }
626
627 if (reg->Register.Indirect) {
628 indirect_index = get_indirect_index(bld,
629 reg->Register.File,
630 reg->Register.Index,
631 &reg->Indirect);
632 } else {
633 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
634 }
635
636 switch (reg->Register.File) {
637 case TGSI_FILE_CONSTANT:
638 if (reg->Register.Indirect) {
639 LLVMValueRef swizzle_vec =
640 lp_build_const_int_vec(uint_bld->type, swizzle);
641 LLVMValueRef index_vec; /* index into the const buffer */
642
643 /* index_vec = indirect_index * 4 + swizzle */
644 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
645 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
646
647 /* Gather values from the constant buffer */
648 res = build_gather(bld, bld->consts_ptr, index_vec);
649 }
650 else {
651 LLVMValueRef index; /* index into the const buffer */
652 LLVMValueRef scalar, scalar_ptr;
653
654 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
655
656 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
657 &index, 1, "");
658 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
659
660 res = lp_build_broadcast_scalar(&bld->base, scalar);
661 }
662 break;
663
664 case TGSI_FILE_IMMEDIATE:
665 res = bld->immediates[reg->Register.Index][swizzle];
666 assert(res);
667 break;
668
669 case TGSI_FILE_INPUT:
670 if (reg->Register.Indirect) {
671 LLVMValueRef swizzle_vec =
672 lp_build_const_int_vec(uint_bld->type, swizzle);
673 LLVMValueRef length_vec =
674 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
675 LLVMValueRef index_vec; /* index into the const buffer */
676 LLVMValueRef inputs_array;
677 LLVMTypeRef float4_ptr_type;
678
679 /* index_vec = (indirect_index * 4 + swizzle) * length */
680 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
681 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
682 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
683
684 /* cast inputs_array pointer to float* */
685 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
686 inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array,
687 float4_ptr_type, "");
688
689 /* Gather values from the temporary register array */
690 res = build_gather(bld, inputs_array, index_vec);
691 } else {
692 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
693 LLVMValueRef lindex = lp_build_const_int32(reg->Register.Index * 4 + swizzle);
694 LLVMValueRef input_ptr = LLVMBuildGEP(bld->base.builder,
695 bld->inputs_array, &lindex, 1, "");
696 res = LLVMBuildLoad(bld->base.builder, input_ptr, "");
697 }
698 else {
699 res = bld->inputs[reg->Register.Index][swizzle];
700 }
701 }
702 assert(res);
703 break;
704
705 case TGSI_FILE_TEMPORARY:
706 if (reg->Register.Indirect) {
707 LLVMValueRef swizzle_vec =
708 lp_build_const_int_vec(uint_bld->type, swizzle);
709 LLVMValueRef length_vec =
710 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
711 LLVMValueRef index_vec; /* index into the const buffer */
712 LLVMValueRef temps_array;
713 LLVMTypeRef float4_ptr_type;
714
715 /* index_vec = (indirect_index * 4 + swizzle) * length */
716 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
717 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
718 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
719
720 /* cast temps_array pointer to float* */
721 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
722 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
723 float4_ptr_type, "");
724
725 /* Gather values from the temporary register array */
726 res = build_gather(bld, temps_array, index_vec);
727 }
728 else {
729 LLVMValueRef temp_ptr;
730 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
731 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
732 if (!res)
733 return bld->base.undef;
734 }
735 break;
736
737 case TGSI_FILE_SYSTEM_VALUE:
738 assert(!reg->Register.Indirect);
739 {
740 LLVMValueRef index; /* index into the system value array */
741 LLVMValueRef scalar, scalar_ptr;
742
743 index = lp_build_const_int32(reg->Register.Index * 4 + swizzle);
744
745 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->system_values_array,
746 &index, 1, "");
747 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
748
749 res = lp_build_broadcast_scalar(&bld->base, scalar);
750 }
751 break;
752
753 default:
754 assert(0 && "invalid src register in emit_fetch()");
755 return bld->base.undef;
756 }
757
758 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
759 case TGSI_UTIL_SIGN_CLEAR:
760 res = lp_build_abs( &bld->base, res );
761 break;
762
763 case TGSI_UTIL_SIGN_SET:
764 res = lp_build_abs( &bld->base, res );
765 /* fall through */
766 case TGSI_UTIL_SIGN_TOGGLE:
767 res = lp_build_negate( &bld->base, res );
768 break;
769
770 case TGSI_UTIL_SIGN_KEEP:
771 break;
772 }
773
774 return res;
775 }
776
777
778 /**
779 * Register fetch with derivatives.
780 */
781 static void
782 emit_fetch_deriv(
783 struct lp_build_tgsi_soa_context *bld,
784 const struct tgsi_full_instruction *inst,
785 unsigned index,
786 const unsigned chan_index,
787 LLVMValueRef *res,
788 LLVMValueRef *ddx,
789 LLVMValueRef *ddy)
790 {
791 LLVMValueRef src;
792
793 src = emit_fetch(bld, inst, index, chan_index);
794
795 if(res)
796 *res = src;
797
798 /* TODO: use interpolation coeffs for inputs */
799
800 if(ddx)
801 *ddx = lp_build_ddx(&bld->base, src);
802
803 if(ddy)
804 *ddy = lp_build_ddy(&bld->base, src);
805 }
806
807
808 /**
809 * Predicate.
810 */
811 static void
812 emit_fetch_predicate(
813 struct lp_build_tgsi_soa_context *bld,
814 const struct tgsi_full_instruction *inst,
815 LLVMValueRef *pred)
816 {
817 unsigned index;
818 unsigned char swizzles[4];
819 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
820 LLVMValueRef value;
821 unsigned chan;
822
823 if (!inst->Instruction.Predicate) {
824 FOR_EACH_CHANNEL( chan ) {
825 pred[chan] = NULL;
826 }
827 return;
828 }
829
830 swizzles[0] = inst->Predicate.SwizzleX;
831 swizzles[1] = inst->Predicate.SwizzleY;
832 swizzles[2] = inst->Predicate.SwizzleZ;
833 swizzles[3] = inst->Predicate.SwizzleW;
834
835 index = inst->Predicate.Index;
836 assert(index < LP_MAX_TGSI_PREDS);
837
838 FOR_EACH_CHANNEL( chan ) {
839 unsigned swizzle = swizzles[chan];
840
841 /*
842 * Only fetch the predicate register channels that are actually listed
843 * in the swizzles
844 */
845 if (!unswizzled[swizzle]) {
846 value = LLVMBuildLoad(bld->base.builder,
847 bld->preds[index][swizzle], "");
848
849 /*
850 * Convert the value to an integer mask.
851 *
852 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
853 * is needlessly causing two comparisons due to storing the intermediate
854 * result as float vector instead of an integer mask vector.
855 */
856 value = lp_build_compare(bld->base.builder,
857 bld->base.type,
858 PIPE_FUNC_NOTEQUAL,
859 value,
860 bld->base.zero);
861 if (inst->Predicate.Negate) {
862 value = LLVMBuildNot(bld->base.builder, value, "");
863 }
864
865 unswizzled[swizzle] = value;
866 } else {
867 value = unswizzled[swizzle];
868 }
869
870 pred[chan] = value;
871 }
872 }
873
874
875 /**
876 * Register store.
877 */
878 static void
879 emit_store(
880 struct lp_build_tgsi_soa_context *bld,
881 const struct tgsi_full_instruction *inst,
882 unsigned index,
883 unsigned chan_index,
884 LLVMValueRef pred,
885 LLVMValueRef value)
886 {
887 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
888 struct lp_build_context *uint_bld = &bld->uint_bld;
889 LLVMValueRef indirect_index = NULL;
890
891 switch( inst->Instruction.Saturate ) {
892 case TGSI_SAT_NONE:
893 break;
894
895 case TGSI_SAT_ZERO_ONE:
896 value = lp_build_max(&bld->base, value, bld->base.zero);
897 value = lp_build_min(&bld->base, value, bld->base.one);
898 break;
899
900 case TGSI_SAT_MINUS_PLUS_ONE:
901 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
902 value = lp_build_min(&bld->base, value, bld->base.one);
903 break;
904
905 default:
906 assert(0);
907 }
908
909 if (reg->Register.Indirect) {
910 indirect_index = get_indirect_index(bld,
911 reg->Register.File,
912 reg->Register.Index,
913 &reg->Indirect);
914 } else {
915 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
916 }
917
918 switch( reg->Register.File ) {
919 case TGSI_FILE_OUTPUT:
920 if (reg->Register.Indirect) {
921 LLVMBuilderRef builder = bld->base.builder;
922 LLVMValueRef chan_vec =
923 lp_build_const_int_vec(uint_bld->type, chan_index);
924 LLVMValueRef length_vec =
925 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
926 LLVMValueRef index_vec; /* indexes into the temp registers */
927 LLVMValueRef outputs_array;
928 LLVMValueRef pixel_offsets;
929 LLVMTypeRef float_ptr_type;
930 int i;
931
932 /* build pixel offset vector: {0, 1, 2, 3, ...} */
933 pixel_offsets = uint_bld->undef;
934 for (i = 0; i < bld->base.type.length; i++) {
935 LLVMValueRef ii = lp_build_const_int32(i);
936 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
937 ii, ii, "");
938 }
939
940 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
941 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
942 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
943 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
944 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
945
946 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
947 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
948 float_ptr_type, "");
949
950 /* Scatter store values into temp registers */
951 emit_mask_scatter(bld, outputs_array, index_vec, value,
952 &bld->exec_mask, pred);
953 }
954 else {
955 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
956 chan_index);
957 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
958 }
959 break;
960
961 case TGSI_FILE_TEMPORARY:
962 if (reg->Register.Indirect) {
963 LLVMBuilderRef builder = bld->base.builder;
964 LLVMValueRef chan_vec =
965 lp_build_const_int_vec(uint_bld->type, chan_index);
966 LLVMValueRef length_vec =
967 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
968 LLVMValueRef index_vec; /* indexes into the temp registers */
969 LLVMValueRef temps_array;
970 LLVMValueRef pixel_offsets;
971 LLVMTypeRef float_ptr_type;
972 int i;
973
974 /* build pixel offset vector: {0, 1, 2, 3, ...} */
975 pixel_offsets = uint_bld->undef;
976 for (i = 0; i < bld->base.type.length; i++) {
977 LLVMValueRef ii = lp_build_const_int32(i);
978 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
979 ii, ii, "");
980 }
981
982 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
983 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
984 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
985 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
986 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
987
988 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
989 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
990 float_ptr_type, "");
991
992 /* Scatter store values into temp registers */
993 emit_mask_scatter(bld, temps_array, index_vec, value,
994 &bld->exec_mask, pred);
995 }
996 else {
997 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
998 chan_index);
999 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
1000 }
1001 break;
1002
1003 case TGSI_FILE_ADDRESS:
1004 lp_exec_mask_store(&bld->exec_mask, pred, value,
1005 bld->addr[reg->Indirect.Index][chan_index]);
1006 break;
1007
1008 case TGSI_FILE_PREDICATE:
1009 lp_exec_mask_store(&bld->exec_mask, pred, value,
1010 bld->preds[reg->Register.Index][chan_index]);
1011 break;
1012
1013 default:
1014 assert( 0 );
1015 }
1016 }
1017
1018
1019 /**
1020 * High-level instruction translators.
1021 */
1022
1023 static void
1024 emit_tex( struct lp_build_tgsi_soa_context *bld,
1025 const struct tgsi_full_instruction *inst,
1026 enum lp_build_tex_modifier modifier,
1027 LLVMValueRef *texel)
1028 {
1029 unsigned unit;
1030 LLVMValueRef lod_bias, explicit_lod;
1031 LLVMValueRef oow = NULL;
1032 LLVMValueRef coords[3];
1033 LLVMValueRef ddx[3];
1034 LLVMValueRef ddy[3];
1035 unsigned num_coords;
1036 unsigned i;
1037
1038 if (!bld->sampler) {
1039 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1040 for (i = 0; i < 4; i++) {
1041 texel[i] = bld->base.undef;
1042 }
1043 return;
1044 }
1045
1046 switch (inst->Texture.Texture) {
1047 case TGSI_TEXTURE_1D:
1048 num_coords = 1;
1049 break;
1050 case TGSI_TEXTURE_2D:
1051 case TGSI_TEXTURE_RECT:
1052 num_coords = 2;
1053 break;
1054 case TGSI_TEXTURE_SHADOW1D:
1055 case TGSI_TEXTURE_SHADOW2D:
1056 case TGSI_TEXTURE_SHADOWRECT:
1057 case TGSI_TEXTURE_3D:
1058 case TGSI_TEXTURE_CUBE:
1059 num_coords = 3;
1060 break;
1061 default:
1062 assert(0);
1063 return;
1064 }
1065
1066 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1067 lod_bias = emit_fetch( bld, inst, 0, 3 );
1068 explicit_lod = NULL;
1069 }
1070 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1071 lod_bias = NULL;
1072 explicit_lod = emit_fetch( bld, inst, 0, 3 );
1073 }
1074 else {
1075 lod_bias = NULL;
1076 explicit_lod = NULL;
1077 }
1078
1079 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1080 oow = emit_fetch( bld, inst, 0, 3 );
1081 oow = lp_build_rcp(&bld->base, oow);
1082 }
1083
1084 for (i = 0; i < num_coords; i++) {
1085 coords[i] = emit_fetch( bld, inst, 0, i );
1086 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1087 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1088 }
1089 for (i = num_coords; i < 3; i++) {
1090 coords[i] = bld->base.undef;
1091 }
1092
1093 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1094 LLVMTypeRef i32t = LLVMInt32Type();
1095 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
1096 for (i = 0; i < num_coords; i++) {
1097 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1098 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1099 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
1100 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
1101 }
1102 unit = inst->Src[3].Register.Index;
1103 } else {
1104 for (i = 0; i < num_coords; i++) {
1105 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1106 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1107 }
1108 unit = inst->Src[1].Register.Index;
1109 }
1110 for (i = num_coords; i < 3; i++) {
1111 ddx[i] = LLVMGetUndef(bld->base.elem_type);
1112 ddy[i] = LLVMGetUndef(bld->base.elem_type);
1113 }
1114
1115 bld->sampler->emit_fetch_texel(bld->sampler,
1116 bld->base.builder,
1117 bld->base.type,
1118 unit, num_coords, coords,
1119 ddx, ddy,
1120 lod_bias, explicit_lod,
1121 texel);
1122 }
1123
1124 static boolean
1125 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1126 int pc)
1127 {
1128 int i;
1129
1130 for (i = 0; i < 5; i++) {
1131 unsigned opcode;
1132
1133 if (pc + i >= bld->info->num_instructions)
1134 return TRUE;
1135
1136 opcode = bld->instructions[pc + i].Instruction.Opcode;
1137
1138 if (opcode == TGSI_OPCODE_END)
1139 return TRUE;
1140
1141 if (opcode == TGSI_OPCODE_TEX ||
1142 opcode == TGSI_OPCODE_TXP ||
1143 opcode == TGSI_OPCODE_TXD ||
1144 opcode == TGSI_OPCODE_TXB ||
1145 opcode == TGSI_OPCODE_TXL ||
1146 opcode == TGSI_OPCODE_TXF ||
1147 opcode == TGSI_OPCODE_TXQ ||
1148 opcode == TGSI_OPCODE_CAL ||
1149 opcode == TGSI_OPCODE_CALLNZ ||
1150 opcode == TGSI_OPCODE_IF ||
1151 opcode == TGSI_OPCODE_IFC ||
1152 opcode == TGSI_OPCODE_BGNLOOP ||
1153 opcode == TGSI_OPCODE_SWITCH)
1154 return FALSE;
1155 }
1156
1157 return TRUE;
1158 }
1159
1160
1161
1162 /**
1163 * Kill fragment if any of the src register values are negative.
1164 */
1165 static void
1166 emit_kil(
1167 struct lp_build_tgsi_soa_context *bld,
1168 const struct tgsi_full_instruction *inst,
1169 int pc)
1170 {
1171 const struct tgsi_full_src_register *reg = &inst->Src[0];
1172 LLVMValueRef terms[NUM_CHANNELS];
1173 LLVMValueRef mask;
1174 unsigned chan_index;
1175
1176 memset(&terms, 0, sizeof terms);
1177
1178 FOR_EACH_CHANNEL( chan_index ) {
1179 unsigned swizzle;
1180
1181 /* Unswizzle channel */
1182 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1183
1184 /* Check if the component has not been already tested. */
1185 assert(swizzle < NUM_CHANNELS);
1186 if( !terms[swizzle] )
1187 /* TODO: change the comparison operator instead of setting the sign */
1188 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1189 }
1190
1191 mask = NULL;
1192 FOR_EACH_CHANNEL( chan_index ) {
1193 if(terms[chan_index]) {
1194 LLVMValueRef chan_mask;
1195
1196 /*
1197 * If term < 0 then mask = 0 else mask = ~0.
1198 */
1199 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1200
1201 if(mask)
1202 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1203 else
1204 mask = chan_mask;
1205 }
1206 }
1207
1208 if(mask) {
1209 lp_build_mask_update(bld->mask, mask);
1210
1211 if (!near_end_of_shader(bld, pc))
1212 lp_build_mask_check(bld->mask);
1213 }
1214 }
1215
1216
1217 /**
1218 * Predicated fragment kill.
1219 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1220 * The only predication is the execution mask which will apply if
1221 * we're inside a loop or conditional.
1222 */
1223 static void
1224 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1225 const struct tgsi_full_instruction *inst,
1226 int pc)
1227 {
1228 LLVMValueRef mask;
1229
1230 /* For those channels which are "alive", disable fragment shader
1231 * execution.
1232 */
1233 if (bld->exec_mask.has_mask) {
1234 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1235 }
1236 else {
1237 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1238 mask = zero;
1239 }
1240
1241 lp_build_mask_update(bld->mask, mask);
1242
1243 if (!near_end_of_shader(bld, pc))
1244 lp_build_mask_check(bld->mask);
1245 }
1246
1247
1248 /**
1249 * Emit code which will dump the value of all the temporary registers
1250 * to stdout.
1251 */
1252 static void
1253 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1254 {
1255 LLVMBuilderRef builder = bld->base.builder;
1256 LLVMValueRef temp_ptr;
1257 LLVMValueRef i0 = lp_build_const_int32(0);
1258 LLVMValueRef i1 = lp_build_const_int32(1);
1259 LLVMValueRef i2 = lp_build_const_int32(2);
1260 LLVMValueRef i3 = lp_build_const_int32(3);
1261 int index;
1262 int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1263
1264 for (index = 0; index < n; index++) {
1265 LLVMValueRef idx = lp_build_const_int32(index);
1266 LLVMValueRef v[4][4], res;
1267 int chan;
1268
1269 lp_build_printf(builder, "TEMP[%d]:\n", idx);
1270
1271 for (chan = 0; chan < 4; chan++) {
1272 temp_ptr = get_temp_ptr(bld, index, chan);
1273 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
1274 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1275 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1276 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1277 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1278 }
1279
1280 lp_build_printf(builder, " X: %f %f %f %f\n",
1281 v[0][0], v[0][1], v[0][2], v[0][3]);
1282 lp_build_printf(builder, " Y: %f %f %f %f\n",
1283 v[1][0], v[1][1], v[1][2], v[1][3]);
1284 lp_build_printf(builder, " Z: %f %f %f %f\n",
1285 v[2][0], v[2][1], v[2][2], v[2][3]);
1286 lp_build_printf(builder, " W: %f %f %f %f\n",
1287 v[3][0], v[3][1], v[3][2], v[3][3]);
1288 }
1289 }
1290
1291
1292
1293 static void
1294 emit_declaration(
1295 struct lp_build_tgsi_soa_context *bld,
1296 const struct tgsi_full_declaration *decl)
1297 {
1298 LLVMTypeRef vec_type = bld->base.vec_type;
1299 const unsigned first = decl->Range.First;
1300 const unsigned last = decl->Range.Last;
1301 unsigned idx, i;
1302
1303 for (idx = first; idx <= last; ++idx) {
1304 assert(last <= bld->info->file_max[decl->Declaration.File]);
1305 switch (decl->Declaration.File) {
1306 case TGSI_FILE_TEMPORARY:
1307 assert(idx < LP_MAX_TGSI_TEMPS);
1308 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1309 for (i = 0; i < NUM_CHANNELS; i++)
1310 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1311 vec_type, "temp");
1312 }
1313 break;
1314
1315 case TGSI_FILE_OUTPUT:
1316 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1317 for (i = 0; i < NUM_CHANNELS; i++)
1318 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1319 vec_type, "output");
1320 }
1321 break;
1322
1323 case TGSI_FILE_ADDRESS:
1324 assert(idx < LP_MAX_TGSI_ADDRS);
1325 for (i = 0; i < NUM_CHANNELS; i++)
1326 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1327 vec_type, "addr");
1328 break;
1329
1330 case TGSI_FILE_PREDICATE:
1331 assert(idx < LP_MAX_TGSI_PREDS);
1332 for (i = 0; i < NUM_CHANNELS; i++)
1333 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1334 vec_type, "predicate");
1335 break;
1336
1337 default:
1338 /* don't need to declare other vars */
1339 break;
1340 }
1341 }
1342 }
1343
1344
1345 /**
1346 * Emit LLVM for one TGSI instruction.
1347 * \param return TRUE for success, FALSE otherwise
1348 */
1349 static boolean
1350 emit_instruction(
1351 struct lp_build_tgsi_soa_context *bld,
1352 const struct tgsi_full_instruction *inst,
1353 const struct tgsi_opcode_info *info,
1354 int *pc)
1355 {
1356 unsigned chan_index;
1357 LLVMValueRef src0, src1, src2;
1358 LLVMValueRef tmp0, tmp1, tmp2;
1359 LLVMValueRef tmp3 = NULL;
1360 LLVMValueRef tmp4 = NULL;
1361 LLVMValueRef tmp5 = NULL;
1362 LLVMValueRef tmp6 = NULL;
1363 LLVMValueRef tmp7 = NULL;
1364 LLVMValueRef res;
1365 LLVMValueRef dst0[NUM_CHANNELS];
1366
1367 /*
1368 * Stores and write masks are handled in a general fashion after the long
1369 * instruction opcode switch statement.
1370 *
1371 * Although not stricitly necessary, we avoid generating instructions for
1372 * channels which won't be stored, in cases where's that easy. For some
1373 * complex instructions, like texture sampling, it is more convenient to
1374 * assume a full writemask and then let LLVM optimization passes eliminate
1375 * redundant code.
1376 */
1377
1378 (*pc)++;
1379
1380 assert(info->num_dst <= 1);
1381 if (info->num_dst) {
1382 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1383 dst0[chan_index] = bld->base.undef;
1384 }
1385 }
1386
1387 switch (inst->Instruction.Opcode) {
1388 case TGSI_OPCODE_ARL:
1389 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1390 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1391 tmp0 = lp_build_floor(&bld->base, tmp0);
1392 dst0[chan_index] = tmp0;
1393 }
1394 break;
1395
1396 case TGSI_OPCODE_MOV:
1397 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1398 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1399 }
1400 break;
1401
1402 case TGSI_OPCODE_LIT:
1403 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1404 dst0[CHAN_X] = bld->base.one;
1405 }
1406 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1407 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1408 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1409 }
1410 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1411 /* XMM[1] = SrcReg[0].yyyy */
1412 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1413 /* XMM[1] = max(XMM[1], 0) */
1414 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1415 /* XMM[2] = SrcReg[0].wwww */
1416 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1417 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1418 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1419 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1420 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1421 }
1422 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1423 dst0[CHAN_W] = bld->base.one;
1424 }
1425 break;
1426
1427 case TGSI_OPCODE_RCP:
1428 /* TGSI_OPCODE_RECIP */
1429 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1430 res = lp_build_rcp(&bld->base, src0);
1431 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1432 dst0[chan_index] = res;
1433 }
1434 break;
1435
1436 case TGSI_OPCODE_RSQ:
1437 /* TGSI_OPCODE_RECIPSQRT */
1438 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1439 src0 = lp_build_abs(&bld->base, src0);
1440 res = lp_build_rsqrt(&bld->base, src0);
1441 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1442 dst0[chan_index] = res;
1443 }
1444 break;
1445
1446 case TGSI_OPCODE_EXP:
1447 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1448 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1449 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1450 LLVMValueRef *p_exp2_int_part = NULL;
1451 LLVMValueRef *p_frac_part = NULL;
1452 LLVMValueRef *p_exp2 = NULL;
1453
1454 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1455
1456 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1457 p_exp2_int_part = &tmp0;
1458 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1459 p_frac_part = &tmp1;
1460 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1461 p_exp2 = &tmp2;
1462
1463 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1464
1465 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1466 dst0[CHAN_X] = tmp0;
1467 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1468 dst0[CHAN_Y] = tmp1;
1469 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1470 dst0[CHAN_Z] = tmp2;
1471 }
1472 /* dst.w = 1.0 */
1473 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1474 dst0[CHAN_W] = bld->base.one;
1475 }
1476 break;
1477
1478 case TGSI_OPCODE_LOG:
1479 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1480 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1481 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1482 LLVMValueRef *p_floor_log2 = NULL;
1483 LLVMValueRef *p_exp = NULL;
1484 LLVMValueRef *p_log2 = NULL;
1485
1486 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1487 src0 = lp_build_abs( &bld->base, src0 );
1488
1489 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1490 p_floor_log2 = &tmp0;
1491 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1492 p_exp = &tmp1;
1493 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1494 p_log2 = &tmp2;
1495
1496 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1497
1498 /* dst.x = floor(lg2(abs(src.x))) */
1499 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1500 dst0[CHAN_X] = tmp0;
1501 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1502 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1503 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1504 }
1505 /* dst.z = lg2(abs(src.x)) */
1506 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1507 dst0[CHAN_Z] = tmp2;
1508 }
1509 /* dst.w = 1.0 */
1510 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1511 dst0[CHAN_W] = bld->base.one;
1512 }
1513 break;
1514
1515 case TGSI_OPCODE_MUL:
1516 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1517 src0 = emit_fetch( bld, inst, 0, chan_index );
1518 src1 = emit_fetch( bld, inst, 1, chan_index );
1519 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1520 }
1521 break;
1522
1523 case TGSI_OPCODE_ADD:
1524 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1525 src0 = emit_fetch( bld, inst, 0, chan_index );
1526 src1 = emit_fetch( bld, inst, 1, chan_index );
1527 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1528 }
1529 break;
1530
1531 case TGSI_OPCODE_DP3:
1532 /* TGSI_OPCODE_DOT3 */
1533 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1534 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1535 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1536 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1537 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1538 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1539 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1540 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1541 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1542 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1543 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1545 dst0[chan_index] = tmp0;
1546 }
1547 break;
1548
1549 case TGSI_OPCODE_DP4:
1550 /* TGSI_OPCODE_DOT4 */
1551 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1552 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1553 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1554 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1555 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1556 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1557 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1558 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1559 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1560 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1561 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1562 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1563 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1564 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1565 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1566 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1567 dst0[chan_index] = tmp0;
1568 }
1569 break;
1570
1571 case TGSI_OPCODE_DST:
1572 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1573 dst0[CHAN_X] = bld->base.one;
1574 }
1575 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1576 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1577 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1578 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1579 }
1580 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1581 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1582 }
1583 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1584 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1585 }
1586 break;
1587
1588 case TGSI_OPCODE_MIN:
1589 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1590 src0 = emit_fetch( bld, inst, 0, chan_index );
1591 src1 = emit_fetch( bld, inst, 1, chan_index );
1592 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1593 }
1594 break;
1595
1596 case TGSI_OPCODE_MAX:
1597 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1598 src0 = emit_fetch( bld, inst, 0, chan_index );
1599 src1 = emit_fetch( bld, inst, 1, chan_index );
1600 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1601 }
1602 break;
1603
1604 case TGSI_OPCODE_SLT:
1605 /* TGSI_OPCODE_SETLT */
1606 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1607 src0 = emit_fetch( bld, inst, 0, chan_index );
1608 src1 = emit_fetch( bld, inst, 1, chan_index );
1609 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1610 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1611 }
1612 break;
1613
1614 case TGSI_OPCODE_SGE:
1615 /* TGSI_OPCODE_SETGE */
1616 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1617 src0 = emit_fetch( bld, inst, 0, chan_index );
1618 src1 = emit_fetch( bld, inst, 1, chan_index );
1619 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1620 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1621 }
1622 break;
1623
1624 case TGSI_OPCODE_MAD:
1625 /* TGSI_OPCODE_MADD */
1626 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1627 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1628 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1629 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1630 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1631 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1632 dst0[chan_index] = tmp0;
1633 }
1634 break;
1635
1636 case TGSI_OPCODE_SUB:
1637 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1638 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1639 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1640 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1641 }
1642 break;
1643
1644 case TGSI_OPCODE_LRP:
1645 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1646 src0 = emit_fetch( bld, inst, 0, chan_index );
1647 src1 = emit_fetch( bld, inst, 1, chan_index );
1648 src2 = emit_fetch( bld, inst, 2, chan_index );
1649 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1650 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1651 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1652 }
1653 break;
1654
1655 case TGSI_OPCODE_CND:
1656 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1657 src0 = emit_fetch( bld, inst, 0, chan_index );
1658 src1 = emit_fetch( bld, inst, 1, chan_index );
1659 src2 = emit_fetch( bld, inst, 2, chan_index );
1660 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1661 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1662 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1663 }
1664 break;
1665
1666 case TGSI_OPCODE_DP2A:
1667 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1668 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1669 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1670 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1671 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1672 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1673 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1674 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1675 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1676 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1677 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1678 }
1679 break;
1680
1681 case TGSI_OPCODE_FRC:
1682 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1683 src0 = emit_fetch( bld, inst, 0, chan_index );
1684 tmp0 = lp_build_floor(&bld->base, src0);
1685 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1686 dst0[chan_index] = tmp0;
1687 }
1688 break;
1689
1690 case TGSI_OPCODE_CLAMP:
1691 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1692 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1693 src1 = emit_fetch( bld, inst, 1, chan_index );
1694 src2 = emit_fetch( bld, inst, 2, chan_index );
1695 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1696 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1697 dst0[chan_index] = tmp0;
1698 }
1699 break;
1700
1701 case TGSI_OPCODE_FLR:
1702 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1703 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1704 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1705 }
1706 break;
1707
1708 case TGSI_OPCODE_ROUND:
1709 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1710 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1711 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1712 }
1713 break;
1714
1715 case TGSI_OPCODE_EX2: {
1716 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1717 tmp0 = lp_build_exp2( &bld->base, tmp0);
1718 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1719 dst0[chan_index] = tmp0;
1720 }
1721 break;
1722 }
1723
1724 case TGSI_OPCODE_LG2:
1725 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1726 tmp0 = lp_build_log2( &bld->base, tmp0);
1727 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1728 dst0[chan_index] = tmp0;
1729 }
1730 break;
1731
1732 case TGSI_OPCODE_POW:
1733 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1734 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1735 res = lp_build_pow( &bld->base, src0, src1 );
1736 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1737 dst0[chan_index] = res;
1738 }
1739 break;
1740
1741 case TGSI_OPCODE_XPD:
1742 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1743 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1744 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1745 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1746 }
1747 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1748 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1749 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1750 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1751 }
1752 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1753 tmp2 = tmp0;
1754 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1755 tmp5 = tmp3;
1756 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1757 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1758 dst0[CHAN_X] = tmp2;
1759 }
1760 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1761 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1762 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1763 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1764 }
1765 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1766 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1767 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1768 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1769 dst0[CHAN_Y] = tmp3;
1770 }
1771 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1772 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1773 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1774 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1775 dst0[CHAN_Z] = tmp5;
1776 }
1777 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1778 dst0[CHAN_W] = bld->base.one;
1779 }
1780 break;
1781
1782 case TGSI_OPCODE_ABS:
1783 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1784 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1785 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1786 }
1787 break;
1788
1789 case TGSI_OPCODE_RCC:
1790 /* deprecated? */
1791 assert(0);
1792 return FALSE;
1793
1794 case TGSI_OPCODE_DPH:
1795 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1796 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1797 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1798 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1799 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1800 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1801 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1802 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1803 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1804 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1805 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1806 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1807 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1808 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1809 dst0[chan_index] = tmp0;
1810 }
1811 break;
1812
1813 case TGSI_OPCODE_COS:
1814 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1815 tmp0 = lp_build_cos( &bld->base, tmp0 );
1816 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1817 dst0[chan_index] = tmp0;
1818 }
1819 break;
1820
1821 case TGSI_OPCODE_DDX:
1822 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1823 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1824 }
1825 break;
1826
1827 case TGSI_OPCODE_DDY:
1828 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1829 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1830 }
1831 break;
1832
1833 case TGSI_OPCODE_KILP:
1834 /* predicated kill */
1835 emit_kilp( bld, inst, (*pc)-1 );
1836 break;
1837
1838 case TGSI_OPCODE_KIL:
1839 /* conditional kill */
1840 emit_kil( bld, inst, (*pc)-1 );
1841 break;
1842
1843 case TGSI_OPCODE_PK2H:
1844 return FALSE;
1845 break;
1846
1847 case TGSI_OPCODE_PK2US:
1848 return FALSE;
1849 break;
1850
1851 case TGSI_OPCODE_PK4B:
1852 return FALSE;
1853 break;
1854
1855 case TGSI_OPCODE_PK4UB:
1856 return FALSE;
1857 break;
1858
1859 case TGSI_OPCODE_RFL:
1860 return FALSE;
1861 break;
1862
1863 case TGSI_OPCODE_SEQ:
1864 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1865 src0 = emit_fetch( bld, inst, 0, chan_index );
1866 src1 = emit_fetch( bld, inst, 1, chan_index );
1867 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1868 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1869 }
1870 break;
1871
1872 case TGSI_OPCODE_SFL:
1873 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1874 dst0[chan_index] = bld->base.zero;
1875 }
1876 break;
1877
1878 case TGSI_OPCODE_SGT:
1879 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1880 src0 = emit_fetch( bld, inst, 0, chan_index );
1881 src1 = emit_fetch( bld, inst, 1, chan_index );
1882 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1883 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1884 }
1885 break;
1886
1887 case TGSI_OPCODE_SIN:
1888 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1889 tmp0 = lp_build_sin( &bld->base, tmp0 );
1890 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1891 dst0[chan_index] = tmp0;
1892 }
1893 break;
1894
1895 case TGSI_OPCODE_SLE:
1896 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1897 src0 = emit_fetch( bld, inst, 0, chan_index );
1898 src1 = emit_fetch( bld, inst, 1, chan_index );
1899 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1900 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1901 }
1902 break;
1903
1904 case TGSI_OPCODE_SNE:
1905 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1906 src0 = emit_fetch( bld, inst, 0, chan_index );
1907 src1 = emit_fetch( bld, inst, 1, chan_index );
1908 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1909 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1910 }
1911 break;
1912
1913 case TGSI_OPCODE_STR:
1914 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1915 dst0[chan_index] = bld->base.one;
1916 }
1917 break;
1918
1919 case TGSI_OPCODE_TEX:
1920 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1921 break;
1922
1923 case TGSI_OPCODE_TXD:
1924 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1925 break;
1926
1927 case TGSI_OPCODE_UP2H:
1928 /* deprecated */
1929 assert (0);
1930 return FALSE;
1931 break;
1932
1933 case TGSI_OPCODE_UP2US:
1934 /* deprecated */
1935 assert(0);
1936 return FALSE;
1937 break;
1938
1939 case TGSI_OPCODE_UP4B:
1940 /* deprecated */
1941 assert(0);
1942 return FALSE;
1943 break;
1944
1945 case TGSI_OPCODE_UP4UB:
1946 /* deprecated */
1947 assert(0);
1948 return FALSE;
1949 break;
1950
1951 case TGSI_OPCODE_X2D:
1952 /* deprecated? */
1953 assert(0);
1954 return FALSE;
1955 break;
1956
1957 case TGSI_OPCODE_ARA:
1958 /* deprecated */
1959 assert(0);
1960 return FALSE;
1961 break;
1962
1963 case TGSI_OPCODE_ARR:
1964 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1965 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1966 tmp0 = lp_build_round(&bld->base, tmp0);
1967 dst0[chan_index] = tmp0;
1968 }
1969 break;
1970
1971 case TGSI_OPCODE_BRA:
1972 /* deprecated */
1973 assert(0);
1974 return FALSE;
1975 break;
1976
1977 case TGSI_OPCODE_CAL:
1978 lp_exec_mask_call(&bld->exec_mask,
1979 inst->Label.Label,
1980 pc);
1981
1982 break;
1983
1984 case TGSI_OPCODE_RET:
1985 lp_exec_mask_ret(&bld->exec_mask, pc);
1986 break;
1987
1988 case TGSI_OPCODE_END:
1989 if (0) {
1990 /* for debugging */
1991 emit_dump_temps(bld);
1992 }
1993 *pc = -1;
1994 break;
1995
1996 case TGSI_OPCODE_SSG:
1997 /* TGSI_OPCODE_SGN */
1998 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1999 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2000 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
2001 }
2002 break;
2003
2004 case TGSI_OPCODE_CMP:
2005 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2006 src0 = emit_fetch( bld, inst, 0, chan_index );
2007 src1 = emit_fetch( bld, inst, 1, chan_index );
2008 src2 = emit_fetch( bld, inst, 2, chan_index );
2009 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
2010 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
2011 }
2012 break;
2013
2014 case TGSI_OPCODE_SCS:
2015 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
2016 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2017 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
2018 }
2019 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
2020 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2021 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
2022 }
2023 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
2024 dst0[CHAN_Z] = bld->base.zero;
2025 }
2026 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
2027 dst0[CHAN_W] = bld->base.one;
2028 }
2029 break;
2030
2031 case TGSI_OPCODE_TXB:
2032 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
2033 break;
2034
2035 case TGSI_OPCODE_NRM:
2036 /* fall-through */
2037 case TGSI_OPCODE_NRM4:
2038 /* 3 or 4-component normalization */
2039 {
2040 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2041
2042 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
2043 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
2044 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
2045 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
2046
2047 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2048
2049 /* xmm4 = src.x */
2050 /* xmm0 = src.x * src.x */
2051 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2052 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2053 tmp4 = tmp0;
2054 }
2055 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2056
2057 /* xmm5 = src.y */
2058 /* xmm0 = xmm0 + src.y * src.y */
2059 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2060 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2061 tmp5 = tmp1;
2062 }
2063 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2064 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2065
2066 /* xmm6 = src.z */
2067 /* xmm0 = xmm0 + src.z * src.z */
2068 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2069 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2070 tmp6 = tmp1;
2071 }
2072 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2073 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2074
2075 if (dims == 4) {
2076 /* xmm7 = src.w */
2077 /* xmm0 = xmm0 + src.w * src.w */
2078 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2079 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2080 tmp7 = tmp1;
2081 }
2082 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2083 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2084 }
2085
2086 /* xmm1 = 1 / sqrt(xmm0) */
2087 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2088
2089 /* dst.x = xmm1 * src.x */
2090 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2091 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2092 }
2093
2094 /* dst.y = xmm1 * src.y */
2095 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2096 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2097 }
2098
2099 /* dst.z = xmm1 * src.z */
2100 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2101 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2102 }
2103
2104 /* dst.w = xmm1 * src.w */
2105 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2106 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2107 }
2108 }
2109
2110 /* dst.w = 1.0 */
2111 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2112 dst0[CHAN_W] = bld->base.one;
2113 }
2114 }
2115 break;
2116
2117 case TGSI_OPCODE_DIV:
2118 /* deprecated */
2119 assert( 0 );
2120 return FALSE;
2121 break;
2122
2123 case TGSI_OPCODE_DP2:
2124 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
2125 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
2126 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
2127 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
2128 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
2129 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
2130 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
2131 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2132 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
2133 }
2134 break;
2135
2136 case TGSI_OPCODE_TXL:
2137 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2138 break;
2139
2140 case TGSI_OPCODE_TXP:
2141 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2142 break;
2143
2144 case TGSI_OPCODE_BRK:
2145 lp_exec_break(&bld->exec_mask);
2146 break;
2147
2148 case TGSI_OPCODE_IF:
2149 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2150 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2151 tmp0, bld->base.zero);
2152 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2153 break;
2154
2155 case TGSI_OPCODE_BGNLOOP:
2156 lp_exec_bgnloop(&bld->exec_mask);
2157 break;
2158
2159 case TGSI_OPCODE_BGNSUB:
2160 lp_exec_mask_bgnsub(&bld->exec_mask);
2161 break;
2162
2163 case TGSI_OPCODE_ELSE:
2164 lp_exec_mask_cond_invert(&bld->exec_mask);
2165 break;
2166
2167 case TGSI_OPCODE_ENDIF:
2168 lp_exec_mask_cond_pop(&bld->exec_mask);
2169 break;
2170
2171 case TGSI_OPCODE_ENDLOOP:
2172 lp_exec_endloop(&bld->exec_mask);
2173 break;
2174
2175 case TGSI_OPCODE_ENDSUB:
2176 lp_exec_mask_endsub(&bld->exec_mask, pc);
2177 break;
2178
2179 case TGSI_OPCODE_PUSHA:
2180 /* deprecated? */
2181 assert(0);
2182 return FALSE;
2183 break;
2184
2185 case TGSI_OPCODE_POPA:
2186 /* deprecated? */
2187 assert(0);
2188 return FALSE;
2189 break;
2190
2191 case TGSI_OPCODE_CEIL:
2192 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2193 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2194 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2195 }
2196 break;
2197
2198 case TGSI_OPCODE_I2F:
2199 /* deprecated? */
2200 assert(0);
2201 return FALSE;
2202 break;
2203
2204 case TGSI_OPCODE_NOT:
2205 /* deprecated? */
2206 assert(0);
2207 return FALSE;
2208 break;
2209
2210 case TGSI_OPCODE_TRUNC:
2211 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2212 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2213 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2214 }
2215 break;
2216
2217 case TGSI_OPCODE_SHL:
2218 /* deprecated? */
2219 assert(0);
2220 return FALSE;
2221 break;
2222
2223 case TGSI_OPCODE_ISHR:
2224 /* deprecated? */
2225 assert(0);
2226 return FALSE;
2227 break;
2228
2229 case TGSI_OPCODE_AND:
2230 /* deprecated? */
2231 assert(0);
2232 return FALSE;
2233 break;
2234
2235 case TGSI_OPCODE_OR:
2236 /* deprecated? */
2237 assert(0);
2238 return FALSE;
2239 break;
2240
2241 case TGSI_OPCODE_MOD:
2242 /* deprecated? */
2243 assert(0);
2244 return FALSE;
2245 break;
2246
2247 case TGSI_OPCODE_XOR:
2248 /* deprecated? */
2249 assert(0);
2250 return FALSE;
2251 break;
2252
2253 case TGSI_OPCODE_SAD:
2254 /* deprecated? */
2255 assert(0);
2256 return FALSE;
2257 break;
2258
2259 case TGSI_OPCODE_TXF:
2260 /* deprecated? */
2261 assert(0);
2262 return FALSE;
2263 break;
2264
2265 case TGSI_OPCODE_TXQ:
2266 /* deprecated? */
2267 assert(0);
2268 return FALSE;
2269 break;
2270
2271 case TGSI_OPCODE_CONT:
2272 lp_exec_continue(&bld->exec_mask);
2273 break;
2274
2275 case TGSI_OPCODE_EMIT:
2276 return FALSE;
2277 break;
2278
2279 case TGSI_OPCODE_ENDPRIM:
2280 return FALSE;
2281 break;
2282
2283 case TGSI_OPCODE_NOP:
2284 break;
2285
2286 default:
2287 return FALSE;
2288 }
2289
2290 if(info->num_dst) {
2291 LLVMValueRef pred[NUM_CHANNELS];
2292
2293 emit_fetch_predicate( bld, inst, pred );
2294
2295 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2296 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2297 }
2298 }
2299
2300 return TRUE;
2301 }
2302
2303
2304 void
2305 lp_build_tgsi_soa(LLVMBuilderRef builder,
2306 const struct tgsi_token *tokens,
2307 struct lp_type type,
2308 struct lp_build_mask_context *mask,
2309 LLVMValueRef consts_ptr,
2310 LLVMValueRef system_values_array,
2311 const LLVMValueRef *pos,
2312 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2313 LLVMValueRef (*outputs)[NUM_CHANNELS],
2314 struct lp_build_sampler_soa *sampler,
2315 const struct tgsi_shader_info *info)
2316 {
2317 struct lp_build_tgsi_soa_context bld;
2318 struct tgsi_parse_context parse;
2319 uint num_immediates = 0;
2320 uint num_instructions = 0;
2321 unsigned i;
2322 int pc = 0;
2323
2324 struct lp_type res_type;
2325
2326 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2327 memset(&res_type, 0, sizeof res_type);
2328 res_type.width = type.width;
2329 res_type.length = type.length;
2330 res_type.sign = 1;
2331
2332 /* Setup build context */
2333 memset(&bld, 0, sizeof bld);
2334 lp_build_context_init(&bld.base, builder, type);
2335 lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
2336 lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type));
2337 bld.mask = mask;
2338 bld.pos = pos;
2339 bld.inputs = inputs;
2340 bld.outputs = outputs;
2341 bld.consts_ptr = consts_ptr;
2342 bld.sampler = sampler;
2343 bld.info = info;
2344 bld.indirect_files = info->indirect_files;
2345 bld.instructions = (struct tgsi_full_instruction *)
2346 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2347 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2348
2349 if (!bld.instructions) {
2350 return;
2351 }
2352
2353 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2354
2355 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2356 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2357 info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0);
2358 bld.temps_array = lp_build_array_alloca(bld.base.builder,
2359 bld.base.vec_type, array_size,
2360 "temp_array");
2361 }
2362
2363 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2364 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2365 info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0);
2366 bld.outputs_array = lp_build_array_alloca(bld.base.builder,
2367 bld.base.vec_type, array_size,
2368 "output_array");
2369 }
2370
2371 /* If we have indirect addressing in inputs we need to copy them into
2372 * our alloca array to be able to iterate over them */
2373 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
2374 unsigned index, chan;
2375 LLVMTypeRef vec_type = bld.base.vec_type;
2376 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2377 info->file_max[TGSI_FILE_INPUT]*4 + 4, 0);
2378 bld.inputs_array = lp_build_array_alloca(bld.base.builder,
2379 vec_type, array_size,
2380 "input_array");
2381
2382 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
2383
2384 for (index = 0; index < info->num_inputs; ++index) {
2385 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2386 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
2387 LLVMValueRef input_ptr =
2388 LLVMBuildGEP(bld.base.builder, bld.inputs_array,
2389 &lindex, 1, "");
2390 LLVMValueRef value = bld.inputs[index][chan];
2391 if (value)
2392 LLVMBuildStore(bld.base.builder, value, input_ptr);
2393 }
2394 }
2395 }
2396
2397 bld.system_values_array = system_values_array;
2398
2399 tgsi_parse_init( &parse, tokens );
2400
2401 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2402 tgsi_parse_token( &parse );
2403
2404 switch( parse.FullToken.Token.Type ) {
2405 case TGSI_TOKEN_TYPE_DECLARATION:
2406 /* Inputs already interpolated */
2407 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2408 break;
2409
2410 case TGSI_TOKEN_TYPE_INSTRUCTION:
2411 {
2412 /* save expanded instruction */
2413 if (num_instructions == bld.max_instructions) {
2414 struct tgsi_full_instruction *instructions;
2415 instructions = REALLOC(bld.instructions,
2416 bld.max_instructions
2417 * sizeof(struct tgsi_full_instruction),
2418 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2419 * sizeof(struct tgsi_full_instruction));
2420 if (!instructions) {
2421 break;
2422 }
2423 bld.instructions = instructions;
2424 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2425 }
2426
2427 memcpy(bld.instructions + num_instructions,
2428 &parse.FullToken.FullInstruction,
2429 sizeof(bld.instructions[0]));
2430
2431 num_instructions++;
2432 }
2433
2434 break;
2435
2436 case TGSI_TOKEN_TYPE_IMMEDIATE:
2437 /* simply copy the immediate values into the next immediates[] slot */
2438 {
2439 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2440 assert(size <= 4);
2441 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2442 for( i = 0; i < size; ++i )
2443 bld.immediates[num_immediates][i] =
2444 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2445 for( i = size; i < 4; ++i )
2446 bld.immediates[num_immediates][i] = bld.base.undef;
2447 num_immediates++;
2448 }
2449 break;
2450
2451 case TGSI_TOKEN_TYPE_PROPERTY:
2452 break;
2453
2454 default:
2455 assert( 0 );
2456 }
2457 }
2458
2459 while (pc != -1) {
2460 struct tgsi_full_instruction *instr = bld.instructions + pc;
2461 const struct tgsi_opcode_info *opcode_info =
2462 tgsi_get_opcode_info(instr->Instruction.Opcode);
2463 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2464 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2465 opcode_info->mnemonic);
2466 }
2467
2468 /* If we have indirect addressing in outputs we need to copy our alloca array
2469 * to the outputs slots specified by the called */
2470 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2471 unsigned index, chan;
2472 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2473 for (index = 0; index < info->num_outputs; ++index) {
2474 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2475 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2476 }
2477 }
2478 }
2479
2480 if (0) {
2481 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2482 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2483 debug_printf("11111111111111111111111111111 \n");
2484 tgsi_dump(tokens, 0);
2485 lp_debug_dump_value(function);
2486 debug_printf("2222222222222222222222222222 \n");
2487 }
2488 tgsi_parse_free( &parse );
2489
2490 if (0) {
2491 LLVMModuleRef module = LLVMGetGlobalParent(
2492 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2493 LLVMDumpModule(module);
2494
2495 }
2496
2497 FREE( bld.instructions );
2498 }
2499
2500
2501 /**
2502 * Build up the system values array out of individual values such as
2503 * the instance ID, front-face, primitive ID, etc. The shader info is
2504 * used to determine which system values are needed and where to put
2505 * them in the system values array.
2506 *
2507 * XXX only instance ID is implemented at this time.
2508 *
2509 * The system values register file is similar to the constants buffer.
2510 * Example declaration:
2511 * DCL SV[0], INSTANCEID
2512 * Example instruction:
2513 * MOVE foo, SV[0].xxxx;
2514 *
2515 * \return LLVM float array (interpreted as float [][4])
2516 */
2517 LLVMValueRef
2518 lp_build_system_values_array(LLVMBuilderRef builder,
2519 const struct tgsi_shader_info *info,
2520 LLVMValueRef instance_id,
2521 LLVMValueRef facing)
2522 {
2523 LLVMValueRef size = lp_build_const_int32(4 * info->num_system_values);
2524 LLVMValueRef array = lp_build_array_alloca(builder, LLVMFloatType(),
2525 size, "sysvals_array");
2526 unsigned i;
2527
2528 for (i = 0; i < info->num_system_values; i++) {
2529 LLVMValueRef index = lp_build_const_int32(i * 4);
2530 LLVMValueRef ptr, value;
2531
2532 switch (info->system_value_semantic_name[i]) {
2533 case TGSI_SEMANTIC_INSTANCEID:
2534 /* convert instance ID from int to float */
2535 value = LLVMBuildSIToFP(builder, instance_id, LLVMFloatType(),
2536 "sysval_instanceid");
2537 break;
2538 case TGSI_SEMANTIC_FACE:
2539 /* fall-through */
2540 default:
2541 assert(0 && "unexpected semantic in build_system_values_array()");
2542 }
2543
2544 ptr = LLVMBuildGEP(builder, array, &index, 1, "");
2545 LLVMBuildStore(builder, value, ptr);
2546 }
2547
2548 return array;
2549 }