gallium: Unify defines of CHAN_[XYZW] in tgsi_exec.h
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_bitarit.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_init.h"
56 #include "lp_bld_logic.h"
57 #include "lp_bld_swizzle.h"
58 #include "lp_bld_flow.h"
59 #include "lp_bld_quad.h"
60 #include "lp_bld_tgsi.h"
61 #include "lp_bld_limits.h"
62 #include "lp_bld_debug.h"
63 #include "lp_bld_printf.h"
64
65
66 #define FOR_EACH_CHANNEL( CHAN )\
67 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
68
69 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
71
72 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
73 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
74
75 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
76 FOR_EACH_CHANNEL( CHAN )\
77 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
78
79 #define NUM_CHANNELS 4
80
81 #define LP_MAX_INSTRUCTIONS 256
82
83
84 struct lp_exec_mask {
85 struct lp_build_context *bld;
86
87 boolean has_mask;
88
89 LLVMTypeRef int_vec_type;
90
91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
92 int cond_stack_size;
93 LLVMValueRef cond_mask;
94
95 LLVMBasicBlockRef loop_block;
96 LLVMValueRef cont_mask;
97 LLVMValueRef break_mask;
98 LLVMValueRef break_var;
99 struct {
100 LLVMBasicBlockRef loop_block;
101 LLVMValueRef cont_mask;
102 LLVMValueRef break_mask;
103 LLVMValueRef break_var;
104 } loop_stack[LP_MAX_TGSI_NESTING];
105 int loop_stack_size;
106
107 LLVMValueRef ret_mask;
108 struct {
109 int pc;
110 LLVMValueRef ret_mask;
111 } call_stack[LP_MAX_TGSI_NESTING];
112 int call_stack_size;
113
114 LLVMValueRef exec_mask;
115 };
116
117 struct lp_build_tgsi_soa_context
118 {
119 struct lp_build_context base;
120
121 /* Builder for vector integer masks and indices */
122 struct lp_build_context uint_bld;
123
124 /* Builder for scalar elements of shader's data type (float) */
125 struct lp_build_context elem_bld;
126
127 LLVMValueRef consts_ptr;
128 const LLVMValueRef *pos;
129 const LLVMValueRef (*inputs)[NUM_CHANNELS];
130 LLVMValueRef (*outputs)[NUM_CHANNELS];
131
132 const struct lp_build_sampler_soa *sampler;
133
134 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
135 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
136 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
137 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
138
139 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
140 * set in the indirect_files field.
141 * The temps[] array above is unused then.
142 */
143 LLVMValueRef temps_array;
144
145 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
146 * set in the indirect_files field.
147 * The outputs[] array above is unused then.
148 */
149 LLVMValueRef outputs_array;
150
151 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
152 * set in the indirect_files field.
153 * The inputs[] array above is unused then.
154 */
155 LLVMValueRef inputs_array;
156
157 LLVMValueRef system_values_array;
158
159 const struct tgsi_shader_info *info;
160 /** bitmask indicating which register files are accessed indirectly */
161 unsigned indirect_files;
162
163 struct lp_build_mask_context *mask;
164 struct lp_exec_mask exec_mask;
165
166 struct tgsi_full_instruction *instructions;
167 uint max_instructions;
168 };
169
170 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
171 {
172 mask->bld = bld;
173 mask->has_mask = FALSE;
174 mask->cond_stack_size = 0;
175 mask->loop_stack_size = 0;
176 mask->call_stack_size = 0;
177
178 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
179 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
180 LLVMConstAllOnes(mask->int_vec_type);
181 }
182
183 static void lp_exec_mask_update(struct lp_exec_mask *mask)
184 {
185 LLVMBuilderRef builder = mask->bld->gallivm->builder;
186
187 if (mask->loop_stack_size) {
188 /*for loops we need to update the entire mask at runtime */
189 LLVMValueRef tmp;
190 assert(mask->break_mask);
191 tmp = LLVMBuildAnd(builder,
192 mask->cont_mask,
193 mask->break_mask,
194 "maskcb");
195 mask->exec_mask = LLVMBuildAnd(builder,
196 mask->cond_mask,
197 tmp,
198 "maskfull");
199 } else
200 mask->exec_mask = mask->cond_mask;
201
202 if (mask->call_stack_size) {
203 mask->exec_mask = LLVMBuildAnd(builder,
204 mask->exec_mask,
205 mask->ret_mask,
206 "callmask");
207 }
208
209 mask->has_mask = (mask->cond_stack_size > 0 ||
210 mask->loop_stack_size > 0 ||
211 mask->call_stack_size > 0);
212 }
213
214 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
215 LLVMValueRef val)
216 {
217 LLVMBuilderRef builder = mask->bld->gallivm->builder;
218
219 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
220 if (mask->cond_stack_size == 0) {
221 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
222 }
223 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
224 assert(LLVMTypeOf(val) == mask->int_vec_type);
225 mask->cond_mask = LLVMBuildAnd(builder,
226 mask->cond_mask,
227 val,
228 "");
229 lp_exec_mask_update(mask);
230 }
231
232 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
233 {
234 LLVMBuilderRef builder = mask->bld->gallivm->builder;
235 LLVMValueRef prev_mask;
236 LLVMValueRef inv_mask;
237
238 assert(mask->cond_stack_size);
239 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
240 if (mask->cond_stack_size == 1) {
241 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
242 }
243
244 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
245
246 mask->cond_mask = LLVMBuildAnd(builder,
247 inv_mask,
248 prev_mask, "");
249 lp_exec_mask_update(mask);
250 }
251
252 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
253 {
254 assert(mask->cond_stack_size);
255 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
256 lp_exec_mask_update(mask);
257 }
258
259 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
260 {
261 LLVMBuilderRef builder = mask->bld->gallivm->builder;
262
263 if (mask->loop_stack_size == 0) {
264 assert(mask->loop_block == NULL);
265 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
266 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
267 assert(mask->break_var == NULL);
268 }
269
270 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
271
272 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
273 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
274 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
275 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
276 ++mask->loop_stack_size;
277
278 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
279 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
280
281 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
282 LLVMBuildBr(builder, mask->loop_block);
283 LLVMPositionBuilderAtEnd(builder, mask->loop_block);
284
285 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
286
287 lp_exec_mask_update(mask);
288 }
289
290 static void lp_exec_break(struct lp_exec_mask *mask)
291 {
292 LLVMBuilderRef builder = mask->bld->gallivm->builder;
293 LLVMValueRef exec_mask = LLVMBuildNot(builder,
294 mask->exec_mask,
295 "break");
296
297 mask->break_mask = LLVMBuildAnd(builder,
298 mask->break_mask,
299 exec_mask, "break_full");
300
301 lp_exec_mask_update(mask);
302 }
303
304 static void lp_exec_continue(struct lp_exec_mask *mask)
305 {
306 LLVMBuilderRef builder = mask->bld->gallivm->builder;
307 LLVMValueRef exec_mask = LLVMBuildNot(builder,
308 mask->exec_mask,
309 "");
310
311 mask->cont_mask = LLVMBuildAnd(builder,
312 mask->cont_mask,
313 exec_mask, "");
314
315 lp_exec_mask_update(mask);
316 }
317
318
319 static void lp_exec_endloop(struct gallivm_state *gallivm,
320 struct lp_exec_mask *mask)
321 {
322 LLVMBuilderRef builder = mask->bld->gallivm->builder;
323 LLVMBasicBlockRef endloop;
324 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
325 mask->bld->type.width *
326 mask->bld->type.length);
327 LLVMValueRef i1cond;
328
329 assert(mask->break_mask);
330
331 /*
332 * Restore the cont_mask, but don't pop
333 */
334 assert(mask->loop_stack_size);
335 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
336 lp_exec_mask_update(mask);
337
338 /*
339 * Unlike the continue mask, the break_mask must be preserved across loop
340 * iterations
341 */
342 LLVMBuildStore(builder, mask->break_mask, mask->break_var);
343
344 /* i1cond = (mask == 0) */
345 i1cond = LLVMBuildICmp(
346 builder,
347 LLVMIntNE,
348 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
349 LLVMConstNull(reg_type), "");
350
351 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
352
353 LLVMBuildCondBr(builder,
354 i1cond, mask->loop_block, endloop);
355
356 LLVMPositionBuilderAtEnd(builder, endloop);
357
358 assert(mask->loop_stack_size);
359 --mask->loop_stack_size;
360 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
361 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
362 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
363 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
364
365 lp_exec_mask_update(mask);
366 }
367
368 /* stores val into an address pointed to by dst.
369 * mask->exec_mask is used to figure out which bits of val
370 * should be stored into the address
371 * (0 means don't store this bit, 1 means do store).
372 */
373 static void lp_exec_mask_store(struct lp_exec_mask *mask,
374 LLVMValueRef pred,
375 LLVMValueRef val,
376 LLVMValueRef dst)
377 {
378 LLVMBuilderRef builder = mask->bld->gallivm->builder;
379
380 /* Mix the predicate and execution mask */
381 if (mask->has_mask) {
382 if (pred) {
383 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
384 } else {
385 pred = mask->exec_mask;
386 }
387 }
388
389 if (pred) {
390 LLVMValueRef real_val, dst_val;
391
392 dst_val = LLVMBuildLoad(builder, dst, "");
393 real_val = lp_build_select(mask->bld,
394 pred,
395 val, dst_val);
396
397 LLVMBuildStore(builder, real_val, dst);
398 } else
399 LLVMBuildStore(builder, val, dst);
400 }
401
402 static void lp_exec_mask_call(struct lp_exec_mask *mask,
403 int func,
404 int *pc)
405 {
406 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
407 mask->call_stack[mask->call_stack_size].pc = *pc;
408 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
409 mask->call_stack_size++;
410 *pc = func;
411 }
412
413 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
414 {
415 LLVMBuilderRef builder = mask->bld->gallivm->builder;
416 LLVMValueRef exec_mask;
417
418 if (mask->call_stack_size == 0) {
419 /* returning from main() */
420 *pc = -1;
421 return;
422 }
423 exec_mask = LLVMBuildNot(builder,
424 mask->exec_mask,
425 "ret");
426
427 mask->ret_mask = LLVMBuildAnd(builder,
428 mask->ret_mask,
429 exec_mask, "ret_full");
430
431 lp_exec_mask_update(mask);
432 }
433
434 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
435 {
436 }
437
438 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
439 {
440 assert(mask->call_stack_size);
441 mask->call_stack_size--;
442 *pc = mask->call_stack[mask->call_stack_size].pc;
443 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
444 lp_exec_mask_update(mask);
445 }
446
447
448 /**
449 * Return pointer to a temporary register channel (src or dest).
450 * Note that indirect addressing cannot be handled here.
451 * \param index which temporary register
452 * \param chan which channel of the temp register.
453 */
454 static LLVMValueRef
455 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
456 unsigned index,
457 unsigned chan)
458 {
459 LLVMBuilderRef builder = bld->base.gallivm->builder;
460 assert(chan < 4);
461 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
462 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan);
463 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
464 }
465 else {
466 return bld->temps[index][chan];
467 }
468 }
469
470 /**
471 * Return pointer to a output register channel (src or dest).
472 * Note that indirect addressing cannot be handled here.
473 * \param index which output register
474 * \param chan which channel of the output register.
475 */
476 static LLVMValueRef
477 get_output_ptr(struct lp_build_tgsi_soa_context *bld,
478 unsigned index,
479 unsigned chan)
480 {
481 LLVMBuilderRef builder = bld->base.gallivm->builder;
482 assert(chan < 4);
483 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
484 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
485 index * 4 + chan);
486 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
487 }
488 else {
489 return bld->outputs[index][chan];
490 }
491 }
492
493 /**
494 * Gather vector.
495 * XXX the lp_build_gather() function should be capable of doing this
496 * with a little work.
497 */
498 static LLVMValueRef
499 build_gather(struct lp_build_tgsi_soa_context *bld,
500 LLVMValueRef base_ptr,
501 LLVMValueRef indexes)
502 {
503 LLVMBuilderRef builder = bld->base.gallivm->builder;
504 LLVMValueRef res = bld->base.undef;
505 unsigned i;
506
507 /*
508 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
509 */
510 for (i = 0; i < bld->base.type.length; i++) {
511 LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
512 LLVMValueRef index = LLVMBuildExtractElement(builder,
513 indexes, ii, "");
514 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
515 &index, 1, "gather_ptr");
516 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
517
518 res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
519 }
520
521 return res;
522 }
523
524
525 /**
526 * Scatter/store vector.
527 */
528 static void
529 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
530 LLVMValueRef base_ptr,
531 LLVMValueRef indexes,
532 LLVMValueRef values,
533 struct lp_exec_mask *mask,
534 LLVMValueRef pred)
535 {
536 struct gallivm_state *gallivm = bld->base.gallivm;
537 LLVMBuilderRef builder = gallivm->builder;
538 unsigned i;
539
540 /* Mix the predicate and execution mask */
541 if (mask->has_mask) {
542 if (pred) {
543 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
544 }
545 else {
546 pred = mask->exec_mask;
547 }
548 }
549
550 /*
551 * Loop over elements of index_vec, store scalar value.
552 */
553 for (i = 0; i < bld->base.type.length; i++) {
554 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
555 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
556 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
557 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
558 LLVMValueRef scalar_pred = pred ?
559 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
560
561 if (0)
562 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
563 ii, val, index, scalar_ptr);
564
565 if (scalar_pred) {
566 LLVMValueRef real_val, dst_val;
567 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
568 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
569 LLVMBuildStore(builder, real_val, scalar_ptr);
570 }
571 else {
572 LLVMBuildStore(builder, val, scalar_ptr);
573 }
574 }
575 }
576
577
578 /**
579 * Read the current value of the ADDR register, convert the floats to
580 * ints, add the base index and return the vector of offsets.
581 * The offsets will be used to index into the constant buffer or
582 * temporary register file.
583 */
584 static LLVMValueRef
585 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
586 unsigned reg_file, unsigned reg_index,
587 const struct tgsi_src_register *indirect_reg)
588 {
589 LLVMBuilderRef builder = bld->base.gallivm->builder;
590 struct lp_build_context *uint_bld = &bld->uint_bld;
591 /* always use X component of address register */
592 unsigned swizzle = indirect_reg->SwizzleX;
593 LLVMValueRef base;
594 LLVMValueRef rel;
595 LLVMValueRef max_index;
596 LLVMValueRef index;
597
598 assert(bld->indirect_files & (1 << reg_file));
599
600 base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index);
601
602 assert(swizzle < 4);
603 rel = LLVMBuildLoad(builder,
604 bld->addr[indirect_reg->Index][swizzle],
605 "load addr reg");
606
607 /* for indexing we want integers */
608 rel = LLVMBuildFPToSI(builder,
609 rel,
610 uint_bld->vec_type, "");
611
612 index = lp_build_add(uint_bld, base, rel);
613
614 max_index = lp_build_const_int_vec(bld->base.gallivm,
615 uint_bld->type,
616 bld->info->file_max[reg_file]);
617
618 assert(!uint_bld->type.sign);
619 index = lp_build_min(uint_bld, index, max_index);
620
621 return index;
622 }
623
624
625 /**
626 * Register fetch.
627 */
628 static LLVMValueRef
629 emit_fetch(
630 struct lp_build_tgsi_soa_context *bld,
631 const struct tgsi_full_instruction *inst,
632 unsigned src_op,
633 const unsigned chan_index )
634 {
635 struct gallivm_state *gallivm = bld->base.gallivm;
636 LLVMBuilderRef builder = gallivm->builder;
637 struct lp_build_context *uint_bld = &bld->uint_bld;
638 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
639 const unsigned swizzle =
640 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
641 LLVMValueRef res;
642 LLVMValueRef indirect_index = NULL;
643
644 if (swizzle > 3) {
645 assert(0 && "invalid swizzle in emit_fetch()");
646 return bld->base.undef;
647 }
648
649 if (reg->Register.Indirect) {
650 indirect_index = get_indirect_index(bld,
651 reg->Register.File,
652 reg->Register.Index,
653 &reg->Indirect);
654 } else {
655 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
656 }
657
658 switch (reg->Register.File) {
659 case TGSI_FILE_CONSTANT:
660 if (reg->Register.Indirect) {
661 LLVMValueRef swizzle_vec =
662 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
663 LLVMValueRef index_vec; /* index into the const buffer */
664
665 /* index_vec = indirect_index * 4 + swizzle */
666 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
667 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
668
669 /* Gather values from the constant buffer */
670 res = build_gather(bld, bld->consts_ptr, index_vec);
671 }
672 else {
673 LLVMValueRef index; /* index into the const buffer */
674 LLVMValueRef scalar, scalar_ptr;
675
676 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
677
678 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
679 &index, 1, "");
680 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
681
682 res = lp_build_broadcast_scalar(&bld->base, scalar);
683 }
684 break;
685
686 case TGSI_FILE_IMMEDIATE:
687 res = bld->immediates[reg->Register.Index][swizzle];
688 assert(res);
689 break;
690
691 case TGSI_FILE_INPUT:
692 if (reg->Register.Indirect) {
693 LLVMValueRef swizzle_vec =
694 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
695 LLVMValueRef length_vec =
696 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
697 LLVMValueRef index_vec; /* index into the const buffer */
698 LLVMValueRef inputs_array;
699 LLVMTypeRef float4_ptr_type;
700
701 /* index_vec = (indirect_index * 4 + swizzle) * length */
702 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
703 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
704 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
705
706 /* cast inputs_array pointer to float* */
707 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
708 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
709 float4_ptr_type, "");
710
711 /* Gather values from the temporary register array */
712 res = build_gather(bld, inputs_array, index_vec);
713 } else {
714 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
715 LLVMValueRef lindex = lp_build_const_int32(gallivm,
716 reg->Register.Index * 4 + swizzle);
717 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
718 bld->inputs_array, &lindex, 1, "");
719 res = LLVMBuildLoad(builder, input_ptr, "");
720 }
721 else {
722 res = bld->inputs[reg->Register.Index][swizzle];
723 }
724 }
725 assert(res);
726 break;
727
728 case TGSI_FILE_TEMPORARY:
729 if (reg->Register.Indirect) {
730 LLVMValueRef swizzle_vec =
731 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
732 LLVMValueRef length_vec =
733 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
734 bld->base.type.length);
735 LLVMValueRef index_vec; /* index into the const buffer */
736 LLVMValueRef temps_array;
737 LLVMTypeRef float4_ptr_type;
738
739 /* index_vec = (indirect_index * 4 + swizzle) * length */
740 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
741 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
742 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
743
744 /* cast temps_array pointer to float* */
745 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0);
746 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
747 float4_ptr_type, "");
748
749 /* Gather values from the temporary register array */
750 res = build_gather(bld, temps_array, index_vec);
751 }
752 else {
753 LLVMValueRef temp_ptr;
754 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
755 res = LLVMBuildLoad(builder, temp_ptr, "");
756 if (!res)
757 return bld->base.undef;
758 }
759 break;
760
761 case TGSI_FILE_SYSTEM_VALUE:
762 assert(!reg->Register.Indirect);
763 {
764 LLVMValueRef index; /* index into the system value array */
765 LLVMValueRef scalar, scalar_ptr;
766
767 index = lp_build_const_int32(gallivm,
768 reg->Register.Index * 4 + swizzle);
769
770 scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array,
771 &index, 1, "");
772 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
773
774 res = lp_build_broadcast_scalar(&bld->base, scalar);
775 }
776 break;
777
778 default:
779 assert(0 && "invalid src register in emit_fetch()");
780 return bld->base.undef;
781 }
782
783 if (reg->Register.Absolute) {
784 res = lp_build_abs( &bld->base, res );
785 }
786
787 if (reg->Register.Negate) {
788 res = lp_build_negate( &bld->base, res );
789 }
790
791 return res;
792 }
793
794
795 /**
796 * Register fetch with derivatives.
797 */
798 static void
799 emit_fetch_deriv(
800 struct lp_build_tgsi_soa_context *bld,
801 const struct tgsi_full_instruction *inst,
802 unsigned index,
803 const unsigned chan_index,
804 LLVMValueRef *res,
805 LLVMValueRef *ddx,
806 LLVMValueRef *ddy)
807 {
808 LLVMValueRef src;
809
810 src = emit_fetch(bld, inst, index, chan_index);
811
812 if(res)
813 *res = src;
814
815 /* TODO: use interpolation coeffs for inputs */
816
817 if(ddx)
818 *ddx = lp_build_ddx(&bld->base, src);
819
820 if(ddy)
821 *ddy = lp_build_ddy(&bld->base, src);
822 }
823
824
825 /**
826 * Predicate.
827 */
828 static void
829 emit_fetch_predicate(
830 struct lp_build_tgsi_soa_context *bld,
831 const struct tgsi_full_instruction *inst,
832 LLVMValueRef *pred)
833 {
834 LLVMBuilderRef builder = bld->base.gallivm->builder;
835 unsigned index;
836 unsigned char swizzles[4];
837 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
838 LLVMValueRef value;
839 unsigned chan;
840
841 if (!inst->Instruction.Predicate) {
842 FOR_EACH_CHANNEL( chan ) {
843 pred[chan] = NULL;
844 }
845 return;
846 }
847
848 swizzles[0] = inst->Predicate.SwizzleX;
849 swizzles[1] = inst->Predicate.SwizzleY;
850 swizzles[2] = inst->Predicate.SwizzleZ;
851 swizzles[3] = inst->Predicate.SwizzleW;
852
853 index = inst->Predicate.Index;
854 assert(index < LP_MAX_TGSI_PREDS);
855
856 FOR_EACH_CHANNEL( chan ) {
857 unsigned swizzle = swizzles[chan];
858
859 /*
860 * Only fetch the predicate register channels that are actually listed
861 * in the swizzles
862 */
863 if (!unswizzled[swizzle]) {
864 value = LLVMBuildLoad(builder,
865 bld->preds[index][swizzle], "");
866
867 /*
868 * Convert the value to an integer mask.
869 *
870 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
871 * is needlessly causing two comparisons due to storing the intermediate
872 * result as float vector instead of an integer mask vector.
873 */
874 value = lp_build_compare(bld->base.gallivm,
875 bld->base.type,
876 PIPE_FUNC_NOTEQUAL,
877 value,
878 bld->base.zero);
879 if (inst->Predicate.Negate) {
880 value = LLVMBuildNot(builder, value, "");
881 }
882
883 unswizzled[swizzle] = value;
884 } else {
885 value = unswizzled[swizzle];
886 }
887
888 pred[chan] = value;
889 }
890 }
891
892
893 /**
894 * Register store.
895 */
896 static void
897 emit_store(
898 struct lp_build_tgsi_soa_context *bld,
899 const struct tgsi_full_instruction *inst,
900 unsigned index,
901 unsigned chan_index,
902 LLVMValueRef pred,
903 LLVMValueRef value)
904 {
905 struct gallivm_state *gallivm = bld->base.gallivm;
906 LLVMBuilderRef builder = gallivm->builder;
907 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
908 struct lp_build_context *uint_bld = &bld->uint_bld;
909 LLVMValueRef indirect_index = NULL;
910
911 switch( inst->Instruction.Saturate ) {
912 case TGSI_SAT_NONE:
913 break;
914
915 case TGSI_SAT_ZERO_ONE:
916 value = lp_build_max(&bld->base, value, bld->base.zero);
917 value = lp_build_min(&bld->base, value, bld->base.one);
918 break;
919
920 case TGSI_SAT_MINUS_PLUS_ONE:
921 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
922 value = lp_build_min(&bld->base, value, bld->base.one);
923 break;
924
925 default:
926 assert(0);
927 }
928
929 if (reg->Register.Indirect) {
930 indirect_index = get_indirect_index(bld,
931 reg->Register.File,
932 reg->Register.Index,
933 &reg->Indirect);
934 } else {
935 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
936 }
937
938 switch( reg->Register.File ) {
939 case TGSI_FILE_OUTPUT:
940 if (reg->Register.Indirect) {
941 LLVMValueRef chan_vec =
942 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
943 LLVMValueRef length_vec =
944 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
945 LLVMValueRef index_vec; /* indexes into the temp registers */
946 LLVMValueRef outputs_array;
947 LLVMValueRef pixel_offsets;
948 LLVMTypeRef float_ptr_type;
949 int i;
950
951 /* build pixel offset vector: {0, 1, 2, 3, ...} */
952 pixel_offsets = uint_bld->undef;
953 for (i = 0; i < bld->base.type.length; i++) {
954 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
955 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
956 ii, ii, "");
957 }
958
959 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
960 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
961 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
962 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
963 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
964
965 float_ptr_type =
966 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
967 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
968 float_ptr_type, "");
969
970 /* Scatter store values into temp registers */
971 emit_mask_scatter(bld, outputs_array, index_vec, value,
972 &bld->exec_mask, pred);
973 }
974 else {
975 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
976 chan_index);
977 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
978 }
979 break;
980
981 case TGSI_FILE_TEMPORARY:
982 if (reg->Register.Indirect) {
983 LLVMValueRef chan_vec =
984 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
985 LLVMValueRef length_vec =
986 lp_build_const_int_vec(gallivm, uint_bld->type,
987 bld->base.type.length);
988 LLVMValueRef index_vec; /* indexes into the temp registers */
989 LLVMValueRef temps_array;
990 LLVMValueRef pixel_offsets;
991 LLVMTypeRef float_ptr_type;
992 int i;
993
994 /* build pixel offset vector: {0, 1, 2, 3, ...} */
995 pixel_offsets = uint_bld->undef;
996 for (i = 0; i < bld->base.type.length; i++) {
997 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
998 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
999 ii, ii, "");
1000 }
1001
1002 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1003 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1004 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1005 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1006 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1007
1008 float_ptr_type =
1009 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1010 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1011 float_ptr_type, "");
1012
1013 /* Scatter store values into temp registers */
1014 emit_mask_scatter(bld, temps_array, index_vec, value,
1015 &bld->exec_mask, pred);
1016 }
1017 else {
1018 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
1019 chan_index);
1020 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
1021 }
1022 break;
1023
1024 case TGSI_FILE_ADDRESS:
1025 lp_exec_mask_store(&bld->exec_mask, pred, value,
1026 bld->addr[reg->Register.Index][chan_index]);
1027 break;
1028
1029 case TGSI_FILE_PREDICATE:
1030 lp_exec_mask_store(&bld->exec_mask, pred, value,
1031 bld->preds[reg->Register.Index][chan_index]);
1032 break;
1033
1034 default:
1035 assert( 0 );
1036 }
1037 }
1038
1039
1040 /**
1041 * High-level instruction translators.
1042 */
1043
1044 static void
1045 emit_tex( struct lp_build_tgsi_soa_context *bld,
1046 const struct tgsi_full_instruction *inst,
1047 enum lp_build_tex_modifier modifier,
1048 LLVMValueRef *texel)
1049 {
1050 LLVMBuilderRef builder = bld->base.gallivm->builder;
1051 unsigned unit;
1052 LLVMValueRef lod_bias, explicit_lod;
1053 LLVMValueRef oow = NULL;
1054 LLVMValueRef coords[3];
1055 LLVMValueRef ddx[3];
1056 LLVMValueRef ddy[3];
1057 unsigned num_coords;
1058 unsigned i;
1059
1060 if (!bld->sampler) {
1061 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1062 for (i = 0; i < 4; i++) {
1063 texel[i] = bld->base.undef;
1064 }
1065 return;
1066 }
1067
1068 switch (inst->Texture.Texture) {
1069 case TGSI_TEXTURE_1D:
1070 num_coords = 1;
1071 break;
1072 case TGSI_TEXTURE_1D_ARRAY:
1073 case TGSI_TEXTURE_2D:
1074 case TGSI_TEXTURE_RECT:
1075 num_coords = 2;
1076 break;
1077 case TGSI_TEXTURE_SHADOW1D:
1078 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1079 case TGSI_TEXTURE_SHADOW2D:
1080 case TGSI_TEXTURE_SHADOWRECT:
1081 case TGSI_TEXTURE_2D_ARRAY:
1082 case TGSI_TEXTURE_3D:
1083 case TGSI_TEXTURE_CUBE:
1084 num_coords = 3;
1085 break;
1086 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1087 num_coords = 4;
1088 break;
1089 default:
1090 assert(0);
1091 return;
1092 }
1093
1094 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1095 lod_bias = emit_fetch( bld, inst, 0, 3 );
1096 explicit_lod = NULL;
1097 }
1098 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1099 lod_bias = NULL;
1100 explicit_lod = emit_fetch( bld, inst, 0, 3 );
1101 }
1102 else {
1103 lod_bias = NULL;
1104 explicit_lod = NULL;
1105 }
1106
1107 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1108 oow = emit_fetch( bld, inst, 0, 3 );
1109 oow = lp_build_rcp(&bld->base, oow);
1110 }
1111
1112 for (i = 0; i < num_coords; i++) {
1113 coords[i] = emit_fetch( bld, inst, 0, i );
1114 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1115 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1116 }
1117 for (i = num_coords; i < 3; i++) {
1118 coords[i] = bld->base.undef;
1119 }
1120
1121 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1122 LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0);
1123 for (i = 0; i < num_coords; i++) {
1124 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1125 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1126 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
1127 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
1128 }
1129 unit = inst->Src[3].Register.Index;
1130 } else {
1131 for (i = 0; i < num_coords; i++) {
1132 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1133 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1134 }
1135 unit = inst->Src[1].Register.Index;
1136 }
1137 for (i = num_coords; i < 3; i++) {
1138 ddx[i] = LLVMGetUndef(bld->base.elem_type);
1139 ddy[i] = LLVMGetUndef(bld->base.elem_type);
1140 }
1141
1142 bld->sampler->emit_fetch_texel(bld->sampler,
1143 bld->base.gallivm,
1144 bld->base.type,
1145 unit, num_coords, coords,
1146 ddx, ddy,
1147 lod_bias, explicit_lod,
1148 texel);
1149 }
1150
1151 static boolean
1152 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1153 int pc)
1154 {
1155 int i;
1156
1157 for (i = 0; i < 5; i++) {
1158 unsigned opcode;
1159
1160 if (pc + i >= bld->info->num_instructions)
1161 return TRUE;
1162
1163 opcode = bld->instructions[pc + i].Instruction.Opcode;
1164
1165 if (opcode == TGSI_OPCODE_END)
1166 return TRUE;
1167
1168 if (opcode == TGSI_OPCODE_TEX ||
1169 opcode == TGSI_OPCODE_TXP ||
1170 opcode == TGSI_OPCODE_TXD ||
1171 opcode == TGSI_OPCODE_TXB ||
1172 opcode == TGSI_OPCODE_TXL ||
1173 opcode == TGSI_OPCODE_TXF ||
1174 opcode == TGSI_OPCODE_TXQ ||
1175 opcode == TGSI_OPCODE_CAL ||
1176 opcode == TGSI_OPCODE_CALLNZ ||
1177 opcode == TGSI_OPCODE_IF ||
1178 opcode == TGSI_OPCODE_IFC ||
1179 opcode == TGSI_OPCODE_BGNLOOP ||
1180 opcode == TGSI_OPCODE_SWITCH)
1181 return FALSE;
1182 }
1183
1184 return TRUE;
1185 }
1186
1187
1188
1189 /**
1190 * Kill fragment if any of the src register values are negative.
1191 */
1192 static void
1193 emit_kil(
1194 struct lp_build_tgsi_soa_context *bld,
1195 const struct tgsi_full_instruction *inst,
1196 int pc)
1197 {
1198 LLVMBuilderRef builder = bld->base.gallivm->builder;
1199 const struct tgsi_full_src_register *reg = &inst->Src[0];
1200 LLVMValueRef terms[NUM_CHANNELS];
1201 LLVMValueRef mask;
1202 unsigned chan_index;
1203
1204 memset(&terms, 0, sizeof terms);
1205
1206 FOR_EACH_CHANNEL( chan_index ) {
1207 unsigned swizzle;
1208
1209 /* Unswizzle channel */
1210 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1211
1212 /* Check if the component has not been already tested. */
1213 assert(swizzle < NUM_CHANNELS);
1214 if( !terms[swizzle] )
1215 /* TODO: change the comparison operator instead of setting the sign */
1216 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1217 }
1218
1219 mask = NULL;
1220 FOR_EACH_CHANNEL( chan_index ) {
1221 if(terms[chan_index]) {
1222 LLVMValueRef chan_mask;
1223
1224 /*
1225 * If term < 0 then mask = 0 else mask = ~0.
1226 */
1227 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1228
1229 if(mask)
1230 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1231 else
1232 mask = chan_mask;
1233 }
1234 }
1235
1236 if(mask) {
1237 lp_build_mask_update(bld->mask, mask);
1238
1239 if (!near_end_of_shader(bld, pc))
1240 lp_build_mask_check(bld->mask);
1241 }
1242 }
1243
1244
1245 /**
1246 * Predicated fragment kill.
1247 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1248 * The only predication is the execution mask which will apply if
1249 * we're inside a loop or conditional.
1250 */
1251 static void
1252 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1253 const struct tgsi_full_instruction *inst,
1254 int pc)
1255 {
1256 LLVMBuilderRef builder = bld->base.gallivm->builder;
1257 LLVMValueRef mask;
1258
1259 /* For those channels which are "alive", disable fragment shader
1260 * execution.
1261 */
1262 if (bld->exec_mask.has_mask) {
1263 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1264 }
1265 else {
1266 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1267 mask = zero;
1268 }
1269
1270 lp_build_mask_update(bld->mask, mask);
1271
1272 if (!near_end_of_shader(bld, pc))
1273 lp_build_mask_check(bld->mask);
1274 }
1275
1276
1277 /**
1278 * Emit code which will dump the value of all the temporary registers
1279 * to stdout.
1280 */
1281 static void
1282 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1283 {
1284 struct gallivm_state *gallivm = bld->base.gallivm;
1285 LLVMBuilderRef builder = gallivm->builder;
1286 LLVMValueRef temp_ptr;
1287 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1288 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1289 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1290 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1291 int index;
1292 int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1293
1294 for (index = 0; index < n; index++) {
1295 LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1296 LLVMValueRef v[4][4], res;
1297 int chan;
1298
1299 lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1300
1301 for (chan = 0; chan < 4; chan++) {
1302 temp_ptr = get_temp_ptr(bld, index, chan);
1303 res = LLVMBuildLoad(builder, temp_ptr, "");
1304 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1305 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1306 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1307 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1308 }
1309
1310 lp_build_printf(gallivm, " X: %f %f %f %f\n",
1311 v[0][0], v[0][1], v[0][2], v[0][3]);
1312 lp_build_printf(gallivm, " Y: %f %f %f %f\n",
1313 v[1][0], v[1][1], v[1][2], v[1][3]);
1314 lp_build_printf(gallivm, " Z: %f %f %f %f\n",
1315 v[2][0], v[2][1], v[2][2], v[2][3]);
1316 lp_build_printf(gallivm, " W: %f %f %f %f\n",
1317 v[3][0], v[3][1], v[3][2], v[3][3]);
1318 }
1319 }
1320
1321
1322
1323 static void
1324 emit_declaration(
1325 struct lp_build_tgsi_soa_context *bld,
1326 const struct tgsi_full_declaration *decl)
1327 {
1328 struct gallivm_state *gallivm = bld->base.gallivm;
1329 LLVMTypeRef vec_type = bld->base.vec_type;
1330 const unsigned first = decl->Range.First;
1331 const unsigned last = decl->Range.Last;
1332 unsigned idx, i;
1333
1334 for (idx = first; idx <= last; ++idx) {
1335 assert(last <= bld->info->file_max[decl->Declaration.File]);
1336 switch (decl->Declaration.File) {
1337 case TGSI_FILE_TEMPORARY:
1338 assert(idx < LP_MAX_TGSI_TEMPS);
1339 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1340 for (i = 0; i < NUM_CHANNELS; i++)
1341 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1342 }
1343 break;
1344
1345 case TGSI_FILE_OUTPUT:
1346 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1347 for (i = 0; i < NUM_CHANNELS; i++)
1348 bld->outputs[idx][i] = lp_build_alloca(gallivm,
1349 vec_type, "output");
1350 }
1351 break;
1352
1353 case TGSI_FILE_ADDRESS:
1354 assert(idx < LP_MAX_TGSI_ADDRS);
1355 for (i = 0; i < NUM_CHANNELS; i++)
1356 bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr");
1357 break;
1358
1359 case TGSI_FILE_PREDICATE:
1360 assert(idx < LP_MAX_TGSI_PREDS);
1361 for (i = 0; i < NUM_CHANNELS; i++)
1362 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1363 "predicate");
1364 break;
1365
1366 default:
1367 /* don't need to declare other vars */
1368 break;
1369 }
1370 }
1371 }
1372
1373
1374 /**
1375 * Emit LLVM for one TGSI instruction.
1376 * \param return TRUE for success, FALSE otherwise
1377 */
1378 static boolean
1379 emit_instruction(
1380 struct lp_build_tgsi_soa_context *bld,
1381 const struct tgsi_full_instruction *inst,
1382 const struct tgsi_opcode_info *info,
1383 int *pc)
1384 {
1385 unsigned chan_index;
1386 LLVMValueRef src0, src1, src2;
1387 LLVMValueRef tmp0, tmp1, tmp2;
1388 LLVMValueRef tmp3 = NULL;
1389 LLVMValueRef tmp4 = NULL;
1390 LLVMValueRef tmp5 = NULL;
1391 LLVMValueRef tmp6 = NULL;
1392 LLVMValueRef tmp7 = NULL;
1393 LLVMValueRef res;
1394 LLVMValueRef dst0[NUM_CHANNELS];
1395
1396 /*
1397 * Stores and write masks are handled in a general fashion after the long
1398 * instruction opcode switch statement.
1399 *
1400 * Although not stricitly necessary, we avoid generating instructions for
1401 * channels which won't be stored, in cases where's that easy. For some
1402 * complex instructions, like texture sampling, it is more convenient to
1403 * assume a full writemask and then let LLVM optimization passes eliminate
1404 * redundant code.
1405 */
1406
1407 (*pc)++;
1408
1409 assert(info->num_dst <= 1);
1410 if (info->num_dst) {
1411 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1412 dst0[chan_index] = bld->base.undef;
1413 }
1414 }
1415
1416 switch (inst->Instruction.Opcode) {
1417 case TGSI_OPCODE_ARL:
1418 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1419 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1420 tmp0 = lp_build_floor(&bld->base, tmp0);
1421 dst0[chan_index] = tmp0;
1422 }
1423 break;
1424
1425 case TGSI_OPCODE_MOV:
1426 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1427 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1428 }
1429 break;
1430
1431 case TGSI_OPCODE_LIT:
1432 if( IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ) {
1433 dst0[TGSI_CHAN_X] = bld->base.one;
1434 }
1435 if( IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
1436 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1437 dst0[TGSI_CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1438 }
1439 if( IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
1440 /* XMM[1] = SrcReg[0].yyyy */
1441 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1442 /* XMM[1] = max(XMM[1], 0) */
1443 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1444 /* XMM[2] = SrcReg[0].wwww */
1445 tmp2 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
1446 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1447 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1448 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1449 dst0[TGSI_CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1450 }
1451 if( IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) ) {
1452 dst0[TGSI_CHAN_W] = bld->base.one;
1453 }
1454 break;
1455
1456 case TGSI_OPCODE_RCP:
1457 /* TGSI_OPCODE_RECIP */
1458 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1459 res = lp_build_rcp(&bld->base, src0);
1460 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1461 dst0[chan_index] = res;
1462 }
1463 break;
1464
1465 case TGSI_OPCODE_RSQ:
1466 /* TGSI_OPCODE_RECIPSQRT */
1467 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1468 src0 = lp_build_abs(&bld->base, src0);
1469 res = lp_build_rsqrt(&bld->base, src0);
1470 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1471 dst0[chan_index] = res;
1472 }
1473 break;
1474
1475 case TGSI_OPCODE_EXP:
1476 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
1477 IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
1478 IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
1479 LLVMValueRef *p_exp2_int_part = NULL;
1480 LLVMValueRef *p_frac_part = NULL;
1481 LLVMValueRef *p_exp2 = NULL;
1482
1483 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1484
1485 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
1486 p_exp2_int_part = &tmp0;
1487 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
1488 p_frac_part = &tmp1;
1489 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
1490 p_exp2 = &tmp2;
1491
1492 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1493
1494 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
1495 dst0[TGSI_CHAN_X] = tmp0;
1496 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
1497 dst0[TGSI_CHAN_Y] = tmp1;
1498 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
1499 dst0[TGSI_CHAN_Z] = tmp2;
1500 }
1501 /* dst.w = 1.0 */
1502 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
1503 dst0[TGSI_CHAN_W] = bld->base.one;
1504 }
1505 break;
1506
1507 case TGSI_OPCODE_LOG:
1508 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
1509 IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
1510 IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
1511 LLVMValueRef *p_floor_log2 = NULL;
1512 LLVMValueRef *p_exp = NULL;
1513 LLVMValueRef *p_log2 = NULL;
1514
1515 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1516 src0 = lp_build_abs( &bld->base, src0 );
1517
1518 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
1519 p_floor_log2 = &tmp0;
1520 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
1521 p_exp = &tmp1;
1522 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
1523 p_log2 = &tmp2;
1524
1525 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1526
1527 /* dst.x = floor(lg2(abs(src.x))) */
1528 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
1529 dst0[TGSI_CHAN_X] = tmp0;
1530 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1531 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) {
1532 dst0[TGSI_CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1533 }
1534 /* dst.z = lg2(abs(src.x)) */
1535 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
1536 dst0[TGSI_CHAN_Z] = tmp2;
1537 }
1538 /* dst.w = 1.0 */
1539 if (IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
1540 dst0[TGSI_CHAN_W] = bld->base.one;
1541 }
1542 break;
1543
1544 case TGSI_OPCODE_MUL:
1545 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1546 src0 = emit_fetch( bld, inst, 0, chan_index );
1547 src1 = emit_fetch( bld, inst, 1, chan_index );
1548 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1549 }
1550 break;
1551
1552 case TGSI_OPCODE_ADD:
1553 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1554 src0 = emit_fetch( bld, inst, 0, chan_index );
1555 src1 = emit_fetch( bld, inst, 1, chan_index );
1556 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1557 }
1558 break;
1559
1560 case TGSI_OPCODE_DP3:
1561 /* TGSI_OPCODE_DOT3 */
1562 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1563 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1564 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1565 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1566 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1567 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1568 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1569 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1570 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
1571 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1572 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1573 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1574 dst0[chan_index] = tmp0;
1575 }
1576 break;
1577
1578 case TGSI_OPCODE_DP4:
1579 /* TGSI_OPCODE_DOT4 */
1580 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1581 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1582 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1583 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1584 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1585 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1586 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1587 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1588 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
1589 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1590 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1591 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
1592 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
1593 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1594 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1595 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1596 dst0[chan_index] = tmp0;
1597 }
1598 break;
1599
1600 case TGSI_OPCODE_DST:
1601 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
1602 dst0[TGSI_CHAN_X] = bld->base.one;
1603 }
1604 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
1605 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1606 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1607 dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1608 }
1609 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
1610 dst0[TGSI_CHAN_Z] = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1611 }
1612 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
1613 dst0[TGSI_CHAN_W] = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
1614 }
1615 break;
1616
1617 case TGSI_OPCODE_MIN:
1618 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1619 src0 = emit_fetch( bld, inst, 0, chan_index );
1620 src1 = emit_fetch( bld, inst, 1, chan_index );
1621 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1622 }
1623 break;
1624
1625 case TGSI_OPCODE_MAX:
1626 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1627 src0 = emit_fetch( bld, inst, 0, chan_index );
1628 src1 = emit_fetch( bld, inst, 1, chan_index );
1629 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1630 }
1631 break;
1632
1633 case TGSI_OPCODE_SLT:
1634 /* TGSI_OPCODE_SETLT */
1635 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1636 src0 = emit_fetch( bld, inst, 0, chan_index );
1637 src1 = emit_fetch( bld, inst, 1, chan_index );
1638 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1639 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1640 }
1641 break;
1642
1643 case TGSI_OPCODE_SGE:
1644 /* TGSI_OPCODE_SETGE */
1645 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1646 src0 = emit_fetch( bld, inst, 0, chan_index );
1647 src1 = emit_fetch( bld, inst, 1, chan_index );
1648 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1649 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1650 }
1651 break;
1652
1653 case TGSI_OPCODE_MAD:
1654 /* TGSI_OPCODE_MADD */
1655 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1656 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1657 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1658 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1659 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1660 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1661 dst0[chan_index] = tmp0;
1662 }
1663 break;
1664
1665 case TGSI_OPCODE_SUB:
1666 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1667 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1668 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1669 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1670 }
1671 break;
1672
1673 case TGSI_OPCODE_LRP:
1674 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1675 src0 = emit_fetch( bld, inst, 0, chan_index );
1676 src1 = emit_fetch( bld, inst, 1, chan_index );
1677 src2 = emit_fetch( bld, inst, 2, chan_index );
1678 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1679 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1680 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1681 }
1682 break;
1683
1684 case TGSI_OPCODE_CND:
1685 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1686 src0 = emit_fetch( bld, inst, 0, chan_index );
1687 src1 = emit_fetch( bld, inst, 1, chan_index );
1688 src2 = emit_fetch( bld, inst, 2, chan_index );
1689 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
1690 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1691 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1692 }
1693 break;
1694
1695 case TGSI_OPCODE_DP2A:
1696 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 = src[0].x */
1697 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 = src[1].x */
1698 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1699 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 = src[0].y */
1700 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 = src[1].y */
1701 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1702 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1703 tmp1 = emit_fetch( bld, inst, 2, TGSI_CHAN_X ); /* xmm1 = src[2].x */
1704 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1705 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1706 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1707 }
1708 break;
1709
1710 case TGSI_OPCODE_FRC:
1711 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1712 src0 = emit_fetch( bld, inst, 0, chan_index );
1713 tmp0 = lp_build_floor(&bld->base, src0);
1714 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1715 dst0[chan_index] = tmp0;
1716 }
1717 break;
1718
1719 case TGSI_OPCODE_CLAMP:
1720 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1721 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1722 src1 = emit_fetch( bld, inst, 1, chan_index );
1723 src2 = emit_fetch( bld, inst, 2, chan_index );
1724 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1725 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1726 dst0[chan_index] = tmp0;
1727 }
1728 break;
1729
1730 case TGSI_OPCODE_FLR:
1731 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1732 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1733 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1734 }
1735 break;
1736
1737 case TGSI_OPCODE_ROUND:
1738 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1739 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1740 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1741 }
1742 break;
1743
1744 case TGSI_OPCODE_EX2: {
1745 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1746 tmp0 = lp_build_exp2( &bld->base, tmp0);
1747 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1748 dst0[chan_index] = tmp0;
1749 }
1750 break;
1751 }
1752
1753 case TGSI_OPCODE_LG2:
1754 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1755 tmp0 = lp_build_log2( &bld->base, tmp0);
1756 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1757 dst0[chan_index] = tmp0;
1758 }
1759 break;
1760
1761 case TGSI_OPCODE_POW:
1762 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1763 src1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1764 res = lp_build_pow( &bld->base, src0, src1 );
1765 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1766 dst0[chan_index] = res;
1767 }
1768 break;
1769
1770 case TGSI_OPCODE_XPD:
1771 if( IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
1772 IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
1773 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
1774 tmp3 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1775 }
1776 if( IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
1777 IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
1778 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1779 tmp4 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1780 }
1781 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
1782 tmp2 = tmp0;
1783 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1784 tmp5 = tmp3;
1785 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1786 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1787 dst0[TGSI_CHAN_X] = tmp2;
1788 }
1789 if( IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
1790 IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
1791 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1792 tmp5 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1793 }
1794 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
1795 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1796 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1797 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1798 dst0[TGSI_CHAN_Y] = tmp3;
1799 }
1800 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
1801 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1802 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1803 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1804 dst0[TGSI_CHAN_Z] = tmp5;
1805 }
1806 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
1807 dst0[TGSI_CHAN_W] = bld->base.one;
1808 }
1809 break;
1810
1811 case TGSI_OPCODE_ABS:
1812 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1813 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1814 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1815 }
1816 break;
1817
1818 case TGSI_OPCODE_RCC:
1819 /* deprecated? */
1820 assert(0);
1821 return FALSE;
1822
1823 case TGSI_OPCODE_DPH:
1824 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1825 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1826 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1827 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1828 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1829 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1830 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1831 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1832 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
1833 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1834 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1835 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
1836 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1837 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1838 dst0[chan_index] = tmp0;
1839 }
1840 break;
1841
1842 case TGSI_OPCODE_COS:
1843 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1844 tmp0 = lp_build_cos( &bld->base, tmp0 );
1845 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1846 dst0[chan_index] = tmp0;
1847 }
1848 break;
1849
1850 case TGSI_OPCODE_DDX:
1851 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1852 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1853 }
1854 break;
1855
1856 case TGSI_OPCODE_DDY:
1857 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1858 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1859 }
1860 break;
1861
1862 case TGSI_OPCODE_KILP:
1863 /* predicated kill */
1864 emit_kilp( bld, inst, (*pc)-1 );
1865 break;
1866
1867 case TGSI_OPCODE_KIL:
1868 /* conditional kill */
1869 emit_kil( bld, inst, (*pc)-1 );
1870 break;
1871
1872 case TGSI_OPCODE_PK2H:
1873 return FALSE;
1874 break;
1875
1876 case TGSI_OPCODE_PK2US:
1877 return FALSE;
1878 break;
1879
1880 case TGSI_OPCODE_PK4B:
1881 return FALSE;
1882 break;
1883
1884 case TGSI_OPCODE_PK4UB:
1885 return FALSE;
1886 break;
1887
1888 case TGSI_OPCODE_RFL:
1889 return FALSE;
1890 break;
1891
1892 case TGSI_OPCODE_SEQ:
1893 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1894 src0 = emit_fetch( bld, inst, 0, chan_index );
1895 src1 = emit_fetch( bld, inst, 1, chan_index );
1896 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1897 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1898 }
1899 break;
1900
1901 case TGSI_OPCODE_SFL:
1902 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1903 dst0[chan_index] = bld->base.zero;
1904 }
1905 break;
1906
1907 case TGSI_OPCODE_SGT:
1908 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1909 src0 = emit_fetch( bld, inst, 0, chan_index );
1910 src1 = emit_fetch( bld, inst, 1, chan_index );
1911 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1912 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1913 }
1914 break;
1915
1916 case TGSI_OPCODE_SIN:
1917 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1918 tmp0 = lp_build_sin( &bld->base, tmp0 );
1919 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1920 dst0[chan_index] = tmp0;
1921 }
1922 break;
1923
1924 case TGSI_OPCODE_SLE:
1925 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1926 src0 = emit_fetch( bld, inst, 0, chan_index );
1927 src1 = emit_fetch( bld, inst, 1, chan_index );
1928 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1929 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1930 }
1931 break;
1932
1933 case TGSI_OPCODE_SNE:
1934 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1935 src0 = emit_fetch( bld, inst, 0, chan_index );
1936 src1 = emit_fetch( bld, inst, 1, chan_index );
1937 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1938 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1939 }
1940 break;
1941
1942 case TGSI_OPCODE_STR:
1943 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1944 dst0[chan_index] = bld->base.one;
1945 }
1946 break;
1947
1948 case TGSI_OPCODE_TEX:
1949 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1950 break;
1951
1952 case TGSI_OPCODE_TXD:
1953 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1954 break;
1955
1956 case TGSI_OPCODE_UP2H:
1957 /* deprecated */
1958 assert (0);
1959 return FALSE;
1960 break;
1961
1962 case TGSI_OPCODE_UP2US:
1963 /* deprecated */
1964 assert(0);
1965 return FALSE;
1966 break;
1967
1968 case TGSI_OPCODE_UP4B:
1969 /* deprecated */
1970 assert(0);
1971 return FALSE;
1972 break;
1973
1974 case TGSI_OPCODE_UP4UB:
1975 /* deprecated */
1976 assert(0);
1977 return FALSE;
1978 break;
1979
1980 case TGSI_OPCODE_X2D:
1981 /* deprecated? */
1982 assert(0);
1983 return FALSE;
1984 break;
1985
1986 case TGSI_OPCODE_ARA:
1987 /* deprecated */
1988 assert(0);
1989 return FALSE;
1990 break;
1991
1992 case TGSI_OPCODE_ARR:
1993 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1994 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1995 tmp0 = lp_build_round(&bld->base, tmp0);
1996 dst0[chan_index] = tmp0;
1997 }
1998 break;
1999
2000 case TGSI_OPCODE_BRA:
2001 /* deprecated */
2002 assert(0);
2003 return FALSE;
2004 break;
2005
2006 case TGSI_OPCODE_CAL:
2007 lp_exec_mask_call(&bld->exec_mask,
2008 inst->Label.Label,
2009 pc);
2010
2011 break;
2012
2013 case TGSI_OPCODE_RET:
2014 lp_exec_mask_ret(&bld->exec_mask, pc);
2015 break;
2016
2017 case TGSI_OPCODE_END:
2018 if (0) {
2019 /* for debugging */
2020 emit_dump_temps(bld);
2021 }
2022 *pc = -1;
2023 break;
2024
2025 case TGSI_OPCODE_SSG:
2026 /* TGSI_OPCODE_SGN */
2027 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2028 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2029 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
2030 }
2031 break;
2032
2033 case TGSI_OPCODE_CMP:
2034 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2035 src0 = emit_fetch( bld, inst, 0, chan_index );
2036 src1 = emit_fetch( bld, inst, 1, chan_index );
2037 src2 = emit_fetch( bld, inst, 2, chan_index );
2038 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
2039 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
2040 }
2041 break;
2042
2043 case TGSI_OPCODE_SCS:
2044 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
2045 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
2046 dst0[TGSI_CHAN_X] = lp_build_cos( &bld->base, tmp0 );
2047 }
2048 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
2049 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
2050 dst0[TGSI_CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
2051 }
2052 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
2053 dst0[TGSI_CHAN_Z] = bld->base.zero;
2054 }
2055 IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
2056 dst0[TGSI_CHAN_W] = bld->base.one;
2057 }
2058 break;
2059
2060 case TGSI_OPCODE_TXB:
2061 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
2062 break;
2063
2064 case TGSI_OPCODE_NRM:
2065 /* fall-through */
2066 case TGSI_OPCODE_NRM4:
2067 /* 3 or 4-component normalization */
2068 {
2069 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2070
2071 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) ||
2072 IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) ||
2073 IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) ||
2074 (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 4)) {
2075
2076 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2077
2078 /* xmm4 = src.x */
2079 /* xmm0 = src.x * src.x */
2080 tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
2081 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
2082 tmp4 = tmp0;
2083 }
2084 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2085
2086 /* xmm5 = src.y */
2087 /* xmm0 = xmm0 + src.y * src.y */
2088 tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Y);
2089 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
2090 tmp5 = tmp1;
2091 }
2092 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2093 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2094
2095 /* xmm6 = src.z */
2096 /* xmm0 = xmm0 + src.z * src.z */
2097 tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Z);
2098 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
2099 tmp6 = tmp1;
2100 }
2101 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2102 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2103
2104 if (dims == 4) {
2105 /* xmm7 = src.w */
2106 /* xmm0 = xmm0 + src.w * src.w */
2107 tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_W);
2108 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) {
2109 tmp7 = tmp1;
2110 }
2111 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2112 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2113 }
2114
2115 /* xmm1 = 1 / sqrt(xmm0) */
2116 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2117
2118 /* dst.x = xmm1 * src.x */
2119 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
2120 dst0[TGSI_CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2121 }
2122
2123 /* dst.y = xmm1 * src.y */
2124 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
2125 dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2126 }
2127
2128 /* dst.z = xmm1 * src.z */
2129 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
2130 dst0[TGSI_CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2131 }
2132
2133 /* dst.w = xmm1 * src.w */
2134 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) && dims == 4) {
2135 dst0[TGSI_CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2136 }
2137 }
2138
2139 /* dst.w = 1.0 */
2140 if (IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 3) {
2141 dst0[TGSI_CHAN_W] = bld->base.one;
2142 }
2143 }
2144 break;
2145
2146 case TGSI_OPCODE_DIV:
2147 /* deprecated */
2148 assert( 0 );
2149 return FALSE;
2150 break;
2151
2152 case TGSI_OPCODE_DP2:
2153 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 = src[0].x */
2154 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 = src[1].x */
2155 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
2156 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 = src[0].y */
2157 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 = src[1].y */
2158 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
2159 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
2160 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2161 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
2162 }
2163 break;
2164
2165 case TGSI_OPCODE_TXL:
2166 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2167 break;
2168
2169 case TGSI_OPCODE_TXP:
2170 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2171 break;
2172
2173 case TGSI_OPCODE_BRK:
2174 lp_exec_break(&bld->exec_mask);
2175 break;
2176
2177 case TGSI_OPCODE_IF:
2178 tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
2179 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2180 tmp0, bld->base.zero);
2181 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2182 break;
2183
2184 case TGSI_OPCODE_BGNLOOP:
2185 lp_exec_bgnloop(&bld->exec_mask);
2186 break;
2187
2188 case TGSI_OPCODE_BGNSUB:
2189 lp_exec_mask_bgnsub(&bld->exec_mask);
2190 break;
2191
2192 case TGSI_OPCODE_ELSE:
2193 lp_exec_mask_cond_invert(&bld->exec_mask);
2194 break;
2195
2196 case TGSI_OPCODE_ENDIF:
2197 lp_exec_mask_cond_pop(&bld->exec_mask);
2198 break;
2199
2200 case TGSI_OPCODE_ENDLOOP:
2201 lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
2202 break;
2203
2204 case TGSI_OPCODE_ENDSUB:
2205 lp_exec_mask_endsub(&bld->exec_mask, pc);
2206 break;
2207
2208 case TGSI_OPCODE_PUSHA:
2209 /* deprecated? */
2210 assert(0);
2211 return FALSE;
2212 break;
2213
2214 case TGSI_OPCODE_POPA:
2215 /* deprecated? */
2216 assert(0);
2217 return FALSE;
2218 break;
2219
2220 case TGSI_OPCODE_CEIL:
2221 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2222 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2223 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2224 }
2225 break;
2226
2227 case TGSI_OPCODE_I2F:
2228 /* deprecated? */
2229 assert(0);
2230 return FALSE;
2231 break;
2232
2233 case TGSI_OPCODE_NOT:
2234 /* deprecated? */
2235 assert(0);
2236 return FALSE;
2237 break;
2238
2239 case TGSI_OPCODE_TRUNC:
2240 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2241 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2242 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2243 }
2244 break;
2245
2246 case TGSI_OPCODE_SHL:
2247 /* deprecated? */
2248 assert(0);
2249 return FALSE;
2250 break;
2251
2252 case TGSI_OPCODE_ISHR:
2253 /* deprecated? */
2254 assert(0);
2255 return FALSE;
2256 break;
2257
2258 case TGSI_OPCODE_AND:
2259 /* deprecated? */
2260 assert(0);
2261 return FALSE;
2262 break;
2263
2264 case TGSI_OPCODE_OR:
2265 /* deprecated? */
2266 assert(0);
2267 return FALSE;
2268 break;
2269
2270 case TGSI_OPCODE_MOD:
2271 /* deprecated? */
2272 assert(0);
2273 return FALSE;
2274 break;
2275
2276 case TGSI_OPCODE_XOR:
2277 /* deprecated? */
2278 assert(0);
2279 return FALSE;
2280 break;
2281
2282 case TGSI_OPCODE_SAD:
2283 /* deprecated? */
2284 assert(0);
2285 return FALSE;
2286 break;
2287
2288 case TGSI_OPCODE_TXF:
2289 /* deprecated? */
2290 assert(0);
2291 return FALSE;
2292 break;
2293
2294 case TGSI_OPCODE_TXQ:
2295 /* deprecated? */
2296 assert(0);
2297 return FALSE;
2298 break;
2299
2300 case TGSI_OPCODE_CONT:
2301 lp_exec_continue(&bld->exec_mask);
2302 break;
2303
2304 case TGSI_OPCODE_EMIT:
2305 return FALSE;
2306 break;
2307
2308 case TGSI_OPCODE_ENDPRIM:
2309 return FALSE;
2310 break;
2311
2312 case TGSI_OPCODE_NOP:
2313 break;
2314
2315 default:
2316 return FALSE;
2317 }
2318
2319 if(info->num_dst) {
2320 LLVMValueRef pred[NUM_CHANNELS];
2321
2322 emit_fetch_predicate( bld, inst, pred );
2323
2324 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2325 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2326 }
2327 }
2328
2329 return TRUE;
2330 }
2331
2332
2333 void
2334 lp_build_tgsi_soa(struct gallivm_state *gallivm,
2335 const struct tgsi_token *tokens,
2336 struct lp_type type,
2337 struct lp_build_mask_context *mask,
2338 LLVMValueRef consts_ptr,
2339 LLVMValueRef system_values_array,
2340 const LLVMValueRef *pos,
2341 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2342 LLVMValueRef (*outputs)[NUM_CHANNELS],
2343 struct lp_build_sampler_soa *sampler,
2344 const struct tgsi_shader_info *info)
2345 {
2346 struct lp_build_tgsi_soa_context bld;
2347 struct tgsi_parse_context parse;
2348 uint num_immediates = 0;
2349 uint num_instructions = 0;
2350 unsigned i;
2351 int pc = 0;
2352
2353 struct lp_type res_type;
2354
2355 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2356 memset(&res_type, 0, sizeof res_type);
2357 res_type.width = type.width;
2358 res_type.length = type.length;
2359 res_type.sign = 1;
2360
2361 /* Setup build context */
2362 memset(&bld, 0, sizeof bld);
2363 lp_build_context_init(&bld.base, gallivm, type);
2364 lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type));
2365 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2366 bld.mask = mask;
2367 bld.pos = pos;
2368 bld.inputs = inputs;
2369 bld.outputs = outputs;
2370 bld.consts_ptr = consts_ptr;
2371 bld.sampler = sampler;
2372 bld.info = info;
2373 bld.indirect_files = info->indirect_files;
2374 bld.instructions = (struct tgsi_full_instruction *)
2375 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2376 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2377
2378 if (!bld.instructions) {
2379 return;
2380 }
2381
2382 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2383
2384 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2385 LLVMValueRef array_size =
2386 lp_build_const_int32(gallivm,
2387 info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2388 bld.temps_array = lp_build_array_alloca(gallivm,
2389 bld.base.vec_type, array_size,
2390 "temp_array");
2391 }
2392
2393 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2394 LLVMValueRef array_size =
2395 lp_build_const_int32(gallivm,
2396 info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2397 bld.outputs_array = lp_build_array_alloca(gallivm,
2398 bld.base.vec_type, array_size,
2399 "output_array");
2400 }
2401
2402 /* If we have indirect addressing in inputs we need to copy them into
2403 * our alloca array to be able to iterate over them */
2404 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
2405 unsigned index, chan;
2406 LLVMTypeRef vec_type = bld.base.vec_type;
2407 LLVMValueRef array_size =
2408 lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4);
2409 bld.inputs_array = lp_build_array_alloca(gallivm,
2410 vec_type, array_size,
2411 "input_array");
2412
2413 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
2414
2415 for (index = 0; index < info->num_inputs; ++index) {
2416 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2417 LLVMValueRef lindex =
2418 lp_build_const_int32(gallivm, index * 4 + chan);
2419 LLVMValueRef input_ptr =
2420 LLVMBuildGEP(gallivm->builder, bld.inputs_array,
2421 &lindex, 1, "");
2422 LLVMValueRef value = bld.inputs[index][chan];
2423 if (value)
2424 LLVMBuildStore(gallivm->builder, value, input_ptr);
2425 }
2426 }
2427 }
2428
2429 bld.system_values_array = system_values_array;
2430
2431 tgsi_parse_init( &parse, tokens );
2432
2433 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2434 tgsi_parse_token( &parse );
2435
2436 switch( parse.FullToken.Token.Type ) {
2437 case TGSI_TOKEN_TYPE_DECLARATION:
2438 /* Inputs already interpolated */
2439 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2440 break;
2441
2442 case TGSI_TOKEN_TYPE_INSTRUCTION:
2443 {
2444 /* save expanded instruction */
2445 if (num_instructions == bld.max_instructions) {
2446 struct tgsi_full_instruction *instructions;
2447 instructions = REALLOC(bld.instructions,
2448 bld.max_instructions
2449 * sizeof(struct tgsi_full_instruction),
2450 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2451 * sizeof(struct tgsi_full_instruction));
2452 if (!instructions) {
2453 break;
2454 }
2455 bld.instructions = instructions;
2456 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2457 }
2458
2459 memcpy(bld.instructions + num_instructions,
2460 &parse.FullToken.FullInstruction,
2461 sizeof(bld.instructions[0]));
2462
2463 num_instructions++;
2464 }
2465
2466 break;
2467
2468 case TGSI_TOKEN_TYPE_IMMEDIATE:
2469 /* simply copy the immediate values into the next immediates[] slot */
2470 {
2471 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2472 assert(size <= 4);
2473 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2474 for( i = 0; i < size; ++i )
2475 bld.immediates[num_immediates][i] =
2476 lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float);
2477 for( i = size; i < 4; ++i )
2478 bld.immediates[num_immediates][i] = bld.base.undef;
2479 num_immediates++;
2480 }
2481 break;
2482
2483 case TGSI_TOKEN_TYPE_PROPERTY:
2484 break;
2485
2486 default:
2487 assert( 0 );
2488 }
2489 }
2490
2491 while (pc != -1) {
2492 struct tgsi_full_instruction *instr = bld.instructions + pc;
2493 const struct tgsi_opcode_info *opcode_info =
2494 tgsi_get_opcode_info(instr->Instruction.Opcode);
2495 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2496 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2497 opcode_info->mnemonic);
2498 }
2499
2500 /* If we have indirect addressing in outputs we need to copy our alloca array
2501 * to the outputs slots specified by the called */
2502 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2503 unsigned index, chan;
2504 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2505 for (index = 0; index < info->num_outputs; ++index) {
2506 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2507 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2508 }
2509 }
2510 }
2511
2512 if (0) {
2513 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2514 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2515 debug_printf("11111111111111111111111111111 \n");
2516 tgsi_dump(tokens, 0);
2517 lp_debug_dump_value(function);
2518 debug_printf("2222222222222222222222222222 \n");
2519 }
2520 tgsi_parse_free( &parse );
2521
2522 if (0) {
2523 LLVMModuleRef module = LLVMGetGlobalParent(
2524 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2525 LLVMDumpModule(module);
2526
2527 }
2528
2529 FREE( bld.instructions );
2530 }
2531
2532
2533 /**
2534 * Build up the system values array out of individual values such as
2535 * the instance ID, front-face, primitive ID, etc. The shader info is
2536 * used to determine which system values are needed and where to put
2537 * them in the system values array.
2538 *
2539 * XXX only instance ID is implemented at this time.
2540 *
2541 * The system values register file is similar to the constants buffer.
2542 * Example declaration:
2543 * DCL SV[0], INSTANCEID
2544 * Example instruction:
2545 * MOVE foo, SV[0].xxxx;
2546 *
2547 * \return LLVM float array (interpreted as float [][4])
2548 */
2549 LLVMValueRef
2550 lp_build_system_values_array(struct gallivm_state *gallivm,
2551 const struct tgsi_shader_info *info,
2552 LLVMValueRef instance_id,
2553 LLVMValueRef facing)
2554 {
2555 LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values);
2556 LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context);
2557 LLVMValueRef array = lp_build_array_alloca(gallivm, float_t,
2558 size, "sysvals_array");
2559 unsigned i;
2560
2561 for (i = 0; i < info->num_system_values; i++) {
2562 LLVMValueRef index = lp_build_const_int32(gallivm, i * 4);
2563 LLVMValueRef ptr, value = 0;
2564
2565 switch (info->system_value_semantic_name[i]) {
2566 case TGSI_SEMANTIC_INSTANCEID:
2567 /* convert instance ID from int to float */
2568 value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t,
2569 "sysval_instanceid");
2570 break;
2571 case TGSI_SEMANTIC_FACE:
2572 /* fall-through */
2573 default:
2574 assert(0 && "unexpected semantic in build_system_values_array()");
2575 }
2576
2577 ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, "");
2578 LLVMBuildStore(gallivm->builder, value, ptr);
2579 }
2580
2581 return array;
2582 }