gallivm: add pixel offsets in scatter stores
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_info.h"
46 #include "tgsi/tgsi_parse.h"
47 #include "tgsi/tgsi_util.h"
48 #include "tgsi/tgsi_scan.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_quad.h"
58 #include "lp_bld_tgsi.h"
59 #include "lp_bld_limits.h"
60 #include "lp_bld_debug.h"
61 #include "lp_bld_printf.h"
62
63
64 #define FOR_EACH_CHANNEL( CHAN )\
65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
66
67 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
69
70 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
72
73 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74 FOR_EACH_CHANNEL( CHAN )\
75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
76
77 #define CHAN_X 0
78 #define CHAN_Y 1
79 #define CHAN_Z 2
80 #define CHAN_W 3
81 #define NUM_CHANNELS 4
82
83 #define LP_MAX_INSTRUCTIONS 256
84
85
86 struct lp_exec_mask {
87 struct lp_build_context *bld;
88
89 boolean has_mask;
90
91 LLVMTypeRef int_vec_type;
92
93 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
94 int cond_stack_size;
95 LLVMValueRef cond_mask;
96
97 LLVMBasicBlockRef loop_block;
98 LLVMValueRef cont_mask;
99 LLVMValueRef break_mask;
100 LLVMValueRef break_var;
101 struct {
102 LLVMBasicBlockRef loop_block;
103 LLVMValueRef cont_mask;
104 LLVMValueRef break_mask;
105 LLVMValueRef break_var;
106 } loop_stack[LP_MAX_TGSI_NESTING];
107 int loop_stack_size;
108
109 LLVMValueRef ret_mask;
110 struct {
111 int pc;
112 LLVMValueRef ret_mask;
113 } call_stack[LP_MAX_TGSI_NESTING];
114 int call_stack_size;
115
116 LLVMValueRef exec_mask;
117 };
118
119 struct lp_build_tgsi_soa_context
120 {
121 struct lp_build_context base;
122
123 /* Builder for integer masks and indices */
124 struct lp_build_context uint_bld;
125
126 LLVMValueRef consts_ptr;
127 const LLVMValueRef *pos;
128 const LLVMValueRef (*inputs)[NUM_CHANNELS];
129 LLVMValueRef (*outputs)[NUM_CHANNELS];
130
131 const struct lp_build_sampler_soa *sampler;
132
133 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
134 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
135 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
136 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
137
138 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
139 * set in the indirect_files field.
140 * The temps[] array above is unused then.
141 */
142 LLVMValueRef temps_array;
143
144 const struct tgsi_shader_info *info;
145 /** bitmask indicating which register files are accessed indirectly */
146 unsigned indirect_files;
147
148 struct lp_build_mask_context *mask;
149 struct lp_exec_mask exec_mask;
150
151 struct tgsi_full_instruction *instructions;
152 uint max_instructions;
153 };
154
155 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
156 {
157 mask->bld = bld;
158 mask->has_mask = FALSE;
159 mask->cond_stack_size = 0;
160 mask->loop_stack_size = 0;
161 mask->call_stack_size = 0;
162
163 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
164 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
165 LLVMConstAllOnes(mask->int_vec_type);
166 }
167
168 static void lp_exec_mask_update(struct lp_exec_mask *mask)
169 {
170 if (mask->loop_stack_size) {
171 /*for loops we need to update the entire mask at runtime */
172 LLVMValueRef tmp;
173 assert(mask->break_mask);
174 tmp = LLVMBuildAnd(mask->bld->builder,
175 mask->cont_mask,
176 mask->break_mask,
177 "maskcb");
178 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
179 mask->cond_mask,
180 tmp,
181 "maskfull");
182 } else
183 mask->exec_mask = mask->cond_mask;
184
185 if (mask->call_stack_size) {
186 mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
187 mask->exec_mask,
188 mask->ret_mask,
189 "callmask");
190 }
191
192 mask->has_mask = (mask->cond_stack_size > 0 ||
193 mask->loop_stack_size > 0 ||
194 mask->call_stack_size > 0);
195 }
196
197 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
198 LLVMValueRef val)
199 {
200 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
201 if (mask->cond_stack_size == 0) {
202 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
203 }
204 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
205 assert(LLVMTypeOf(val) == mask->int_vec_type);
206 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
207 mask->cond_mask,
208 val,
209 "");
210 lp_exec_mask_update(mask);
211 }
212
213 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
214 {
215 LLVMValueRef prev_mask;
216 LLVMValueRef inv_mask;
217
218 assert(mask->cond_stack_size);
219 prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
220 if (mask->cond_stack_size == 1) {
221 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
222 }
223
224 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
225
226 mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
227 inv_mask,
228 prev_mask, "");
229 lp_exec_mask_update(mask);
230 }
231
232 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
233 {
234 assert(mask->cond_stack_size);
235 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
236 lp_exec_mask_update(mask);
237 }
238
239 static void lp_exec_bgnloop(struct lp_exec_mask *mask)
240 {
241 if (mask->loop_stack_size == 0) {
242 assert(mask->loop_block == NULL);
243 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
244 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
245 assert(mask->break_var == NULL);
246 }
247
248 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
249
250 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
251 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
252 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
253 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
254 ++mask->loop_stack_size;
255
256 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
257 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
258
259 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
260 LLVMBuildBr(mask->bld->builder, mask->loop_block);
261 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
262
263 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
264
265 lp_exec_mask_update(mask);
266 }
267
268 static void lp_exec_break(struct lp_exec_mask *mask)
269 {
270 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
271 mask->exec_mask,
272 "break");
273
274 mask->break_mask = LLVMBuildAnd(mask->bld->builder,
275 mask->break_mask,
276 exec_mask, "break_full");
277
278 lp_exec_mask_update(mask);
279 }
280
281 static void lp_exec_continue(struct lp_exec_mask *mask)
282 {
283 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
284 mask->exec_mask,
285 "");
286
287 mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
288 mask->cont_mask,
289 exec_mask, "");
290
291 lp_exec_mask_update(mask);
292 }
293
294
295 static void lp_exec_endloop(struct lp_exec_mask *mask)
296 {
297 LLVMBasicBlockRef endloop;
298 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
299 mask->bld->type.length);
300 LLVMValueRef i1cond;
301
302 assert(mask->break_mask);
303
304 /*
305 * Restore the cont_mask, but don't pop
306 */
307 assert(mask->loop_stack_size);
308 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
309 lp_exec_mask_update(mask);
310
311 /*
312 * Unlike the continue mask, the break_mask must be preserved across loop
313 * iterations
314 */
315 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
316
317 /* i1cond = (mask == 0) */
318 i1cond = LLVMBuildICmp(
319 mask->bld->builder,
320 LLVMIntNE,
321 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
322 LLVMConstNull(reg_type), "");
323
324 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
325
326 LLVMBuildCondBr(mask->bld->builder,
327 i1cond, mask->loop_block, endloop);
328
329 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
330
331 assert(mask->loop_stack_size);
332 --mask->loop_stack_size;
333 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
334 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
335 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
336 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
337
338 lp_exec_mask_update(mask);
339 }
340
341 /* stores val into an address pointed to by dst.
342 * mask->exec_mask is used to figure out which bits of val
343 * should be stored into the address
344 * (0 means don't store this bit, 1 means do store).
345 */
346 static void lp_exec_mask_store(struct lp_exec_mask *mask,
347 LLVMValueRef pred,
348 LLVMValueRef val,
349 LLVMValueRef dst)
350 {
351 /* Mix the predicate and execution mask */
352 if (mask->has_mask) {
353 if (pred) {
354 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
355 } else {
356 pred = mask->exec_mask;
357 }
358 }
359
360 if (pred) {
361 LLVMValueRef real_val, dst_val;
362
363 dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
364 real_val = lp_build_select(mask->bld,
365 pred,
366 val, dst_val);
367
368 LLVMBuildStore(mask->bld->builder, real_val, dst);
369 } else
370 LLVMBuildStore(mask->bld->builder, val, dst);
371 }
372
373 static void lp_exec_mask_call(struct lp_exec_mask *mask,
374 int func,
375 int *pc)
376 {
377 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
378 mask->call_stack[mask->call_stack_size].pc = *pc;
379 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
380 mask->call_stack_size++;
381 *pc = func;
382 }
383
384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMValueRef exec_mask;
387
388 if (mask->call_stack_size == 0) {
389 /* returning from main() */
390 *pc = -1;
391 return;
392 }
393 exec_mask = LLVMBuildNot(mask->bld->builder,
394 mask->exec_mask,
395 "ret");
396
397 mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
398 mask->ret_mask,
399 exec_mask, "ret_full");
400
401 lp_exec_mask_update(mask);
402 }
403
404 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
405 {
406 }
407
408 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
409 {
410 assert(mask->call_stack_size);
411 mask->call_stack_size--;
412 *pc = mask->call_stack[mask->call_stack_size].pc;
413 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
414 lp_exec_mask_update(mask);
415 }
416
417
418 /**
419 * Return pointer to a temporary register channel (src or dest).
420 * Note that indirect addressing cannot be handled here.
421 * \param index which temporary register
422 * \param chan which channel of the temp register.
423 */
424 static LLVMValueRef
425 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
426 unsigned index,
427 unsigned chan)
428 {
429 assert(chan < 4);
430 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
431 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
432 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
433 }
434 else {
435 return bld->temps[index][chan];
436 }
437 }
438
439
440 /**
441 * Gather vector.
442 * XXX the lp_build_gather() function should be capable of doing this
443 * with a little work.
444 */
445 static LLVMValueRef
446 build_gather(struct lp_build_tgsi_soa_context *bld,
447 LLVMValueRef base_ptr,
448 LLVMValueRef indexes)
449 {
450 LLVMValueRef res = bld->base.undef;
451 unsigned i;
452
453 /*
454 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
455 */
456 for (i = 0; i < bld->base.type.length; i++) {
457 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
458 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
459 indexes, ii, "");
460 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
461 &index, 1, "gather_ptr");
462 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
463
464 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
465 }
466
467 return res;
468 }
469
470
471 /**
472 * Scatter/store vector.
473 */
474 static void
475 build_scatter(struct lp_build_tgsi_soa_context *bld,
476 LLVMValueRef base_ptr,
477 LLVMValueRef indexes,
478 LLVMValueRef values)
479 {
480 LLVMBuilderRef builder = bld->base.builder;
481 unsigned i;
482
483 /*
484 * Loop over elements of index_vec, store scalar value.
485 */
486 for (i = 0; i < bld->base.type.length; i++) {
487 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
488 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
489 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
490 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
491
492 if (0)
493 lp_build_printf(builder, "scatter %d: val %f at %d %p\n",
494 ii, val, index, scalar_ptr);
495
496 LLVMBuildStore(builder, val, scalar_ptr);
497 }
498 }
499
500
501 /**
502 * Read the current value of the ADDR register, convert the floats to
503 * ints, add the base index and return the vector of offsets.
504 * The offsets will be used to index into the constant buffer or
505 * temporary register file.
506 */
507 static LLVMValueRef
508 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
509 unsigned reg_file, unsigned reg_index,
510 const struct tgsi_src_register *indirect_reg)
511 {
512 struct lp_build_context *uint_bld = &bld->uint_bld;
513 /* always use X component of address register */
514 unsigned swizzle = indirect_reg->SwizzleX;
515 LLVMValueRef base;
516 LLVMValueRef rel;
517 LLVMValueRef max_index;
518 LLVMValueRef index;
519
520 assert(bld->indirect_files & (1 << reg_file));
521
522 base = lp_build_const_int_vec(uint_bld->type, reg_index);
523
524 assert(swizzle < 4);
525 rel = LLVMBuildLoad(bld->base.builder,
526 bld->addr[indirect_reg->Index][swizzle],
527 "load addr reg");
528
529 /* for indexing we want integers */
530 rel = LLVMBuildFPToSI(bld->base.builder,
531 rel,
532 uint_bld->vec_type, "");
533
534 index = lp_build_add(uint_bld, base, rel);
535
536 max_index = lp_build_const_int_vec(uint_bld->type,
537 bld->info->file_max[reg_file]);
538
539 assert(!uint_bld->type.sign);
540 index = lp_build_min(uint_bld, index, max_index);
541
542 return index;
543 }
544
545
546 /**
547 * Register fetch.
548 */
549 static LLVMValueRef
550 emit_fetch(
551 struct lp_build_tgsi_soa_context *bld,
552 const struct tgsi_full_instruction *inst,
553 unsigned src_op,
554 const unsigned chan_index )
555 {
556 struct lp_build_context *uint_bld = &bld->uint_bld;
557 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
558 const unsigned swizzle =
559 tgsi_util_get_full_src_register_swizzle(reg, chan_index);
560 LLVMValueRef res;
561 LLVMValueRef indirect_index = NULL;
562
563 if (swizzle > 3) {
564 assert(0 && "invalid swizzle in emit_fetch()");
565 return bld->base.undef;
566 }
567
568 if (reg->Register.Indirect) {
569 indirect_index = get_indirect_index(bld,
570 reg->Register.File,
571 reg->Register.Index,
572 &reg->Indirect);
573 } else {
574 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
575 }
576
577 switch (reg->Register.File) {
578 case TGSI_FILE_CONSTANT:
579 if (reg->Register.Indirect) {
580 LLVMValueRef swizzle_vec =
581 lp_build_const_int_vec(uint_bld->type, swizzle);
582 LLVMValueRef index_vec; /* index into the const buffer */
583
584 /* index_vec = indirect_index * 4 + swizzle */
585 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
586 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
587
588 /* Gather values from the constant buffer */
589 res = build_gather(bld, bld->consts_ptr, index_vec);
590 }
591 else {
592 LLVMValueRef index; /* index into the const buffer */
593 LLVMValueRef scalar, scalar_ptr;
594
595 index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
596
597 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
598 &index, 1, "");
599 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
600
601 res = lp_build_broadcast_scalar(&bld->base, scalar);
602 }
603 break;
604
605 case TGSI_FILE_IMMEDIATE:
606 res = bld->immediates[reg->Register.Index][swizzle];
607 assert(res);
608 break;
609
610 case TGSI_FILE_INPUT:
611 res = bld->inputs[reg->Register.Index][swizzle];
612 assert(res);
613 break;
614
615 case TGSI_FILE_TEMPORARY:
616 if (reg->Register.Indirect) {
617 LLVMValueRef swizzle_vec =
618 lp_build_const_int_vec(uint_bld->type, swizzle);
619 LLVMValueRef length_vec =
620 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
621 LLVMValueRef index_vec; /* index into the const buffer */
622 LLVMValueRef temps_array;
623 LLVMTypeRef float4_ptr_type;
624
625 /* index_vec = (indirect_index * 4 + swizzle) * length */
626 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
627 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
628 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
629
630 /* cast temps_array pointer to float* */
631 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
632 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
633 float4_ptr_type, "");
634
635 /* Gather values from the temporary register array */
636 res = build_gather(bld, temps_array, index_vec);
637 }
638 else {
639 LLVMValueRef temp_ptr;
640 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
641 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
642 if (!res)
643 return bld->base.undef;
644 }
645 break;
646
647 default:
648 assert(0 && "invalid src register in emit_fetch()");
649 return bld->base.undef;
650 }
651
652 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
653 case TGSI_UTIL_SIGN_CLEAR:
654 res = lp_build_abs( &bld->base, res );
655 break;
656
657 case TGSI_UTIL_SIGN_SET:
658 res = lp_build_abs( &bld->base, res );
659 /* fall through */
660 case TGSI_UTIL_SIGN_TOGGLE:
661 res = lp_build_negate( &bld->base, res );
662 break;
663
664 case TGSI_UTIL_SIGN_KEEP:
665 break;
666 }
667
668 return res;
669 }
670
671
672 /**
673 * Register fetch with derivatives.
674 */
675 static void
676 emit_fetch_deriv(
677 struct lp_build_tgsi_soa_context *bld,
678 const struct tgsi_full_instruction *inst,
679 unsigned index,
680 const unsigned chan_index,
681 LLVMValueRef *res,
682 LLVMValueRef *ddx,
683 LLVMValueRef *ddy)
684 {
685 LLVMValueRef src;
686
687 src = emit_fetch(bld, inst, index, chan_index);
688
689 if(res)
690 *res = src;
691
692 /* TODO: use interpolation coeffs for inputs */
693
694 if(ddx)
695 *ddx = lp_build_ddx(&bld->base, src);
696
697 if(ddy)
698 *ddy = lp_build_ddy(&bld->base, src);
699 }
700
701
702 /**
703 * Predicate.
704 */
705 static void
706 emit_fetch_predicate(
707 struct lp_build_tgsi_soa_context *bld,
708 const struct tgsi_full_instruction *inst,
709 LLVMValueRef *pred)
710 {
711 unsigned index;
712 unsigned char swizzles[4];
713 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
714 LLVMValueRef value;
715 unsigned chan;
716
717 if (!inst->Instruction.Predicate) {
718 FOR_EACH_CHANNEL( chan ) {
719 pred[chan] = NULL;
720 }
721 return;
722 }
723
724 swizzles[0] = inst->Predicate.SwizzleX;
725 swizzles[1] = inst->Predicate.SwizzleY;
726 swizzles[2] = inst->Predicate.SwizzleZ;
727 swizzles[3] = inst->Predicate.SwizzleW;
728
729 index = inst->Predicate.Index;
730 assert(index < LP_MAX_TGSI_PREDS);
731
732 FOR_EACH_CHANNEL( chan ) {
733 unsigned swizzle = swizzles[chan];
734
735 /*
736 * Only fetch the predicate register channels that are actually listed
737 * in the swizzles
738 */
739 if (!unswizzled[swizzle]) {
740 value = LLVMBuildLoad(bld->base.builder,
741 bld->preds[index][swizzle], "");
742
743 /*
744 * Convert the value to an integer mask.
745 *
746 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
747 * is needlessly causing two comparisons due to storing the intermediate
748 * result as float vector instead of an integer mask vector.
749 */
750 value = lp_build_compare(bld->base.builder,
751 bld->base.type,
752 PIPE_FUNC_NOTEQUAL,
753 value,
754 bld->base.zero);
755 if (inst->Predicate.Negate) {
756 value = LLVMBuildNot(bld->base.builder, value, "");
757 }
758
759 unswizzled[swizzle] = value;
760 } else {
761 value = unswizzled[swizzle];
762 }
763
764 pred[chan] = value;
765 }
766 }
767
768
769 /**
770 * Register store.
771 */
772 static void
773 emit_store(
774 struct lp_build_tgsi_soa_context *bld,
775 const struct tgsi_full_instruction *inst,
776 unsigned index,
777 unsigned chan_index,
778 LLVMValueRef pred,
779 LLVMValueRef value)
780 {
781 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
782 struct lp_build_context *uint_bld = &bld->uint_bld;
783 LLVMValueRef indirect_index = NULL;
784
785 switch( inst->Instruction.Saturate ) {
786 case TGSI_SAT_NONE:
787 break;
788
789 case TGSI_SAT_ZERO_ONE:
790 value = lp_build_max(&bld->base, value, bld->base.zero);
791 value = lp_build_min(&bld->base, value, bld->base.one);
792 break;
793
794 case TGSI_SAT_MINUS_PLUS_ONE:
795 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
796 value = lp_build_min(&bld->base, value, bld->base.one);
797 break;
798
799 default:
800 assert(0);
801 }
802
803 if (reg->Register.Indirect) {
804 indirect_index = get_indirect_index(bld,
805 reg->Register.File,
806 reg->Register.Index,
807 &reg->Indirect);
808 } else {
809 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
810 }
811
812 switch( reg->Register.File ) {
813 case TGSI_FILE_OUTPUT:
814 lp_exec_mask_store(&bld->exec_mask, pred, value,
815 bld->outputs[reg->Register.Index][chan_index]);
816 break;
817
818 case TGSI_FILE_TEMPORARY:
819 if (reg->Register.Indirect) {
820 LLVMBuilderRef builder = bld->base.builder;
821 LLVMValueRef chan_vec =
822 lp_build_const_int_vec(uint_bld->type, chan_index);
823 LLVMValueRef length_vec =
824 lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
825 LLVMValueRef index_vec; /* indexes into the temp registers */
826 LLVMValueRef temps_array;
827 LLVMValueRef pixel_offsets;
828 LLVMTypeRef float_ptr_type;
829 int i;
830
831 /* build pixel offset vector: {0, 1, 2, 3, ...} */
832 pixel_offsets = uint_bld->undef;
833 for (i = 0; i < bld->base.type.length; i++) {
834 LLVMValueRef ii = lp_build_const_int32(i);
835 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
836 ii, ii, "");
837 }
838
839 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
840 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
841 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
842 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
843 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
844
845 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
846 temps_array = LLVMBuildBitCast(builder, bld->temps_array,
847 float_ptr_type, "");
848
849 /* Scatter store values into temp registers */
850 build_scatter(bld, temps_array, index_vec, value);
851 }
852 else {
853 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
854 chan_index);
855 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
856 }
857 break;
858
859 case TGSI_FILE_ADDRESS:
860 lp_exec_mask_store(&bld->exec_mask, pred, value,
861 bld->addr[reg->Indirect.Index][chan_index]);
862 break;
863
864 case TGSI_FILE_PREDICATE:
865 lp_exec_mask_store(&bld->exec_mask, pred, value,
866 bld->preds[reg->Register.Index][chan_index]);
867 break;
868
869 default:
870 assert( 0 );
871 }
872 }
873
874
875 /**
876 * High-level instruction translators.
877 */
878
879 static void
880 emit_tex( struct lp_build_tgsi_soa_context *bld,
881 const struct tgsi_full_instruction *inst,
882 enum lp_build_tex_modifier modifier,
883 LLVMValueRef *texel)
884 {
885 unsigned unit;
886 LLVMValueRef lod_bias, explicit_lod;
887 LLVMValueRef oow = NULL;
888 LLVMValueRef coords[3];
889 LLVMValueRef ddx[3];
890 LLVMValueRef ddy[3];
891 unsigned num_coords;
892 unsigned i;
893
894 if (!bld->sampler) {
895 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
896 for (i = 0; i < 4; i++) {
897 texel[i] = bld->base.undef;
898 }
899 return;
900 }
901
902 switch (inst->Texture.Texture) {
903 case TGSI_TEXTURE_1D:
904 num_coords = 1;
905 break;
906 case TGSI_TEXTURE_2D:
907 case TGSI_TEXTURE_RECT:
908 num_coords = 2;
909 break;
910 case TGSI_TEXTURE_SHADOW1D:
911 case TGSI_TEXTURE_SHADOW2D:
912 case TGSI_TEXTURE_SHADOWRECT:
913 case TGSI_TEXTURE_3D:
914 case TGSI_TEXTURE_CUBE:
915 num_coords = 3;
916 break;
917 default:
918 assert(0);
919 return;
920 }
921
922 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
923 lod_bias = emit_fetch( bld, inst, 0, 3 );
924 explicit_lod = NULL;
925 }
926 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
927 lod_bias = NULL;
928 explicit_lod = emit_fetch( bld, inst, 0, 3 );
929 }
930 else {
931 lod_bias = NULL;
932 explicit_lod = NULL;
933 }
934
935 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
936 oow = emit_fetch( bld, inst, 0, 3 );
937 oow = lp_build_rcp(&bld->base, oow);
938 }
939
940 for (i = 0; i < num_coords; i++) {
941 coords[i] = emit_fetch( bld, inst, 0, i );
942 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
943 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
944 }
945 for (i = num_coords; i < 3; i++) {
946 coords[i] = bld->base.undef;
947 }
948
949 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
950 LLVMTypeRef i32t = LLVMInt32Type();
951 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
952 for (i = 0; i < num_coords; i++) {
953 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
954 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
955 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
956 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
957 }
958 unit = inst->Src[3].Register.Index;
959 } else {
960 for (i = 0; i < num_coords; i++) {
961 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
962 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
963 }
964 unit = inst->Src[1].Register.Index;
965 }
966 for (i = num_coords; i < 3; i++) {
967 ddx[i] = LLVMGetUndef(bld->base.elem_type);
968 ddy[i] = LLVMGetUndef(bld->base.elem_type);
969 }
970
971 bld->sampler->emit_fetch_texel(bld->sampler,
972 bld->base.builder,
973 bld->base.type,
974 unit, num_coords, coords,
975 ddx, ddy,
976 lod_bias, explicit_lod,
977 texel);
978 }
979
980 static boolean
981 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
982 int pc)
983 {
984 int i;
985
986 for (i = 0; i < 5; i++) {
987 unsigned opcode;
988
989 if (pc + i >= bld->info->num_instructions)
990 return TRUE;
991
992 opcode = bld->instructions[pc + i].Instruction.Opcode;
993
994 if (opcode == TGSI_OPCODE_END)
995 return TRUE;
996
997 if (opcode == TGSI_OPCODE_TEX ||
998 opcode == TGSI_OPCODE_TXP ||
999 opcode == TGSI_OPCODE_TXD ||
1000 opcode == TGSI_OPCODE_TXB ||
1001 opcode == TGSI_OPCODE_TXL ||
1002 opcode == TGSI_OPCODE_TXF ||
1003 opcode == TGSI_OPCODE_TXQ ||
1004 opcode == TGSI_OPCODE_CAL ||
1005 opcode == TGSI_OPCODE_CALLNZ ||
1006 opcode == TGSI_OPCODE_IF ||
1007 opcode == TGSI_OPCODE_IFC ||
1008 opcode == TGSI_OPCODE_BGNLOOP ||
1009 opcode == TGSI_OPCODE_SWITCH)
1010 return FALSE;
1011 }
1012
1013 return TRUE;
1014 }
1015
1016
1017
1018 /**
1019 * Kill fragment if any of the src register values are negative.
1020 */
1021 static void
1022 emit_kil(
1023 struct lp_build_tgsi_soa_context *bld,
1024 const struct tgsi_full_instruction *inst,
1025 int pc)
1026 {
1027 const struct tgsi_full_src_register *reg = &inst->Src[0];
1028 LLVMValueRef terms[NUM_CHANNELS];
1029 LLVMValueRef mask;
1030 unsigned chan_index;
1031
1032 memset(&terms, 0, sizeof terms);
1033
1034 FOR_EACH_CHANNEL( chan_index ) {
1035 unsigned swizzle;
1036
1037 /* Unswizzle channel */
1038 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1039
1040 /* Check if the component has not been already tested. */
1041 assert(swizzle < NUM_CHANNELS);
1042 if( !terms[swizzle] )
1043 /* TODO: change the comparison operator instead of setting the sign */
1044 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
1045 }
1046
1047 mask = NULL;
1048 FOR_EACH_CHANNEL( chan_index ) {
1049 if(terms[chan_index]) {
1050 LLVMValueRef chan_mask;
1051
1052 /*
1053 * If term < 0 then mask = 0 else mask = ~0.
1054 */
1055 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1056
1057 if(mask)
1058 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1059 else
1060 mask = chan_mask;
1061 }
1062 }
1063
1064 if(mask) {
1065 lp_build_mask_update(bld->mask, mask);
1066
1067 if (!near_end_of_shader(bld, pc))
1068 lp_build_mask_check(bld->mask);
1069 }
1070 }
1071
1072
1073 /**
1074 * Predicated fragment kill.
1075 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1076 * The only predication is the execution mask which will apply if
1077 * we're inside a loop or conditional.
1078 */
1079 static void
1080 emit_kilp(struct lp_build_tgsi_soa_context *bld,
1081 const struct tgsi_full_instruction *inst,
1082 int pc)
1083 {
1084 LLVMValueRef mask;
1085
1086 /* For those channels which are "alive", disable fragment shader
1087 * execution.
1088 */
1089 if (bld->exec_mask.has_mask) {
1090 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1091 }
1092 else {
1093 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1094 mask = zero;
1095 }
1096
1097 lp_build_mask_update(bld->mask, mask);
1098
1099 if (!near_end_of_shader(bld, pc))
1100 lp_build_mask_check(bld->mask);
1101 }
1102
1103
1104 /**
1105 * Emit code which will dump the value of all the temporary registers
1106 * to stdout.
1107 */
1108 static void
1109 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1110 {
1111 LLVMBuilderRef builder = bld->base.builder;
1112 LLVMValueRef temp_ptr;
1113 LLVMValueRef i0 = lp_build_const_int32(0);
1114 LLVMValueRef i1 = lp_build_const_int32(1);
1115 LLVMValueRef i2 = lp_build_const_int32(2);
1116 LLVMValueRef i3 = lp_build_const_int32(3);
1117 int index;
1118 int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1119
1120 for (index = 0; index < n; index++) {
1121 LLVMValueRef idx = lp_build_const_int32(index);
1122 LLVMValueRef v[4][4], res;
1123 int chan;
1124
1125 lp_build_printf(builder, "TEMP[%d]:\n", idx);
1126
1127 for (chan = 0; chan < 4; chan++) {
1128 temp_ptr = get_temp_ptr(bld, index, chan);
1129 res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
1130 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1131 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1132 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1133 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1134 }
1135
1136 lp_build_printf(builder, " X: %f %f %f %f\n",
1137 v[0][0], v[0][1], v[0][2], v[0][3]);
1138 lp_build_printf(builder, " Y: %f %f %f %f\n",
1139 v[1][0], v[1][1], v[1][2], v[1][3]);
1140 lp_build_printf(builder, " Z: %f %f %f %f\n",
1141 v[2][0], v[2][1], v[2][2], v[2][3]);
1142 lp_build_printf(builder, " W: %f %f %f %f\n",
1143 v[3][0], v[3][1], v[3][2], v[3][3]);
1144 }
1145 }
1146
1147
1148
1149 static void
1150 emit_declaration(
1151 struct lp_build_tgsi_soa_context *bld,
1152 const struct tgsi_full_declaration *decl)
1153 {
1154 LLVMTypeRef vec_type = bld->base.vec_type;
1155
1156 unsigned first = decl->Range.First;
1157 unsigned last = decl->Range.Last;
1158 unsigned idx, i;
1159
1160 for (idx = first; idx <= last; ++idx) {
1161 assert(last <= bld->info->file_max[decl->Declaration.File]);
1162 switch (decl->Declaration.File) {
1163 case TGSI_FILE_TEMPORARY:
1164 assert(idx < LP_MAX_TGSI_TEMPS);
1165 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1166 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
1167 last*4 + 4, 0);
1168 bld->temps_array = lp_build_array_alloca(bld->base.builder,
1169 vec_type, array_size, "temparray");
1170 } else {
1171 for (i = 0; i < NUM_CHANNELS; i++)
1172 bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1173 vec_type, "temp");
1174 }
1175 break;
1176
1177 case TGSI_FILE_OUTPUT:
1178 for (i = 0; i < NUM_CHANNELS; i++)
1179 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1180 vec_type, "output");
1181 break;
1182
1183 case TGSI_FILE_ADDRESS:
1184 assert(idx < LP_MAX_TGSI_ADDRS);
1185 for (i = 0; i < NUM_CHANNELS; i++)
1186 bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1187 vec_type, "addr");
1188 break;
1189
1190 case TGSI_FILE_PREDICATE:
1191 assert(idx < LP_MAX_TGSI_PREDS);
1192 for (i = 0; i < NUM_CHANNELS; i++)
1193 bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1194 vec_type, "predicate");
1195 break;
1196
1197 default:
1198 /* don't need to declare other vars */
1199 break;
1200 }
1201 }
1202 }
1203
1204
1205 /**
1206 * Emit LLVM for one TGSI instruction.
1207 * \param return TRUE for success, FALSE otherwise
1208 */
1209 static boolean
1210 emit_instruction(
1211 struct lp_build_tgsi_soa_context *bld,
1212 const struct tgsi_full_instruction *inst,
1213 const struct tgsi_opcode_info *info,
1214 int *pc)
1215 {
1216 unsigned chan_index;
1217 LLVMValueRef src0, src1, src2;
1218 LLVMValueRef tmp0, tmp1, tmp2;
1219 LLVMValueRef tmp3 = NULL;
1220 LLVMValueRef tmp4 = NULL;
1221 LLVMValueRef tmp5 = NULL;
1222 LLVMValueRef tmp6 = NULL;
1223 LLVMValueRef tmp7 = NULL;
1224 LLVMValueRef res;
1225 LLVMValueRef dst0[NUM_CHANNELS];
1226
1227 /*
1228 * Stores and write masks are handled in a general fashion after the long
1229 * instruction opcode switch statement.
1230 *
1231 * Although not stricitly necessary, we avoid generating instructions for
1232 * channels which won't be stored, in cases where's that easy. For some
1233 * complex instructions, like texture sampling, it is more convenient to
1234 * assume a full writemask and then let LLVM optimization passes eliminate
1235 * redundant code.
1236 */
1237
1238 (*pc)++;
1239
1240 assert(info->num_dst <= 1);
1241 if (info->num_dst) {
1242 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1243 dst0[chan_index] = bld->base.undef;
1244 }
1245 }
1246
1247 switch (inst->Instruction.Opcode) {
1248 case TGSI_OPCODE_ARL:
1249 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1250 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1251 tmp0 = lp_build_floor(&bld->base, tmp0);
1252 dst0[chan_index] = tmp0;
1253 }
1254 break;
1255
1256 case TGSI_OPCODE_MOV:
1257 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1258 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1259 }
1260 break;
1261
1262 case TGSI_OPCODE_LIT:
1263 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1264 dst0[CHAN_X] = bld->base.one;
1265 }
1266 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1267 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1268 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1269 }
1270 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1271 /* XMM[1] = SrcReg[0].yyyy */
1272 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1273 /* XMM[1] = max(XMM[1], 0) */
1274 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1275 /* XMM[2] = SrcReg[0].wwww */
1276 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1277 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1278 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1279 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1280 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1281 }
1282 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1283 dst0[CHAN_W] = bld->base.one;
1284 }
1285 break;
1286
1287 case TGSI_OPCODE_RCP:
1288 /* TGSI_OPCODE_RECIP */
1289 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1290 res = lp_build_rcp(&bld->base, src0);
1291 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1292 dst0[chan_index] = res;
1293 }
1294 break;
1295
1296 case TGSI_OPCODE_RSQ:
1297 /* TGSI_OPCODE_RECIPSQRT */
1298 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1299 src0 = lp_build_abs(&bld->base, src0);
1300 res = lp_build_rsqrt(&bld->base, src0);
1301 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1302 dst0[chan_index] = res;
1303 }
1304 break;
1305
1306 case TGSI_OPCODE_EXP:
1307 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1308 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1309 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1310 LLVMValueRef *p_exp2_int_part = NULL;
1311 LLVMValueRef *p_frac_part = NULL;
1312 LLVMValueRef *p_exp2 = NULL;
1313
1314 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1315
1316 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1317 p_exp2_int_part = &tmp0;
1318 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1319 p_frac_part = &tmp1;
1320 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1321 p_exp2 = &tmp2;
1322
1323 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1324
1325 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1326 dst0[CHAN_X] = tmp0;
1327 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1328 dst0[CHAN_Y] = tmp1;
1329 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1330 dst0[CHAN_Z] = tmp2;
1331 }
1332 /* dst.w = 1.0 */
1333 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1334 dst0[CHAN_W] = bld->base.one;
1335 }
1336 break;
1337
1338 case TGSI_OPCODE_LOG:
1339 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1340 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1341 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1342 LLVMValueRef *p_floor_log2 = NULL;
1343 LLVMValueRef *p_exp = NULL;
1344 LLVMValueRef *p_log2 = NULL;
1345
1346 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1347 src0 = lp_build_abs( &bld->base, src0 );
1348
1349 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1350 p_floor_log2 = &tmp0;
1351 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1352 p_exp = &tmp1;
1353 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1354 p_log2 = &tmp2;
1355
1356 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1357
1358 /* dst.x = floor(lg2(abs(src.x))) */
1359 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1360 dst0[CHAN_X] = tmp0;
1361 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1362 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1363 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1364 }
1365 /* dst.z = lg2(abs(src.x)) */
1366 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1367 dst0[CHAN_Z] = tmp2;
1368 }
1369 /* dst.w = 1.0 */
1370 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1371 dst0[CHAN_W] = bld->base.one;
1372 }
1373 break;
1374
1375 case TGSI_OPCODE_MUL:
1376 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1377 src0 = emit_fetch( bld, inst, 0, chan_index );
1378 src1 = emit_fetch( bld, inst, 1, chan_index );
1379 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1380 }
1381 break;
1382
1383 case TGSI_OPCODE_ADD:
1384 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1385 src0 = emit_fetch( bld, inst, 0, chan_index );
1386 src1 = emit_fetch( bld, inst, 1, chan_index );
1387 dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1388 }
1389 break;
1390
1391 case TGSI_OPCODE_DP3:
1392 /* TGSI_OPCODE_DOT3 */
1393 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1394 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1395 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1396 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1397 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1398 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1399 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1400 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1401 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1402 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1403 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1404 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1405 dst0[chan_index] = tmp0;
1406 }
1407 break;
1408
1409 case TGSI_OPCODE_DP4:
1410 /* TGSI_OPCODE_DOT4 */
1411 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1412 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1413 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1414 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1415 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1416 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1417 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1418 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1419 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1420 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1421 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1422 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1423 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1424 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1425 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1426 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1427 dst0[chan_index] = tmp0;
1428 }
1429 break;
1430
1431 case TGSI_OPCODE_DST:
1432 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1433 dst0[CHAN_X] = bld->base.one;
1434 }
1435 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1436 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1437 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1438 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1439 }
1440 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1441 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1442 }
1443 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1444 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1445 }
1446 break;
1447
1448 case TGSI_OPCODE_MIN:
1449 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1450 src0 = emit_fetch( bld, inst, 0, chan_index );
1451 src1 = emit_fetch( bld, inst, 1, chan_index );
1452 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1453 }
1454 break;
1455
1456 case TGSI_OPCODE_MAX:
1457 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1458 src0 = emit_fetch( bld, inst, 0, chan_index );
1459 src1 = emit_fetch( bld, inst, 1, chan_index );
1460 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1461 }
1462 break;
1463
1464 case TGSI_OPCODE_SLT:
1465 /* TGSI_OPCODE_SETLT */
1466 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1467 src0 = emit_fetch( bld, inst, 0, chan_index );
1468 src1 = emit_fetch( bld, inst, 1, chan_index );
1469 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1470 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1471 }
1472 break;
1473
1474 case TGSI_OPCODE_SGE:
1475 /* TGSI_OPCODE_SETGE */
1476 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1477 src0 = emit_fetch( bld, inst, 0, chan_index );
1478 src1 = emit_fetch( bld, inst, 1, chan_index );
1479 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1480 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1481 }
1482 break;
1483
1484 case TGSI_OPCODE_MAD:
1485 /* TGSI_OPCODE_MADD */
1486 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1487 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1488 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1489 tmp2 = emit_fetch( bld, inst, 2, chan_index );
1490 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1491 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1492 dst0[chan_index] = tmp0;
1493 }
1494 break;
1495
1496 case TGSI_OPCODE_SUB:
1497 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1498 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1499 tmp1 = emit_fetch( bld, inst, 1, chan_index );
1500 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1501 }
1502 break;
1503
1504 case TGSI_OPCODE_LRP:
1505 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1506 src0 = emit_fetch( bld, inst, 0, chan_index );
1507 src1 = emit_fetch( bld, inst, 1, chan_index );
1508 src2 = emit_fetch( bld, inst, 2, chan_index );
1509 tmp0 = lp_build_sub( &bld->base, src1, src2 );
1510 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1511 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1512 }
1513 break;
1514
1515 case TGSI_OPCODE_CND:
1516 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1517 src0 = emit_fetch( bld, inst, 0, chan_index );
1518 src1 = emit_fetch( bld, inst, 1, chan_index );
1519 src2 = emit_fetch( bld, inst, 2, chan_index );
1520 tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1521 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1522 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1523 }
1524 break;
1525
1526 case TGSI_OPCODE_DP2A:
1527 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1528 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1529 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1530 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1531 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1532 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1533 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1534 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
1535 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1536 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1537 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1538 }
1539 break;
1540
1541 case TGSI_OPCODE_FRC:
1542 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1543 src0 = emit_fetch( bld, inst, 0, chan_index );
1544 tmp0 = lp_build_floor(&bld->base, src0);
1545 tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1546 dst0[chan_index] = tmp0;
1547 }
1548 break;
1549
1550 case TGSI_OPCODE_CLAMP:
1551 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1552 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1553 src1 = emit_fetch( bld, inst, 1, chan_index );
1554 src2 = emit_fetch( bld, inst, 2, chan_index );
1555 tmp0 = lp_build_max(&bld->base, tmp0, src1);
1556 tmp0 = lp_build_min(&bld->base, tmp0, src2);
1557 dst0[chan_index] = tmp0;
1558 }
1559 break;
1560
1561 case TGSI_OPCODE_FLR:
1562 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1563 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1564 dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1565 }
1566 break;
1567
1568 case TGSI_OPCODE_ROUND:
1569 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1570 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1571 dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1572 }
1573 break;
1574
1575 case TGSI_OPCODE_EX2: {
1576 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1577 tmp0 = lp_build_exp2( &bld->base, tmp0);
1578 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1579 dst0[chan_index] = tmp0;
1580 }
1581 break;
1582 }
1583
1584 case TGSI_OPCODE_LG2:
1585 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1586 tmp0 = lp_build_log2( &bld->base, tmp0);
1587 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1588 dst0[chan_index] = tmp0;
1589 }
1590 break;
1591
1592 case TGSI_OPCODE_POW:
1593 src0 = emit_fetch( bld, inst, 0, CHAN_X );
1594 src1 = emit_fetch( bld, inst, 1, CHAN_X );
1595 res = lp_build_pow( &bld->base, src0, src1 );
1596 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1597 dst0[chan_index] = res;
1598 }
1599 break;
1600
1601 case TGSI_OPCODE_XPD:
1602 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1603 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1604 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1605 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1606 }
1607 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1608 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1609 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1610 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1611 }
1612 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1613 tmp2 = tmp0;
1614 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1615 tmp5 = tmp3;
1616 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1617 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1618 dst0[CHAN_X] = tmp2;
1619 }
1620 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1621 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1622 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1623 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1624 }
1625 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1626 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1627 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1628 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1629 dst0[CHAN_Y] = tmp3;
1630 }
1631 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1632 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1633 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1634 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1635 dst0[CHAN_Z] = tmp5;
1636 }
1637 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1638 dst0[CHAN_W] = bld->base.one;
1639 }
1640 break;
1641
1642 case TGSI_OPCODE_ABS:
1643 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1644 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1645 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1646 }
1647 break;
1648
1649 case TGSI_OPCODE_RCC:
1650 /* deprecated? */
1651 assert(0);
1652 return FALSE;
1653
1654 case TGSI_OPCODE_DPH:
1655 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1656 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1657 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1658 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1659 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1660 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1661 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1662 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1663 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1664 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1665 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1666 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1667 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1668 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1669 dst0[chan_index] = tmp0;
1670 }
1671 break;
1672
1673 case TGSI_OPCODE_COS:
1674 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1675 tmp0 = lp_build_cos( &bld->base, tmp0 );
1676 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1677 dst0[chan_index] = tmp0;
1678 }
1679 break;
1680
1681 case TGSI_OPCODE_DDX:
1682 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1683 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1684 }
1685 break;
1686
1687 case TGSI_OPCODE_DDY:
1688 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1689 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1690 }
1691 break;
1692
1693 case TGSI_OPCODE_KILP:
1694 /* predicated kill */
1695 emit_kilp( bld, inst, (*pc)-1 );
1696 break;
1697
1698 case TGSI_OPCODE_KIL:
1699 /* conditional kill */
1700 emit_kil( bld, inst, (*pc)-1 );
1701 break;
1702
1703 case TGSI_OPCODE_PK2H:
1704 return FALSE;
1705 break;
1706
1707 case TGSI_OPCODE_PK2US:
1708 return FALSE;
1709 break;
1710
1711 case TGSI_OPCODE_PK4B:
1712 return FALSE;
1713 break;
1714
1715 case TGSI_OPCODE_PK4UB:
1716 return FALSE;
1717 break;
1718
1719 case TGSI_OPCODE_RFL:
1720 return FALSE;
1721 break;
1722
1723 case TGSI_OPCODE_SEQ:
1724 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1725 src0 = emit_fetch( bld, inst, 0, chan_index );
1726 src1 = emit_fetch( bld, inst, 1, chan_index );
1727 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1728 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1729 }
1730 break;
1731
1732 case TGSI_OPCODE_SFL:
1733 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1734 dst0[chan_index] = bld->base.zero;
1735 }
1736 break;
1737
1738 case TGSI_OPCODE_SGT:
1739 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1740 src0 = emit_fetch( bld, inst, 0, chan_index );
1741 src1 = emit_fetch( bld, inst, 1, chan_index );
1742 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1743 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1744 }
1745 break;
1746
1747 case TGSI_OPCODE_SIN:
1748 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1749 tmp0 = lp_build_sin( &bld->base, tmp0 );
1750 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1751 dst0[chan_index] = tmp0;
1752 }
1753 break;
1754
1755 case TGSI_OPCODE_SLE:
1756 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1757 src0 = emit_fetch( bld, inst, 0, chan_index );
1758 src1 = emit_fetch( bld, inst, 1, chan_index );
1759 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1760 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1761 }
1762 break;
1763
1764 case TGSI_OPCODE_SNE:
1765 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1766 src0 = emit_fetch( bld, inst, 0, chan_index );
1767 src1 = emit_fetch( bld, inst, 1, chan_index );
1768 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1769 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1770 }
1771 break;
1772
1773 case TGSI_OPCODE_STR:
1774 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1775 dst0[chan_index] = bld->base.one;
1776 }
1777 break;
1778
1779 case TGSI_OPCODE_TEX:
1780 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1781 break;
1782
1783 case TGSI_OPCODE_TXD:
1784 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1785 break;
1786
1787 case TGSI_OPCODE_UP2H:
1788 /* deprecated */
1789 assert (0);
1790 return FALSE;
1791 break;
1792
1793 case TGSI_OPCODE_UP2US:
1794 /* deprecated */
1795 assert(0);
1796 return FALSE;
1797 break;
1798
1799 case TGSI_OPCODE_UP4B:
1800 /* deprecated */
1801 assert(0);
1802 return FALSE;
1803 break;
1804
1805 case TGSI_OPCODE_UP4UB:
1806 /* deprecated */
1807 assert(0);
1808 return FALSE;
1809 break;
1810
1811 case TGSI_OPCODE_X2D:
1812 /* deprecated? */
1813 assert(0);
1814 return FALSE;
1815 break;
1816
1817 case TGSI_OPCODE_ARA:
1818 /* deprecated */
1819 assert(0);
1820 return FALSE;
1821 break;
1822
1823 case TGSI_OPCODE_ARR:
1824 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1825 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1826 tmp0 = lp_build_round(&bld->base, tmp0);
1827 dst0[chan_index] = tmp0;
1828 }
1829 break;
1830
1831 case TGSI_OPCODE_BRA:
1832 /* deprecated */
1833 assert(0);
1834 return FALSE;
1835 break;
1836
1837 case TGSI_OPCODE_CAL:
1838 lp_exec_mask_call(&bld->exec_mask,
1839 inst->Label.Label,
1840 pc);
1841
1842 break;
1843
1844 case TGSI_OPCODE_RET:
1845 lp_exec_mask_ret(&bld->exec_mask, pc);
1846 break;
1847
1848 case TGSI_OPCODE_END:
1849 if (0) {
1850 /* for debugging */
1851 emit_dump_temps(bld);
1852 }
1853 *pc = -1;
1854 break;
1855
1856 case TGSI_OPCODE_SSG:
1857 /* TGSI_OPCODE_SGN */
1858 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1859 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1860 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1861 }
1862 break;
1863
1864 case TGSI_OPCODE_CMP:
1865 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1866 src0 = emit_fetch( bld, inst, 0, chan_index );
1867 src1 = emit_fetch( bld, inst, 1, chan_index );
1868 src2 = emit_fetch( bld, inst, 2, chan_index );
1869 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1870 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1871 }
1872 break;
1873
1874 case TGSI_OPCODE_SCS:
1875 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1876 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1877 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1878 }
1879 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1880 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1881 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1882 }
1883 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1884 dst0[CHAN_Z] = bld->base.zero;
1885 }
1886 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1887 dst0[CHAN_W] = bld->base.one;
1888 }
1889 break;
1890
1891 case TGSI_OPCODE_TXB:
1892 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
1893 break;
1894
1895 case TGSI_OPCODE_NRM:
1896 /* fall-through */
1897 case TGSI_OPCODE_NRM4:
1898 /* 3 or 4-component normalization */
1899 {
1900 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1901
1902 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1903 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1904 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1905 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1906
1907 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1908
1909 /* xmm4 = src.x */
1910 /* xmm0 = src.x * src.x */
1911 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1912 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1913 tmp4 = tmp0;
1914 }
1915 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1916
1917 /* xmm5 = src.y */
1918 /* xmm0 = xmm0 + src.y * src.y */
1919 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1920 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1921 tmp5 = tmp1;
1922 }
1923 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1924 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1925
1926 /* xmm6 = src.z */
1927 /* xmm0 = xmm0 + src.z * src.z */
1928 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1929 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1930 tmp6 = tmp1;
1931 }
1932 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1933 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1934
1935 if (dims == 4) {
1936 /* xmm7 = src.w */
1937 /* xmm0 = xmm0 + src.w * src.w */
1938 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1939 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1940 tmp7 = tmp1;
1941 }
1942 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1943 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1944 }
1945
1946 /* xmm1 = 1 / sqrt(xmm0) */
1947 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1948
1949 /* dst.x = xmm1 * src.x */
1950 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1951 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1952 }
1953
1954 /* dst.y = xmm1 * src.y */
1955 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1956 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1957 }
1958
1959 /* dst.z = xmm1 * src.z */
1960 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1961 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1962 }
1963
1964 /* dst.w = xmm1 * src.w */
1965 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1966 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1967 }
1968 }
1969
1970 /* dst.w = 1.0 */
1971 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1972 dst0[CHAN_W] = bld->base.one;
1973 }
1974 }
1975 break;
1976
1977 case TGSI_OPCODE_DIV:
1978 /* deprecated */
1979 assert( 0 );
1980 return FALSE;
1981 break;
1982
1983 case TGSI_OPCODE_DP2:
1984 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1985 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1986 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1987 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1988 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1989 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1990 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1991 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1992 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
1993 }
1994 break;
1995
1996 case TGSI_OPCODE_TXL:
1997 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1998 break;
1999
2000 case TGSI_OPCODE_TXP:
2001 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2002 break;
2003
2004 case TGSI_OPCODE_BRK:
2005 lp_exec_break(&bld->exec_mask);
2006 break;
2007
2008 case TGSI_OPCODE_IF:
2009 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2010 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2011 tmp0, bld->base.zero);
2012 lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2013 break;
2014
2015 case TGSI_OPCODE_BGNLOOP:
2016 lp_exec_bgnloop(&bld->exec_mask);
2017 break;
2018
2019 case TGSI_OPCODE_BGNSUB:
2020 lp_exec_mask_bgnsub(&bld->exec_mask);
2021 break;
2022
2023 case TGSI_OPCODE_ELSE:
2024 lp_exec_mask_cond_invert(&bld->exec_mask);
2025 break;
2026
2027 case TGSI_OPCODE_ENDIF:
2028 lp_exec_mask_cond_pop(&bld->exec_mask);
2029 break;
2030
2031 case TGSI_OPCODE_ENDLOOP:
2032 lp_exec_endloop(&bld->exec_mask);
2033 break;
2034
2035 case TGSI_OPCODE_ENDSUB:
2036 lp_exec_mask_endsub(&bld->exec_mask, pc);
2037 break;
2038
2039 case TGSI_OPCODE_PUSHA:
2040 /* deprecated? */
2041 assert(0);
2042 return FALSE;
2043 break;
2044
2045 case TGSI_OPCODE_POPA:
2046 /* deprecated? */
2047 assert(0);
2048 return FALSE;
2049 break;
2050
2051 case TGSI_OPCODE_CEIL:
2052 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2053 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2054 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2055 }
2056 break;
2057
2058 case TGSI_OPCODE_I2F:
2059 /* deprecated? */
2060 assert(0);
2061 return FALSE;
2062 break;
2063
2064 case TGSI_OPCODE_NOT:
2065 /* deprecated? */
2066 assert(0);
2067 return FALSE;
2068 break;
2069
2070 case TGSI_OPCODE_TRUNC:
2071 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2072 tmp0 = emit_fetch( bld, inst, 0, chan_index );
2073 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2074 }
2075 break;
2076
2077 case TGSI_OPCODE_SHL:
2078 /* deprecated? */
2079 assert(0);
2080 return FALSE;
2081 break;
2082
2083 case TGSI_OPCODE_ISHR:
2084 /* deprecated? */
2085 assert(0);
2086 return FALSE;
2087 break;
2088
2089 case TGSI_OPCODE_AND:
2090 /* deprecated? */
2091 assert(0);
2092 return FALSE;
2093 break;
2094
2095 case TGSI_OPCODE_OR:
2096 /* deprecated? */
2097 assert(0);
2098 return FALSE;
2099 break;
2100
2101 case TGSI_OPCODE_MOD:
2102 /* deprecated? */
2103 assert(0);
2104 return FALSE;
2105 break;
2106
2107 case TGSI_OPCODE_XOR:
2108 /* deprecated? */
2109 assert(0);
2110 return FALSE;
2111 break;
2112
2113 case TGSI_OPCODE_SAD:
2114 /* deprecated? */
2115 assert(0);
2116 return FALSE;
2117 break;
2118
2119 case TGSI_OPCODE_TXF:
2120 /* deprecated? */
2121 assert(0);
2122 return FALSE;
2123 break;
2124
2125 case TGSI_OPCODE_TXQ:
2126 /* deprecated? */
2127 assert(0);
2128 return FALSE;
2129 break;
2130
2131 case TGSI_OPCODE_CONT:
2132 lp_exec_continue(&bld->exec_mask);
2133 break;
2134
2135 case TGSI_OPCODE_EMIT:
2136 return FALSE;
2137 break;
2138
2139 case TGSI_OPCODE_ENDPRIM:
2140 return FALSE;
2141 break;
2142
2143 case TGSI_OPCODE_NOP:
2144 break;
2145
2146 default:
2147 return FALSE;
2148 }
2149
2150 if(info->num_dst) {
2151 LLVMValueRef pred[NUM_CHANNELS];
2152
2153 emit_fetch_predicate( bld, inst, pred );
2154
2155 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2156 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2157 }
2158 }
2159
2160 return TRUE;
2161 }
2162
2163
2164 void
2165 lp_build_tgsi_soa(LLVMBuilderRef builder,
2166 const struct tgsi_token *tokens,
2167 struct lp_type type,
2168 struct lp_build_mask_context *mask,
2169 LLVMValueRef consts_ptr,
2170 const LLVMValueRef *pos,
2171 const LLVMValueRef (*inputs)[NUM_CHANNELS],
2172 LLVMValueRef (*outputs)[NUM_CHANNELS],
2173 struct lp_build_sampler_soa *sampler,
2174 const struct tgsi_shader_info *info)
2175 {
2176 struct lp_build_tgsi_soa_context bld;
2177 struct tgsi_parse_context parse;
2178 uint num_immediates = 0;
2179 uint num_instructions = 0;
2180 unsigned i;
2181 int pc = 0;
2182
2183 struct lp_type res_type;
2184
2185 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2186 memset(&res_type, 0, sizeof res_type);
2187 res_type.width = type.width;
2188 res_type.length = type.length;
2189 res_type.sign = 1;
2190
2191 /* Setup build context */
2192 memset(&bld, 0, sizeof bld);
2193 lp_build_context_init(&bld.base, builder, type);
2194 lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
2195 bld.mask = mask;
2196 bld.pos = pos;
2197 bld.inputs = inputs;
2198 bld.outputs = outputs;
2199 bld.consts_ptr = consts_ptr;
2200 bld.sampler = sampler;
2201 bld.info = info;
2202 bld.indirect_files = info->indirect_files;
2203 bld.instructions = (struct tgsi_full_instruction *)
2204 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2205 bld.max_instructions = LP_MAX_INSTRUCTIONS;
2206
2207 if (!bld.instructions) {
2208 return;
2209 }
2210
2211 lp_exec_mask_init(&bld.exec_mask, &bld.base);
2212
2213 tgsi_parse_init( &parse, tokens );
2214
2215 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2216 tgsi_parse_token( &parse );
2217
2218 switch( parse.FullToken.Token.Type ) {
2219 case TGSI_TOKEN_TYPE_DECLARATION:
2220 /* Inputs already interpolated */
2221 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2222 break;
2223
2224 case TGSI_TOKEN_TYPE_INSTRUCTION:
2225 {
2226 /* save expanded instruction */
2227 if (num_instructions == bld.max_instructions) {
2228 struct tgsi_full_instruction *instructions;
2229 instructions = REALLOC(bld.instructions,
2230 bld.max_instructions
2231 * sizeof(struct tgsi_full_instruction),
2232 (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2233 * sizeof(struct tgsi_full_instruction));
2234 if (!instructions) {
2235 break;
2236 }
2237 bld.instructions = instructions;
2238 bld.max_instructions += LP_MAX_INSTRUCTIONS;
2239 }
2240
2241 memcpy(bld.instructions + num_instructions,
2242 &parse.FullToken.FullInstruction,
2243 sizeof(bld.instructions[0]));
2244
2245 num_instructions++;
2246 }
2247
2248 break;
2249
2250 case TGSI_TOKEN_TYPE_IMMEDIATE:
2251 /* simply copy the immediate values into the next immediates[] slot */
2252 {
2253 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2254 assert(size <= 4);
2255 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2256 for( i = 0; i < size; ++i )
2257 bld.immediates[num_immediates][i] =
2258 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2259 for( i = size; i < 4; ++i )
2260 bld.immediates[num_immediates][i] = bld.base.undef;
2261 num_immediates++;
2262 }
2263 break;
2264
2265 case TGSI_TOKEN_TYPE_PROPERTY:
2266 break;
2267
2268 default:
2269 assert( 0 );
2270 }
2271 }
2272
2273 while (pc != -1) {
2274 struct tgsi_full_instruction *instr = bld.instructions + pc;
2275 const struct tgsi_opcode_info *opcode_info =
2276 tgsi_get_opcode_info(instr->Instruction.Opcode);
2277 if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2278 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2279 opcode_info->mnemonic);
2280 }
2281
2282 if (0) {
2283 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2284 LLVMValueRef function = LLVMGetBasicBlockParent(block);
2285 debug_printf("11111111111111111111111111111 \n");
2286 tgsi_dump(tokens, 0);
2287 lp_debug_dump_value(function);
2288 debug_printf("2222222222222222222222222222 \n");
2289 }
2290 tgsi_parse_free( &parse );
2291
2292 if (0) {
2293 LLVMModuleRef module = LLVMGetGlobalParent(
2294 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2295 LLVMDumpModule(module);
2296
2297 }
2298
2299 FREE( bld.instructions );
2300 }
2301