mesa/gallium: Move u_bit_scan{,64} from gallium to util.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60
61
62 /**
63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64 * ordering.
65 */
66 static LLVMValueRef
67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68 LLVMValueRef a,
69 unsigned swizzle_x,
70 unsigned swizzle_y,
71 unsigned swizzle_z,
72 unsigned swizzle_w)
73 {
74 unsigned char swizzles[4];
75 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
77 assert(swizzle_x < 4);
78 assert(swizzle_y < 4);
79 assert(swizzle_z < 4);
80 assert(swizzle_w < 4);
81
82 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
87 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89
90
91 static LLVMValueRef
92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93 LLVMValueRef a,
94 unsigned chan)
95 {
96 chan = bld->swizzles[chan];
97 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98 }
99
100
101 static LLVMValueRef
102 emit_fetch_constant(
103 struct lp_build_tgsi_context * bld_base,
104 const struct tgsi_full_src_register * reg,
105 enum tgsi_opcode_type stype,
106 unsigned swizzle)
107 {
108 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110 struct lp_type type = bld_base->base.type;
111 LLVMValueRef res;
112 unsigned chan;
113
114 assert(!reg->Register.Indirect);
115
116 /*
117 * Get the constants components
118 */
119
120 res = bld->bld_base.base.undef;
121 for (chan = 0; chan < 4; ++chan) {
122 LLVMValueRef index;
123 LLVMValueRef scalar_ptr;
124 LLVMValueRef scalar;
125 LLVMValueRef swizzle;
126
127 index = lp_build_const_int32(bld->bld_base.base.gallivm,
128 reg->Register.Index * 4 + chan);
129
130 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
132 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
134 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
136 /*
137 * NOTE: constants array is always assumed to be RGBA
138 */
139
140 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141 bld->swizzles[chan]);
142
143 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144 }
145
146 /*
147 * Broadcast the first quaternion to all others.
148 *
149 * XXX: could be factored into a reusable function.
150 */
151
152 if (type.length > 4) {
153 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154 unsigned i;
155
156 for (chan = 0; chan < 4; ++chan) {
157 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158 }
159
160 for (i = 4; i < type.length; ++i) {
161 shuffles[i] = shuffles[i % 4];
162 }
163
164 res = LLVMBuildShuffleVector(builder,
165 res, bld->bld_base.base.undef,
166 LLVMConstVector(shuffles, type.length),
167 "");
168 }
169 return res;
170 }
171
172 static LLVMValueRef
173 emit_fetch_immediate(
174 struct lp_build_tgsi_context * bld_base,
175 const struct tgsi_full_src_register * reg,
176 enum tgsi_opcode_type stype,
177 unsigned swizzle)
178 {
179 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180 LLVMValueRef res = bld->immediates[reg->Register.Index];
181 assert(res);
182 return res;
183 }
184
185 static LLVMValueRef
186 emit_fetch_input(
187 struct lp_build_tgsi_context * bld_base,
188 const struct tgsi_full_src_register * reg,
189 enum tgsi_opcode_type stype,
190 unsigned swizzle)
191 {
192 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193 LLVMValueRef res = bld->inputs[reg->Register.Index];
194 assert(!reg->Register.Indirect);
195 assert(res);
196 return res;
197 }
198
199 static LLVMValueRef
200 emit_fetch_temporary(
201 struct lp_build_tgsi_context * bld_base,
202 const struct tgsi_full_src_register * reg,
203 enum tgsi_opcode_type stype,
204 unsigned swizzle)
205 {
206 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210 assert(!reg->Register.Indirect);
211 if (!res)
212 return bld->bld_base.base.undef;
213
214 return res;
215 }
216
217 /**
218 * Register store.
219 */
220 void
221 lp_emit_store_aos(
222 struct lp_build_tgsi_aos_context *bld,
223 const struct tgsi_full_instruction *inst,
224 unsigned index,
225 LLVMValueRef value)
226 {
227 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229 LLVMValueRef mask = NULL;
230 LLVMValueRef ptr;
231
232 /*
233 * Saturate the value
234 */
235 if (inst->Instruction.Saturate) {
236 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
237 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
238 }
239
240 /*
241 * Translate the register file
242 */
243
244 assert(!reg->Register.Indirect);
245
246 switch (reg->Register.File) {
247 case TGSI_FILE_OUTPUT:
248 ptr = bld->outputs[reg->Register.Index];
249 break;
250
251 case TGSI_FILE_TEMPORARY:
252 ptr = bld->temps[reg->Register.Index];
253 break;
254
255 case TGSI_FILE_ADDRESS:
256 ptr = bld->addr[reg->Indirect.Index];
257 break;
258
259 case TGSI_FILE_PREDICATE:
260 ptr = bld->preds[reg->Register.Index];
261 break;
262
263 default:
264 assert(0);
265 return;
266 }
267
268 if (!ptr)
269 return;
270 /*
271 * Predicate
272 */
273
274 if (inst->Instruction.Predicate) {
275 LLVMValueRef pred;
276
277 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
278
279 pred = LLVMBuildLoad(builder,
280 bld->preds[inst->Predicate.Index], "");
281
282 /*
283 * Convert the value to an integer mask.
284 */
285 pred = lp_build_compare(bld->bld_base.base.gallivm,
286 bld->bld_base.base.type,
287 PIPE_FUNC_NOTEQUAL,
288 pred,
289 bld->bld_base.base.zero);
290
291 if (inst->Predicate.Negate) {
292 pred = LLVMBuildNot(builder, pred, "");
293 }
294
295 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
296 inst->Predicate.SwizzleX,
297 inst->Predicate.SwizzleY,
298 inst->Predicate.SwizzleZ,
299 inst->Predicate.SwizzleW);
300
301 if (mask) {
302 mask = LLVMBuildAnd(builder, mask, pred, "");
303 } else {
304 mask = pred;
305 }
306 }
307
308 /*
309 * Writemask
310 */
311
312 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
313 LLVMValueRef writemask;
314
315 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
316 bld->bld_base.base.type,
317 reg->Register.WriteMask,
318 TGSI_NUM_CHANNELS,
319 bld->swizzles);
320
321 if (mask) {
322 mask = LLVMBuildAnd(builder, mask, writemask, "");
323 } else {
324 mask = writemask;
325 }
326 }
327
328 if (mask) {
329 LLVMValueRef orig_value;
330
331 orig_value = LLVMBuildLoad(builder, ptr, "");
332 value = lp_build_select(&bld->bld_base.base,
333 mask, value, orig_value);
334 }
335
336 LLVMBuildStore(builder, value, ptr);
337 }
338
339
340 /**
341 * High-level instruction translators.
342 */
343
344 static LLVMValueRef
345 emit_tex(struct lp_build_tgsi_aos_context *bld,
346 const struct tgsi_full_instruction *inst,
347 enum lp_build_tex_modifier modifier)
348 {
349 unsigned target;
350 unsigned unit;
351 LLVMValueRef coords;
352 struct lp_derivatives derivs = { {NULL}, {NULL} };
353
354 if (!bld->sampler) {
355 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
356 return bld->bld_base.base.undef;
357 }
358
359 target = inst->Texture.Texture;
360
361 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
362
363 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
364 /* probably not going to work */
365 derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
366 derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
367 unit = inst->Src[3].Register.Index;
368 }
369 else {
370 unit = inst->Src[1].Register.Index;
371 }
372 return bld->sampler->emit_fetch_texel(bld->sampler,
373 &bld->bld_base.base,
374 target, unit,
375 coords, derivs,
376 modifier);
377 }
378
379
380 static LLVMValueRef
381 emit_sample(struct lp_build_tgsi_aos_context *bld,
382 const struct tgsi_full_instruction *inst,
383 enum lp_build_tex_modifier modifier)
384 {
385 unsigned target;
386 unsigned unit;
387 LLVMValueRef coords;
388 struct lp_derivatives derivs = { {NULL}, {NULL} };
389
390 if (!bld->sampler) {
391 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
392 return bld->bld_base.base.undef;
393 }
394
395 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
396
397 /* ignore modifiers, can't handle different sampler / sampler view, etc... */
398 unit = inst->Src[1].Register.Index;
399 assert(inst->Src[2].Register.Index == unit);
400
401 target = bld->sv[unit].Resource;
402
403 return bld->sampler->emit_fetch_texel(bld->sampler,
404 &bld->bld_base.base,
405 target, unit,
406 coords, derivs,
407 modifier);
408 }
409
410
411 void
412 lp_emit_declaration_aos(
413 struct lp_build_tgsi_aos_context *bld,
414 const struct tgsi_full_declaration *decl)
415 {
416 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
417 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
418
419 unsigned first = decl->Range.First;
420 unsigned last = decl->Range.Last;
421 unsigned idx;
422
423 for (idx = first; idx <= last; ++idx) {
424 switch (decl->Declaration.File) {
425 case TGSI_FILE_TEMPORARY:
426 assert(idx < LP_MAX_INLINED_TEMPS);
427 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
428 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
429 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
430 vec_type, array_size, "");
431 } else {
432 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
433 }
434 break;
435
436 case TGSI_FILE_OUTPUT:
437 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
438 break;
439
440 case TGSI_FILE_ADDRESS:
441 assert(idx < LP_MAX_TGSI_ADDRS);
442 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
443 break;
444
445 case TGSI_FILE_PREDICATE:
446 assert(idx < LP_MAX_TGSI_PREDS);
447 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
448 break;
449
450 case TGSI_FILE_SAMPLER_VIEW:
451 /*
452 * The target stored here MUST match whatever there actually
453 * is in the set sampler views (what about return type?).
454 */
455 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
456 for (idx = first; idx <= last; ++idx) {
457 bld->sv[idx] = decl->SamplerView;
458 }
459 break;
460
461 default:
462 /* don't need to declare other vars */
463 break;
464 }
465 }
466 }
467
468
469 /**
470 * Emit LLVM for one TGSI instruction.
471 * \param return TRUE for success, FALSE otherwise
472 */
473 boolean
474 lp_emit_instruction_aos(
475 struct lp_build_tgsi_aos_context *bld,
476 const struct tgsi_full_instruction *inst,
477 const struct tgsi_opcode_info *info,
478 int *pc)
479 {
480 LLVMValueRef src0, src1, src2;
481 LLVMValueRef tmp0;
482 LLVMValueRef dst0 = NULL;
483
484 /*
485 * Stores and write masks are handled in a general fashion after the long
486 * instruction opcode switch statement.
487 *
488 * Although not stricitly necessary, we avoid generating instructions for
489 * channels which won't be stored, in cases where's that easy. For some
490 * complex instructions, like texture sampling, it is more convenient to
491 * assume a full writemask and then let LLVM optimization passes eliminate
492 * redundant code.
493 */
494
495 (*pc)++;
496
497 assert(info->num_dst <= 1);
498 if (info->num_dst) {
499 dst0 = bld->bld_base.base.undef;
500 }
501
502 switch (inst->Instruction.Opcode) {
503 case TGSI_OPCODE_ARL:
504 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
505 dst0 = lp_build_floor(&bld->bld_base.base, src0);
506 break;
507
508 case TGSI_OPCODE_MOV:
509 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
510 break;
511
512 case TGSI_OPCODE_LIT:
513 return FALSE;
514
515 case TGSI_OPCODE_RCP:
516 /* TGSI_OPCODE_RECIP */
517 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
518 dst0 = lp_build_rcp(&bld->bld_base.base, src0);
519 break;
520
521 case TGSI_OPCODE_RSQ:
522 /* TGSI_OPCODE_RECIPSQRT */
523 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
524 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
525 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
526 break;
527
528 case TGSI_OPCODE_EXP:
529 return FALSE;
530
531 case TGSI_OPCODE_LOG:
532 return FALSE;
533
534 case TGSI_OPCODE_MUL:
535 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
536 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
537 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
538 break;
539
540 case TGSI_OPCODE_ADD:
541 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
542 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
543 dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
544 break;
545
546 case TGSI_OPCODE_DP3:
547 /* TGSI_OPCODE_DOT3 */
548 return FALSE;
549
550 case TGSI_OPCODE_DP4:
551 /* TGSI_OPCODE_DOT4 */
552 return FALSE;
553
554 case TGSI_OPCODE_DST:
555 return FALSE;
556
557 case TGSI_OPCODE_MIN:
558 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
559 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
560 dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
561 break;
562
563 case TGSI_OPCODE_MAX:
564 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
565 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
566 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
567 break;
568
569 case TGSI_OPCODE_SLT:
570 /* TGSI_OPCODE_SETLT */
571 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
572 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
573 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
574 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
575 break;
576
577 case TGSI_OPCODE_SGE:
578 /* TGSI_OPCODE_SETGE */
579 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
580 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
581 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
582 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
583 break;
584
585 case TGSI_OPCODE_MAD:
586 /* TGSI_OPCODE_MADD */
587 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
588 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
589 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
590 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
591 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
592 break;
593
594 case TGSI_OPCODE_SUB:
595 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
596 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
597 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
598 break;
599
600 case TGSI_OPCODE_LRP:
601 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
602 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
603 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
604 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
605 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
606 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
607 break;
608
609 case TGSI_OPCODE_DP2A:
610 return FALSE;
611
612 case TGSI_OPCODE_FRC:
613 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
614 tmp0 = lp_build_floor(&bld->bld_base.base, src0);
615 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
616 break;
617
618 case TGSI_OPCODE_CLAMP:
619 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
620 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
621 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
622 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
623 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
624 break;
625
626 case TGSI_OPCODE_FLR:
627 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
628 dst0 = lp_build_floor(&bld->bld_base.base, src0);
629 break;
630
631 case TGSI_OPCODE_ROUND:
632 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
633 dst0 = lp_build_round(&bld->bld_base.base, src0);
634 break;
635
636 case TGSI_OPCODE_EX2:
637 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
638 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
639 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
640 break;
641
642 case TGSI_OPCODE_LG2:
643 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
644 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
645 dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
646 break;
647
648 case TGSI_OPCODE_POW:
649 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
650 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
651 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
652 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
653 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
654 break;
655
656 case TGSI_OPCODE_XPD:
657 return FALSE;
658
659 case TGSI_OPCODE_DPH:
660 return FALSE;
661
662 case TGSI_OPCODE_COS:
663 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
664 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
665 dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
666 break;
667
668 case TGSI_OPCODE_DDX:
669 return FALSE;
670
671 case TGSI_OPCODE_DDY:
672 return FALSE;
673
674 case TGSI_OPCODE_KILL:
675 return FALSE;
676
677 case TGSI_OPCODE_KILL_IF:
678 return FALSE;
679
680 case TGSI_OPCODE_PK2H:
681 return FALSE;
682 break;
683
684 case TGSI_OPCODE_PK2US:
685 return FALSE;
686 break;
687
688 case TGSI_OPCODE_PK4B:
689 return FALSE;
690 break;
691
692 case TGSI_OPCODE_PK4UB:
693 return FALSE;
694
695 case TGSI_OPCODE_SEQ:
696 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
697 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
698 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
699 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
700 break;
701
702 case TGSI_OPCODE_SGT:
703 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
704 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
705 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
706 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
707 break;
708
709 case TGSI_OPCODE_SIN:
710 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
711 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
712 dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
713 break;
714
715 case TGSI_OPCODE_SLE:
716 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
717 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
718 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
719 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
720 break;
721
722 case TGSI_OPCODE_SNE:
723 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
724 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
725 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
726 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
727 break;
728
729 case TGSI_OPCODE_TEX:
730 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
731 break;
732
733 case TGSI_OPCODE_TXD:
734 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
735 break;
736
737 case TGSI_OPCODE_UP2H:
738 /* deprecated */
739 assert (0);
740 return FALSE;
741 break;
742
743 case TGSI_OPCODE_UP2US:
744 /* deprecated */
745 assert(0);
746 return FALSE;
747 break;
748
749 case TGSI_OPCODE_UP4B:
750 /* deprecated */
751 assert(0);
752 return FALSE;
753 break;
754
755 case TGSI_OPCODE_UP4UB:
756 /* deprecated */
757 assert(0);
758 return FALSE;
759 break;
760
761 case TGSI_OPCODE_ARR:
762 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
763 dst0 = lp_build_round(&bld->bld_base.base, src0);
764 break;
765
766 case TGSI_OPCODE_CAL:
767 return FALSE;
768
769 case TGSI_OPCODE_RET:
770 /* safe to ignore at end */
771 break;
772
773 case TGSI_OPCODE_END:
774 *pc = -1;
775 break;
776
777 case TGSI_OPCODE_SSG:
778 /* TGSI_OPCODE_SGN */
779 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
780 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
781 break;
782
783 case TGSI_OPCODE_CMP:
784 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
785 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
786 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
787 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
788 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
789 break;
790
791 case TGSI_OPCODE_SCS:
792 return FALSE;
793
794 case TGSI_OPCODE_TXB:
795 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
796 break;
797
798 case TGSI_OPCODE_DIV:
799 assert(0);
800 return FALSE;
801 break;
802
803 case TGSI_OPCODE_DP2:
804 return FALSE;
805
806 case TGSI_OPCODE_TXL:
807 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
808 break;
809
810 case TGSI_OPCODE_TXP:
811 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
812 break;
813
814 case TGSI_OPCODE_BRK:
815 return FALSE;
816
817 case TGSI_OPCODE_IF:
818 case TGSI_OPCODE_UIF:
819 return FALSE;
820
821 case TGSI_OPCODE_BGNLOOP:
822 return FALSE;
823
824 case TGSI_OPCODE_BGNSUB:
825 return FALSE;
826
827 case TGSI_OPCODE_ELSE:
828 return FALSE;
829
830 case TGSI_OPCODE_ENDIF:
831 return FALSE;
832
833 case TGSI_OPCODE_ENDLOOP:
834 return FALSE;
835
836 case TGSI_OPCODE_ENDSUB:
837 return FALSE;
838
839 case TGSI_OPCODE_PUSHA:
840 /* deprecated? */
841 assert(0);
842 return FALSE;
843 break;
844
845 case TGSI_OPCODE_POPA:
846 /* deprecated? */
847 assert(0);
848 return FALSE;
849 break;
850
851 case TGSI_OPCODE_CEIL:
852 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
853 dst0 = lp_build_ceil(&bld->bld_base.base, src0);
854 break;
855
856 case TGSI_OPCODE_I2F:
857 assert(0);
858 return FALSE;
859 break;
860
861 case TGSI_OPCODE_NOT:
862 assert(0);
863 return FALSE;
864 break;
865
866 case TGSI_OPCODE_TRUNC:
867 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
868 dst0 = lp_build_trunc(&bld->bld_base.base, src0);
869 break;
870
871 case TGSI_OPCODE_SHL:
872 assert(0);
873 return FALSE;
874 break;
875
876 case TGSI_OPCODE_ISHR:
877 assert(0);
878 return FALSE;
879 break;
880
881 case TGSI_OPCODE_AND:
882 assert(0);
883 return FALSE;
884 break;
885
886 case TGSI_OPCODE_OR:
887 assert(0);
888 return FALSE;
889 break;
890
891 case TGSI_OPCODE_MOD:
892 assert(0);
893 return FALSE;
894 break;
895
896 case TGSI_OPCODE_XOR:
897 assert(0);
898 return FALSE;
899 break;
900
901 case TGSI_OPCODE_SAD:
902 assert(0);
903 return FALSE;
904 break;
905
906 case TGSI_OPCODE_TXF:
907 assert(0);
908 return FALSE;
909 break;
910
911 case TGSI_OPCODE_TXQ:
912 assert(0);
913 return FALSE;
914 break;
915
916 case TGSI_OPCODE_CONT:
917 return FALSE;
918
919 case TGSI_OPCODE_EMIT:
920 return FALSE;
921 break;
922
923 case TGSI_OPCODE_ENDPRIM:
924 return FALSE;
925 break;
926
927 case TGSI_OPCODE_NOP:
928 break;
929
930 case TGSI_OPCODE_SAMPLE:
931 dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
932 break;
933
934 default:
935 return FALSE;
936 }
937
938 if (info->num_dst) {
939 lp_emit_store_aos(bld, inst, 0, dst0);
940 }
941
942 return TRUE;
943 }
944
945
946 void
947 lp_build_tgsi_aos(struct gallivm_state *gallivm,
948 const struct tgsi_token *tokens,
949 struct lp_type type,
950 const unsigned char swizzles[4],
951 LLVMValueRef consts_ptr,
952 const LLVMValueRef *inputs,
953 LLVMValueRef *outputs,
954 struct lp_build_sampler_aos *sampler,
955 const struct tgsi_shader_info *info)
956 {
957 struct lp_build_tgsi_aos_context bld;
958 struct tgsi_parse_context parse;
959 uint num_immediates = 0;
960 unsigned chan;
961 int pc = 0;
962
963 /* Setup build context */
964 memset(&bld, 0, sizeof bld);
965 lp_build_context_init(&bld.bld_base.base, gallivm, type);
966 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
967 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
968 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
969
970 for (chan = 0; chan < 4; ++chan) {
971 bld.swizzles[chan] = swizzles[chan];
972 bld.inv_swizzles[swizzles[chan]] = chan;
973 }
974
975 bld.inputs = inputs;
976 bld.outputs = outputs;
977 bld.consts_ptr = consts_ptr;
978 bld.sampler = sampler;
979 bld.indirect_files = info->indirect_files;
980 bld.bld_base.emit_swizzle = swizzle_aos;
981 bld.bld_base.info = info;
982
983 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
984 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
985 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
986 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
987
988 /* Set opcode actions */
989 lp_set_default_actions_cpu(&bld.bld_base);
990
991 if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
992 return;
993 }
994
995 tgsi_parse_init(&parse, tokens);
996
997 while (!tgsi_parse_end_of_tokens(&parse)) {
998 tgsi_parse_token(&parse);
999
1000 switch(parse.FullToken.Token.Type) {
1001 case TGSI_TOKEN_TYPE_DECLARATION:
1002 /* Inputs already interpolated */
1003 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1004 break;
1005
1006 case TGSI_TOKEN_TYPE_INSTRUCTION:
1007 /* save expanded instruction */
1008 lp_bld_tgsi_add_instruction(&bld.bld_base,
1009 &parse.FullToken.FullInstruction);
1010 break;
1011
1012 case TGSI_TOKEN_TYPE_IMMEDIATE:
1013 /* simply copy the immediate values into the next immediates[] slot */
1014 {
1015 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1016 float imm[4];
1017 assert(size <= 4);
1018 assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
1019 for (chan = 0; chan < 4; ++chan) {
1020 imm[chan] = 0.0f;
1021 }
1022 for (chan = 0; chan < size; ++chan) {
1023 unsigned swizzle = bld.swizzles[chan];
1024 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1025 }
1026 bld.immediates[num_immediates] =
1027 lp_build_const_aos(gallivm, type,
1028 imm[0], imm[1], imm[2], imm[3],
1029 NULL);
1030 num_immediates++;
1031 }
1032 break;
1033
1034 case TGSI_TOKEN_TYPE_PROPERTY:
1035 break;
1036
1037 default:
1038 assert(0);
1039 }
1040 }
1041
1042 while (pc != -1) {
1043 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1044 const struct tgsi_opcode_info *opcode_info =
1045 tgsi_get_opcode_info(instr->Instruction.Opcode);
1046 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1047 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1048 opcode_info->mnemonic);
1049 }
1050
1051 if (0) {
1052 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1053 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1054 debug_printf("11111111111111111111111111111 \n");
1055 tgsi_dump(tokens, 0);
1056 lp_debug_dump_value(function);
1057 debug_printf("2222222222222222222222222222 \n");
1058 }
1059 tgsi_parse_free(&parse);
1060 FREE(bld.bld_base.instructions);
1061
1062 if (0) {
1063 LLVMModuleRef module = LLVMGetGlobalParent(
1064 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1065 LLVMDumpModule(module);
1066 }
1067
1068 }
1069