gallivm: Add a new interface for doing TGSI->LLVM conversions
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59
60
61 /**
62 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
63 * ordering.
64 */
65 static LLVMValueRef
66 swizzle_aos(struct lp_build_tgsi_context *bld_base,
67 LLVMValueRef a,
68 unsigned swizzle_x,
69 unsigned swizzle_y,
70 unsigned swizzle_z,
71 unsigned swizzle_w)
72 {
73 unsigned char swizzles[4];
74 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
75
76 assert(swizzle_x < 4);
77 assert(swizzle_y < 4);
78 assert(swizzle_z < 4);
79 assert(swizzle_w < 4);
80
81 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
82 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
83 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
84 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
85
86 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
87 }
88
89
90 static LLVMValueRef
91 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
92 LLVMValueRef a,
93 unsigned chan)
94 {
95 chan = bld->swizzles[chan];
96 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
97 }
98
99
100 static LLVMValueRef
101 emit_fetch_constant(
102 struct lp_build_tgsi_context * bld_base,
103 const struct tgsi_full_src_register * reg,
104 const unsigned swizzle)
105 {
106 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
107 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
108 struct lp_type type = bld_base->base.type;
109 LLVMValueRef res;
110 unsigned chan;
111
112 assert(!reg->Register.Indirect);
113
114 /*
115 * Get the constants components
116 */
117
118 res = bld->bld_base.base.undef;
119 for (chan = 0; chan < 4; ++chan) {
120 LLVMValueRef index;
121 LLVMValueRef scalar_ptr;
122 LLVMValueRef scalar;
123 LLVMValueRef swizzle;
124
125 index = lp_build_const_int32(bld->bld_base.base.gallivm,
126 reg->Register.Index * 4 + chan);
127
128 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
129
130 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
131
132 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
133
134 /*
135 * NOTE: constants array is always assumed to be RGBA
136 */
137
138 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
139 bld->swizzles[chan]);
140
141 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
142 }
143
144 /*
145 * Broadcast the first quaternion to all others.
146 *
147 * XXX: could be factored into a reusable function.
148 */
149
150 if (type.length > 4) {
151 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
152 unsigned i;
153
154 for (chan = 0; chan < 4; ++chan) {
155 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
156 }
157
158 for (i = 4; i < type.length; ++i) {
159 shuffles[i] = shuffles[i % 4];
160 }
161
162 res = LLVMBuildShuffleVector(builder,
163 res, bld->bld_base.base.undef,
164 LLVMConstVector(shuffles, type.length),
165 "");
166 }
167 return res;
168 }
169
170 static LLVMValueRef
171 emit_fetch_immediate(
172 struct lp_build_tgsi_context * bld_base,
173 const struct tgsi_full_src_register * reg,
174 const unsigned swizzle)
175 {
176 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
177 LLVMValueRef res = bld->immediates[reg->Register.Index];
178 assert(res);
179 return res;
180 }
181
182 static LLVMValueRef
183 emit_fetch_input(
184 struct lp_build_tgsi_context * bld_base,
185 const struct tgsi_full_src_register * reg,
186 const unsigned swizzle)
187 {
188 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
189 LLVMValueRef res = bld->inputs[reg->Register.Index];
190 assert(!reg->Register.Indirect);
191 assert(res);
192 return res;
193 }
194
195 static LLVMValueRef
196 emit_fetch_temporary(
197 struct lp_build_tgsi_context * bld_base,
198 const struct tgsi_full_src_register * reg,
199 const unsigned swizzle)
200 {
201 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
202 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
203 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
204 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
205 assert(!reg->Register.Indirect);
206 if (!res)
207 return bld->bld_base.base.undef;
208
209 return res;
210 }
211
212 /**
213 * Register store.
214 */
215 void
216 lp_emit_store_aos(
217 struct lp_build_tgsi_aos_context *bld,
218 const struct tgsi_full_instruction *inst,
219 unsigned index,
220 LLVMValueRef value)
221 {
222 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
223 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
224 LLVMValueRef mask = NULL;
225 LLVMValueRef ptr;
226
227 /*
228 * Saturate the value
229 */
230
231 switch (inst->Instruction.Saturate) {
232 case TGSI_SAT_NONE:
233 break;
234
235 case TGSI_SAT_ZERO_ONE:
236 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
237 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
238 break;
239
240 case TGSI_SAT_MINUS_PLUS_ONE:
241 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
242 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
243 break;
244
245 default:
246 assert(0);
247 }
248
249 /*
250 * Translate the register file
251 */
252
253 assert(!reg->Register.Indirect);
254
255 switch (reg->Register.File) {
256 case TGSI_FILE_OUTPUT:
257 ptr = bld->outputs[reg->Register.Index];
258 break;
259
260 case TGSI_FILE_TEMPORARY:
261 ptr = bld->temps[reg->Register.Index];
262 break;
263
264 case TGSI_FILE_ADDRESS:
265 ptr = bld->addr[reg->Indirect.Index];
266 break;
267
268 case TGSI_FILE_PREDICATE:
269 ptr = bld->preds[reg->Register.Index];
270 break;
271
272 default:
273 assert(0);
274 return;
275 }
276
277 if (!ptr)
278 return;
279 /*
280 * Predicate
281 */
282
283 if (inst->Instruction.Predicate) {
284 LLVMValueRef pred;
285
286 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
287
288 pred = LLVMBuildLoad(builder,
289 bld->preds[inst->Predicate.Index], "");
290
291 /*
292 * Convert the value to an integer mask.
293 */
294 pred = lp_build_compare(bld->bld_base.base.gallivm,
295 bld->bld_base.base.type,
296 PIPE_FUNC_NOTEQUAL,
297 pred,
298 bld->bld_base.base.zero);
299
300 if (inst->Predicate.Negate) {
301 pred = LLVMBuildNot(builder, pred, "");
302 }
303
304 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
305 inst->Predicate.SwizzleX,
306 inst->Predicate.SwizzleY,
307 inst->Predicate.SwizzleZ,
308 inst->Predicate.SwizzleW);
309
310 if (mask) {
311 mask = LLVMBuildAnd(builder, mask, pred, "");
312 } else {
313 mask = pred;
314 }
315 }
316
317 /*
318 * Writemask
319 */
320
321 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
322 LLVMValueRef writemask;
323
324 writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type,
325 reg->Register.WriteMask);
326
327 if (mask) {
328 mask = LLVMBuildAnd(builder, mask, writemask, "");
329 } else {
330 mask = writemask;
331 }
332 }
333
334 if (mask) {
335 LLVMValueRef orig_value;
336
337 orig_value = LLVMBuildLoad(builder, ptr, "");
338 value = lp_build_select(&bld->bld_base.base,
339 mask, value, orig_value);
340 }
341
342 LLVMBuildStore(builder, value, ptr);
343 }
344
345
346 /**
347 * High-level instruction translators.
348 */
349
350 static LLVMValueRef
351 emit_tex(struct lp_build_tgsi_aos_context *bld,
352 const struct tgsi_full_instruction *inst,
353 enum lp_build_tex_modifier modifier)
354 {
355 unsigned target;
356 unsigned unit;
357 LLVMValueRef coords;
358 LLVMValueRef ddx;
359 LLVMValueRef ddy;
360
361 if (!bld->sampler) {
362 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
363 return bld->bld_base.base.undef;
364 }
365
366 target = inst->Texture.Texture;
367
368 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
369
370 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
371 ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
372 ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
373 unit = inst->Src[3].Register.Index;
374 } else {
375 #if 0
376 ddx = lp_build_ddx( &bld->bld_base.base, coords );
377 ddy = lp_build_ddy( &bld->bld_base.base, coords );
378 #else
379 /* TODO */
380 ddx = bld->bld_base.base.one;
381 ddy = bld->bld_base.base.one;
382 #endif
383 unit = inst->Src[1].Register.Index;
384 }
385
386 return bld->sampler->emit_fetch_texel(bld->sampler,
387 &bld->bld_base.base,
388 target, unit,
389 coords, ddx, ddy,
390 modifier);
391 }
392
393
394 void
395 lp_emit_declaration_aos(
396 struct lp_build_tgsi_aos_context *bld,
397 const struct tgsi_full_declaration *decl)
398 {
399 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
400 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
401
402 unsigned first = decl->Range.First;
403 unsigned last = decl->Range.Last;
404 unsigned idx;
405
406 for (idx = first; idx <= last; ++idx) {
407 switch (decl->Declaration.File) {
408 case TGSI_FILE_TEMPORARY:
409 assert(idx < LP_MAX_TGSI_TEMPS);
410 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
411 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
412 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
413 vec_type, array_size, "");
414 } else {
415 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
416 }
417 break;
418
419 case TGSI_FILE_OUTPUT:
420 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
421 break;
422
423 case TGSI_FILE_ADDRESS:
424 assert(idx < LP_MAX_TGSI_ADDRS);
425 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
426 break;
427
428 case TGSI_FILE_PREDICATE:
429 assert(idx < LP_MAX_TGSI_PREDS);
430 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
431 break;
432
433 default:
434 /* don't need to declare other vars */
435 break;
436 }
437 }
438 }
439
440
441 /**
442 * Emit LLVM for one TGSI instruction.
443 * \param return TRUE for success, FALSE otherwise
444 */
445 boolean
446 lp_emit_instruction_aos(
447 struct lp_build_tgsi_aos_context *bld,
448 const struct tgsi_full_instruction *inst,
449 const struct tgsi_opcode_info *info,
450 int *pc)
451 {
452 LLVMValueRef src0, src1, src2;
453 LLVMValueRef tmp0, tmp1;
454 LLVMValueRef dst0 = NULL;
455
456 /*
457 * Stores and write masks are handled in a general fashion after the long
458 * instruction opcode switch statement.
459 *
460 * Although not stricitly necessary, we avoid generating instructions for
461 * channels which won't be stored, in cases where's that easy. For some
462 * complex instructions, like texture sampling, it is more convenient to
463 * assume a full writemask and then let LLVM optimization passes eliminate
464 * redundant code.
465 */
466
467 (*pc)++;
468
469 assert(info->num_dst <= 1);
470 if (info->num_dst) {
471 dst0 = bld->bld_base.base.undef;
472 }
473
474 switch (inst->Instruction.Opcode) {
475 case TGSI_OPCODE_ARL:
476 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
477 dst0 = lp_build_floor(&bld->bld_base.base, src0);
478 break;
479
480 case TGSI_OPCODE_MOV:
481 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
482 break;
483
484 case TGSI_OPCODE_LIT:
485 return FALSE;
486
487 case TGSI_OPCODE_RCP:
488 /* TGSI_OPCODE_RECIP */
489 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
490 dst0 = lp_build_rcp(&bld->bld_base.base, src0);
491 break;
492
493 case TGSI_OPCODE_RSQ:
494 /* TGSI_OPCODE_RECIPSQRT */
495 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
496 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
497 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
498 break;
499
500 case TGSI_OPCODE_EXP:
501 return FALSE;
502
503 case TGSI_OPCODE_LOG:
504 return FALSE;
505
506 case TGSI_OPCODE_MUL:
507 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
508 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
509 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
510 break;
511
512 case TGSI_OPCODE_ADD:
513 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
514 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
515 dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
516 break;
517
518 case TGSI_OPCODE_DP3:
519 /* TGSI_OPCODE_DOT3 */
520 return FALSE;
521
522 case TGSI_OPCODE_DP4:
523 /* TGSI_OPCODE_DOT4 */
524 return FALSE;
525
526 case TGSI_OPCODE_DST:
527 return FALSE;
528
529 case TGSI_OPCODE_MIN:
530 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
531 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
532 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
533 break;
534
535 case TGSI_OPCODE_MAX:
536 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
537 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
538 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
539 break;
540
541 case TGSI_OPCODE_SLT:
542 /* TGSI_OPCODE_SETLT */
543 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
544 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
545 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
546 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
547 break;
548
549 case TGSI_OPCODE_SGE:
550 /* TGSI_OPCODE_SETGE */
551 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
552 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
553 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
554 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
555 break;
556
557 case TGSI_OPCODE_MAD:
558 /* TGSI_OPCODE_MADD */
559 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
560 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
561 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
562 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
563 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
564 break;
565
566 case TGSI_OPCODE_SUB:
567 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
568 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
569 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
570 break;
571
572 case TGSI_OPCODE_LRP:
573 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
574 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
575 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
576 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
577 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
578 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
579 break;
580
581 case TGSI_OPCODE_CND:
582 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
583 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
584 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
585 tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
586 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
587 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
588 break;
589
590 case TGSI_OPCODE_DP2A:
591 return FALSE;
592
593 case TGSI_OPCODE_FRC:
594 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
595 tmp0 = lp_build_floor(&bld->bld_base.base, src0);
596 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
597 break;
598
599 case TGSI_OPCODE_CLAMP:
600 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
601 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
602 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
603 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
604 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
605 break;
606
607 case TGSI_OPCODE_FLR:
608 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
609 dst0 = lp_build_floor(&bld->bld_base.base, src0);
610 break;
611
612 case TGSI_OPCODE_ROUND:
613 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
614 dst0 = lp_build_round(&bld->bld_base.base, src0);
615 break;
616
617 case TGSI_OPCODE_EX2:
618 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
619 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
620 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
621 break;
622
623 case TGSI_OPCODE_LG2:
624 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
625 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
626 dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
627 break;
628
629 case TGSI_OPCODE_POW:
630 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
631 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
632 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
633 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
634 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
635 break;
636
637 case TGSI_OPCODE_XPD:
638 return FALSE;
639
640 case TGSI_OPCODE_RCC:
641 /* deprecated? */
642 assert(0);
643 return FALSE;
644
645 case TGSI_OPCODE_DPH:
646 return FALSE;
647
648 case TGSI_OPCODE_COS:
649 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
650 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
651 dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
652 break;
653
654 case TGSI_OPCODE_DDX:
655 return FALSE;
656
657 case TGSI_OPCODE_DDY:
658 return FALSE;
659
660 case TGSI_OPCODE_KILP:
661 /* predicated kill */
662 return FALSE;
663
664 case TGSI_OPCODE_KIL:
665 /* conditional kill */
666 return FALSE;
667
668 case TGSI_OPCODE_PK2H:
669 return FALSE;
670 break;
671
672 case TGSI_OPCODE_PK2US:
673 return FALSE;
674 break;
675
676 case TGSI_OPCODE_PK4B:
677 return FALSE;
678 break;
679
680 case TGSI_OPCODE_PK4UB:
681 return FALSE;
682
683 case TGSI_OPCODE_RFL:
684 return FALSE;
685
686 case TGSI_OPCODE_SEQ:
687 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
688 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
689 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
690 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
691 break;
692
693 case TGSI_OPCODE_SFL:
694 dst0 = bld->bld_base.base.zero;
695 break;
696
697 case TGSI_OPCODE_SGT:
698 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
699 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
700 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
701 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
702 break;
703
704 case TGSI_OPCODE_SIN:
705 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
706 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
707 dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
708 break;
709
710 case TGSI_OPCODE_SLE:
711 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
712 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
713 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
714 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
715 break;
716
717 case TGSI_OPCODE_SNE:
718 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
719 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
720 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
721 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
722 break;
723
724 case TGSI_OPCODE_STR:
725 dst0 = bld->bld_base.base.one;
726 break;
727
728 case TGSI_OPCODE_TEX:
729 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
730 break;
731
732 case TGSI_OPCODE_TXD:
733 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
734 break;
735
736 case TGSI_OPCODE_UP2H:
737 /* deprecated */
738 assert (0);
739 return FALSE;
740 break;
741
742 case TGSI_OPCODE_UP2US:
743 /* deprecated */
744 assert(0);
745 return FALSE;
746 break;
747
748 case TGSI_OPCODE_UP4B:
749 /* deprecated */
750 assert(0);
751 return FALSE;
752 break;
753
754 case TGSI_OPCODE_UP4UB:
755 /* deprecated */
756 assert(0);
757 return FALSE;
758 break;
759
760 case TGSI_OPCODE_X2D:
761 /* deprecated? */
762 assert(0);
763 return FALSE;
764 break;
765
766 case TGSI_OPCODE_ARA:
767 /* deprecated */
768 assert(0);
769 return FALSE;
770 break;
771
772 case TGSI_OPCODE_ARR:
773 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
774 dst0 = lp_build_round(&bld->bld_base.base, src0);
775 break;
776
777 case TGSI_OPCODE_BRA:
778 /* deprecated */
779 assert(0);
780 return FALSE;
781 break;
782
783 case TGSI_OPCODE_CAL:
784 return FALSE;
785
786 case TGSI_OPCODE_RET:
787 return FALSE;
788
789 case TGSI_OPCODE_END:
790 *pc = -1;
791 break;
792
793 case TGSI_OPCODE_SSG:
794 /* TGSI_OPCODE_SGN */
795 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
796 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
797 break;
798
799 case TGSI_OPCODE_CMP:
800 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
801 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
802 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
803 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
804 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
805 break;
806
807 case TGSI_OPCODE_SCS:
808 return FALSE;
809
810 case TGSI_OPCODE_TXB:
811 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
812 break;
813
814 case TGSI_OPCODE_NRM:
815 /* fall-through */
816 case TGSI_OPCODE_NRM4:
817 return FALSE;
818
819 case TGSI_OPCODE_DIV:
820 /* deprecated */
821 assert(0);
822 return FALSE;
823 break;
824
825 case TGSI_OPCODE_DP2:
826 return FALSE;
827
828 case TGSI_OPCODE_TXL:
829 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
830 break;
831
832 case TGSI_OPCODE_TXP:
833 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
834 break;
835
836 case TGSI_OPCODE_BRK:
837 return FALSE;
838
839 case TGSI_OPCODE_IF:
840 return FALSE;
841
842 case TGSI_OPCODE_BGNLOOP:
843 return FALSE;
844
845 case TGSI_OPCODE_BGNSUB:
846 return FALSE;
847
848 case TGSI_OPCODE_ELSE:
849 return FALSE;
850
851 case TGSI_OPCODE_ENDIF:
852 return FALSE;
853
854 case TGSI_OPCODE_ENDLOOP:
855 return FALSE;
856
857 case TGSI_OPCODE_ENDSUB:
858 return FALSE;
859
860 case TGSI_OPCODE_PUSHA:
861 /* deprecated? */
862 assert(0);
863 return FALSE;
864 break;
865
866 case TGSI_OPCODE_POPA:
867 /* deprecated? */
868 assert(0);
869 return FALSE;
870 break;
871
872 case TGSI_OPCODE_CEIL:
873 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
874 dst0 = lp_build_ceil(&bld->bld_base.base, src0);
875 break;
876
877 case TGSI_OPCODE_I2F:
878 /* deprecated? */
879 assert(0);
880 return FALSE;
881 break;
882
883 case TGSI_OPCODE_NOT:
884 /* deprecated? */
885 assert(0);
886 return FALSE;
887 break;
888
889 case TGSI_OPCODE_TRUNC:
890 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
891 dst0 = lp_build_trunc(&bld->bld_base.base, src0);
892 break;
893
894 case TGSI_OPCODE_SHL:
895 /* deprecated? */
896 assert(0);
897 return FALSE;
898 break;
899
900 case TGSI_OPCODE_ISHR:
901 /* deprecated? */
902 assert(0);
903 return FALSE;
904 break;
905
906 case TGSI_OPCODE_AND:
907 /* deprecated? */
908 assert(0);
909 return FALSE;
910 break;
911
912 case TGSI_OPCODE_OR:
913 /* deprecated? */
914 assert(0);
915 return FALSE;
916 break;
917
918 case TGSI_OPCODE_MOD:
919 /* deprecated? */
920 assert(0);
921 return FALSE;
922 break;
923
924 case TGSI_OPCODE_XOR:
925 /* deprecated? */
926 assert(0);
927 return FALSE;
928 break;
929
930 case TGSI_OPCODE_SAD:
931 /* deprecated? */
932 assert(0);
933 return FALSE;
934 break;
935
936 case TGSI_OPCODE_TXF:
937 /* deprecated? */
938 assert(0);
939 return FALSE;
940 break;
941
942 case TGSI_OPCODE_TXQ:
943 /* deprecated? */
944 assert(0);
945 return FALSE;
946 break;
947
948 case TGSI_OPCODE_CONT:
949 return FALSE;
950
951 case TGSI_OPCODE_EMIT:
952 return FALSE;
953 break;
954
955 case TGSI_OPCODE_ENDPRIM:
956 return FALSE;
957 break;
958
959 case TGSI_OPCODE_NOP:
960 break;
961
962 default:
963 return FALSE;
964 }
965
966 if (info->num_dst) {
967 lp_emit_store_aos(bld, inst, 0, dst0);
968 }
969
970 return TRUE;
971 }
972
973
974 void
975 lp_build_tgsi_aos(struct gallivm_state *gallivm,
976 const struct tgsi_token *tokens,
977 struct lp_type type,
978 const unsigned char swizzles[4],
979 LLVMValueRef consts_ptr,
980 const LLVMValueRef *inputs,
981 LLVMValueRef *outputs,
982 struct lp_build_sampler_aos *sampler,
983 const struct tgsi_shader_info *info)
984 {
985 struct lp_build_tgsi_aos_context bld;
986 struct tgsi_parse_context parse;
987 uint num_immediates = 0;
988 unsigned chan;
989 int pc = 0;
990
991 /* Setup build context */
992 memset(&bld, 0, sizeof bld);
993 lp_build_context_init(&bld.bld_base.base, gallivm, type);
994 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
995
996 for (chan = 0; chan < 4; ++chan) {
997 bld.swizzles[chan] = swizzles[chan];
998 bld.inv_swizzles[swizzles[chan]] = chan;
999 }
1000
1001 bld.inputs = inputs;
1002 bld.outputs = outputs;
1003 bld.consts_ptr = consts_ptr;
1004 bld.sampler = sampler;
1005 bld.indirect_files = info->indirect_files;
1006 bld.bld_base.emit_swizzle = swizzle_aos;
1007 bld.bld_base.info = info;
1008
1009 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1010 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1011 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1012 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1013
1014 /* Set opcode actions */
1015 lp_set_default_actions_cpu(&bld.bld_base);
1016
1017 if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1018 return;
1019 }
1020
1021 tgsi_parse_init(&parse, tokens);
1022
1023 while (!tgsi_parse_end_of_tokens(&parse)) {
1024 tgsi_parse_token(&parse);
1025
1026 switch(parse.FullToken.Token.Type) {
1027 case TGSI_TOKEN_TYPE_DECLARATION:
1028 /* Inputs already interpolated */
1029 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1030 break;
1031
1032 case TGSI_TOKEN_TYPE_INSTRUCTION:
1033 /* save expanded instruction */
1034 lp_bld_tgsi_add_instruction(&bld.bld_base,
1035 &parse.FullToken.FullInstruction);
1036 break;
1037
1038 case TGSI_TOKEN_TYPE_IMMEDIATE:
1039 /* simply copy the immediate values into the next immediates[] slot */
1040 {
1041 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1042 float imm[4];
1043 assert(size <= 4);
1044 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1045 for (chan = 0; chan < 4; ++chan) {
1046 imm[chan] = 0.0f;
1047 }
1048 for (chan = 0; chan < size; ++chan) {
1049 unsigned swizzle = bld.swizzles[chan];
1050 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1051 }
1052 bld.immediates[num_immediates] =
1053 lp_build_const_aos(gallivm, type,
1054 imm[0], imm[1], imm[2], imm[3],
1055 NULL);
1056 num_immediates++;
1057 }
1058 break;
1059
1060 case TGSI_TOKEN_TYPE_PROPERTY:
1061 break;
1062
1063 default:
1064 assert(0);
1065 }
1066 }
1067
1068 while (pc != -1) {
1069 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1070 const struct tgsi_opcode_info *opcode_info =
1071 tgsi_get_opcode_info(instr->Instruction.Opcode);
1072 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1073 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1074 opcode_info->mnemonic);
1075 }
1076
1077 if (0) {
1078 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1079 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1080 debug_printf("11111111111111111111111111111 \n");
1081 tgsi_dump(tokens, 0);
1082 lp_debug_dump_value(function);
1083 debug_printf("2222222222222222222222222222 \n");
1084 }
1085 tgsi_parse_free(&parse);
1086
1087 if (0) {
1088 LLVMModuleRef module = LLVMGetGlobalParent(
1089 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1090 LLVMDumpModule(module);
1091 }
1092
1093 }
1094