Revert "gallivm: Change getExtent and readByte to non-const with llvm-3.1."
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59
60
61 /**
62 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
63 * ordering.
64 */
65 static LLVMValueRef
66 swizzle_aos(struct lp_build_tgsi_context *bld_base,
67 LLVMValueRef a,
68 unsigned swizzle_x,
69 unsigned swizzle_y,
70 unsigned swizzle_z,
71 unsigned swizzle_w)
72 {
73 unsigned char swizzles[4];
74 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
75
76 assert(swizzle_x < 4);
77 assert(swizzle_y < 4);
78 assert(swizzle_z < 4);
79 assert(swizzle_w < 4);
80
81 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
82 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
83 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
84 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
85
86 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
87 }
88
89
90 static LLVMValueRef
91 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
92 LLVMValueRef a,
93 unsigned chan)
94 {
95 chan = bld->swizzles[chan];
96 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
97 }
98
99
100 static LLVMValueRef
101 emit_fetch_constant(
102 struct lp_build_tgsi_context * bld_base,
103 const struct tgsi_full_src_register * reg,
104 enum tgsi_opcode_type stype,
105 unsigned swizzle)
106 {
107 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
108 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
109 struct lp_type type = bld_base->base.type;
110 LLVMValueRef res;
111 unsigned chan;
112
113 assert(!reg->Register.Indirect);
114
115 /*
116 * Get the constants components
117 */
118
119 res = bld->bld_base.base.undef;
120 for (chan = 0; chan < 4; ++chan) {
121 LLVMValueRef index;
122 LLVMValueRef scalar_ptr;
123 LLVMValueRef scalar;
124 LLVMValueRef swizzle;
125
126 index = lp_build_const_int32(bld->bld_base.base.gallivm,
127 reg->Register.Index * 4 + chan);
128
129 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
130
131 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
132
133 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
134
135 /*
136 * NOTE: constants array is always assumed to be RGBA
137 */
138
139 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
140 bld->swizzles[chan]);
141
142 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
143 }
144
145 /*
146 * Broadcast the first quaternion to all others.
147 *
148 * XXX: could be factored into a reusable function.
149 */
150
151 if (type.length > 4) {
152 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
153 unsigned i;
154
155 for (chan = 0; chan < 4; ++chan) {
156 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
157 }
158
159 for (i = 4; i < type.length; ++i) {
160 shuffles[i] = shuffles[i % 4];
161 }
162
163 res = LLVMBuildShuffleVector(builder,
164 res, bld->bld_base.base.undef,
165 LLVMConstVector(shuffles, type.length),
166 "");
167 }
168 return res;
169 }
170
171 static LLVMValueRef
172 emit_fetch_immediate(
173 struct lp_build_tgsi_context * bld_base,
174 const struct tgsi_full_src_register * reg,
175 enum tgsi_opcode_type stype,
176 unsigned swizzle)
177 {
178 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
179 LLVMValueRef res = bld->immediates[reg->Register.Index];
180 assert(res);
181 return res;
182 }
183
184 static LLVMValueRef
185 emit_fetch_input(
186 struct lp_build_tgsi_context * bld_base,
187 const struct tgsi_full_src_register * reg,
188 enum tgsi_opcode_type stype,
189 unsigned swizzle)
190 {
191 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
192 LLVMValueRef res = bld->inputs[reg->Register.Index];
193 assert(!reg->Register.Indirect);
194 assert(res);
195 return res;
196 }
197
198 static LLVMValueRef
199 emit_fetch_temporary(
200 struct lp_build_tgsi_context * bld_base,
201 const struct tgsi_full_src_register * reg,
202 enum tgsi_opcode_type stype,
203 unsigned swizzle)
204 {
205 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
206 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
207 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
208 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
209 assert(!reg->Register.Indirect);
210 if (!res)
211 return bld->bld_base.base.undef;
212
213 return res;
214 }
215
216 /**
217 * Register store.
218 */
219 void
220 lp_emit_store_aos(
221 struct lp_build_tgsi_aos_context *bld,
222 const struct tgsi_full_instruction *inst,
223 unsigned index,
224 LLVMValueRef value)
225 {
226 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
227 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
228 LLVMValueRef mask = NULL;
229 LLVMValueRef ptr;
230
231 /*
232 * Saturate the value
233 */
234
235 switch (inst->Instruction.Saturate) {
236 case TGSI_SAT_NONE:
237 break;
238
239 case TGSI_SAT_ZERO_ONE:
240 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
241 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
242 break;
243
244 case TGSI_SAT_MINUS_PLUS_ONE:
245 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
246 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
247 break;
248
249 default:
250 assert(0);
251 }
252
253 /*
254 * Translate the register file
255 */
256
257 assert(!reg->Register.Indirect);
258
259 switch (reg->Register.File) {
260 case TGSI_FILE_OUTPUT:
261 ptr = bld->outputs[reg->Register.Index];
262 break;
263
264 case TGSI_FILE_TEMPORARY:
265 ptr = bld->temps[reg->Register.Index];
266 break;
267
268 case TGSI_FILE_ADDRESS:
269 ptr = bld->addr[reg->Indirect.Index];
270 break;
271
272 case TGSI_FILE_PREDICATE:
273 ptr = bld->preds[reg->Register.Index];
274 break;
275
276 default:
277 assert(0);
278 return;
279 }
280
281 if (!ptr)
282 return;
283 /*
284 * Predicate
285 */
286
287 if (inst->Instruction.Predicate) {
288 LLVMValueRef pred;
289
290 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
291
292 pred = LLVMBuildLoad(builder,
293 bld->preds[inst->Predicate.Index], "");
294
295 /*
296 * Convert the value to an integer mask.
297 */
298 pred = lp_build_compare(bld->bld_base.base.gallivm,
299 bld->bld_base.base.type,
300 PIPE_FUNC_NOTEQUAL,
301 pred,
302 bld->bld_base.base.zero);
303
304 if (inst->Predicate.Negate) {
305 pred = LLVMBuildNot(builder, pred, "");
306 }
307
308 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
309 inst->Predicate.SwizzleX,
310 inst->Predicate.SwizzleY,
311 inst->Predicate.SwizzleZ,
312 inst->Predicate.SwizzleW);
313
314 if (mask) {
315 mask = LLVMBuildAnd(builder, mask, pred, "");
316 } else {
317 mask = pred;
318 }
319 }
320
321 /*
322 * Writemask
323 */
324
325 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
326 LLVMValueRef writemask;
327
328 writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type,
329 reg->Register.WriteMask);
330
331 if (mask) {
332 mask = LLVMBuildAnd(builder, mask, writemask, "");
333 } else {
334 mask = writemask;
335 }
336 }
337
338 if (mask) {
339 LLVMValueRef orig_value;
340
341 orig_value = LLVMBuildLoad(builder, ptr, "");
342 value = lp_build_select(&bld->bld_base.base,
343 mask, value, orig_value);
344 }
345
346 LLVMBuildStore(builder, value, ptr);
347 }
348
349
350 /**
351 * High-level instruction translators.
352 */
353
354 static LLVMValueRef
355 emit_tex(struct lp_build_tgsi_aos_context *bld,
356 const struct tgsi_full_instruction *inst,
357 enum lp_build_tex_modifier modifier)
358 {
359 unsigned target;
360 unsigned unit;
361 LLVMValueRef coords;
362 LLVMValueRef ddx;
363 LLVMValueRef ddy;
364
365 if (!bld->sampler) {
366 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
367 return bld->bld_base.base.undef;
368 }
369
370 target = inst->Texture.Texture;
371
372 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
373
374 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
375 ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
376 ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
377 unit = inst->Src[3].Register.Index;
378 } else {
379 #if 0
380 ddx = lp_build_ddx( &bld->bld_base.base, coords );
381 ddy = lp_build_ddy( &bld->bld_base.base, coords );
382 #else
383 /* TODO */
384 ddx = bld->bld_base.base.one;
385 ddy = bld->bld_base.base.one;
386 #endif
387 unit = inst->Src[1].Register.Index;
388 }
389
390 return bld->sampler->emit_fetch_texel(bld->sampler,
391 &bld->bld_base.base,
392 target, unit,
393 coords, ddx, ddy,
394 modifier);
395 }
396
397
398 void
399 lp_emit_declaration_aos(
400 struct lp_build_tgsi_aos_context *bld,
401 const struct tgsi_full_declaration *decl)
402 {
403 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
404 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
405
406 unsigned first = decl->Range.First;
407 unsigned last = decl->Range.Last;
408 unsigned idx;
409
410 for (idx = first; idx <= last; ++idx) {
411 switch (decl->Declaration.File) {
412 case TGSI_FILE_TEMPORARY:
413 assert(idx < LP_MAX_TGSI_TEMPS);
414 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
415 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
416 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
417 vec_type, array_size, "");
418 } else {
419 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
420 }
421 break;
422
423 case TGSI_FILE_OUTPUT:
424 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
425 break;
426
427 case TGSI_FILE_ADDRESS:
428 assert(idx < LP_MAX_TGSI_ADDRS);
429 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
430 break;
431
432 case TGSI_FILE_PREDICATE:
433 assert(idx < LP_MAX_TGSI_PREDS);
434 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
435 break;
436
437 default:
438 /* don't need to declare other vars */
439 break;
440 }
441 }
442 }
443
444
445 /**
446 * Emit LLVM for one TGSI instruction.
447 * \param return TRUE for success, FALSE otherwise
448 */
449 boolean
450 lp_emit_instruction_aos(
451 struct lp_build_tgsi_aos_context *bld,
452 const struct tgsi_full_instruction *inst,
453 const struct tgsi_opcode_info *info,
454 int *pc)
455 {
456 LLVMValueRef src0, src1, src2;
457 LLVMValueRef tmp0, tmp1;
458 LLVMValueRef dst0 = NULL;
459
460 /*
461 * Stores and write masks are handled in a general fashion after the long
462 * instruction opcode switch statement.
463 *
464 * Although not stricitly necessary, we avoid generating instructions for
465 * channels which won't be stored, in cases where's that easy. For some
466 * complex instructions, like texture sampling, it is more convenient to
467 * assume a full writemask and then let LLVM optimization passes eliminate
468 * redundant code.
469 */
470
471 (*pc)++;
472
473 assert(info->num_dst <= 1);
474 if (info->num_dst) {
475 dst0 = bld->bld_base.base.undef;
476 }
477
478 switch (inst->Instruction.Opcode) {
479 case TGSI_OPCODE_ARL:
480 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
481 dst0 = lp_build_floor(&bld->bld_base.base, src0);
482 break;
483
484 case TGSI_OPCODE_MOV:
485 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
486 break;
487
488 case TGSI_OPCODE_LIT:
489 return FALSE;
490
491 case TGSI_OPCODE_RCP:
492 /* TGSI_OPCODE_RECIP */
493 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
494 dst0 = lp_build_rcp(&bld->bld_base.base, src0);
495 break;
496
497 case TGSI_OPCODE_RSQ:
498 /* TGSI_OPCODE_RECIPSQRT */
499 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
500 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
501 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
502 break;
503
504 case TGSI_OPCODE_EXP:
505 return FALSE;
506
507 case TGSI_OPCODE_LOG:
508 return FALSE;
509
510 case TGSI_OPCODE_MUL:
511 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
512 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
513 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
514 break;
515
516 case TGSI_OPCODE_ADD:
517 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
518 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
519 dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
520 break;
521
522 case TGSI_OPCODE_DP3:
523 /* TGSI_OPCODE_DOT3 */
524 return FALSE;
525
526 case TGSI_OPCODE_DP4:
527 /* TGSI_OPCODE_DOT4 */
528 return FALSE;
529
530 case TGSI_OPCODE_DST:
531 return FALSE;
532
533 case TGSI_OPCODE_MIN:
534 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
535 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
536 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
537 break;
538
539 case TGSI_OPCODE_MAX:
540 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
541 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
542 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
543 break;
544
545 case TGSI_OPCODE_SLT:
546 /* TGSI_OPCODE_SETLT */
547 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
548 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
549 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
550 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
551 break;
552
553 case TGSI_OPCODE_SGE:
554 /* TGSI_OPCODE_SETGE */
555 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
556 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
557 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
558 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
559 break;
560
561 case TGSI_OPCODE_MAD:
562 /* TGSI_OPCODE_MADD */
563 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
564 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
565 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
566 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
567 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
568 break;
569
570 case TGSI_OPCODE_SUB:
571 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
572 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
573 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
574 break;
575
576 case TGSI_OPCODE_LRP:
577 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
578 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
579 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
580 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
581 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
582 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
583 break;
584
585 case TGSI_OPCODE_CND:
586 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
587 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
588 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
589 tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
590 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
591 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
592 break;
593
594 case TGSI_OPCODE_DP2A:
595 return FALSE;
596
597 case TGSI_OPCODE_FRC:
598 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
599 tmp0 = lp_build_floor(&bld->bld_base.base, src0);
600 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
601 break;
602
603 case TGSI_OPCODE_CLAMP:
604 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
605 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
606 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
607 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
608 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
609 break;
610
611 case TGSI_OPCODE_FLR:
612 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
613 dst0 = lp_build_floor(&bld->bld_base.base, src0);
614 break;
615
616 case TGSI_OPCODE_ROUND:
617 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
618 dst0 = lp_build_round(&bld->bld_base.base, src0);
619 break;
620
621 case TGSI_OPCODE_EX2:
622 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
623 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
624 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
625 break;
626
627 case TGSI_OPCODE_LG2:
628 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
629 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
630 dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
631 break;
632
633 case TGSI_OPCODE_POW:
634 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
635 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
636 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
637 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
638 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
639 break;
640
641 case TGSI_OPCODE_XPD:
642 return FALSE;
643
644 case TGSI_OPCODE_RCC:
645 /* deprecated? */
646 assert(0);
647 return FALSE;
648
649 case TGSI_OPCODE_DPH:
650 return FALSE;
651
652 case TGSI_OPCODE_COS:
653 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
654 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
655 dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
656 break;
657
658 case TGSI_OPCODE_DDX:
659 return FALSE;
660
661 case TGSI_OPCODE_DDY:
662 return FALSE;
663
664 case TGSI_OPCODE_KILP:
665 /* predicated kill */
666 return FALSE;
667
668 case TGSI_OPCODE_KIL:
669 /* conditional kill */
670 return FALSE;
671
672 case TGSI_OPCODE_PK2H:
673 return FALSE;
674 break;
675
676 case TGSI_OPCODE_PK2US:
677 return FALSE;
678 break;
679
680 case TGSI_OPCODE_PK4B:
681 return FALSE;
682 break;
683
684 case TGSI_OPCODE_PK4UB:
685 return FALSE;
686
687 case TGSI_OPCODE_RFL:
688 return FALSE;
689
690 case TGSI_OPCODE_SEQ:
691 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
692 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
693 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
694 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
695 break;
696
697 case TGSI_OPCODE_SFL:
698 dst0 = bld->bld_base.base.zero;
699 break;
700
701 case TGSI_OPCODE_SGT:
702 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
703 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
704 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
705 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
706 break;
707
708 case TGSI_OPCODE_SIN:
709 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
710 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
711 dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
712 break;
713
714 case TGSI_OPCODE_SLE:
715 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
716 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
717 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
718 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
719 break;
720
721 case TGSI_OPCODE_SNE:
722 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
723 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
724 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
725 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
726 break;
727
728 case TGSI_OPCODE_STR:
729 dst0 = bld->bld_base.base.one;
730 break;
731
732 case TGSI_OPCODE_TEX:
733 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
734 break;
735
736 case TGSI_OPCODE_TXD:
737 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
738 break;
739
740 case TGSI_OPCODE_UP2H:
741 /* deprecated */
742 assert (0);
743 return FALSE;
744 break;
745
746 case TGSI_OPCODE_UP2US:
747 /* deprecated */
748 assert(0);
749 return FALSE;
750 break;
751
752 case TGSI_OPCODE_UP4B:
753 /* deprecated */
754 assert(0);
755 return FALSE;
756 break;
757
758 case TGSI_OPCODE_UP4UB:
759 /* deprecated */
760 assert(0);
761 return FALSE;
762 break;
763
764 case TGSI_OPCODE_X2D:
765 /* deprecated? */
766 assert(0);
767 return FALSE;
768 break;
769
770 case TGSI_OPCODE_ARA:
771 /* deprecated */
772 assert(0);
773 return FALSE;
774 break;
775
776 case TGSI_OPCODE_ARR:
777 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
778 dst0 = lp_build_round(&bld->bld_base.base, src0);
779 break;
780
781 case TGSI_OPCODE_BRA:
782 /* deprecated */
783 assert(0);
784 return FALSE;
785 break;
786
787 case TGSI_OPCODE_CAL:
788 return FALSE;
789
790 case TGSI_OPCODE_RET:
791 return FALSE;
792
793 case TGSI_OPCODE_END:
794 *pc = -1;
795 break;
796
797 case TGSI_OPCODE_SSG:
798 /* TGSI_OPCODE_SGN */
799 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
800 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
801 break;
802
803 case TGSI_OPCODE_CMP:
804 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
805 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
806 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
807 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
808 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
809 break;
810
811 case TGSI_OPCODE_SCS:
812 return FALSE;
813
814 case TGSI_OPCODE_TXB:
815 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
816 break;
817
818 case TGSI_OPCODE_NRM:
819 /* fall-through */
820 case TGSI_OPCODE_NRM4:
821 return FALSE;
822
823 case TGSI_OPCODE_DIV:
824 /* deprecated */
825 assert(0);
826 return FALSE;
827 break;
828
829 case TGSI_OPCODE_DP2:
830 return FALSE;
831
832 case TGSI_OPCODE_TXL:
833 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
834 break;
835
836 case TGSI_OPCODE_TXP:
837 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
838 break;
839
840 case TGSI_OPCODE_BRK:
841 return FALSE;
842
843 case TGSI_OPCODE_IF:
844 return FALSE;
845
846 case TGSI_OPCODE_BGNLOOP:
847 return FALSE;
848
849 case TGSI_OPCODE_BGNSUB:
850 return FALSE;
851
852 case TGSI_OPCODE_ELSE:
853 return FALSE;
854
855 case TGSI_OPCODE_ENDIF:
856 return FALSE;
857
858 case TGSI_OPCODE_ENDLOOP:
859 return FALSE;
860
861 case TGSI_OPCODE_ENDSUB:
862 return FALSE;
863
864 case TGSI_OPCODE_PUSHA:
865 /* deprecated? */
866 assert(0);
867 return FALSE;
868 break;
869
870 case TGSI_OPCODE_POPA:
871 /* deprecated? */
872 assert(0);
873 return FALSE;
874 break;
875
876 case TGSI_OPCODE_CEIL:
877 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
878 dst0 = lp_build_ceil(&bld->bld_base.base, src0);
879 break;
880
881 case TGSI_OPCODE_I2F:
882 /* deprecated? */
883 assert(0);
884 return FALSE;
885 break;
886
887 case TGSI_OPCODE_NOT:
888 /* deprecated? */
889 assert(0);
890 return FALSE;
891 break;
892
893 case TGSI_OPCODE_TRUNC:
894 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
895 dst0 = lp_build_trunc(&bld->bld_base.base, src0);
896 break;
897
898 case TGSI_OPCODE_SHL:
899 /* deprecated? */
900 assert(0);
901 return FALSE;
902 break;
903
904 case TGSI_OPCODE_ISHR:
905 /* deprecated? */
906 assert(0);
907 return FALSE;
908 break;
909
910 case TGSI_OPCODE_AND:
911 /* deprecated? */
912 assert(0);
913 return FALSE;
914 break;
915
916 case TGSI_OPCODE_OR:
917 /* deprecated? */
918 assert(0);
919 return FALSE;
920 break;
921
922 case TGSI_OPCODE_MOD:
923 /* deprecated? */
924 assert(0);
925 return FALSE;
926 break;
927
928 case TGSI_OPCODE_XOR:
929 /* deprecated? */
930 assert(0);
931 return FALSE;
932 break;
933
934 case TGSI_OPCODE_SAD:
935 /* deprecated? */
936 assert(0);
937 return FALSE;
938 break;
939
940 case TGSI_OPCODE_TXF:
941 /* deprecated? */
942 assert(0);
943 return FALSE;
944 break;
945
946 case TGSI_OPCODE_TXQ:
947 /* deprecated? */
948 assert(0);
949 return FALSE;
950 break;
951
952 case TGSI_OPCODE_CONT:
953 return FALSE;
954
955 case TGSI_OPCODE_EMIT:
956 return FALSE;
957 break;
958
959 case TGSI_OPCODE_ENDPRIM:
960 return FALSE;
961 break;
962
963 case TGSI_OPCODE_NOP:
964 break;
965
966 default:
967 return FALSE;
968 }
969
970 if (info->num_dst) {
971 lp_emit_store_aos(bld, inst, 0, dst0);
972 }
973
974 return TRUE;
975 }
976
977
978 void
979 lp_build_tgsi_aos(struct gallivm_state *gallivm,
980 const struct tgsi_token *tokens,
981 struct lp_type type,
982 const unsigned char swizzles[4],
983 LLVMValueRef consts_ptr,
984 const LLVMValueRef *inputs,
985 LLVMValueRef *outputs,
986 struct lp_build_sampler_aos *sampler,
987 const struct tgsi_shader_info *info)
988 {
989 struct lp_build_tgsi_aos_context bld;
990 struct tgsi_parse_context parse;
991 uint num_immediates = 0;
992 unsigned chan;
993 int pc = 0;
994
995 /* Setup build context */
996 memset(&bld, 0, sizeof bld);
997 lp_build_context_init(&bld.bld_base.base, gallivm, type);
998 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
999 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1000 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1001
1002 for (chan = 0; chan < 4; ++chan) {
1003 bld.swizzles[chan] = swizzles[chan];
1004 bld.inv_swizzles[swizzles[chan]] = chan;
1005 }
1006
1007 bld.inputs = inputs;
1008 bld.outputs = outputs;
1009 bld.consts_ptr = consts_ptr;
1010 bld.sampler = sampler;
1011 bld.indirect_files = info->indirect_files;
1012 bld.bld_base.emit_swizzle = swizzle_aos;
1013 bld.bld_base.info = info;
1014
1015 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1016 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1017 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1018 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1019
1020 /* Set opcode actions */
1021 lp_set_default_actions_cpu(&bld.bld_base);
1022
1023 if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1024 return;
1025 }
1026
1027 tgsi_parse_init(&parse, tokens);
1028
1029 while (!tgsi_parse_end_of_tokens(&parse)) {
1030 tgsi_parse_token(&parse);
1031
1032 switch(parse.FullToken.Token.Type) {
1033 case TGSI_TOKEN_TYPE_DECLARATION:
1034 /* Inputs already interpolated */
1035 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1036 break;
1037
1038 case TGSI_TOKEN_TYPE_INSTRUCTION:
1039 /* save expanded instruction */
1040 lp_bld_tgsi_add_instruction(&bld.bld_base,
1041 &parse.FullToken.FullInstruction);
1042 break;
1043
1044 case TGSI_TOKEN_TYPE_IMMEDIATE:
1045 /* simply copy the immediate values into the next immediates[] slot */
1046 {
1047 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1048 float imm[4];
1049 assert(size <= 4);
1050 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1051 for (chan = 0; chan < 4; ++chan) {
1052 imm[chan] = 0.0f;
1053 }
1054 for (chan = 0; chan < size; ++chan) {
1055 unsigned swizzle = bld.swizzles[chan];
1056 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1057 }
1058 bld.immediates[num_immediates] =
1059 lp_build_const_aos(gallivm, type,
1060 imm[0], imm[1], imm[2], imm[3],
1061 NULL);
1062 num_immediates++;
1063 }
1064 break;
1065
1066 case TGSI_TOKEN_TYPE_PROPERTY:
1067 break;
1068
1069 default:
1070 assert(0);
1071 }
1072 }
1073
1074 while (pc != -1) {
1075 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1076 const struct tgsi_opcode_info *opcode_info =
1077 tgsi_get_opcode_info(instr->Instruction.Opcode);
1078 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1079 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1080 opcode_info->mnemonic);
1081 }
1082
1083 if (0) {
1084 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1085 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1086 debug_printf("11111111111111111111111111111 \n");
1087 tgsi_dump(tokens, 0);
1088 lp_debug_dump_value(function);
1089 debug_printf("2222222222222222222222222222 \n");
1090 }
1091 tgsi_parse_free(&parse);
1092
1093 if (0) {
1094 LLVMModuleRef module = LLVMGetGlobalParent(
1095 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1096 LLVMDumpModule(module);
1097 }
1098
1099 }
1100