Merge branch 'gallium-userbuf'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59
60
61 /**
62 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
63 * ordering.
64 */
65 static LLVMValueRef
66 swizzle_aos(struct lp_build_tgsi_context *bld_base,
67 LLVMValueRef a,
68 unsigned swizzle_x,
69 unsigned swizzle_y,
70 unsigned swizzle_z,
71 unsigned swizzle_w)
72 {
73 unsigned char swizzles[4];
74 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
75
76 assert(swizzle_x < 4);
77 assert(swizzle_y < 4);
78 assert(swizzle_z < 4);
79 assert(swizzle_w < 4);
80
81 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
82 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
83 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
84 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
85
86 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
87 }
88
89
90 static LLVMValueRef
91 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
92 LLVMValueRef a,
93 unsigned chan)
94 {
95 chan = bld->swizzles[chan];
96 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
97 }
98
99
100 static LLVMValueRef
101 emit_fetch_constant(
102 struct lp_build_tgsi_context * bld_base,
103 const struct tgsi_full_src_register * reg,
104 enum tgsi_opcode_type stype,
105 unsigned swizzle)
106 {
107 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
108 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
109 struct lp_type type = bld_base->base.type;
110 LLVMValueRef res;
111 unsigned chan;
112
113 assert(!reg->Register.Indirect);
114
115 /*
116 * Get the constants components
117 */
118
119 res = bld->bld_base.base.undef;
120 for (chan = 0; chan < 4; ++chan) {
121 LLVMValueRef index;
122 LLVMValueRef scalar_ptr;
123 LLVMValueRef scalar;
124 LLVMValueRef swizzle;
125
126 index = lp_build_const_int32(bld->bld_base.base.gallivm,
127 reg->Register.Index * 4 + chan);
128
129 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
130
131 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
132
133 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
134
135 /*
136 * NOTE: constants array is always assumed to be RGBA
137 */
138
139 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
140 bld->swizzles[chan]);
141
142 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
143 }
144
145 /*
146 * Broadcast the first quaternion to all others.
147 *
148 * XXX: could be factored into a reusable function.
149 */
150
151 if (type.length > 4) {
152 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
153 unsigned i;
154
155 for (chan = 0; chan < 4; ++chan) {
156 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
157 }
158
159 for (i = 4; i < type.length; ++i) {
160 shuffles[i] = shuffles[i % 4];
161 }
162
163 res = LLVMBuildShuffleVector(builder,
164 res, bld->bld_base.base.undef,
165 LLVMConstVector(shuffles, type.length),
166 "");
167 }
168 return res;
169 }
170
171 static LLVMValueRef
172 emit_fetch_immediate(
173 struct lp_build_tgsi_context * bld_base,
174 const struct tgsi_full_src_register * reg,
175 enum tgsi_opcode_type stype,
176 unsigned swizzle)
177 {
178 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
179 LLVMValueRef res = bld->immediates[reg->Register.Index];
180 assert(res);
181 return res;
182 }
183
184 static LLVMValueRef
185 emit_fetch_input(
186 struct lp_build_tgsi_context * bld_base,
187 const struct tgsi_full_src_register * reg,
188 enum tgsi_opcode_type stype,
189 unsigned swizzle)
190 {
191 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
192 LLVMValueRef res = bld->inputs[reg->Register.Index];
193 assert(!reg->Register.Indirect);
194 assert(res);
195 return res;
196 }
197
198 static LLVMValueRef
199 emit_fetch_temporary(
200 struct lp_build_tgsi_context * bld_base,
201 const struct tgsi_full_src_register * reg,
202 enum tgsi_opcode_type stype,
203 unsigned swizzle)
204 {
205 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
206 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
207 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
208 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
209 assert(!reg->Register.Indirect);
210 if (!res)
211 return bld->bld_base.base.undef;
212
213 return res;
214 }
215
216 /**
217 * Register store.
218 */
219 void
220 lp_emit_store_aos(
221 struct lp_build_tgsi_aos_context *bld,
222 const struct tgsi_full_instruction *inst,
223 unsigned index,
224 LLVMValueRef value)
225 {
226 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
227 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
228 LLVMValueRef mask = NULL;
229 LLVMValueRef ptr;
230
231 /*
232 * Saturate the value
233 */
234
235 switch (inst->Instruction.Saturate) {
236 case TGSI_SAT_NONE:
237 break;
238
239 case TGSI_SAT_ZERO_ONE:
240 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
241 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
242 break;
243
244 case TGSI_SAT_MINUS_PLUS_ONE:
245 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
246 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
247 break;
248
249 default:
250 assert(0);
251 }
252
253 /*
254 * Translate the register file
255 */
256
257 assert(!reg->Register.Indirect);
258
259 switch (reg->Register.File) {
260 case TGSI_FILE_OUTPUT:
261 ptr = bld->outputs[reg->Register.Index];
262 break;
263
264 case TGSI_FILE_TEMPORARY:
265 ptr = bld->temps[reg->Register.Index];
266 break;
267
268 case TGSI_FILE_ADDRESS:
269 ptr = bld->addr[reg->Indirect.Index];
270 break;
271
272 case TGSI_FILE_PREDICATE:
273 ptr = bld->preds[reg->Register.Index];
274 break;
275
276 default:
277 assert(0);
278 return;
279 }
280
281 if (!ptr)
282 return;
283 /*
284 * Predicate
285 */
286
287 if (inst->Instruction.Predicate) {
288 LLVMValueRef pred;
289
290 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
291
292 pred = LLVMBuildLoad(builder,
293 bld->preds[inst->Predicate.Index], "");
294
295 /*
296 * Convert the value to an integer mask.
297 */
298 pred = lp_build_compare(bld->bld_base.base.gallivm,
299 bld->bld_base.base.type,
300 PIPE_FUNC_NOTEQUAL,
301 pred,
302 bld->bld_base.base.zero);
303
304 if (inst->Predicate.Negate) {
305 pred = LLVMBuildNot(builder, pred, "");
306 }
307
308 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
309 inst->Predicate.SwizzleX,
310 inst->Predicate.SwizzleY,
311 inst->Predicate.SwizzleZ,
312 inst->Predicate.SwizzleW);
313
314 if (mask) {
315 mask = LLVMBuildAnd(builder, mask, pred, "");
316 } else {
317 mask = pred;
318 }
319 }
320
321 /*
322 * Writemask
323 */
324
325 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
326 LLVMValueRef writemask;
327
328 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
329 bld->bld_base.base.type,
330 reg->Register.WriteMask,
331 bld->swizzles);
332
333 if (mask) {
334 mask = LLVMBuildAnd(builder, mask, writemask, "");
335 } else {
336 mask = writemask;
337 }
338 }
339
340 if (mask) {
341 LLVMValueRef orig_value;
342
343 orig_value = LLVMBuildLoad(builder, ptr, "");
344 value = lp_build_select(&bld->bld_base.base,
345 mask, value, orig_value);
346 }
347
348 LLVMBuildStore(builder, value, ptr);
349 }
350
351
352 /**
353 * High-level instruction translators.
354 */
355
356 static LLVMValueRef
357 emit_tex(struct lp_build_tgsi_aos_context *bld,
358 const struct tgsi_full_instruction *inst,
359 enum lp_build_tex_modifier modifier)
360 {
361 unsigned target;
362 unsigned unit;
363 LLVMValueRef coords;
364 LLVMValueRef ddx;
365 LLVMValueRef ddy;
366
367 if (!bld->sampler) {
368 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
369 return bld->bld_base.base.undef;
370 }
371
372 target = inst->Texture.Texture;
373
374 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
375
376 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
377 ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
378 ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
379 unit = inst->Src[3].Register.Index;
380 } else {
381 #if 0
382 ddx = lp_build_ddx( &bld->bld_base.base, coords );
383 ddy = lp_build_ddy( &bld->bld_base.base, coords );
384 #else
385 /* TODO */
386 ddx = bld->bld_base.base.one;
387 ddy = bld->bld_base.base.one;
388 #endif
389 unit = inst->Src[1].Register.Index;
390 }
391
392 return bld->sampler->emit_fetch_texel(bld->sampler,
393 &bld->bld_base.base,
394 target, unit,
395 coords, ddx, ddy,
396 modifier);
397 }
398
399
400 void
401 lp_emit_declaration_aos(
402 struct lp_build_tgsi_aos_context *bld,
403 const struct tgsi_full_declaration *decl)
404 {
405 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
406 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
407
408 unsigned first = decl->Range.First;
409 unsigned last = decl->Range.Last;
410 unsigned idx;
411
412 for (idx = first; idx <= last; ++idx) {
413 switch (decl->Declaration.File) {
414 case TGSI_FILE_TEMPORARY:
415 assert(idx < LP_MAX_TGSI_TEMPS);
416 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
417 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
418 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
419 vec_type, array_size, "");
420 } else {
421 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
422 }
423 break;
424
425 case TGSI_FILE_OUTPUT:
426 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
427 break;
428
429 case TGSI_FILE_ADDRESS:
430 assert(idx < LP_MAX_TGSI_ADDRS);
431 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
432 break;
433
434 case TGSI_FILE_PREDICATE:
435 assert(idx < LP_MAX_TGSI_PREDS);
436 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
437 break;
438
439 default:
440 /* don't need to declare other vars */
441 break;
442 }
443 }
444 }
445
446
447 /**
448 * Emit LLVM for one TGSI instruction.
449 * \param return TRUE for success, FALSE otherwise
450 */
451 boolean
452 lp_emit_instruction_aos(
453 struct lp_build_tgsi_aos_context *bld,
454 const struct tgsi_full_instruction *inst,
455 const struct tgsi_opcode_info *info,
456 int *pc)
457 {
458 LLVMValueRef src0, src1, src2;
459 LLVMValueRef tmp0, tmp1;
460 LLVMValueRef dst0 = NULL;
461
462 /*
463 * Stores and write masks are handled in a general fashion after the long
464 * instruction opcode switch statement.
465 *
466 * Although not stricitly necessary, we avoid generating instructions for
467 * channels which won't be stored, in cases where's that easy. For some
468 * complex instructions, like texture sampling, it is more convenient to
469 * assume a full writemask and then let LLVM optimization passes eliminate
470 * redundant code.
471 */
472
473 (*pc)++;
474
475 assert(info->num_dst <= 1);
476 if (info->num_dst) {
477 dst0 = bld->bld_base.base.undef;
478 }
479
480 switch (inst->Instruction.Opcode) {
481 case TGSI_OPCODE_ARL:
482 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
483 dst0 = lp_build_floor(&bld->bld_base.base, src0);
484 break;
485
486 case TGSI_OPCODE_MOV:
487 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
488 break;
489
490 case TGSI_OPCODE_LIT:
491 return FALSE;
492
493 case TGSI_OPCODE_RCP:
494 /* TGSI_OPCODE_RECIP */
495 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
496 dst0 = lp_build_rcp(&bld->bld_base.base, src0);
497 break;
498
499 case TGSI_OPCODE_RSQ:
500 /* TGSI_OPCODE_RECIPSQRT */
501 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
502 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
503 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
504 break;
505
506 case TGSI_OPCODE_EXP:
507 return FALSE;
508
509 case TGSI_OPCODE_LOG:
510 return FALSE;
511
512 case TGSI_OPCODE_MUL:
513 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
514 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
515 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
516 break;
517
518 case TGSI_OPCODE_ADD:
519 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
520 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
521 dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
522 break;
523
524 case TGSI_OPCODE_DP3:
525 /* TGSI_OPCODE_DOT3 */
526 return FALSE;
527
528 case TGSI_OPCODE_DP4:
529 /* TGSI_OPCODE_DOT4 */
530 return FALSE;
531
532 case TGSI_OPCODE_DST:
533 return FALSE;
534
535 case TGSI_OPCODE_MIN:
536 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
537 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
538 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
539 break;
540
541 case TGSI_OPCODE_MAX:
542 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
543 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
544 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
545 break;
546
547 case TGSI_OPCODE_SLT:
548 /* TGSI_OPCODE_SETLT */
549 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
550 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
551 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
552 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
553 break;
554
555 case TGSI_OPCODE_SGE:
556 /* TGSI_OPCODE_SETGE */
557 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
558 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
559 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
560 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
561 break;
562
563 case TGSI_OPCODE_MAD:
564 /* TGSI_OPCODE_MADD */
565 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
566 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
567 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
568 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
569 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
570 break;
571
572 case TGSI_OPCODE_SUB:
573 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
574 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
575 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
576 break;
577
578 case TGSI_OPCODE_LRP:
579 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
580 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
581 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
582 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
583 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
584 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
585 break;
586
587 case TGSI_OPCODE_CND:
588 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
589 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
590 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
591 tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
592 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
593 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
594 break;
595
596 case TGSI_OPCODE_DP2A:
597 return FALSE;
598
599 case TGSI_OPCODE_FRC:
600 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
601 tmp0 = lp_build_floor(&bld->bld_base.base, src0);
602 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
603 break;
604
605 case TGSI_OPCODE_CLAMP:
606 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
607 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
608 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
609 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
610 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
611 break;
612
613 case TGSI_OPCODE_FLR:
614 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
615 dst0 = lp_build_floor(&bld->bld_base.base, src0);
616 break;
617
618 case TGSI_OPCODE_ROUND:
619 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
620 dst0 = lp_build_round(&bld->bld_base.base, src0);
621 break;
622
623 case TGSI_OPCODE_EX2:
624 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
625 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
626 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
627 break;
628
629 case TGSI_OPCODE_LG2:
630 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
631 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
632 dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
633 break;
634
635 case TGSI_OPCODE_POW:
636 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
637 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
638 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
639 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
640 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
641 break;
642
643 case TGSI_OPCODE_XPD:
644 return FALSE;
645
646 case TGSI_OPCODE_RCC:
647 /* deprecated? */
648 assert(0);
649 return FALSE;
650
651 case TGSI_OPCODE_DPH:
652 return FALSE;
653
654 case TGSI_OPCODE_COS:
655 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
656 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
657 dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
658 break;
659
660 case TGSI_OPCODE_DDX:
661 return FALSE;
662
663 case TGSI_OPCODE_DDY:
664 return FALSE;
665
666 case TGSI_OPCODE_KILP:
667 /* predicated kill */
668 return FALSE;
669
670 case TGSI_OPCODE_KIL:
671 /* conditional kill */
672 return FALSE;
673
674 case TGSI_OPCODE_PK2H:
675 return FALSE;
676 break;
677
678 case TGSI_OPCODE_PK2US:
679 return FALSE;
680 break;
681
682 case TGSI_OPCODE_PK4B:
683 return FALSE;
684 break;
685
686 case TGSI_OPCODE_PK4UB:
687 return FALSE;
688
689 case TGSI_OPCODE_RFL:
690 return FALSE;
691
692 case TGSI_OPCODE_SEQ:
693 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
694 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
695 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
696 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
697 break;
698
699 case TGSI_OPCODE_SFL:
700 dst0 = bld->bld_base.base.zero;
701 break;
702
703 case TGSI_OPCODE_SGT:
704 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
705 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
706 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
707 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
708 break;
709
710 case TGSI_OPCODE_SIN:
711 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
712 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
713 dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
714 break;
715
716 case TGSI_OPCODE_SLE:
717 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
718 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
719 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
720 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
721 break;
722
723 case TGSI_OPCODE_SNE:
724 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
725 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
726 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
727 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
728 break;
729
730 case TGSI_OPCODE_STR:
731 dst0 = bld->bld_base.base.one;
732 break;
733
734 case TGSI_OPCODE_TEX:
735 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
736 break;
737
738 case TGSI_OPCODE_TXD:
739 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
740 break;
741
742 case TGSI_OPCODE_UP2H:
743 /* deprecated */
744 assert (0);
745 return FALSE;
746 break;
747
748 case TGSI_OPCODE_UP2US:
749 /* deprecated */
750 assert(0);
751 return FALSE;
752 break;
753
754 case TGSI_OPCODE_UP4B:
755 /* deprecated */
756 assert(0);
757 return FALSE;
758 break;
759
760 case TGSI_OPCODE_UP4UB:
761 /* deprecated */
762 assert(0);
763 return FALSE;
764 break;
765
766 case TGSI_OPCODE_X2D:
767 /* deprecated? */
768 assert(0);
769 return FALSE;
770 break;
771
772 case TGSI_OPCODE_ARA:
773 /* deprecated */
774 assert(0);
775 return FALSE;
776 break;
777
778 case TGSI_OPCODE_ARR:
779 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
780 dst0 = lp_build_round(&bld->bld_base.base, src0);
781 break;
782
783 case TGSI_OPCODE_BRA:
784 /* deprecated */
785 assert(0);
786 return FALSE;
787 break;
788
789 case TGSI_OPCODE_CAL:
790 return FALSE;
791
792 case TGSI_OPCODE_RET:
793 return FALSE;
794
795 case TGSI_OPCODE_END:
796 *pc = -1;
797 break;
798
799 case TGSI_OPCODE_SSG:
800 /* TGSI_OPCODE_SGN */
801 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
802 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
803 break;
804
805 case TGSI_OPCODE_CMP:
806 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
807 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
808 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
809 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
810 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
811 break;
812
813 case TGSI_OPCODE_SCS:
814 return FALSE;
815
816 case TGSI_OPCODE_TXB:
817 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
818 break;
819
820 case TGSI_OPCODE_NRM:
821 /* fall-through */
822 case TGSI_OPCODE_NRM4:
823 return FALSE;
824
825 case TGSI_OPCODE_DIV:
826 /* deprecated */
827 assert(0);
828 return FALSE;
829 break;
830
831 case TGSI_OPCODE_DP2:
832 return FALSE;
833
834 case TGSI_OPCODE_TXL:
835 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
836 break;
837
838 case TGSI_OPCODE_TXP:
839 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
840 break;
841
842 case TGSI_OPCODE_BRK:
843 return FALSE;
844
845 case TGSI_OPCODE_IF:
846 return FALSE;
847
848 case TGSI_OPCODE_BGNLOOP:
849 return FALSE;
850
851 case TGSI_OPCODE_BGNSUB:
852 return FALSE;
853
854 case TGSI_OPCODE_ELSE:
855 return FALSE;
856
857 case TGSI_OPCODE_ENDIF:
858 return FALSE;
859
860 case TGSI_OPCODE_ENDLOOP:
861 return FALSE;
862
863 case TGSI_OPCODE_ENDSUB:
864 return FALSE;
865
866 case TGSI_OPCODE_PUSHA:
867 /* deprecated? */
868 assert(0);
869 return FALSE;
870 break;
871
872 case TGSI_OPCODE_POPA:
873 /* deprecated? */
874 assert(0);
875 return FALSE;
876 break;
877
878 case TGSI_OPCODE_CEIL:
879 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
880 dst0 = lp_build_ceil(&bld->bld_base.base, src0);
881 break;
882
883 case TGSI_OPCODE_I2F:
884 /* deprecated? */
885 assert(0);
886 return FALSE;
887 break;
888
889 case TGSI_OPCODE_NOT:
890 /* deprecated? */
891 assert(0);
892 return FALSE;
893 break;
894
895 case TGSI_OPCODE_TRUNC:
896 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
897 dst0 = lp_build_trunc(&bld->bld_base.base, src0);
898 break;
899
900 case TGSI_OPCODE_SHL:
901 /* deprecated? */
902 assert(0);
903 return FALSE;
904 break;
905
906 case TGSI_OPCODE_ISHR:
907 /* deprecated? */
908 assert(0);
909 return FALSE;
910 break;
911
912 case TGSI_OPCODE_AND:
913 /* deprecated? */
914 assert(0);
915 return FALSE;
916 break;
917
918 case TGSI_OPCODE_OR:
919 /* deprecated? */
920 assert(0);
921 return FALSE;
922 break;
923
924 case TGSI_OPCODE_MOD:
925 /* deprecated? */
926 assert(0);
927 return FALSE;
928 break;
929
930 case TGSI_OPCODE_XOR:
931 /* deprecated? */
932 assert(0);
933 return FALSE;
934 break;
935
936 case TGSI_OPCODE_SAD:
937 /* deprecated? */
938 assert(0);
939 return FALSE;
940 break;
941
942 case TGSI_OPCODE_TXF:
943 /* deprecated? */
944 assert(0);
945 return FALSE;
946 break;
947
948 case TGSI_OPCODE_TXQ:
949 /* deprecated? */
950 assert(0);
951 return FALSE;
952 break;
953
954 case TGSI_OPCODE_CONT:
955 return FALSE;
956
957 case TGSI_OPCODE_EMIT:
958 return FALSE;
959 break;
960
961 case TGSI_OPCODE_ENDPRIM:
962 return FALSE;
963 break;
964
965 case TGSI_OPCODE_NOP:
966 break;
967
968 default:
969 return FALSE;
970 }
971
972 if (info->num_dst) {
973 lp_emit_store_aos(bld, inst, 0, dst0);
974 }
975
976 return TRUE;
977 }
978
979
980 void
981 lp_build_tgsi_aos(struct gallivm_state *gallivm,
982 const struct tgsi_token *tokens,
983 struct lp_type type,
984 const unsigned char swizzles[4],
985 LLVMValueRef consts_ptr,
986 const LLVMValueRef *inputs,
987 LLVMValueRef *outputs,
988 struct lp_build_sampler_aos *sampler,
989 const struct tgsi_shader_info *info)
990 {
991 struct lp_build_tgsi_aos_context bld;
992 struct tgsi_parse_context parse;
993 uint num_immediates = 0;
994 unsigned chan;
995 int pc = 0;
996
997 /* Setup build context */
998 memset(&bld, 0, sizeof bld);
999 lp_build_context_init(&bld.bld_base.base, gallivm, type);
1000 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1001 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1002 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1003
1004 for (chan = 0; chan < 4; ++chan) {
1005 bld.swizzles[chan] = swizzles[chan];
1006 bld.inv_swizzles[swizzles[chan]] = chan;
1007 }
1008
1009 bld.inputs = inputs;
1010 bld.outputs = outputs;
1011 bld.consts_ptr = consts_ptr;
1012 bld.sampler = sampler;
1013 bld.indirect_files = info->indirect_files;
1014 bld.bld_base.emit_swizzle = swizzle_aos;
1015 bld.bld_base.info = info;
1016
1017 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1018 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1019 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1020 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1021
1022 /* Set opcode actions */
1023 lp_set_default_actions_cpu(&bld.bld_base);
1024
1025 if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1026 return;
1027 }
1028
1029 tgsi_parse_init(&parse, tokens);
1030
1031 while (!tgsi_parse_end_of_tokens(&parse)) {
1032 tgsi_parse_token(&parse);
1033
1034 switch(parse.FullToken.Token.Type) {
1035 case TGSI_TOKEN_TYPE_DECLARATION:
1036 /* Inputs already interpolated */
1037 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1038 break;
1039
1040 case TGSI_TOKEN_TYPE_INSTRUCTION:
1041 /* save expanded instruction */
1042 lp_bld_tgsi_add_instruction(&bld.bld_base,
1043 &parse.FullToken.FullInstruction);
1044 break;
1045
1046 case TGSI_TOKEN_TYPE_IMMEDIATE:
1047 /* simply copy the immediate values into the next immediates[] slot */
1048 {
1049 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1050 float imm[4];
1051 assert(size <= 4);
1052 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1053 for (chan = 0; chan < 4; ++chan) {
1054 imm[chan] = 0.0f;
1055 }
1056 for (chan = 0; chan < size; ++chan) {
1057 unsigned swizzle = bld.swizzles[chan];
1058 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1059 }
1060 bld.immediates[num_immediates] =
1061 lp_build_const_aos(gallivm, type,
1062 imm[0], imm[1], imm[2], imm[3],
1063 NULL);
1064 num_immediates++;
1065 }
1066 break;
1067
1068 case TGSI_TOKEN_TYPE_PROPERTY:
1069 break;
1070
1071 default:
1072 assert(0);
1073 }
1074 }
1075
1076 while (pc != -1) {
1077 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1078 const struct tgsi_opcode_info *opcode_info =
1079 tgsi_get_opcode_info(instr->Instruction.Opcode);
1080 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1081 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1082 opcode_info->mnemonic);
1083 }
1084
1085 if (0) {
1086 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1087 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1088 debug_printf("11111111111111111111111111111 \n");
1089 tgsi_dump(tokens, 0);
1090 lp_debug_dump_value(function);
1091 debug_printf("2222222222222222222222222222 \n");
1092 }
1093 tgsi_parse_free(&parse);
1094 FREE(bld.bld_base.instructions);
1095
1096 if (0) {
1097 LLVMModuleRef module = LLVMGetGlobalParent(
1098 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1099 LLVMDumpModule(module);
1100 }
1101
1102 }
1103