gallivm: handle SAMPLE opcode in aos sampling
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40 #include "pipe/p_config.h"
41 #include "pipe/p_shader_tokens.h"
42 #include "util/u_debug.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "lp_bld_type.h"
51 #include "lp_bld_const.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_quad.h"
57 #include "lp_bld_tgsi.h"
58 #include "lp_bld_debug.h"
59 #include "lp_bld_sample.h"
60
61
62 /**
63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64 * ordering.
65 */
66 static LLVMValueRef
67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
68 LLVMValueRef a,
69 unsigned swizzle_x,
70 unsigned swizzle_y,
71 unsigned swizzle_z,
72 unsigned swizzle_w)
73 {
74 unsigned char swizzles[4];
75 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
77 assert(swizzle_x < 4);
78 assert(swizzle_y < 4);
79 assert(swizzle_z < 4);
80 assert(swizzle_w < 4);
81
82 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
87 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88 }
89
90
91 static LLVMValueRef
92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93 LLVMValueRef a,
94 unsigned chan)
95 {
96 chan = bld->swizzles[chan];
97 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98 }
99
100
101 static LLVMValueRef
102 emit_fetch_constant(
103 struct lp_build_tgsi_context * bld_base,
104 const struct tgsi_full_src_register * reg,
105 enum tgsi_opcode_type stype,
106 unsigned swizzle)
107 {
108 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110 struct lp_type type = bld_base->base.type;
111 LLVMValueRef res;
112 unsigned chan;
113
114 assert(!reg->Register.Indirect);
115
116 /*
117 * Get the constants components
118 */
119
120 res = bld->bld_base.base.undef;
121 for (chan = 0; chan < 4; ++chan) {
122 LLVMValueRef index;
123 LLVMValueRef scalar_ptr;
124 LLVMValueRef scalar;
125 LLVMValueRef swizzle;
126
127 index = lp_build_const_int32(bld->bld_base.base.gallivm,
128 reg->Register.Index * 4 + chan);
129
130 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
132 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
134 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
136 /*
137 * NOTE: constants array is always assumed to be RGBA
138 */
139
140 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141 bld->swizzles[chan]);
142
143 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144 }
145
146 /*
147 * Broadcast the first quaternion to all others.
148 *
149 * XXX: could be factored into a reusable function.
150 */
151
152 if (type.length > 4) {
153 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154 unsigned i;
155
156 for (chan = 0; chan < 4; ++chan) {
157 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158 }
159
160 for (i = 4; i < type.length; ++i) {
161 shuffles[i] = shuffles[i % 4];
162 }
163
164 res = LLVMBuildShuffleVector(builder,
165 res, bld->bld_base.base.undef,
166 LLVMConstVector(shuffles, type.length),
167 "");
168 }
169 return res;
170 }
171
172 static LLVMValueRef
173 emit_fetch_immediate(
174 struct lp_build_tgsi_context * bld_base,
175 const struct tgsi_full_src_register * reg,
176 enum tgsi_opcode_type stype,
177 unsigned swizzle)
178 {
179 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180 LLVMValueRef res = bld->immediates[reg->Register.Index];
181 assert(res);
182 return res;
183 }
184
185 static LLVMValueRef
186 emit_fetch_input(
187 struct lp_build_tgsi_context * bld_base,
188 const struct tgsi_full_src_register * reg,
189 enum tgsi_opcode_type stype,
190 unsigned swizzle)
191 {
192 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193 LLVMValueRef res = bld->inputs[reg->Register.Index];
194 assert(!reg->Register.Indirect);
195 assert(res);
196 return res;
197 }
198
199 static LLVMValueRef
200 emit_fetch_temporary(
201 struct lp_build_tgsi_context * bld_base,
202 const struct tgsi_full_src_register * reg,
203 enum tgsi_opcode_type stype,
204 unsigned swizzle)
205 {
206 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210 assert(!reg->Register.Indirect);
211 if (!res)
212 return bld->bld_base.base.undef;
213
214 return res;
215 }
216
217 /**
218 * Register store.
219 */
220 void
221 lp_emit_store_aos(
222 struct lp_build_tgsi_aos_context *bld,
223 const struct tgsi_full_instruction *inst,
224 unsigned index,
225 LLVMValueRef value)
226 {
227 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229 LLVMValueRef mask = NULL;
230 LLVMValueRef ptr;
231
232 /*
233 * Saturate the value
234 */
235
236 switch (inst->Instruction.Saturate) {
237 case TGSI_SAT_NONE:
238 break;
239
240 case TGSI_SAT_ZERO_ONE:
241 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
242 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
243 break;
244
245 case TGSI_SAT_MINUS_PLUS_ONE:
246 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
247 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
248 break;
249
250 default:
251 assert(0);
252 }
253
254 /*
255 * Translate the register file
256 */
257
258 assert(!reg->Register.Indirect);
259
260 switch (reg->Register.File) {
261 case TGSI_FILE_OUTPUT:
262 ptr = bld->outputs[reg->Register.Index];
263 break;
264
265 case TGSI_FILE_TEMPORARY:
266 ptr = bld->temps[reg->Register.Index];
267 break;
268
269 case TGSI_FILE_ADDRESS:
270 ptr = bld->addr[reg->Indirect.Index];
271 break;
272
273 case TGSI_FILE_PREDICATE:
274 ptr = bld->preds[reg->Register.Index];
275 break;
276
277 default:
278 assert(0);
279 return;
280 }
281
282 if (!ptr)
283 return;
284 /*
285 * Predicate
286 */
287
288 if (inst->Instruction.Predicate) {
289 LLVMValueRef pred;
290
291 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
292
293 pred = LLVMBuildLoad(builder,
294 bld->preds[inst->Predicate.Index], "");
295
296 /*
297 * Convert the value to an integer mask.
298 */
299 pred = lp_build_compare(bld->bld_base.base.gallivm,
300 bld->bld_base.base.type,
301 PIPE_FUNC_NOTEQUAL,
302 pred,
303 bld->bld_base.base.zero);
304
305 if (inst->Predicate.Negate) {
306 pred = LLVMBuildNot(builder, pred, "");
307 }
308
309 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
310 inst->Predicate.SwizzleX,
311 inst->Predicate.SwizzleY,
312 inst->Predicate.SwizzleZ,
313 inst->Predicate.SwizzleW);
314
315 if (mask) {
316 mask = LLVMBuildAnd(builder, mask, pred, "");
317 } else {
318 mask = pred;
319 }
320 }
321
322 /*
323 * Writemask
324 */
325
326 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
327 LLVMValueRef writemask;
328
329 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
330 bld->bld_base.base.type,
331 reg->Register.WriteMask,
332 TGSI_NUM_CHANNELS,
333 bld->swizzles);
334
335 if (mask) {
336 mask = LLVMBuildAnd(builder, mask, writemask, "");
337 } else {
338 mask = writemask;
339 }
340 }
341
342 if (mask) {
343 LLVMValueRef orig_value;
344
345 orig_value = LLVMBuildLoad(builder, ptr, "");
346 value = lp_build_select(&bld->bld_base.base,
347 mask, value, orig_value);
348 }
349
350 LLVMBuildStore(builder, value, ptr);
351 }
352
353
354 /**
355 * High-level instruction translators.
356 */
357
358 static LLVMValueRef
359 emit_tex(struct lp_build_tgsi_aos_context *bld,
360 const struct tgsi_full_instruction *inst,
361 enum lp_build_tex_modifier modifier)
362 {
363 unsigned target;
364 unsigned unit;
365 LLVMValueRef coords;
366 struct lp_derivatives derivs = { {NULL}, {NULL} };
367
368 if (!bld->sampler) {
369 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
370 return bld->bld_base.base.undef;
371 }
372
373 target = inst->Texture.Texture;
374
375 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
376
377 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
378 /* probably not going to work */
379 derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
380 derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
381 unit = inst->Src[3].Register.Index;
382 }
383 else {
384 unit = inst->Src[1].Register.Index;
385 }
386 return bld->sampler->emit_fetch_texel(bld->sampler,
387 &bld->bld_base.base,
388 target, unit,
389 coords, derivs,
390 modifier);
391 }
392
393
394 static LLVMValueRef
395 emit_sample(struct lp_build_tgsi_aos_context *bld,
396 const struct tgsi_full_instruction *inst,
397 enum lp_build_tex_modifier modifier)
398 {
399 unsigned target;
400 unsigned unit;
401 LLVMValueRef coords;
402 struct lp_derivatives derivs = { {NULL}, {NULL} };
403
404 if (!bld->sampler) {
405 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
406 return bld->bld_base.base.undef;
407 }
408
409 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
410
411 /* ignore modifiers, can't handle different sampler / sampler view, etc... */
412 unit = inst->Src[1].Register.Index;
413 assert(inst->Src[2].Register.Index == unit);
414
415 target = bld->sv[unit].Resource;
416
417 return bld->sampler->emit_fetch_texel(bld->sampler,
418 &bld->bld_base.base,
419 target, unit,
420 coords, derivs,
421 modifier);
422 }
423
424
425 void
426 lp_emit_declaration_aos(
427 struct lp_build_tgsi_aos_context *bld,
428 const struct tgsi_full_declaration *decl)
429 {
430 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
431 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
432
433 unsigned first = decl->Range.First;
434 unsigned last = decl->Range.Last;
435 unsigned idx;
436
437 for (idx = first; idx <= last; ++idx) {
438 switch (decl->Declaration.File) {
439 case TGSI_FILE_TEMPORARY:
440 assert(idx < LP_MAX_INLINED_TEMPS);
441 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
442 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
443 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
444 vec_type, array_size, "");
445 } else {
446 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
447 }
448 break;
449
450 case TGSI_FILE_OUTPUT:
451 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
452 break;
453
454 case TGSI_FILE_ADDRESS:
455 assert(idx < LP_MAX_TGSI_ADDRS);
456 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
457 break;
458
459 case TGSI_FILE_PREDICATE:
460 assert(idx < LP_MAX_TGSI_PREDS);
461 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
462 break;
463
464 case TGSI_FILE_SAMPLER_VIEW:
465 /*
466 * The target stored here MUST match whatever there actually
467 * is in the set sampler views (what about return type?).
468 */
469 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
470 for (idx = first; idx <= last; ++idx) {
471 bld->sv[idx] = decl->SamplerView;
472 }
473 break;
474
475 default:
476 /* don't need to declare other vars */
477 break;
478 }
479 }
480 }
481
482
483 /**
484 * Emit LLVM for one TGSI instruction.
485 * \param return TRUE for success, FALSE otherwise
486 */
487 boolean
488 lp_emit_instruction_aos(
489 struct lp_build_tgsi_aos_context *bld,
490 const struct tgsi_full_instruction *inst,
491 const struct tgsi_opcode_info *info,
492 int *pc)
493 {
494 LLVMValueRef src0, src1, src2;
495 LLVMValueRef tmp0, tmp1;
496 LLVMValueRef dst0 = NULL;
497
498 /*
499 * Stores and write masks are handled in a general fashion after the long
500 * instruction opcode switch statement.
501 *
502 * Although not stricitly necessary, we avoid generating instructions for
503 * channels which won't be stored, in cases where's that easy. For some
504 * complex instructions, like texture sampling, it is more convenient to
505 * assume a full writemask and then let LLVM optimization passes eliminate
506 * redundant code.
507 */
508
509 (*pc)++;
510
511 assert(info->num_dst <= 1);
512 if (info->num_dst) {
513 dst0 = bld->bld_base.base.undef;
514 }
515
516 switch (inst->Instruction.Opcode) {
517 case TGSI_OPCODE_ARL:
518 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
519 dst0 = lp_build_floor(&bld->bld_base.base, src0);
520 break;
521
522 case TGSI_OPCODE_MOV:
523 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
524 break;
525
526 case TGSI_OPCODE_LIT:
527 return FALSE;
528
529 case TGSI_OPCODE_RCP:
530 /* TGSI_OPCODE_RECIP */
531 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
532 dst0 = lp_build_rcp(&bld->bld_base.base, src0);
533 break;
534
535 case TGSI_OPCODE_RSQ:
536 /* TGSI_OPCODE_RECIPSQRT */
537 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
538 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
539 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
540 break;
541
542 case TGSI_OPCODE_EXP:
543 return FALSE;
544
545 case TGSI_OPCODE_LOG:
546 return FALSE;
547
548 case TGSI_OPCODE_MUL:
549 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
550 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
551 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
552 break;
553
554 case TGSI_OPCODE_ADD:
555 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
556 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
557 dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
558 break;
559
560 case TGSI_OPCODE_DP3:
561 /* TGSI_OPCODE_DOT3 */
562 return FALSE;
563
564 case TGSI_OPCODE_DP4:
565 /* TGSI_OPCODE_DOT4 */
566 return FALSE;
567
568 case TGSI_OPCODE_DST:
569 return FALSE;
570
571 case TGSI_OPCODE_MIN:
572 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
573 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
574 dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
575 break;
576
577 case TGSI_OPCODE_MAX:
578 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
579 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
580 dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
581 break;
582
583 case TGSI_OPCODE_SLT:
584 /* TGSI_OPCODE_SETLT */
585 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
586 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
587 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
588 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
589 break;
590
591 case TGSI_OPCODE_SGE:
592 /* TGSI_OPCODE_SETGE */
593 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
594 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
595 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
596 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
597 break;
598
599 case TGSI_OPCODE_MAD:
600 /* TGSI_OPCODE_MADD */
601 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
602 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
603 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
604 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
605 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
606 break;
607
608 case TGSI_OPCODE_SUB:
609 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
610 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
611 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
612 break;
613
614 case TGSI_OPCODE_LRP:
615 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
616 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
617 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
618 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
619 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
620 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
621 break;
622
623 case TGSI_OPCODE_CND:
624 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
625 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
626 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
627 tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
628 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
629 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
630 break;
631
632 case TGSI_OPCODE_DP2A:
633 return FALSE;
634
635 case TGSI_OPCODE_FRC:
636 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
637 tmp0 = lp_build_floor(&bld->bld_base.base, src0);
638 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
639 break;
640
641 case TGSI_OPCODE_CLAMP:
642 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
643 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
644 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
645 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
646 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
647 break;
648
649 case TGSI_OPCODE_FLR:
650 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
651 dst0 = lp_build_floor(&bld->bld_base.base, src0);
652 break;
653
654 case TGSI_OPCODE_ROUND:
655 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
656 dst0 = lp_build_round(&bld->bld_base.base, src0);
657 break;
658
659 case TGSI_OPCODE_EX2:
660 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
661 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
662 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
663 break;
664
665 case TGSI_OPCODE_LG2:
666 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
667 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
668 dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
669 break;
670
671 case TGSI_OPCODE_POW:
672 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
673 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
674 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
675 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
676 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
677 break;
678
679 case TGSI_OPCODE_XPD:
680 return FALSE;
681
682 case TGSI_OPCODE_RCC:
683 /* deprecated? */
684 assert(0);
685 return FALSE;
686
687 case TGSI_OPCODE_DPH:
688 return FALSE;
689
690 case TGSI_OPCODE_COS:
691 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
692 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
693 dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
694 break;
695
696 case TGSI_OPCODE_DDX:
697 return FALSE;
698
699 case TGSI_OPCODE_DDY:
700 return FALSE;
701
702 case TGSI_OPCODE_KILL:
703 return FALSE;
704
705 case TGSI_OPCODE_KILL_IF:
706 return FALSE;
707
708 case TGSI_OPCODE_PK2H:
709 return FALSE;
710 break;
711
712 case TGSI_OPCODE_PK2US:
713 return FALSE;
714 break;
715
716 case TGSI_OPCODE_PK4B:
717 return FALSE;
718 break;
719
720 case TGSI_OPCODE_PK4UB:
721 return FALSE;
722
723 case TGSI_OPCODE_RFL:
724 return FALSE;
725
726 case TGSI_OPCODE_SEQ:
727 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
728 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
729 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
730 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
731 break;
732
733 case TGSI_OPCODE_SFL:
734 dst0 = bld->bld_base.base.zero;
735 break;
736
737 case TGSI_OPCODE_SGT:
738 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
739 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
740 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
741 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
742 break;
743
744 case TGSI_OPCODE_SIN:
745 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
746 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
747 dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
748 break;
749
750 case TGSI_OPCODE_SLE:
751 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
752 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
753 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
754 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
755 break;
756
757 case TGSI_OPCODE_SNE:
758 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
759 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
760 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
761 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
762 break;
763
764 case TGSI_OPCODE_STR:
765 dst0 = bld->bld_base.base.one;
766 break;
767
768 case TGSI_OPCODE_TEX:
769 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
770 break;
771
772 case TGSI_OPCODE_TXD:
773 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
774 break;
775
776 case TGSI_OPCODE_UP2H:
777 /* deprecated */
778 assert (0);
779 return FALSE;
780 break;
781
782 case TGSI_OPCODE_UP2US:
783 /* deprecated */
784 assert(0);
785 return FALSE;
786 break;
787
788 case TGSI_OPCODE_UP4B:
789 /* deprecated */
790 assert(0);
791 return FALSE;
792 break;
793
794 case TGSI_OPCODE_UP4UB:
795 /* deprecated */
796 assert(0);
797 return FALSE;
798 break;
799
800 case TGSI_OPCODE_X2D:
801 /* deprecated? */
802 assert(0);
803 return FALSE;
804 break;
805
806 case TGSI_OPCODE_ARA:
807 /* deprecated */
808 assert(0);
809 return FALSE;
810 break;
811
812 case TGSI_OPCODE_ARR:
813 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
814 dst0 = lp_build_round(&bld->bld_base.base, src0);
815 break;
816
817 case TGSI_OPCODE_BRA:
818 /* deprecated */
819 assert(0);
820 return FALSE;
821 break;
822
823 case TGSI_OPCODE_CAL:
824 return FALSE;
825
826 case TGSI_OPCODE_RET:
827 /* safe to ignore at end */
828 break;
829
830 case TGSI_OPCODE_END:
831 *pc = -1;
832 break;
833
834 case TGSI_OPCODE_SSG:
835 /* TGSI_OPCODE_SGN */
836 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
837 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
838 break;
839
840 case TGSI_OPCODE_CMP:
841 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
842 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
843 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
844 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
845 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
846 break;
847
848 case TGSI_OPCODE_SCS:
849 return FALSE;
850
851 case TGSI_OPCODE_TXB:
852 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
853 break;
854
855 case TGSI_OPCODE_NRM:
856 /* fall-through */
857 case TGSI_OPCODE_NRM4:
858 return FALSE;
859
860 case TGSI_OPCODE_DIV:
861 assert(0);
862 return FALSE;
863 break;
864
865 case TGSI_OPCODE_DP2:
866 return FALSE;
867
868 case TGSI_OPCODE_TXL:
869 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
870 break;
871
872 case TGSI_OPCODE_TXP:
873 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
874 break;
875
876 case TGSI_OPCODE_BRK:
877 return FALSE;
878
879 case TGSI_OPCODE_IF:
880 case TGSI_OPCODE_UIF:
881 return FALSE;
882
883 case TGSI_OPCODE_BGNLOOP:
884 return FALSE;
885
886 case TGSI_OPCODE_BGNSUB:
887 return FALSE;
888
889 case TGSI_OPCODE_ELSE:
890 return FALSE;
891
892 case TGSI_OPCODE_ENDIF:
893 return FALSE;
894
895 case TGSI_OPCODE_ENDLOOP:
896 return FALSE;
897
898 case TGSI_OPCODE_ENDSUB:
899 return FALSE;
900
901 case TGSI_OPCODE_PUSHA:
902 /* deprecated? */
903 assert(0);
904 return FALSE;
905 break;
906
907 case TGSI_OPCODE_POPA:
908 /* deprecated? */
909 assert(0);
910 return FALSE;
911 break;
912
913 case TGSI_OPCODE_CEIL:
914 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
915 dst0 = lp_build_ceil(&bld->bld_base.base, src0);
916 break;
917
918 case TGSI_OPCODE_I2F:
919 assert(0);
920 return FALSE;
921 break;
922
923 case TGSI_OPCODE_NOT:
924 assert(0);
925 return FALSE;
926 break;
927
928 case TGSI_OPCODE_TRUNC:
929 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
930 dst0 = lp_build_trunc(&bld->bld_base.base, src0);
931 break;
932
933 case TGSI_OPCODE_SHL:
934 assert(0);
935 return FALSE;
936 break;
937
938 case TGSI_OPCODE_ISHR:
939 assert(0);
940 return FALSE;
941 break;
942
943 case TGSI_OPCODE_AND:
944 assert(0);
945 return FALSE;
946 break;
947
948 case TGSI_OPCODE_OR:
949 assert(0);
950 return FALSE;
951 break;
952
953 case TGSI_OPCODE_MOD:
954 assert(0);
955 return FALSE;
956 break;
957
958 case TGSI_OPCODE_XOR:
959 assert(0);
960 return FALSE;
961 break;
962
963 case TGSI_OPCODE_SAD:
964 assert(0);
965 return FALSE;
966 break;
967
968 case TGSI_OPCODE_TXF:
969 assert(0);
970 return FALSE;
971 break;
972
973 case TGSI_OPCODE_TXQ:
974 assert(0);
975 return FALSE;
976 break;
977
978 case TGSI_OPCODE_CONT:
979 return FALSE;
980
981 case TGSI_OPCODE_EMIT:
982 return FALSE;
983 break;
984
985 case TGSI_OPCODE_ENDPRIM:
986 return FALSE;
987 break;
988
989 case TGSI_OPCODE_NOP:
990 break;
991
992 case TGSI_OPCODE_SAMPLE:
993 dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
994 break;
995
996 default:
997 return FALSE;
998 }
999
1000 if (info->num_dst) {
1001 lp_emit_store_aos(bld, inst, 0, dst0);
1002 }
1003
1004 return TRUE;
1005 }
1006
1007
1008 void
1009 lp_build_tgsi_aos(struct gallivm_state *gallivm,
1010 const struct tgsi_token *tokens,
1011 struct lp_type type,
1012 const unsigned char swizzles[4],
1013 LLVMValueRef consts_ptr,
1014 const LLVMValueRef *inputs,
1015 LLVMValueRef *outputs,
1016 struct lp_build_sampler_aos *sampler,
1017 const struct tgsi_shader_info *info)
1018 {
1019 struct lp_build_tgsi_aos_context bld;
1020 struct tgsi_parse_context parse;
1021 uint num_immediates = 0;
1022 unsigned chan;
1023 int pc = 0;
1024
1025 /* Setup build context */
1026 memset(&bld, 0, sizeof bld);
1027 lp_build_context_init(&bld.bld_base.base, gallivm, type);
1028 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1029 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1030 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1031
1032 for (chan = 0; chan < 4; ++chan) {
1033 bld.swizzles[chan] = swizzles[chan];
1034 bld.inv_swizzles[swizzles[chan]] = chan;
1035 }
1036
1037 bld.inputs = inputs;
1038 bld.outputs = outputs;
1039 bld.consts_ptr = consts_ptr;
1040 bld.sampler = sampler;
1041 bld.indirect_files = info->indirect_files;
1042 bld.bld_base.emit_swizzle = swizzle_aos;
1043 bld.bld_base.info = info;
1044
1045 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1046 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1047 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1048 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1049
1050 /* Set opcode actions */
1051 lp_set_default_actions_cpu(&bld.bld_base);
1052
1053 if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1054 return;
1055 }
1056
1057 tgsi_parse_init(&parse, tokens);
1058
1059 while (!tgsi_parse_end_of_tokens(&parse)) {
1060 tgsi_parse_token(&parse);
1061
1062 switch(parse.FullToken.Token.Type) {
1063 case TGSI_TOKEN_TYPE_DECLARATION:
1064 /* Inputs already interpolated */
1065 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1066 break;
1067
1068 case TGSI_TOKEN_TYPE_INSTRUCTION:
1069 /* save expanded instruction */
1070 lp_bld_tgsi_add_instruction(&bld.bld_base,
1071 &parse.FullToken.FullInstruction);
1072 break;
1073
1074 case TGSI_TOKEN_TYPE_IMMEDIATE:
1075 /* simply copy the immediate values into the next immediates[] slot */
1076 {
1077 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1078 float imm[4];
1079 assert(size <= 4);
1080 assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
1081 for (chan = 0; chan < 4; ++chan) {
1082 imm[chan] = 0.0f;
1083 }
1084 for (chan = 0; chan < size; ++chan) {
1085 unsigned swizzle = bld.swizzles[chan];
1086 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1087 }
1088 bld.immediates[num_immediates] =
1089 lp_build_const_aos(gallivm, type,
1090 imm[0], imm[1], imm[2], imm[3],
1091 NULL);
1092 num_immediates++;
1093 }
1094 break;
1095
1096 case TGSI_TOKEN_TYPE_PROPERTY:
1097 break;
1098
1099 default:
1100 assert(0);
1101 }
1102 }
1103
1104 while (pc != -1) {
1105 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1106 const struct tgsi_opcode_info *opcode_info =
1107 tgsi_get_opcode_info(instr->Instruction.Opcode);
1108 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1109 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1110 opcode_info->mnemonic);
1111 }
1112
1113 if (0) {
1114 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1115 LLVMValueRef function = LLVMGetBasicBlockParent(block);
1116 debug_printf("11111111111111111111111111111 \n");
1117 tgsi_dump(tokens, 0);
1118 lp_debug_dump_value(function);
1119 debug_printf("2222222222222222222222222222 \n");
1120 }
1121 tgsi_parse_free(&parse);
1122 FREE(bld.bld_base.instructions);
1123
1124 if (0) {
1125 LLVMModuleRef module = LLVMGetGlobalParent(
1126 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1127 LLVMDumpModule(module);
1128 }
1129
1130 }
1131