Merge branch 'mesa_7_6_branch'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81
82 struct lp_build_tgsi_soa_context
83 {
84 struct lp_build_context base;
85
86 LLVMValueRef consts_ptr;
87 const LLVMValueRef *pos;
88 const LLVMValueRef (*inputs)[NUM_CHANNELS];
89 LLVMValueRef (*outputs)[NUM_CHANNELS];
90
91 struct lp_build_sampler_soa *sampler;
92
93 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
94 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
95
96 struct lp_build_mask_context *mask;
97 };
98
99
100 /**
101 * Register fetch.
102 */
103 static LLVMValueRef
104 emit_fetch(
105 struct lp_build_tgsi_soa_context *bld,
106 const struct tgsi_full_instruction *inst,
107 unsigned index,
108 const unsigned chan_index )
109 {
110 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
111 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
112 LLVMValueRef res;
113
114 switch (swizzle) {
115 case TGSI_EXTSWIZZLE_X:
116 case TGSI_EXTSWIZZLE_Y:
117 case TGSI_EXTSWIZZLE_Z:
118 case TGSI_EXTSWIZZLE_W:
119
120 switch (reg->SrcRegister.File) {
121 case TGSI_FILE_CONSTANT: {
122 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
123 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
124 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
125 res = lp_build_broadcast_scalar(&bld->base, scalar);
126 break;
127 }
128
129 case TGSI_FILE_IMMEDIATE:
130 res = bld->immediates[reg->SrcRegister.Index][swizzle];
131 assert(res);
132 break;
133
134 case TGSI_FILE_INPUT:
135 res = bld->inputs[reg->SrcRegister.Index][swizzle];
136 assert(res);
137 break;
138
139 case TGSI_FILE_TEMPORARY:
140 res = bld->temps[reg->SrcRegister.Index][swizzle];
141 if(!res)
142 return bld->base.undef;
143 break;
144
145 default:
146 assert( 0 );
147 return bld->base.undef;
148 }
149 break;
150
151 case TGSI_EXTSWIZZLE_ZERO:
152 res = bld->base.zero;
153 break;
154
155 case TGSI_EXTSWIZZLE_ONE:
156 res = bld->base.one;
157 break;
158
159 default:
160 assert( 0 );
161 return bld->base.undef;
162 }
163
164 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
165 case TGSI_UTIL_SIGN_CLEAR:
166 res = lp_build_abs( &bld->base, res );
167 break;
168
169 case TGSI_UTIL_SIGN_SET:
170 res = lp_build_abs( &bld->base, res );
171 res = LLVMBuildNeg( bld->base.builder, res, "" );
172 break;
173
174 case TGSI_UTIL_SIGN_TOGGLE:
175 res = LLVMBuildNeg( bld->base.builder, res, "" );
176 break;
177
178 case TGSI_UTIL_SIGN_KEEP:
179 break;
180 }
181
182 return res;
183 }
184
185
186 /**
187 * Register store.
188 */
189 static void
190 emit_store(
191 struct lp_build_tgsi_soa_context *bld,
192 const struct tgsi_full_instruction *inst,
193 unsigned index,
194 unsigned chan_index,
195 LLVMValueRef value)
196 {
197 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
198
199 switch( inst->Instruction.Saturate ) {
200 case TGSI_SAT_NONE:
201 break;
202
203 case TGSI_SAT_ZERO_ONE:
204 value = lp_build_max(&bld->base, value, bld->base.zero);
205 value = lp_build_min(&bld->base, value, bld->base.one);
206 break;
207
208 case TGSI_SAT_MINUS_PLUS_ONE:
209 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
210 value = lp_build_min(&bld->base, value, bld->base.one);
211 break;
212
213 default:
214 assert(0);
215 }
216
217 switch( reg->DstRegister.File ) {
218 case TGSI_FILE_OUTPUT:
219 bld->outputs[reg->DstRegister.Index][chan_index] = value;
220 break;
221
222 case TGSI_FILE_TEMPORARY:
223 bld->temps[reg->DstRegister.Index][chan_index] = value;
224 break;
225
226 case TGSI_FILE_ADDRESS:
227 /* FIXME */
228 assert(0);
229 break;
230
231 default:
232 assert( 0 );
233 }
234 }
235
236
237 /**
238 * High-level instruction translators.
239 */
240
241 static void
242 emit_tex( struct lp_build_tgsi_soa_context *bld,
243 const struct tgsi_full_instruction *inst,
244 boolean apply_lodbias,
245 boolean projected)
246 {
247 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
248 LLVMValueRef lodbias;
249 LLVMValueRef oow;
250 LLVMValueRef coords[3];
251 LLVMValueRef texel[4];
252 unsigned num_coords;
253 unsigned i;
254
255 switch (inst->InstructionExtTexture.Texture) {
256 case TGSI_TEXTURE_1D:
257 num_coords = 1;
258 break;
259 case TGSI_TEXTURE_2D:
260 case TGSI_TEXTURE_RECT:
261 num_coords = 2;
262 break;
263 case TGSI_TEXTURE_SHADOW1D:
264 case TGSI_TEXTURE_SHADOW2D:
265 case TGSI_TEXTURE_SHADOWRECT:
266 case TGSI_TEXTURE_3D:
267 case TGSI_TEXTURE_CUBE:
268 num_coords = 3;
269 break;
270 default:
271 assert(0);
272 return;
273 }
274
275 if(apply_lodbias)
276 lodbias = emit_fetch( bld, inst, 0, 3 );
277 else
278 lodbias = bld->base.zero;
279
280 if (projected) {
281 oow = emit_fetch( bld, inst, 0, 3 );
282 oow = lp_build_rcp(&bld->base, oow);
283 }
284
285 for (i = 0; i < num_coords; i++) {
286 coords[i] = emit_fetch( bld, inst, 0, i );
287 if (projected)
288 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
289 }
290
291 bld->sampler->emit_fetch_texel(bld->sampler,
292 bld->base.builder,
293 bld->base.type,
294 unit, num_coords, coords, lodbias,
295 texel);
296
297 FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
298 emit_store( bld, inst, 0, i, texel[i] );
299 }
300 }
301
302
303 static void
304 emit_kil(
305 struct lp_build_tgsi_soa_context *bld,
306 const struct tgsi_full_instruction *inst )
307 {
308 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
309 LLVMValueRef terms[NUM_CHANNELS];
310 LLVMValueRef mask;
311 unsigned chan_index;
312
313 memset(&terms, 0, sizeof terms);
314
315 FOR_EACH_CHANNEL( chan_index ) {
316 unsigned swizzle;
317
318 /* Unswizzle channel */
319 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
320
321 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
322 * not to be tested. */
323 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
324 continue;
325
326 /* Check if the component has not been already tested. */
327 assert(swizzle < NUM_CHANNELS);
328 if( !terms[swizzle] )
329 /* TODO: change the comparison operator instead of setting the sign */
330 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
331 }
332
333 mask = NULL;
334 FOR_EACH_CHANNEL( chan_index ) {
335 if(terms[chan_index]) {
336 LLVMValueRef chan_mask;
337
338 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
339
340 if(mask)
341 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
342 else
343 mask = chan_mask;
344 }
345 }
346
347 if(mask)
348 lp_build_mask_update(bld->mask, mask);
349 }
350
351
352 static void
353 emit_kilp(
354 struct lp_build_tgsi_soa_context *bld )
355 {
356 /* XXX todo / fix me */
357 }
358
359
360 /**
361 * Check if inst src/dest regs use indirect addressing into temporary
362 * register file.
363 */
364 static boolean
365 indirect_temp_reference(const struct tgsi_full_instruction *inst)
366 {
367 uint i;
368 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
369 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
370 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
371 reg->SrcRegister.Indirect)
372 return TRUE;
373 }
374 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
375 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
376 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
377 reg->DstRegister.Indirect)
378 return TRUE;
379 }
380 return FALSE;
381 }
382
383
384 static int
385 emit_instruction(
386 struct lp_build_tgsi_soa_context *bld,
387 struct tgsi_full_instruction *inst )
388 {
389 unsigned chan_index;
390 LLVMValueRef src0, src1, src2;
391 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
392 LLVMValueRef dst0;
393
394 /* we can't handle indirect addressing into temp register file yet */
395 if (indirect_temp_reference(inst))
396 return FALSE;
397
398 switch (inst->Instruction.Opcode) {
399 #if 0
400 case TGSI_OPCODE_ARL:
401 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
402 tmp0 = emit_fetch( bld, inst, 0, chan_index );
403 emit_flr(bld, 0, 0);
404 emit_f2it( bld, 0 );
405 emit_store( bld, inst, 0, chan_index, tmp0);
406 }
407 break;
408 #endif
409
410 case TGSI_OPCODE_MOV:
411 case TGSI_OPCODE_SWZ:
412 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
413 tmp0 = emit_fetch( bld, inst, 0, chan_index );
414 emit_store( bld, inst, 0, chan_index, tmp0);
415 }
416 break;
417
418 case TGSI_OPCODE_LIT:
419 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
420 emit_store( bld, inst, 0, CHAN_X, bld->base.one);
421 }
422 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
423 src0 = emit_fetch( bld, inst, 0, CHAN_X );
424 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
425 emit_store( bld, inst, 0, CHAN_Y, dst0);
426 }
427 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
428 /* XMM[1] = SrcReg[0].yyyy */
429 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
430 /* XMM[1] = max(XMM[1], 0) */
431 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
432 /* XMM[2] = SrcReg[0].wwww */
433 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
434 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
435 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
436 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
437 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
438 emit_store( bld, inst, 0, CHAN_Z, dst0);
439 }
440 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
441 emit_store( bld, inst, 0, CHAN_W, bld->base.one);
442 }
443 break;
444
445 case TGSI_OPCODE_RCP:
446 /* TGSI_OPCODE_RECIP */
447 src0 = emit_fetch( bld, inst, 0, CHAN_X );
448 dst0 = lp_build_rcp(&bld->base, src0);
449 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
450 emit_store( bld, inst, 0, chan_index, dst0 );
451 }
452 break;
453
454 case TGSI_OPCODE_RSQ:
455 /* TGSI_OPCODE_RECIPSQRT */
456 src0 = emit_fetch( bld, inst, 0, CHAN_X );
457 src0 = lp_build_abs(&bld->base, src0);
458 dst0 = lp_build_rsqrt(&bld->base, src0);
459 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
460 emit_store( bld, inst, 0, chan_index, dst0 );
461 }
462 break;
463
464 case TGSI_OPCODE_EXP:
465 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
466 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
467 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
468 LLVMValueRef *p_exp2_int_part = NULL;
469 LLVMValueRef *p_frac_part = NULL;
470 LLVMValueRef *p_exp2 = NULL;
471
472 src0 = emit_fetch( bld, inst, 0, CHAN_X );
473
474 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
475 p_exp2_int_part = &tmp0;
476 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
477 p_frac_part = &tmp1;
478 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
479 p_exp2 = &tmp2;
480
481 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
482
483 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
484 emit_store( bld, inst, 0, CHAN_X, tmp0);
485 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
486 emit_store( bld, inst, 0, CHAN_Y, tmp1);
487 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
488 emit_store( bld, inst, 0, CHAN_Z, tmp2);
489 }
490 /* dst.w = 1.0 */
491 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
492 tmp0 = bld->base.one;
493 emit_store( bld, inst, 0, CHAN_W, tmp0);
494 }
495 break;
496
497 case TGSI_OPCODE_LOG:
498 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
499 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
500 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
501 LLVMValueRef *p_floor_log2;
502 LLVMValueRef *p_exp;
503 LLVMValueRef *p_log2;
504
505 src0 = emit_fetch( bld, inst, 0, CHAN_X );
506 src0 = lp_build_abs( &bld->base, src0 );
507
508 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
509 p_floor_log2 = &tmp0;
510 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
511 p_exp = &tmp1;
512 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
513 p_log2 = &tmp2;
514
515 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
516
517 /* dst.x = floor(lg2(abs(src.x))) */
518 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
519 emit_store( bld, inst, 0, CHAN_X, tmp0);
520 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
521 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
522 tmp1 = lp_build_div( &bld->base, src0, tmp1);
523 emit_store( bld, inst, 0, CHAN_Y, tmp1);
524 }
525 /* dst.z = lg2(abs(src.x)) */
526 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
527 emit_store( bld, inst, 0, CHAN_Z, tmp2);
528 }
529 /* dst.w = 1.0 */
530 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
531 tmp0 = bld->base.one;
532 emit_store( bld, inst, 0, CHAN_W, tmp0);
533 }
534 break;
535
536 case TGSI_OPCODE_MUL:
537 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
538 src0 = emit_fetch( bld, inst, 0, chan_index );
539 src1 = emit_fetch( bld, inst, 1, chan_index );
540 dst0 = lp_build_mul(&bld->base, src0, src1);
541 emit_store( bld, inst, 0, chan_index, dst0);
542 }
543 break;
544
545 case TGSI_OPCODE_ADD:
546 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
547 src0 = emit_fetch( bld, inst, 0, chan_index );
548 src1 = emit_fetch( bld, inst, 1, chan_index );
549 dst0 = lp_build_add(&bld->base, src0, src1);
550 emit_store( bld, inst, 0, chan_index, dst0);
551 }
552 break;
553
554 case TGSI_OPCODE_DP3:
555 /* TGSI_OPCODE_DOT3 */
556 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
557 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
558 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
559 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
560 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
561 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
562 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
563 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
564 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
565 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
566 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
567 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
568 emit_store( bld, inst, 0, chan_index, tmp0);
569 }
570 break;
571
572 case TGSI_OPCODE_DP4:
573 /* TGSI_OPCODE_DOT4 */
574 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
575 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
576 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
577 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
578 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
579 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
580 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
581 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
582 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
583 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
584 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
585 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
586 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
587 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
588 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
589 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
590 emit_store( bld, inst, 0, chan_index, tmp0);
591 }
592 break;
593
594 case TGSI_OPCODE_DST:
595 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
596 tmp0 = bld->base.one;
597 emit_store( bld, inst, 0, CHAN_X, tmp0);
598 }
599 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
600 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
601 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
602 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
603 emit_store( bld, inst, 0, CHAN_Y, tmp0);
604 }
605 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
606 tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
607 emit_store( bld, inst, 0, CHAN_Z, tmp0);
608 }
609 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
610 tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
611 emit_store( bld, inst, 0, CHAN_W, tmp0);
612 }
613 break;
614
615 case TGSI_OPCODE_MIN:
616 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
617 src0 = emit_fetch( bld, inst, 0, chan_index );
618 src1 = emit_fetch( bld, inst, 1, chan_index );
619 dst0 = lp_build_min( &bld->base, src0, src1 );
620 emit_store( bld, inst, 0, chan_index, dst0);
621 }
622 break;
623
624 case TGSI_OPCODE_MAX:
625 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
626 src0 = emit_fetch( bld, inst, 0, chan_index );
627 src1 = emit_fetch( bld, inst, 1, chan_index );
628 dst0 = lp_build_max( &bld->base, src0, src1 );
629 emit_store( bld, inst, 0, chan_index, dst0);
630 }
631 break;
632
633 case TGSI_OPCODE_SLT:
634 /* TGSI_OPCODE_SETLT */
635 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
636 src0 = emit_fetch( bld, inst, 0, chan_index );
637 src1 = emit_fetch( bld, inst, 1, chan_index );
638 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
639 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
640 emit_store( bld, inst, 0, chan_index, dst0);
641 }
642 break;
643
644 case TGSI_OPCODE_SGE:
645 /* TGSI_OPCODE_SETGE */
646 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
647 src0 = emit_fetch( bld, inst, 0, chan_index );
648 src1 = emit_fetch( bld, inst, 1, chan_index );
649 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
650 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
651 emit_store( bld, inst, 0, chan_index, dst0);
652 }
653 break;
654
655 case TGSI_OPCODE_MAD:
656 /* TGSI_OPCODE_MADD */
657 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
658 tmp0 = emit_fetch( bld, inst, 0, chan_index );
659 tmp1 = emit_fetch( bld, inst, 1, chan_index );
660 tmp2 = emit_fetch( bld, inst, 2, chan_index );
661 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
662 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
663 emit_store( bld, inst, 0, chan_index, tmp0);
664 }
665 break;
666
667 case TGSI_OPCODE_SUB:
668 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
669 tmp0 = emit_fetch( bld, inst, 0, chan_index );
670 tmp1 = emit_fetch( bld, inst, 1, chan_index );
671 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
672 emit_store( bld, inst, 0, chan_index, tmp0);
673 }
674 break;
675
676 case TGSI_OPCODE_LRP:
677 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
678 src0 = emit_fetch( bld, inst, 0, chan_index );
679 src1 = emit_fetch( bld, inst, 1, chan_index );
680 src2 = emit_fetch( bld, inst, 2, chan_index );
681 tmp0 = lp_build_sub( &bld->base, src1, src2 );
682 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
683 dst0 = lp_build_add( &bld->base, tmp0, src2 );
684 emit_store( bld, inst, 0, chan_index, dst0 );
685 }
686 break;
687
688 case TGSI_OPCODE_CND:
689 return 0;
690 break;
691
692 case TGSI_OPCODE_DP2A:
693 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
694 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
695 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
696 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
697 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
698 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
699 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
700 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
701 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
702 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
703 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
704 }
705 break;
706
707 #if 0
708 case TGSI_OPCODE_FRC:
709 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
710 tmp0 = emit_fetch( bld, inst, 0, chan_index );
711 emit_frc( bld, 0, 0 );
712 emit_store( bld, inst, 0, chan_index, tmp0);
713 }
714 break;
715
716 case TGSI_OPCODE_CLAMP:
717 return 0;
718 break;
719
720 case TGSI_OPCODE_FLR:
721 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
722 tmp0 = emit_fetch( bld, inst, 0, chan_index );
723 emit_flr( bld, 0, 0 );
724 emit_store( bld, inst, 0, chan_index, tmp0);
725 }
726 break;
727
728 case TGSI_OPCODE_ROUND:
729 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
730 tmp0 = emit_fetch( bld, inst, 0, chan_index );
731 emit_rnd( bld, 0, 0 );
732 emit_store( bld, inst, 0, chan_index, tmp0);
733 }
734 break;
735 #endif
736
737 case TGSI_OPCODE_EX2: {
738 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
739 tmp0 = lp_build_exp2( &bld->base, tmp0);
740 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
741 emit_store( bld, inst, 0, chan_index, tmp0);
742 }
743 break;
744 }
745
746 case TGSI_OPCODE_LG2:
747 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
748 tmp0 = lp_build_log2( &bld->base, tmp0);
749 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
750 emit_store( bld, inst, 0, chan_index, tmp0);
751 }
752 break;
753
754 case TGSI_OPCODE_POW:
755 src0 = emit_fetch( bld, inst, 0, CHAN_X );
756 src1 = emit_fetch( bld, inst, 1, CHAN_X );
757 dst0 = lp_build_pow( &bld->base, src0, src1 );
758 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
759 emit_store( bld, inst, 0, chan_index, dst0 );
760 }
761 break;
762
763 case TGSI_OPCODE_XPD:
764 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
765 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
766 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
767 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
768 }
769 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
770 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
771 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
772 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
773 }
774 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
775 tmp2 = tmp0;
776 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
777 tmp5 = tmp3;
778 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
779 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
780 emit_store( bld, inst, 0, CHAN_X, tmp2);
781 }
782 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
783 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
784 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
785 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
786 }
787 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
788 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
789 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
790 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
791 emit_store( bld, inst, 0, CHAN_Y, tmp3);
792 }
793 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
794 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
795 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
796 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
797 emit_store( bld, inst, 0, CHAN_Z, tmp5);
798 }
799 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
800 tmp0 = bld->base.one;
801 emit_store( bld, inst, 0, CHAN_W, tmp0);
802 }
803 break;
804
805 case TGSI_OPCODE_ABS:
806 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
807 tmp0 = emit_fetch( bld, inst, 0, chan_index );
808 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
809 emit_store( bld, inst, 0, chan_index, tmp0);
810 }
811 break;
812
813 case TGSI_OPCODE_RCC:
814 return 0;
815 break;
816
817 case TGSI_OPCODE_DPH:
818 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
819 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
820 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
821 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
822 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
823 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
824 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
825 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
826 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
827 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
828 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
829 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
830 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
831 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
832 emit_store( bld, inst, 0, chan_index, tmp0);
833 }
834 break;
835
836 case TGSI_OPCODE_COS:
837 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
838 tmp0 = lp_build_cos( &bld->base, tmp0 );
839 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
840 emit_store( bld, inst, 0, chan_index, tmp0);
841 }
842 break;
843
844 case TGSI_OPCODE_DDX:
845 return 0;
846 break;
847
848 case TGSI_OPCODE_DDY:
849 return 0;
850 break;
851
852 #if 0
853 case TGSI_OPCODE_KILP:
854 /* predicated kill */
855 emit_kilp( bld );
856 return 0; /* XXX fix me */
857 break;
858 #endif
859
860 case TGSI_OPCODE_KIL:
861 /* conditional kill */
862 emit_kil( bld, inst );
863 break;
864
865 case TGSI_OPCODE_PK2H:
866 return 0;
867 break;
868
869 case TGSI_OPCODE_PK2US:
870 return 0;
871 break;
872
873 case TGSI_OPCODE_PK4B:
874 return 0;
875 break;
876
877 case TGSI_OPCODE_PK4UB:
878 return 0;
879 break;
880
881 case TGSI_OPCODE_RFL:
882 return 0;
883 break;
884
885 case TGSI_OPCODE_SEQ:
886 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
887 src0 = emit_fetch( bld, inst, 0, chan_index );
888 src1 = emit_fetch( bld, inst, 1, chan_index );
889 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
890 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
891 emit_store( bld, inst, 0, chan_index, dst0);
892 }
893 break;
894
895 case TGSI_OPCODE_SFL:
896 return 0;
897 break;
898
899 case TGSI_OPCODE_SGT:
900 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
901 src0 = emit_fetch( bld, inst, 0, chan_index );
902 src1 = emit_fetch( bld, inst, 1, chan_index );
903 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
904 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
905 emit_store( bld, inst, 0, chan_index, dst0);
906 }
907 break;
908
909 case TGSI_OPCODE_SIN:
910 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
911 tmp0 = lp_build_sin( &bld->base, tmp0 );
912 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
913 emit_store( bld, inst, 0, chan_index, tmp0);
914 }
915 break;
916
917 case TGSI_OPCODE_SLE:
918 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
919 src0 = emit_fetch( bld, inst, 0, chan_index );
920 src1 = emit_fetch( bld, inst, 1, chan_index );
921 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
922 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
923 emit_store( bld, inst, 0, chan_index, dst0);
924 }
925 break;
926
927 case TGSI_OPCODE_SNE:
928 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
929 src0 = emit_fetch( bld, inst, 0, chan_index );
930 src1 = emit_fetch( bld, inst, 1, chan_index );
931 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
932 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
933 emit_store( bld, inst, 0, chan_index, dst0);
934 }
935 break;
936
937 case TGSI_OPCODE_STR:
938 return 0;
939 break;
940
941 case TGSI_OPCODE_TEX:
942 emit_tex( bld, inst, FALSE, FALSE );
943 break;
944
945 case TGSI_OPCODE_TXD:
946 return 0;
947 break;
948
949 case TGSI_OPCODE_UP2H:
950 return 0;
951 break;
952
953 case TGSI_OPCODE_UP2US:
954 return 0;
955 break;
956
957 case TGSI_OPCODE_UP4B:
958 return 0;
959 break;
960
961 case TGSI_OPCODE_UP4UB:
962 return 0;
963 break;
964
965 case TGSI_OPCODE_X2D:
966 return 0;
967 break;
968
969 case TGSI_OPCODE_ARA:
970 return 0;
971 break;
972
973 #if 0
974 case TGSI_OPCODE_ARR:
975 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
976 tmp0 = emit_fetch( bld, inst, 0, chan_index );
977 emit_rnd( bld, 0, 0 );
978 emit_f2it( bld, 0 );
979 emit_store( bld, inst, 0, chan_index, tmp0);
980 }
981 break;
982 #endif
983
984 case TGSI_OPCODE_BRA:
985 return 0;
986 break;
987
988 case TGSI_OPCODE_CAL:
989 return 0;
990 break;
991
992 #if 0
993 case TGSI_OPCODE_RET:
994 emit_ret( bld );
995 break;
996 #endif
997
998 case TGSI_OPCODE_END:
999 break;
1000
1001 #if 0
1002 case TGSI_OPCODE_SSG:
1003 /* TGSI_OPCODE_SGN */
1004 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1005 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1006 emit_sgn( bld, 0, 0 );
1007 emit_store( bld, inst, 0, chan_index, tmp0);
1008 }
1009 break;
1010 #endif
1011
1012 case TGSI_OPCODE_CMP:
1013 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1014 src0 = emit_fetch( bld, inst, 0, chan_index );
1015 src1 = emit_fetch( bld, inst, 1, chan_index );
1016 src2 = emit_fetch( bld, inst, 2, chan_index );
1017 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1018 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1019 emit_store( bld, inst, 0, chan_index, dst0);
1020 }
1021 break;
1022
1023 case TGSI_OPCODE_SCS:
1024 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1025 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1026 tmp0 = lp_build_cos( &bld->base, tmp0 );
1027 emit_store( bld, inst, 0, CHAN_X, tmp0);
1028 }
1029 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1030 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1031 tmp0 = lp_build_sin( &bld->base, tmp0 );
1032 emit_store( bld, inst, 0, CHAN_Y, tmp0);
1033 }
1034 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1035 tmp0 = bld->base.zero;
1036 emit_store( bld, inst, 0, CHAN_Z, tmp0);
1037 }
1038 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1039 tmp0 = bld->base.one;
1040 emit_store( bld, inst, 0, CHAN_W, tmp0);
1041 }
1042 break;
1043
1044 case TGSI_OPCODE_TXB:
1045 emit_tex( bld, inst, TRUE, FALSE );
1046 break;
1047
1048 case TGSI_OPCODE_NRM:
1049 /* fall-through */
1050 case TGSI_OPCODE_NRM4:
1051 /* 3 or 4-component normalization */
1052 {
1053 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1054
1055 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1056 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1057 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1058 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1059
1060 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1061
1062 /* xmm4 = src.x */
1063 /* xmm0 = src.x * src.x */
1064 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1065 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1066 tmp4 = tmp0;
1067 }
1068 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1069
1070 /* xmm5 = src.y */
1071 /* xmm0 = xmm0 + src.y * src.y */
1072 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1073 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1074 tmp5 = tmp1;
1075 }
1076 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1077 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1078
1079 /* xmm6 = src.z */
1080 /* xmm0 = xmm0 + src.z * src.z */
1081 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1082 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1083 tmp6 = tmp1;
1084 }
1085 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1086 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1087
1088 if (dims == 4) {
1089 /* xmm7 = src.w */
1090 /* xmm0 = xmm0 + src.w * src.w */
1091 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1092 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1093 tmp7 = tmp1;
1094 }
1095 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1096 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1097 }
1098
1099 /* xmm1 = 1 / sqrt(xmm0) */
1100 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1101
1102 /* dst.x = xmm1 * src.x */
1103 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1104 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1105 emit_store(bld, inst, 0, CHAN_X, tmp4);
1106 }
1107
1108 /* dst.y = xmm1 * src.y */
1109 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1110 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1111 emit_store(bld, inst, 0, CHAN_Y, tmp5);
1112 }
1113
1114 /* dst.z = xmm1 * src.z */
1115 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1116 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1117 emit_store(bld, inst, 0, CHAN_Z, tmp6);
1118 }
1119
1120 /* dst.w = xmm1 * src.w */
1121 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1122 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1123 emit_store(bld, inst, 0, CHAN_W, tmp7);
1124 }
1125 }
1126
1127 /* dst0.w = 1.0 */
1128 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1129 tmp0 = bld->base.one;
1130 emit_store(bld, inst, 0, CHAN_W, tmp0);
1131 }
1132 }
1133 break;
1134
1135 case TGSI_OPCODE_DIV:
1136 return 0;
1137 break;
1138
1139 case TGSI_OPCODE_DP2:
1140 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1141 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1142 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1143 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1144 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1145 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1146 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1147 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1148 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1149 }
1150 break;
1151
1152 case TGSI_OPCODE_TXL:
1153 emit_tex( bld, inst, TRUE, FALSE );
1154 break;
1155
1156 case TGSI_OPCODE_TXP:
1157 emit_tex( bld, inst, FALSE, TRUE );
1158 break;
1159
1160 case TGSI_OPCODE_BRK:
1161 return 0;
1162 break;
1163
1164 case TGSI_OPCODE_IF:
1165 return 0;
1166 break;
1167
1168 case TGSI_OPCODE_BGNFOR:
1169 return 0;
1170 break;
1171
1172 case TGSI_OPCODE_REP:
1173 return 0;
1174 break;
1175
1176 case TGSI_OPCODE_ELSE:
1177 return 0;
1178 break;
1179
1180 case TGSI_OPCODE_ENDIF:
1181 return 0;
1182 break;
1183
1184 case TGSI_OPCODE_ENDFOR:
1185 return 0;
1186 break;
1187
1188 case TGSI_OPCODE_ENDREP:
1189 return 0;
1190 break;
1191
1192 case TGSI_OPCODE_PUSHA:
1193 return 0;
1194 break;
1195
1196 case TGSI_OPCODE_POPA:
1197 return 0;
1198 break;
1199
1200 case TGSI_OPCODE_CEIL:
1201 return 0;
1202 break;
1203
1204 case TGSI_OPCODE_I2F:
1205 return 0;
1206 break;
1207
1208 case TGSI_OPCODE_NOT:
1209 return 0;
1210 break;
1211
1212 #if 0
1213 case TGSI_OPCODE_TRUNC:
1214 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1215 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1216 emit_f2it( bld, 0 );
1217 emit_i2f( bld, 0 );
1218 emit_store( bld, inst, 0, chan_index, tmp0);
1219 }
1220 break;
1221 #endif
1222
1223 case TGSI_OPCODE_SHL:
1224 return 0;
1225 break;
1226
1227 case TGSI_OPCODE_SHR:
1228 return 0;
1229 break;
1230
1231 case TGSI_OPCODE_AND:
1232 return 0;
1233 break;
1234
1235 case TGSI_OPCODE_OR:
1236 return 0;
1237 break;
1238
1239 case TGSI_OPCODE_MOD:
1240 return 0;
1241 break;
1242
1243 case TGSI_OPCODE_XOR:
1244 return 0;
1245 break;
1246
1247 case TGSI_OPCODE_SAD:
1248 return 0;
1249 break;
1250
1251 case TGSI_OPCODE_TXF:
1252 return 0;
1253 break;
1254
1255 case TGSI_OPCODE_TXQ:
1256 return 0;
1257 break;
1258
1259 case TGSI_OPCODE_CONT:
1260 return 0;
1261 break;
1262
1263 case TGSI_OPCODE_EMIT:
1264 return 0;
1265 break;
1266
1267 case TGSI_OPCODE_ENDPRIM:
1268 return 0;
1269 break;
1270
1271 default:
1272 return 0;
1273 }
1274
1275 return 1;
1276 }
1277
1278
1279 void
1280 lp_build_tgsi_soa(LLVMBuilderRef builder,
1281 const struct tgsi_token *tokens,
1282 union lp_type type,
1283 struct lp_build_mask_context *mask,
1284 LLVMValueRef consts_ptr,
1285 const LLVMValueRef *pos,
1286 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1287 LLVMValueRef (*outputs)[NUM_CHANNELS],
1288 struct lp_build_sampler_soa *sampler)
1289 {
1290 struct lp_build_tgsi_soa_context bld;
1291 struct tgsi_parse_context parse;
1292 uint num_immediates = 0;
1293 unsigned i;
1294
1295 /* Setup build context */
1296 memset(&bld, 0, sizeof bld);
1297 lp_build_context_init(&bld.base, builder, type);
1298 bld.mask = mask;
1299 bld.pos = pos;
1300 bld.inputs = inputs;
1301 bld.outputs = outputs;
1302 bld.consts_ptr = consts_ptr;
1303 bld.sampler = sampler;
1304
1305 tgsi_parse_init( &parse, tokens );
1306
1307 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1308 tgsi_parse_token( &parse );
1309
1310 switch( parse.FullToken.Token.Type ) {
1311 case TGSI_TOKEN_TYPE_DECLARATION:
1312 /* Input already interpolated */
1313 break;
1314
1315 case TGSI_TOKEN_TYPE_INSTRUCTION:
1316 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1317 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1318 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1319 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1320 info ? info->mnemonic : "<invalid>");
1321 }
1322 break;
1323
1324 case TGSI_TOKEN_TYPE_IMMEDIATE:
1325 /* simply copy the immediate values into the next immediates[] slot */
1326 {
1327 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1328 assert(size <= 4);
1329 assert(num_immediates < LP_MAX_IMMEDIATES);
1330 for( i = 0; i < size; ++i )
1331 bld.immediates[num_immediates][i] =
1332 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1333 for( i = size; i < 4; ++i )
1334 bld.immediates[num_immediates][i] = bld.base.undef;
1335 num_immediates++;
1336 }
1337 break;
1338
1339 default:
1340 assert( 0 );
1341 }
1342 }
1343
1344 tgsi_parse_free( &parse );
1345 }
1346