Merge branch 'mesa_7_5_branch' into mesa_7_6_branch
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81
82 struct lp_build_tgsi_soa_context
83 {
84 struct lp_build_context base;
85
86 LLVMValueRef consts_ptr;
87 const LLVMValueRef *pos;
88 const LLVMValueRef (*inputs)[NUM_CHANNELS];
89 LLVMValueRef (*outputs)[NUM_CHANNELS];
90
91 lp_emit_fetch_texel_soa_callback emit_fetch_texel;
92 void *emit_fetch_texel_context;
93
94 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
95 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
96
97 struct lp_build_mask_context *mask;
98 };
99
100
101 /**
102 * Register fetch.
103 */
104 static LLVMValueRef
105 emit_fetch(
106 struct lp_build_tgsi_soa_context *bld,
107 const struct tgsi_full_instruction *inst,
108 unsigned index,
109 const unsigned chan_index )
110 {
111 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
112 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
113 LLVMValueRef res;
114
115 switch (swizzle) {
116 case TGSI_EXTSWIZZLE_X:
117 case TGSI_EXTSWIZZLE_Y:
118 case TGSI_EXTSWIZZLE_Z:
119 case TGSI_EXTSWIZZLE_W:
120
121 switch (reg->SrcRegister.File) {
122 case TGSI_FILE_CONSTANT: {
123 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
124 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
125 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
126 res = lp_build_broadcast_scalar(&bld->base, scalar);
127 break;
128 }
129
130 case TGSI_FILE_IMMEDIATE:
131 res = bld->immediates[reg->SrcRegister.Index][swizzle];
132 assert(res);
133 break;
134
135 case TGSI_FILE_INPUT:
136 res = bld->inputs[reg->SrcRegister.Index][swizzle];
137 assert(res);
138 break;
139
140 case TGSI_FILE_TEMPORARY:
141 res = bld->temps[reg->SrcRegister.Index][swizzle];
142 if(!res)
143 return bld->base.undef;
144 break;
145
146 default:
147 assert( 0 );
148 return bld->base.undef;
149 }
150 break;
151
152 case TGSI_EXTSWIZZLE_ZERO:
153 res = bld->base.zero;
154 break;
155
156 case TGSI_EXTSWIZZLE_ONE:
157 res = bld->base.one;
158 break;
159
160 default:
161 assert( 0 );
162 return bld->base.undef;
163 }
164
165 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
166 case TGSI_UTIL_SIGN_CLEAR:
167 res = lp_build_abs( &bld->base, res );
168 break;
169
170 case TGSI_UTIL_SIGN_SET:
171 res = lp_build_abs( &bld->base, res );
172 res = LLVMBuildNeg( bld->base.builder, res, "" );
173 break;
174
175 case TGSI_UTIL_SIGN_TOGGLE:
176 res = LLVMBuildNeg( bld->base.builder, res, "" );
177 break;
178
179 case TGSI_UTIL_SIGN_KEEP:
180 break;
181 }
182
183 return res;
184 }
185
186
187 /**
188 * Register store.
189 */
190 static void
191 emit_store(
192 struct lp_build_tgsi_soa_context *bld,
193 const struct tgsi_full_instruction *inst,
194 unsigned index,
195 unsigned chan_index,
196 LLVMValueRef value)
197 {
198 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
199
200 switch( inst->Instruction.Saturate ) {
201 case TGSI_SAT_NONE:
202 break;
203
204 case TGSI_SAT_ZERO_ONE:
205 value = lp_build_max(&bld->base, value, bld->base.zero);
206 value = lp_build_min(&bld->base, value, bld->base.one);
207 break;
208
209 case TGSI_SAT_MINUS_PLUS_ONE:
210 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
211 value = lp_build_min(&bld->base, value, bld->base.one);
212 break;
213
214 default:
215 assert(0);
216 }
217
218 switch( reg->DstRegister.File ) {
219 case TGSI_FILE_OUTPUT:
220 bld->outputs[reg->DstRegister.Index][chan_index] = value;
221 break;
222
223 case TGSI_FILE_TEMPORARY:
224 bld->temps[reg->DstRegister.Index][chan_index] = value;
225 break;
226
227 case TGSI_FILE_ADDRESS:
228 /* FIXME */
229 assert(0);
230 break;
231
232 default:
233 assert( 0 );
234 }
235 }
236
237
238 /**
239 * High-level instruction translators.
240 */
241
242 static void
243 emit_tex( struct lp_build_tgsi_soa_context *bld,
244 const struct tgsi_full_instruction *inst,
245 boolean apply_lodbias,
246 boolean projected)
247 {
248 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
249 LLVMValueRef lodbias;
250 LLVMValueRef oow;
251 LLVMValueRef coords[3];
252 LLVMValueRef texel[4];
253 unsigned num_coords;
254 unsigned i;
255
256 switch (inst->InstructionExtTexture.Texture) {
257 case TGSI_TEXTURE_1D:
258 num_coords = 1;
259 break;
260 case TGSI_TEXTURE_2D:
261 case TGSI_TEXTURE_RECT:
262 num_coords = 2;
263 break;
264 case TGSI_TEXTURE_SHADOW1D:
265 case TGSI_TEXTURE_SHADOW2D:
266 case TGSI_TEXTURE_SHADOWRECT:
267 case TGSI_TEXTURE_3D:
268 case TGSI_TEXTURE_CUBE:
269 num_coords = 3;
270 break;
271 default:
272 assert(0);
273 return;
274 }
275
276 if(apply_lodbias)
277 lodbias = emit_fetch( bld, inst, 0, 3 );
278 else
279 lodbias = bld->base.zero;
280
281 if (projected) {
282 oow = emit_fetch( bld, inst, 0, 3 );
283 oow = lp_build_rcp(&bld->base, oow);
284 }
285
286 for (i = 0; i < num_coords; i++) {
287 coords[i] = emit_fetch( bld, inst, 0, i );
288 if (projected)
289 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
290 }
291
292 bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context,
293 unit, num_coords, coords, lodbias, texel);
294
295 FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
296 emit_store( bld, inst, 0, i, texel[i] );
297 }
298 }
299
300
301 static void
302 emit_kil(
303 struct lp_build_tgsi_soa_context *bld,
304 const struct tgsi_full_instruction *inst )
305 {
306 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
307 LLVMValueRef terms[NUM_CHANNELS];
308 LLVMValueRef mask;
309 unsigned chan_index;
310
311 memset(&terms, 0, sizeof terms);
312
313 FOR_EACH_CHANNEL( chan_index ) {
314 unsigned swizzle;
315
316 /* Unswizzle channel */
317 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
318
319 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
320 * not to be tested. */
321 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
322 continue;
323
324 /* Check if the component has not been already tested. */
325 assert(swizzle < NUM_CHANNELS);
326 if( !terms[swizzle] )
327 /* TODO: change the comparison operator instead of setting the sign */
328 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
329 }
330
331 mask = NULL;
332 FOR_EACH_CHANNEL( chan_index ) {
333 if(terms[chan_index]) {
334 LLVMValueRef chan_mask;
335
336 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
337
338 if(mask)
339 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
340 else
341 mask = chan_mask;
342 }
343 }
344
345 if(mask)
346 lp_build_mask_update(bld->mask, mask);
347 }
348
349
350 static void
351 emit_kilp(
352 struct lp_build_tgsi_soa_context *bld )
353 {
354 /* XXX todo / fix me */
355 }
356
357
358 /**
359 * Check if inst src/dest regs use indirect addressing into temporary
360 * register file.
361 */
362 static boolean
363 indirect_temp_reference(const struct tgsi_full_instruction *inst)
364 {
365 uint i;
366 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
367 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
368 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
369 reg->SrcRegister.Indirect)
370 return TRUE;
371 }
372 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
373 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
374 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
375 reg->DstRegister.Indirect)
376 return TRUE;
377 }
378 return FALSE;
379 }
380
381
382 static int
383 emit_instruction(
384 struct lp_build_tgsi_soa_context *bld,
385 struct tgsi_full_instruction *inst )
386 {
387 unsigned chan_index;
388 LLVMValueRef src0, src1, src2;
389 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
390 LLVMValueRef dst0;
391
392 /* we can't handle indirect addressing into temp register file yet */
393 if (indirect_temp_reference(inst))
394 return FALSE;
395
396 switch (inst->Instruction.Opcode) {
397 #if 0
398 case TGSI_OPCODE_ARL:
399 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
400 tmp0 = emit_fetch( bld, inst, 0, chan_index );
401 emit_flr(bld, 0, 0);
402 emit_f2it( bld, 0 );
403 emit_store( bld, inst, 0, chan_index, tmp0);
404 }
405 break;
406 #endif
407
408 case TGSI_OPCODE_MOV:
409 case TGSI_OPCODE_SWZ:
410 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
411 tmp0 = emit_fetch( bld, inst, 0, chan_index );
412 emit_store( bld, inst, 0, chan_index, tmp0);
413 }
414 break;
415
416 case TGSI_OPCODE_LIT:
417 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
418 emit_store( bld, inst, 0, CHAN_X, bld->base.one);
419 }
420 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
421 src0 = emit_fetch( bld, inst, 0, CHAN_X );
422 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
423 emit_store( bld, inst, 0, CHAN_Y, dst0);
424 }
425 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
426 /* XMM[1] = SrcReg[0].yyyy */
427 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
428 /* XMM[1] = max(XMM[1], 0) */
429 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
430 /* XMM[2] = SrcReg[0].wwww */
431 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
432 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
433 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
434 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
435 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
436 emit_store( bld, inst, 0, CHAN_Z, dst0);
437 }
438 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
439 emit_store( bld, inst, 0, CHAN_W, bld->base.one);
440 }
441 break;
442
443 case TGSI_OPCODE_RCP:
444 /* TGSI_OPCODE_RECIP */
445 src0 = emit_fetch( bld, inst, 0, CHAN_X );
446 dst0 = lp_build_rcp(&bld->base, src0);
447 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
448 emit_store( bld, inst, 0, chan_index, dst0 );
449 }
450 break;
451
452 case TGSI_OPCODE_RSQ:
453 /* TGSI_OPCODE_RECIPSQRT */
454 src0 = emit_fetch( bld, inst, 0, CHAN_X );
455 src0 = lp_build_abs(&bld->base, src0);
456 dst0 = lp_build_rsqrt(&bld->base, src0);
457 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
458 emit_store( bld, inst, 0, chan_index, dst0 );
459 }
460 break;
461
462 case TGSI_OPCODE_EXP:
463 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
464 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
465 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
466 LLVMValueRef *p_exp2_int_part = NULL;
467 LLVMValueRef *p_frac_part = NULL;
468 LLVMValueRef *p_exp2 = NULL;
469
470 src0 = emit_fetch( bld, inst, 0, CHAN_X );
471
472 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
473 p_exp2_int_part = &tmp0;
474 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
475 p_frac_part = &tmp1;
476 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
477 p_exp2 = &tmp2;
478
479 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
480
481 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
482 emit_store( bld, inst, 0, CHAN_X, tmp0);
483 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
484 emit_store( bld, inst, 0, CHAN_Y, tmp1);
485 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
486 emit_store( bld, inst, 0, CHAN_Z, tmp2);
487 }
488 /* dst.w = 1.0 */
489 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
490 tmp0 = bld->base.one;
491 emit_store( bld, inst, 0, CHAN_W, tmp0);
492 }
493 break;
494
495 case TGSI_OPCODE_LOG:
496 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
497 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
498 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
499 LLVMValueRef *p_floor_log2;
500 LLVMValueRef *p_exp;
501 LLVMValueRef *p_log2;
502
503 src0 = emit_fetch( bld, inst, 0, CHAN_X );
504 src0 = lp_build_abs( &bld->base, src0 );
505
506 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
507 p_floor_log2 = &tmp0;
508 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
509 p_exp = &tmp1;
510 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
511 p_log2 = &tmp2;
512
513 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
514
515 /* dst.x = floor(lg2(abs(src.x))) */
516 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
517 emit_store( bld, inst, 0, CHAN_X, tmp0);
518 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
519 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
520 tmp1 = lp_build_div( &bld->base, src0, tmp1);
521 emit_store( bld, inst, 0, CHAN_Y, tmp1);
522 }
523 /* dst.z = lg2(abs(src.x)) */
524 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
525 emit_store( bld, inst, 0, CHAN_Z, tmp2);
526 }
527 /* dst.w = 1.0 */
528 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
529 tmp0 = bld->base.one;
530 emit_store( bld, inst, 0, CHAN_W, tmp0);
531 }
532 break;
533
534 case TGSI_OPCODE_MUL:
535 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
536 src0 = emit_fetch( bld, inst, 0, chan_index );
537 src1 = emit_fetch( bld, inst, 1, chan_index );
538 dst0 = lp_build_mul(&bld->base, src0, src1);
539 emit_store( bld, inst, 0, chan_index, dst0);
540 }
541 break;
542
543 case TGSI_OPCODE_ADD:
544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
545 src0 = emit_fetch( bld, inst, 0, chan_index );
546 src1 = emit_fetch( bld, inst, 1, chan_index );
547 dst0 = lp_build_add(&bld->base, src0, src1);
548 emit_store( bld, inst, 0, chan_index, dst0);
549 }
550 break;
551
552 case TGSI_OPCODE_DP3:
553 /* TGSI_OPCODE_DOT3 */
554 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
555 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
556 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
557 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
558 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
559 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
560 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
561 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
562 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
563 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
564 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
565 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
566 emit_store( bld, inst, 0, chan_index, tmp0);
567 }
568 break;
569
570 case TGSI_OPCODE_DP4:
571 /* TGSI_OPCODE_DOT4 */
572 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
573 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
574 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
575 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
576 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
577 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
578 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
579 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
580 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
581 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
582 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
583 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
584 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
585 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
586 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
587 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
588 emit_store( bld, inst, 0, chan_index, tmp0);
589 }
590 break;
591
592 case TGSI_OPCODE_DST:
593 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
594 tmp0 = bld->base.one;
595 emit_store( bld, inst, 0, CHAN_X, tmp0);
596 }
597 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
598 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
599 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
600 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
601 emit_store( bld, inst, 0, CHAN_Y, tmp0);
602 }
603 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
604 tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
605 emit_store( bld, inst, 0, CHAN_Z, tmp0);
606 }
607 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
608 tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
609 emit_store( bld, inst, 0, CHAN_W, tmp0);
610 }
611 break;
612
613 case TGSI_OPCODE_MIN:
614 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
615 src0 = emit_fetch( bld, inst, 0, chan_index );
616 src1 = emit_fetch( bld, inst, 1, chan_index );
617 dst0 = lp_build_min( &bld->base, src0, src1 );
618 emit_store( bld, inst, 0, chan_index, dst0);
619 }
620 break;
621
622 case TGSI_OPCODE_MAX:
623 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
624 src0 = emit_fetch( bld, inst, 0, chan_index );
625 src1 = emit_fetch( bld, inst, 1, chan_index );
626 dst0 = lp_build_max( &bld->base, src0, src1 );
627 emit_store( bld, inst, 0, chan_index, dst0);
628 }
629 break;
630
631 case TGSI_OPCODE_SLT:
632 /* TGSI_OPCODE_SETLT */
633 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
634 src0 = emit_fetch( bld, inst, 0, chan_index );
635 src1 = emit_fetch( bld, inst, 1, chan_index );
636 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
637 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
638 emit_store( bld, inst, 0, chan_index, dst0);
639 }
640 break;
641
642 case TGSI_OPCODE_SGE:
643 /* TGSI_OPCODE_SETGE */
644 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
645 src0 = emit_fetch( bld, inst, 0, chan_index );
646 src1 = emit_fetch( bld, inst, 1, chan_index );
647 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
648 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
649 emit_store( bld, inst, 0, chan_index, dst0);
650 }
651 break;
652
653 case TGSI_OPCODE_MAD:
654 /* TGSI_OPCODE_MADD */
655 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
656 tmp0 = emit_fetch( bld, inst, 0, chan_index );
657 tmp1 = emit_fetch( bld, inst, 1, chan_index );
658 tmp2 = emit_fetch( bld, inst, 2, chan_index );
659 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
660 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
661 emit_store( bld, inst, 0, chan_index, tmp0);
662 }
663 break;
664
665 case TGSI_OPCODE_SUB:
666 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
667 tmp0 = emit_fetch( bld, inst, 0, chan_index );
668 tmp1 = emit_fetch( bld, inst, 1, chan_index );
669 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
670 emit_store( bld, inst, 0, chan_index, tmp0);
671 }
672 break;
673
674 case TGSI_OPCODE_LRP:
675 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
676 src0 = emit_fetch( bld, inst, 0, chan_index );
677 src1 = emit_fetch( bld, inst, 1, chan_index );
678 src2 = emit_fetch( bld, inst, 2, chan_index );
679 tmp0 = lp_build_sub( &bld->base, src1, src2 );
680 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
681 dst0 = lp_build_add( &bld->base, tmp0, src2 );
682 emit_store( bld, inst, 0, chan_index, dst0 );
683 }
684 break;
685
686 case TGSI_OPCODE_CND:
687 return 0;
688 break;
689
690 case TGSI_OPCODE_DP2A:
691 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
692 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
693 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
694 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
695 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
696 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
697 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
698 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
699 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
700 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
701 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
702 }
703 break;
704
705 #if 0
706 case TGSI_OPCODE_FRC:
707 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
708 tmp0 = emit_fetch( bld, inst, 0, chan_index );
709 emit_frc( bld, 0, 0 );
710 emit_store( bld, inst, 0, chan_index, tmp0);
711 }
712 break;
713
714 case TGSI_OPCODE_CLAMP:
715 return 0;
716 break;
717
718 case TGSI_OPCODE_FLR:
719 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
720 tmp0 = emit_fetch( bld, inst, 0, chan_index );
721 emit_flr( bld, 0, 0 );
722 emit_store( bld, inst, 0, chan_index, tmp0);
723 }
724 break;
725
726 case TGSI_OPCODE_ROUND:
727 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
728 tmp0 = emit_fetch( bld, inst, 0, chan_index );
729 emit_rnd( bld, 0, 0 );
730 emit_store( bld, inst, 0, chan_index, tmp0);
731 }
732 break;
733 #endif
734
735 case TGSI_OPCODE_EX2: {
736 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
737 tmp0 = lp_build_exp2( &bld->base, tmp0);
738 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
739 emit_store( bld, inst, 0, chan_index, tmp0);
740 }
741 break;
742 }
743
744 case TGSI_OPCODE_LG2:
745 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
746 tmp0 = lp_build_log2( &bld->base, tmp0);
747 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
748 emit_store( bld, inst, 0, chan_index, tmp0);
749 }
750 break;
751
752 case TGSI_OPCODE_POW:
753 src0 = emit_fetch( bld, inst, 0, CHAN_X );
754 src1 = emit_fetch( bld, inst, 1, CHAN_X );
755 dst0 = lp_build_pow( &bld->base, src0, src1 );
756 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
757 emit_store( bld, inst, 0, chan_index, dst0 );
758 }
759 break;
760
761 case TGSI_OPCODE_XPD:
762 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
763 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
764 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
765 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
766 }
767 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
768 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
769 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
770 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
771 }
772 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
773 tmp2 = tmp0;
774 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
775 tmp5 = tmp3;
776 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
777 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
778 emit_store( bld, inst, 0, CHAN_X, tmp2);
779 }
780 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
781 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
782 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
783 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
784 }
785 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
786 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
787 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
788 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
789 emit_store( bld, inst, 0, CHAN_Y, tmp3);
790 }
791 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
792 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
793 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
794 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
795 emit_store( bld, inst, 0, CHAN_Z, tmp5);
796 }
797 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
798 tmp0 = bld->base.one;
799 emit_store( bld, inst, 0, CHAN_W, tmp0);
800 }
801 break;
802
803 case TGSI_OPCODE_ABS:
804 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
805 tmp0 = emit_fetch( bld, inst, 0, chan_index );
806 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
807 emit_store( bld, inst, 0, chan_index, tmp0);
808 }
809 break;
810
811 case TGSI_OPCODE_RCC:
812 return 0;
813 break;
814
815 case TGSI_OPCODE_DPH:
816 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
817 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
818 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
819 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
820 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
821 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
822 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
823 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
824 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
825 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
826 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
827 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
828 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
829 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
830 emit_store( bld, inst, 0, chan_index, tmp0);
831 }
832 break;
833
834 case TGSI_OPCODE_COS:
835 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
836 tmp0 = lp_build_cos( &bld->base, tmp0 );
837 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
838 emit_store( bld, inst, 0, chan_index, tmp0);
839 }
840 break;
841
842 case TGSI_OPCODE_DDX:
843 return 0;
844 break;
845
846 case TGSI_OPCODE_DDY:
847 return 0;
848 break;
849
850 #if 0
851 case TGSI_OPCODE_KILP:
852 /* predicated kill */
853 emit_kilp( bld );
854 return 0; /* XXX fix me */
855 break;
856 #endif
857
858 case TGSI_OPCODE_KIL:
859 /* conditional kill */
860 emit_kil( bld, inst );
861 break;
862
863 case TGSI_OPCODE_PK2H:
864 return 0;
865 break;
866
867 case TGSI_OPCODE_PK2US:
868 return 0;
869 break;
870
871 case TGSI_OPCODE_PK4B:
872 return 0;
873 break;
874
875 case TGSI_OPCODE_PK4UB:
876 return 0;
877 break;
878
879 case TGSI_OPCODE_RFL:
880 return 0;
881 break;
882
883 case TGSI_OPCODE_SEQ:
884 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
885 src0 = emit_fetch( bld, inst, 0, chan_index );
886 src1 = emit_fetch( bld, inst, 1, chan_index );
887 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
888 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
889 emit_store( bld, inst, 0, chan_index, dst0);
890 }
891 break;
892
893 case TGSI_OPCODE_SFL:
894 return 0;
895 break;
896
897 case TGSI_OPCODE_SGT:
898 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
899 src0 = emit_fetch( bld, inst, 0, chan_index );
900 src1 = emit_fetch( bld, inst, 1, chan_index );
901 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
902 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
903 emit_store( bld, inst, 0, chan_index, dst0);
904 }
905 break;
906
907 case TGSI_OPCODE_SIN:
908 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
909 tmp0 = lp_build_sin( &bld->base, tmp0 );
910 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
911 emit_store( bld, inst, 0, chan_index, tmp0);
912 }
913 break;
914
915 case TGSI_OPCODE_SLE:
916 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
917 src0 = emit_fetch( bld, inst, 0, chan_index );
918 src1 = emit_fetch( bld, inst, 1, chan_index );
919 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
920 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
921 emit_store( bld, inst, 0, chan_index, dst0);
922 }
923 break;
924
925 case TGSI_OPCODE_SNE:
926 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
927 src0 = emit_fetch( bld, inst, 0, chan_index );
928 src1 = emit_fetch( bld, inst, 1, chan_index );
929 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
930 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
931 emit_store( bld, inst, 0, chan_index, dst0);
932 }
933 break;
934
935 case TGSI_OPCODE_STR:
936 return 0;
937 break;
938
939 case TGSI_OPCODE_TEX:
940 emit_tex( bld, inst, FALSE, FALSE );
941 break;
942
943 case TGSI_OPCODE_TXD:
944 return 0;
945 break;
946
947 case TGSI_OPCODE_UP2H:
948 return 0;
949 break;
950
951 case TGSI_OPCODE_UP2US:
952 return 0;
953 break;
954
955 case TGSI_OPCODE_UP4B:
956 return 0;
957 break;
958
959 case TGSI_OPCODE_UP4UB:
960 return 0;
961 break;
962
963 case TGSI_OPCODE_X2D:
964 return 0;
965 break;
966
967 case TGSI_OPCODE_ARA:
968 return 0;
969 break;
970
971 #if 0
972 case TGSI_OPCODE_ARR:
973 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
974 tmp0 = emit_fetch( bld, inst, 0, chan_index );
975 emit_rnd( bld, 0, 0 );
976 emit_f2it( bld, 0 );
977 emit_store( bld, inst, 0, chan_index, tmp0);
978 }
979 break;
980 #endif
981
982 case TGSI_OPCODE_BRA:
983 return 0;
984 break;
985
986 case TGSI_OPCODE_CAL:
987 return 0;
988 break;
989
990 #if 0
991 case TGSI_OPCODE_RET:
992 emit_ret( bld );
993 break;
994 #endif
995
996 case TGSI_OPCODE_END:
997 break;
998
999 #if 0
1000 case TGSI_OPCODE_SSG:
1001 /* TGSI_OPCODE_SGN */
1002 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1003 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1004 emit_sgn( bld, 0, 0 );
1005 emit_store( bld, inst, 0, chan_index, tmp0);
1006 }
1007 break;
1008 #endif
1009
1010 case TGSI_OPCODE_CMP:
1011 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1012 src0 = emit_fetch( bld, inst, 0, chan_index );
1013 src1 = emit_fetch( bld, inst, 1, chan_index );
1014 src2 = emit_fetch( bld, inst, 2, chan_index );
1015 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1016 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1017 emit_store( bld, inst, 0, chan_index, dst0);
1018 }
1019 break;
1020
1021 case TGSI_OPCODE_SCS:
1022 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1023 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1024 tmp0 = lp_build_cos( &bld->base, tmp0 );
1025 emit_store( bld, inst, 0, CHAN_X, tmp0);
1026 }
1027 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1028 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1029 tmp0 = lp_build_sin( &bld->base, tmp0 );
1030 emit_store( bld, inst, 0, CHAN_Y, tmp0);
1031 }
1032 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1033 tmp0 = bld->base.zero;
1034 emit_store( bld, inst, 0, CHAN_Z, tmp0);
1035 }
1036 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1037 tmp0 = bld->base.one;
1038 emit_store( bld, inst, 0, CHAN_W, tmp0);
1039 }
1040 break;
1041
1042 case TGSI_OPCODE_TXB:
1043 emit_tex( bld, inst, TRUE, FALSE );
1044 break;
1045
1046 case TGSI_OPCODE_NRM:
1047 /* fall-through */
1048 case TGSI_OPCODE_NRM4:
1049 /* 3 or 4-component normalization */
1050 {
1051 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1052
1053 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1054 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1055 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1056 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1057
1058 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1059
1060 /* xmm4 = src.x */
1061 /* xmm0 = src.x * src.x */
1062 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1063 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1064 tmp4 = tmp0;
1065 }
1066 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1067
1068 /* xmm5 = src.y */
1069 /* xmm0 = xmm0 + src.y * src.y */
1070 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1071 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1072 tmp5 = tmp1;
1073 }
1074 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1075 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1076
1077 /* xmm6 = src.z */
1078 /* xmm0 = xmm0 + src.z * src.z */
1079 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1080 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1081 tmp6 = tmp1;
1082 }
1083 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1084 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1085
1086 if (dims == 4) {
1087 /* xmm7 = src.w */
1088 /* xmm0 = xmm0 + src.w * src.w */
1089 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1090 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1091 tmp7 = tmp1;
1092 }
1093 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1094 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1095 }
1096
1097 /* xmm1 = 1 / sqrt(xmm0) */
1098 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1099
1100 /* dst.x = xmm1 * src.x */
1101 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1102 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1103 emit_store(bld, inst, 0, CHAN_X, tmp4);
1104 }
1105
1106 /* dst.y = xmm1 * src.y */
1107 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1108 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1109 emit_store(bld, inst, 0, CHAN_Y, tmp5);
1110 }
1111
1112 /* dst.z = xmm1 * src.z */
1113 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1114 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1115 emit_store(bld, inst, 0, CHAN_Z, tmp6);
1116 }
1117
1118 /* dst.w = xmm1 * src.w */
1119 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1120 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1121 emit_store(bld, inst, 0, CHAN_W, tmp7);
1122 }
1123 }
1124
1125 /* dst0.w = 1.0 */
1126 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1127 tmp0 = bld->base.one;
1128 emit_store(bld, inst, 0, CHAN_W, tmp0);
1129 }
1130 }
1131 break;
1132
1133 case TGSI_OPCODE_DIV:
1134 return 0;
1135 break;
1136
1137 case TGSI_OPCODE_DP2:
1138 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1139 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1140 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1141 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1142 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1143 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1144 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1145 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1146 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1147 }
1148 break;
1149
1150 case TGSI_OPCODE_TXL:
1151 emit_tex( bld, inst, TRUE, FALSE );
1152 break;
1153
1154 case TGSI_OPCODE_TXP:
1155 emit_tex( bld, inst, FALSE, TRUE );
1156 break;
1157
1158 case TGSI_OPCODE_BRK:
1159 return 0;
1160 break;
1161
1162 case TGSI_OPCODE_IF:
1163 return 0;
1164 break;
1165
1166 case TGSI_OPCODE_BGNFOR:
1167 return 0;
1168 break;
1169
1170 case TGSI_OPCODE_REP:
1171 return 0;
1172 break;
1173
1174 case TGSI_OPCODE_ELSE:
1175 return 0;
1176 break;
1177
1178 case TGSI_OPCODE_ENDIF:
1179 return 0;
1180 break;
1181
1182 case TGSI_OPCODE_ENDFOR:
1183 return 0;
1184 break;
1185
1186 case TGSI_OPCODE_ENDREP:
1187 return 0;
1188 break;
1189
1190 case TGSI_OPCODE_PUSHA:
1191 return 0;
1192 break;
1193
1194 case TGSI_OPCODE_POPA:
1195 return 0;
1196 break;
1197
1198 case TGSI_OPCODE_CEIL:
1199 return 0;
1200 break;
1201
1202 case TGSI_OPCODE_I2F:
1203 return 0;
1204 break;
1205
1206 case TGSI_OPCODE_NOT:
1207 return 0;
1208 break;
1209
1210 #if 0
1211 case TGSI_OPCODE_TRUNC:
1212 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1213 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1214 emit_f2it( bld, 0 );
1215 emit_i2f( bld, 0 );
1216 emit_store( bld, inst, 0, chan_index, tmp0);
1217 }
1218 break;
1219 #endif
1220
1221 case TGSI_OPCODE_SHL:
1222 return 0;
1223 break;
1224
1225 case TGSI_OPCODE_SHR:
1226 return 0;
1227 break;
1228
1229 case TGSI_OPCODE_AND:
1230 return 0;
1231 break;
1232
1233 case TGSI_OPCODE_OR:
1234 return 0;
1235 break;
1236
1237 case TGSI_OPCODE_MOD:
1238 return 0;
1239 break;
1240
1241 case TGSI_OPCODE_XOR:
1242 return 0;
1243 break;
1244
1245 case TGSI_OPCODE_SAD:
1246 return 0;
1247 break;
1248
1249 case TGSI_OPCODE_TXF:
1250 return 0;
1251 break;
1252
1253 case TGSI_OPCODE_TXQ:
1254 return 0;
1255 break;
1256
1257 case TGSI_OPCODE_CONT:
1258 return 0;
1259 break;
1260
1261 case TGSI_OPCODE_EMIT:
1262 return 0;
1263 break;
1264
1265 case TGSI_OPCODE_ENDPRIM:
1266 return 0;
1267 break;
1268
1269 default:
1270 return 0;
1271 }
1272
1273 return 1;
1274 }
1275
1276
1277 void
1278 lp_build_tgsi_soa(LLVMBuilderRef builder,
1279 const struct tgsi_token *tokens,
1280 union lp_type type,
1281 struct lp_build_mask_context *mask,
1282 LLVMValueRef consts_ptr,
1283 const LLVMValueRef *pos,
1284 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1285 LLVMValueRef (*outputs)[NUM_CHANNELS],
1286 lp_emit_fetch_texel_soa_callback emit_fetch_texel,
1287 void *emit_fetch_texel_context)
1288 {
1289 struct lp_build_tgsi_soa_context bld;
1290 struct tgsi_parse_context parse;
1291 uint num_immediates = 0;
1292 unsigned i;
1293
1294 /* Setup build context */
1295 memset(&bld, 0, sizeof bld);
1296 lp_build_context_init(&bld.base, builder, type);
1297 bld.mask = mask;
1298 bld.pos = pos;
1299 bld.inputs = inputs;
1300 bld.outputs = outputs;
1301 bld.consts_ptr = consts_ptr;
1302 bld.emit_fetch_texel = emit_fetch_texel;
1303 bld.emit_fetch_texel_context = emit_fetch_texel_context;
1304
1305 tgsi_parse_init( &parse, tokens );
1306
1307 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1308 tgsi_parse_token( &parse );
1309
1310 switch( parse.FullToken.Token.Type ) {
1311 case TGSI_TOKEN_TYPE_DECLARATION:
1312 /* Input already interpolated */
1313 break;
1314
1315 case TGSI_TOKEN_TYPE_INSTRUCTION:
1316 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1317 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1318 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1319 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1320 info ? info->mnemonic : "<invalid>");
1321 }
1322 break;
1323
1324 case TGSI_TOKEN_TYPE_IMMEDIATE:
1325 /* simply copy the immediate values into the next immediates[] slot */
1326 {
1327 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1328 assert(size <= 4);
1329 assert(num_immediates < LP_MAX_IMMEDIATES);
1330 for( i = 0; i < size; ++i )
1331 bld.immediates[num_immediates][i] =
1332 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1333 for( i = size; i < 4; ++i )
1334 bld.immediates[num_immediates][i] = bld.base.undef;
1335 num_immediates++;
1336 }
1337 break;
1338
1339 default:
1340 assert( 0 );
1341 }
1342 }
1343
1344 tgsi_parse_free( &parse );
1345 }
1346