llvmpipe: Isolate sampling from TGSI translation.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81
82 struct lp_build_tgsi_soa_context
83 {
84 struct lp_build_context base;
85
86 LLVMValueRef consts_ptr;
87 const LLVMValueRef *pos;
88 const LLVMValueRef (*inputs)[NUM_CHANNELS];
89 LLVMValueRef (*outputs)[NUM_CHANNELS];
90
91 lp_emit_fetch_texel_soa_callback emit_fetch_texel;
92 void *emit_fetch_texel_context;
93
94 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
95 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
96
97 struct lp_build_mask_context *mask;
98 };
99
100
101 /**
102 * Register fetch.
103 */
104 static LLVMValueRef
105 emit_fetch(
106 struct lp_build_tgsi_soa_context *bld,
107 const struct tgsi_full_instruction *inst,
108 unsigned index,
109 const unsigned chan_index )
110 {
111 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
112 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
113 LLVMValueRef res;
114
115 switch (swizzle) {
116 case TGSI_EXTSWIZZLE_X:
117 case TGSI_EXTSWIZZLE_Y:
118 case TGSI_EXTSWIZZLE_Z:
119 case TGSI_EXTSWIZZLE_W:
120
121 switch (reg->SrcRegister.File) {
122 case TGSI_FILE_CONSTANT: {
123 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
124 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
125 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
126 res = lp_build_broadcast_scalar(&bld->base, scalar);
127 break;
128 }
129
130 case TGSI_FILE_IMMEDIATE:
131 res = bld->immediates[reg->SrcRegister.Index][swizzle];
132 assert(res);
133 break;
134
135 case TGSI_FILE_INPUT:
136 res = bld->inputs[reg->SrcRegister.Index][swizzle];
137 assert(res);
138 break;
139
140 case TGSI_FILE_TEMPORARY:
141 res = bld->temps[reg->SrcRegister.Index][swizzle];
142 if(!res)
143 return bld->base.undef;
144 break;
145
146 default:
147 assert( 0 );
148 return bld->base.undef;
149 }
150 break;
151
152 case TGSI_EXTSWIZZLE_ZERO:
153 res = bld->base.zero;
154 break;
155
156 case TGSI_EXTSWIZZLE_ONE:
157 res = bld->base.one;
158 break;
159
160 default:
161 assert( 0 );
162 return bld->base.undef;
163 }
164
165 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
166 case TGSI_UTIL_SIGN_CLEAR:
167 res = lp_build_abs( &bld->base, res );
168 break;
169
170 case TGSI_UTIL_SIGN_SET:
171 res = lp_build_abs( &bld->base, res );
172 res = LLVMBuildNeg( bld->base.builder, res, "" );
173 break;
174
175 case TGSI_UTIL_SIGN_TOGGLE:
176 res = LLVMBuildNeg( bld->base.builder, res, "" );
177 break;
178
179 case TGSI_UTIL_SIGN_KEEP:
180 break;
181 }
182
183 return res;
184 }
185
186
187 /**
188 * Register store.
189 */
190 static void
191 emit_store(
192 struct lp_build_tgsi_soa_context *bld,
193 const struct tgsi_full_instruction *inst,
194 unsigned index,
195 unsigned chan_index,
196 LLVMValueRef value)
197 {
198 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
199
200 switch( inst->Instruction.Saturate ) {
201 case TGSI_SAT_NONE:
202 break;
203
204 case TGSI_SAT_ZERO_ONE:
205 value = lp_build_max(&bld->base, value, bld->base.zero);
206 value = lp_build_min(&bld->base, value, bld->base.one);
207 break;
208
209 case TGSI_SAT_MINUS_PLUS_ONE:
210 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
211 value = lp_build_min(&bld->base, value, bld->base.one);
212 break;
213
214 default:
215 assert(0);
216 }
217
218 switch( reg->DstRegister.File ) {
219 case TGSI_FILE_OUTPUT:
220 bld->outputs[reg->DstRegister.Index][chan_index] = value;
221 break;
222
223 case TGSI_FILE_TEMPORARY:
224 bld->temps[reg->DstRegister.Index][chan_index] = value;
225 break;
226
227 case TGSI_FILE_ADDRESS:
228 /* FIXME */
229 assert(0);
230 break;
231
232 default:
233 assert( 0 );
234 }
235 }
236
237
238 /**
239 * High-level instruction translators.
240 */
241
242 static void
243 emit_tex( struct lp_build_tgsi_soa_context *bld,
244 const struct tgsi_full_instruction *inst,
245 boolean apply_lodbias,
246 boolean projected)
247 {
248 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
249 LLVMValueRef lodbias;
250 LLVMValueRef oow;
251 LLVMValueRef coords[3];
252 LLVMValueRef texel[4];
253 unsigned num_coords;
254 unsigned i;
255
256 switch (inst->InstructionExtTexture.Texture) {
257 case TGSI_TEXTURE_1D:
258 case TGSI_TEXTURE_SHADOW1D:
259 num_coords = 1;
260 break;
261 case TGSI_TEXTURE_2D:
262 case TGSI_TEXTURE_RECT:
263 case TGSI_TEXTURE_SHADOW2D:
264 case TGSI_TEXTURE_SHADOWRECT:
265 num_coords = 2;
266 break;
267 case TGSI_TEXTURE_3D:
268 case TGSI_TEXTURE_CUBE:
269 num_coords = 3;
270 break;
271 default:
272 assert(0);
273 return;
274 }
275
276 if(apply_lodbias)
277 lodbias = emit_fetch( bld, inst, 0, 3 );
278 else
279 lodbias = bld->base.zero;
280
281 if (projected) {
282 oow = emit_fetch( bld, inst, 0, 3 );
283 oow = lp_build_rcp(&bld->base, oow);
284 }
285
286 for (i = 0; i < num_coords; i++) {
287 coords[i] = emit_fetch( bld, inst, 0, i );
288 if (projected)
289 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
290 }
291
292 bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context,
293 unit, num_coords, coords, lodbias, texel);
294
295 FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
296 emit_store( bld, inst, 0, i, texel[i] );
297 }
298 }
299
300
301 static void
302 emit_kil(
303 struct lp_build_tgsi_soa_context *bld,
304 const struct tgsi_full_instruction *inst )
305 {
306 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
307 LLVMValueRef terms[NUM_CHANNELS];
308 LLVMValueRef mask;
309 unsigned chan_index;
310
311 memset(&terms, 0, sizeof terms);
312
313 FOR_EACH_CHANNEL( chan_index ) {
314 unsigned swizzle;
315
316 /* Unswizzle channel */
317 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
318
319 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
320 * not to be tested. */
321 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
322 continue;
323
324 /* Check if the component has not been already tested. */
325 assert(swizzle < NUM_CHANNELS);
326 if( !terms[swizzle] )
327 /* TODO: change the comparison operator instead of setting the sign */
328 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
329 }
330
331 mask = NULL;
332 FOR_EACH_CHANNEL( chan_index ) {
333 if(terms[chan_index]) {
334 LLVMValueRef chan_mask;
335
336 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
337
338 if(mask)
339 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
340 else
341 mask = chan_mask;
342 }
343 }
344
345 if(mask)
346 lp_build_mask_update(bld->mask, mask);
347 }
348
349
350 static void
351 emit_kilp(
352 struct lp_build_tgsi_soa_context *bld )
353 {
354 /* XXX todo / fix me */
355 }
356
357
358 /**
359 * Check if inst src/dest regs use indirect addressing into temporary
360 * register file.
361 */
362 static boolean
363 indirect_temp_reference(const struct tgsi_full_instruction *inst)
364 {
365 uint i;
366 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
367 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
368 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
369 reg->SrcRegister.Indirect)
370 return TRUE;
371 }
372 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
373 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
374 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
375 reg->DstRegister.Indirect)
376 return TRUE;
377 }
378 return FALSE;
379 }
380
381
382 static int
383 emit_instruction(
384 struct lp_build_tgsi_soa_context *bld,
385 struct tgsi_full_instruction *inst )
386 {
387 unsigned chan_index;
388 LLVMValueRef src0, src1, src2;
389 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
390 LLVMValueRef dst0;
391
392 /* we can't handle indirect addressing into temp register file yet */
393 if (indirect_temp_reference(inst))
394 return FALSE;
395
396 switch (inst->Instruction.Opcode) {
397 #if 0
398 case TGSI_OPCODE_ARL:
399 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
400 tmp0 = emit_fetch( bld, inst, 0, chan_index );
401 emit_flr(bld, 0, 0);
402 emit_f2it( bld, 0 );
403 emit_store( bld, inst, 0, chan_index, tmp0);
404 }
405 break;
406 #endif
407
408 case TGSI_OPCODE_MOV:
409 case TGSI_OPCODE_SWZ:
410 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
411 tmp0 = emit_fetch( bld, inst, 0, chan_index );
412 emit_store( bld, inst, 0, chan_index, tmp0);
413 }
414 break;
415
416 case TGSI_OPCODE_LIT:
417 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
418 emit_store( bld, inst, 0, CHAN_X, bld->base.one);
419 }
420 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
421 src0 = emit_fetch( bld, inst, 0, CHAN_X );
422 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
423 emit_store( bld, inst, 0, CHAN_Y, dst0);
424 }
425 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
426 /* XMM[1] = SrcReg[0].yyyy */
427 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
428 /* XMM[1] = max(XMM[1], 0) */
429 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
430 /* XMM[2] = SrcReg[0].wwww */
431 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
432 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
433 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
434 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
435 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
436 emit_store( bld, inst, 0, CHAN_Z, dst0);
437 }
438 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
439 emit_store( bld, inst, 0, CHAN_W, bld->base.one);
440 }
441 break;
442
443 case TGSI_OPCODE_RCP:
444 /* TGSI_OPCODE_RECIP */
445 src0 = emit_fetch( bld, inst, 0, CHAN_X );
446 dst0 = lp_build_rcp(&bld->base, src0);
447 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
448 emit_store( bld, inst, 0, chan_index, dst0 );
449 }
450 break;
451
452 case TGSI_OPCODE_RSQ:
453 /* TGSI_OPCODE_RECIPSQRT */
454 src0 = emit_fetch( bld, inst, 0, CHAN_X );
455 src0 = lp_build_abs(&bld->base, src0);
456 dst0 = lp_build_rsqrt(&bld->base, src0);
457 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
458 emit_store( bld, inst, 0, chan_index, dst0 );
459 }
460 break;
461
462 case TGSI_OPCODE_EXP:
463 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
464 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
465 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
466 LLVMValueRef *p_exp2_int_part = NULL;
467 LLVMValueRef *p_frac_part = NULL;
468 LLVMValueRef *p_exp2 = NULL;
469
470 src0 = emit_fetch( bld, inst, 0, CHAN_X );
471
472 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
473 p_exp2_int_part = &tmp0;
474 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
475 p_frac_part = &tmp1;
476 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
477 p_exp2 = &tmp2;
478
479 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
480
481 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
482 emit_store( bld, inst, 0, CHAN_X, tmp0);
483 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
484 emit_store( bld, inst, 0, CHAN_Y, tmp1);
485 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
486 emit_store( bld, inst, 0, CHAN_Z, tmp2);
487 }
488 /* dst.w = 1.0 */
489 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
490 tmp0 = bld->base.one;
491 emit_store( bld, inst, 0, CHAN_W, tmp0);
492 }
493 break;
494
495 case TGSI_OPCODE_LOG:
496 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
497 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
498 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
499 LLVMValueRef *p_floor_log2;
500 LLVMValueRef *p_exp;
501 LLVMValueRef *p_log2;
502
503 src0 = emit_fetch( bld, inst, 0, CHAN_X );
504 src0 = lp_build_abs( &bld->base, src0 );
505
506 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
507 p_floor_log2 = &tmp0;
508 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
509 p_exp = &tmp1;
510 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
511 p_log2 = &tmp2;
512
513 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
514
515 /* dst.x = floor(lg2(abs(src.x))) */
516 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
517 emit_store( bld, inst, 0, CHAN_X, tmp0);
518 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
519 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
520 tmp1 = lp_build_div( &bld->base, src0, tmp1);
521 emit_store( bld, inst, 0, CHAN_Y, tmp1);
522 }
523 /* dst.z = lg2(abs(src.x)) */
524 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
525 emit_store( bld, inst, 0, CHAN_Z, tmp2);
526 }
527 /* dst.w = 1.0 */
528 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
529 tmp0 = bld->base.one;
530 emit_store( bld, inst, 0, CHAN_W, tmp0);
531 }
532 break;
533
534 case TGSI_OPCODE_MUL:
535 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
536 src0 = emit_fetch( bld, inst, 0, chan_index );
537 src1 = emit_fetch( bld, inst, 1, chan_index );
538 dst0 = lp_build_mul(&bld->base, src0, src1);
539 emit_store( bld, inst, 0, chan_index, dst0);
540 }
541 break;
542
543 case TGSI_OPCODE_ADD:
544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
545 src0 = emit_fetch( bld, inst, 0, chan_index );
546 src1 = emit_fetch( bld, inst, 1, chan_index );
547 dst0 = lp_build_add(&bld->base, src0, src1);
548 emit_store( bld, inst, 0, chan_index, dst0);
549 }
550 break;
551
552 case TGSI_OPCODE_DP3:
553 /* TGSI_OPCODE_DOT3 */
554 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
555 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
556 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
557 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
558 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
559 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
560 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
561 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
562 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
563 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
564 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
565 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
566 emit_store( bld, inst, 0, chan_index, tmp0);
567 }
568 break;
569
570 case TGSI_OPCODE_DP4:
571 /* TGSI_OPCODE_DOT4 */
572 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
573 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
574 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
575 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
576 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
577 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
578 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
579 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
580 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
581 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
582 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
583 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
584 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
585 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
586 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
587 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
588 emit_store( bld, inst, 0, chan_index, tmp0);
589 }
590 break;
591
592 case TGSI_OPCODE_DST:
593 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
594 tmp0 = bld->base.one;
595 emit_store( bld, inst, 0, CHAN_X, tmp0);
596 }
597 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
598 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
599 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
600 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
601 emit_store( bld, inst, 0, CHAN_Y, tmp0);
602 }
603 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
604 tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
605 emit_store( bld, inst, 0, CHAN_Z, tmp0);
606 }
607 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
608 tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
609 emit_store( bld, inst, 0, CHAN_W, tmp0);
610 }
611 break;
612
613 case TGSI_OPCODE_MIN:
614 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
615 src0 = emit_fetch( bld, inst, 0, chan_index );
616 src1 = emit_fetch( bld, inst, 1, chan_index );
617 dst0 = lp_build_min( &bld->base, src0, src1 );
618 emit_store( bld, inst, 0, chan_index, dst0);
619 }
620 break;
621
622 case TGSI_OPCODE_MAX:
623 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
624 src0 = emit_fetch( bld, inst, 0, chan_index );
625 src1 = emit_fetch( bld, inst, 1, chan_index );
626 dst0 = lp_build_max( &bld->base, src0, src1 );
627 emit_store( bld, inst, 0, chan_index, dst0);
628 }
629 break;
630
631 case TGSI_OPCODE_SLT:
632 /* TGSI_OPCODE_SETLT */
633 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
634 src0 = emit_fetch( bld, inst, 0, chan_index );
635 src1 = emit_fetch( bld, inst, 1, chan_index );
636 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
637 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
638 emit_store( bld, inst, 0, chan_index, dst0);
639 }
640 break;
641
642 case TGSI_OPCODE_SGE:
643 /* TGSI_OPCODE_SETGE */
644 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
645 src0 = emit_fetch( bld, inst, 0, chan_index );
646 src1 = emit_fetch( bld, inst, 1, chan_index );
647 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
648 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
649 emit_store( bld, inst, 0, chan_index, dst0);
650 }
651 break;
652
653 case TGSI_OPCODE_MAD:
654 /* TGSI_OPCODE_MADD */
655 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
656 tmp0 = emit_fetch( bld, inst, 0, chan_index );
657 tmp1 = emit_fetch( bld, inst, 1, chan_index );
658 tmp2 = emit_fetch( bld, inst, 2, chan_index );
659 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
660 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
661 emit_store( bld, inst, 0, chan_index, tmp0);
662 }
663 break;
664
665 case TGSI_OPCODE_SUB:
666 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
667 tmp0 = emit_fetch( bld, inst, 0, chan_index );
668 tmp1 = emit_fetch( bld, inst, 1, chan_index );
669 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
670 emit_store( bld, inst, 0, chan_index, tmp0);
671 }
672 break;
673
674 case TGSI_OPCODE_LRP:
675 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
676 src0 = emit_fetch( bld, inst, 0, chan_index );
677 src1 = emit_fetch( bld, inst, 1, chan_index );
678 src2 = emit_fetch( bld, inst, 2, chan_index );
679 tmp0 = lp_build_sub( &bld->base, src1, src2 );
680 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
681 dst0 = lp_build_add( &bld->base, tmp0, src2 );
682 emit_store( bld, inst, 0, chan_index, dst0 );
683 }
684 break;
685
686 case TGSI_OPCODE_CND:
687 return 0;
688 break;
689
690 case TGSI_OPCODE_CND0:
691 return 0;
692 break;
693
694 case TGSI_OPCODE_DP2A:
695 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
696 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
697 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
698 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
699 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
700 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
701 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
702 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
703 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
704 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
705 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
706 }
707 break;
708
709 #if 0
710 case TGSI_OPCODE_FRC:
711 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
712 tmp0 = emit_fetch( bld, inst, 0, chan_index );
713 emit_frc( bld, 0, 0 );
714 emit_store( bld, inst, 0, chan_index, tmp0);
715 }
716 break;
717
718 case TGSI_OPCODE_CLAMP:
719 return 0;
720 break;
721
722 case TGSI_OPCODE_FLR:
723 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
724 tmp0 = emit_fetch( bld, inst, 0, chan_index );
725 emit_flr( bld, 0, 0 );
726 emit_store( bld, inst, 0, chan_index, tmp0);
727 }
728 break;
729
730 case TGSI_OPCODE_ROUND:
731 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
732 tmp0 = emit_fetch( bld, inst, 0, chan_index );
733 emit_rnd( bld, 0, 0 );
734 emit_store( bld, inst, 0, chan_index, tmp0);
735 }
736 break;
737 #endif
738
739 case TGSI_OPCODE_EX2: {
740 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
741 tmp0 = lp_build_exp2( &bld->base, tmp0);
742 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
743 emit_store( bld, inst, 0, chan_index, tmp0);
744 }
745 break;
746 }
747
748 case TGSI_OPCODE_LG2:
749 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
750 tmp0 = lp_build_log2( &bld->base, tmp0);
751 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
752 emit_store( bld, inst, 0, chan_index, tmp0);
753 }
754 break;
755
756 case TGSI_OPCODE_POW:
757 src0 = emit_fetch( bld, inst, 0, CHAN_X );
758 src1 = emit_fetch( bld, inst, 1, CHAN_X );
759 dst0 = lp_build_pow( &bld->base, src0, src1 );
760 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
761 emit_store( bld, inst, 0, chan_index, dst0 );
762 }
763 break;
764
765 case TGSI_OPCODE_XPD:
766 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
767 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
768 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
769 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
770 }
771 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
772 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
773 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
774 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
775 }
776 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
777 tmp2 = tmp0;
778 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
779 tmp5 = tmp3;
780 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
781 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
782 emit_store( bld, inst, 0, CHAN_X, tmp2);
783 }
784 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
785 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
786 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
787 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
788 }
789 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
790 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
791 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
792 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
793 emit_store( bld, inst, 0, CHAN_Y, tmp3);
794 }
795 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
796 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
797 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
798 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
799 emit_store( bld, inst, 0, CHAN_Z, tmp5);
800 }
801 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
802 tmp0 = bld->base.one;
803 emit_store( bld, inst, 0, CHAN_W, tmp0);
804 }
805 break;
806
807 case TGSI_OPCODE_ABS:
808 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
809 tmp0 = emit_fetch( bld, inst, 0, chan_index );
810 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
811 emit_store( bld, inst, 0, chan_index, tmp0);
812 }
813 break;
814
815 case TGSI_OPCODE_RCC:
816 return 0;
817 break;
818
819 case TGSI_OPCODE_DPH:
820 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
821 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
822 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
823 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
824 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
825 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
826 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
827 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
828 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
829 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
830 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
831 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
832 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
833 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
834 emit_store( bld, inst, 0, chan_index, tmp0);
835 }
836 break;
837
838 case TGSI_OPCODE_COS:
839 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
840 tmp0 = lp_build_cos( &bld->base, tmp0 );
841 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
842 emit_store( bld, inst, 0, chan_index, tmp0);
843 }
844 break;
845
846 case TGSI_OPCODE_DDX:
847 return 0;
848 break;
849
850 case TGSI_OPCODE_DDY:
851 return 0;
852 break;
853
854 #if 0
855 case TGSI_OPCODE_KILP:
856 /* predicated kill */
857 emit_kilp( bld );
858 return 0; /* XXX fix me */
859 break;
860 #endif
861
862 case TGSI_OPCODE_KIL:
863 /* conditional kill */
864 emit_kil( bld, inst );
865 break;
866
867 case TGSI_OPCODE_PK2H:
868 return 0;
869 break;
870
871 case TGSI_OPCODE_PK2US:
872 return 0;
873 break;
874
875 case TGSI_OPCODE_PK4B:
876 return 0;
877 break;
878
879 case TGSI_OPCODE_PK4UB:
880 return 0;
881 break;
882
883 case TGSI_OPCODE_RFL:
884 return 0;
885 break;
886
887 case TGSI_OPCODE_SEQ:
888 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
889 src0 = emit_fetch( bld, inst, 0, chan_index );
890 src1 = emit_fetch( bld, inst, 1, chan_index );
891 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
892 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
893 emit_store( bld, inst, 0, chan_index, dst0);
894 }
895 break;
896
897 case TGSI_OPCODE_SFL:
898 return 0;
899 break;
900
901 case TGSI_OPCODE_SGT:
902 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
903 src0 = emit_fetch( bld, inst, 0, chan_index );
904 src1 = emit_fetch( bld, inst, 1, chan_index );
905 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
906 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
907 emit_store( bld, inst, 0, chan_index, dst0);
908 }
909 break;
910
911 case TGSI_OPCODE_SIN:
912 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
913 tmp0 = lp_build_sin( &bld->base, tmp0 );
914 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
915 emit_store( bld, inst, 0, chan_index, tmp0);
916 }
917 break;
918
919 case TGSI_OPCODE_SLE:
920 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
921 src0 = emit_fetch( bld, inst, 0, chan_index );
922 src1 = emit_fetch( bld, inst, 1, chan_index );
923 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
924 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
925 emit_store( bld, inst, 0, chan_index, dst0);
926 }
927 break;
928
929 case TGSI_OPCODE_SNE:
930 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
931 src0 = emit_fetch( bld, inst, 0, chan_index );
932 src1 = emit_fetch( bld, inst, 1, chan_index );
933 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
934 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
935 emit_store( bld, inst, 0, chan_index, dst0);
936 }
937 break;
938
939 case TGSI_OPCODE_STR:
940 return 0;
941 break;
942
943 case TGSI_OPCODE_TEX:
944 emit_tex( bld, inst, FALSE, FALSE );
945 break;
946
947 case TGSI_OPCODE_TXD:
948 return 0;
949 break;
950
951 case TGSI_OPCODE_UP2H:
952 return 0;
953 break;
954
955 case TGSI_OPCODE_UP2US:
956 return 0;
957 break;
958
959 case TGSI_OPCODE_UP4B:
960 return 0;
961 break;
962
963 case TGSI_OPCODE_UP4UB:
964 return 0;
965 break;
966
967 case TGSI_OPCODE_X2D:
968 return 0;
969 break;
970
971 case TGSI_OPCODE_ARA:
972 return 0;
973 break;
974
975 #if 0
976 case TGSI_OPCODE_ARR:
977 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
978 tmp0 = emit_fetch( bld, inst, 0, chan_index );
979 emit_rnd( bld, 0, 0 );
980 emit_f2it( bld, 0 );
981 emit_store( bld, inst, 0, chan_index, tmp0);
982 }
983 break;
984 #endif
985
986 case TGSI_OPCODE_BRA:
987 return 0;
988 break;
989
990 case TGSI_OPCODE_CAL:
991 return 0;
992 break;
993
994 #if 0
995 case TGSI_OPCODE_RET:
996 emit_ret( bld );
997 break;
998 #endif
999
1000 case TGSI_OPCODE_END:
1001 break;
1002
1003 #if 0
1004 case TGSI_OPCODE_SSG:
1005 /* TGSI_OPCODE_SGN */
1006 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1007 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1008 emit_sgn( bld, 0, 0 );
1009 emit_store( bld, inst, 0, chan_index, tmp0);
1010 }
1011 break;
1012 #endif
1013
1014 case TGSI_OPCODE_CMP:
1015 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1016 src0 = emit_fetch( bld, inst, 0, chan_index );
1017 src1 = emit_fetch( bld, inst, 1, chan_index );
1018 src2 = emit_fetch( bld, inst, 2, chan_index );
1019 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1020 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1021 emit_store( bld, inst, 0, chan_index, dst0);
1022 }
1023 break;
1024
1025 case TGSI_OPCODE_SCS:
1026 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1027 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1028 tmp0 = lp_build_cos( &bld->base, tmp0 );
1029 emit_store( bld, inst, 0, CHAN_X, tmp0);
1030 }
1031 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1032 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1033 tmp0 = lp_build_sin( &bld->base, tmp0 );
1034 emit_store( bld, inst, 0, CHAN_Y, tmp0);
1035 }
1036 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1037 tmp0 = bld->base.zero;
1038 emit_store( bld, inst, 0, CHAN_Z, tmp0);
1039 }
1040 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1041 tmp0 = bld->base.one;
1042 emit_store( bld, inst, 0, CHAN_W, tmp0);
1043 }
1044 break;
1045
1046 case TGSI_OPCODE_TXB:
1047 emit_tex( bld, inst, TRUE, FALSE );
1048 break;
1049
1050 case TGSI_OPCODE_NRM:
1051 /* fall-through */
1052 case TGSI_OPCODE_NRM4:
1053 /* 3 or 4-component normalization */
1054 {
1055 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1056
1057 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1058 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1059 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1060 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1061
1062 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1063
1064 /* xmm4 = src.x */
1065 /* xmm0 = src.x * src.x */
1066 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1067 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1068 tmp4 = tmp0;
1069 }
1070 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1071
1072 /* xmm5 = src.y */
1073 /* xmm0 = xmm0 + src.y * src.y */
1074 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1075 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1076 tmp5 = tmp1;
1077 }
1078 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1079 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1080
1081 /* xmm6 = src.z */
1082 /* xmm0 = xmm0 + src.z * src.z */
1083 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1084 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1085 tmp6 = tmp1;
1086 }
1087 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1088 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1089
1090 if (dims == 4) {
1091 /* xmm7 = src.w */
1092 /* xmm0 = xmm0 + src.w * src.w */
1093 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1094 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1095 tmp7 = tmp1;
1096 }
1097 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1098 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1099 }
1100
1101 /* xmm1 = 1 / sqrt(xmm0) */
1102 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1103
1104 /* dst.x = xmm1 * src.x */
1105 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1106 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1107 emit_store(bld, inst, 0, CHAN_X, tmp4);
1108 }
1109
1110 /* dst.y = xmm1 * src.y */
1111 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1112 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1113 emit_store(bld, inst, 0, CHAN_Y, tmp5);
1114 }
1115
1116 /* dst.z = xmm1 * src.z */
1117 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1118 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1119 emit_store(bld, inst, 0, CHAN_Z, tmp6);
1120 }
1121
1122 /* dst.w = xmm1 * src.w */
1123 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1124 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1125 emit_store(bld, inst, 0, CHAN_W, tmp7);
1126 }
1127 }
1128
1129 /* dst0.w = 1.0 */
1130 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1131 tmp0 = bld->base.one;
1132 emit_store(bld, inst, 0, CHAN_W, tmp0);
1133 }
1134 }
1135 break;
1136
1137 case TGSI_OPCODE_DIV:
1138 return 0;
1139 break;
1140
1141 case TGSI_OPCODE_DP2:
1142 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1143 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1144 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1145 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1146 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1147 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1148 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1149 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1150 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1151 }
1152 break;
1153
1154 case TGSI_OPCODE_TXL:
1155 emit_tex( bld, inst, TRUE, FALSE );
1156 break;
1157
1158 case TGSI_OPCODE_TXP:
1159 emit_tex( bld, inst, FALSE, TRUE );
1160 break;
1161
1162 case TGSI_OPCODE_BRK:
1163 return 0;
1164 break;
1165
1166 case TGSI_OPCODE_IF:
1167 return 0;
1168 break;
1169
1170 case TGSI_OPCODE_LOOP:
1171 return 0;
1172 break;
1173
1174 case TGSI_OPCODE_REP:
1175 return 0;
1176 break;
1177
1178 case TGSI_OPCODE_ELSE:
1179 return 0;
1180 break;
1181
1182 case TGSI_OPCODE_ENDIF:
1183 return 0;
1184 break;
1185
1186 case TGSI_OPCODE_ENDLOOP:
1187 return 0;
1188 break;
1189
1190 case TGSI_OPCODE_ENDREP:
1191 return 0;
1192 break;
1193
1194 case TGSI_OPCODE_PUSHA:
1195 return 0;
1196 break;
1197
1198 case TGSI_OPCODE_POPA:
1199 return 0;
1200 break;
1201
1202 case TGSI_OPCODE_CEIL:
1203 return 0;
1204 break;
1205
1206 case TGSI_OPCODE_I2F:
1207 return 0;
1208 break;
1209
1210 case TGSI_OPCODE_NOT:
1211 return 0;
1212 break;
1213
1214 #if 0
1215 case TGSI_OPCODE_TRUNC:
1216 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1217 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1218 emit_f2it( bld, 0 );
1219 emit_i2f( bld, 0 );
1220 emit_store( bld, inst, 0, chan_index, tmp0);
1221 }
1222 break;
1223 #endif
1224
1225 case TGSI_OPCODE_SHL:
1226 return 0;
1227 break;
1228
1229 case TGSI_OPCODE_SHR:
1230 return 0;
1231 break;
1232
1233 case TGSI_OPCODE_AND:
1234 return 0;
1235 break;
1236
1237 case TGSI_OPCODE_OR:
1238 return 0;
1239 break;
1240
1241 case TGSI_OPCODE_MOD:
1242 return 0;
1243 break;
1244
1245 case TGSI_OPCODE_XOR:
1246 return 0;
1247 break;
1248
1249 case TGSI_OPCODE_SAD:
1250 return 0;
1251 break;
1252
1253 case TGSI_OPCODE_TXF:
1254 return 0;
1255 break;
1256
1257 case TGSI_OPCODE_TXQ:
1258 return 0;
1259 break;
1260
1261 case TGSI_OPCODE_CONT:
1262 return 0;
1263 break;
1264
1265 case TGSI_OPCODE_EMIT:
1266 return 0;
1267 break;
1268
1269 case TGSI_OPCODE_ENDPRIM:
1270 return 0;
1271 break;
1272
1273 default:
1274 return 0;
1275 }
1276
1277 return 1;
1278 }
1279
1280
1281 void
1282 lp_build_tgsi_soa(LLVMBuilderRef builder,
1283 const struct tgsi_token *tokens,
1284 union lp_type type,
1285 struct lp_build_mask_context *mask,
1286 LLVMValueRef consts_ptr,
1287 const LLVMValueRef *pos,
1288 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1289 LLVMValueRef (*outputs)[NUM_CHANNELS],
1290 lp_emit_fetch_texel_soa_callback emit_fetch_texel,
1291 void *emit_fetch_texel_context)
1292 {
1293 struct lp_build_tgsi_soa_context bld;
1294 struct tgsi_parse_context parse;
1295 uint num_immediates = 0;
1296 unsigned i;
1297
1298 /* Setup build context */
1299 memset(&bld, 0, sizeof bld);
1300 lp_build_context_init(&bld.base, builder, type);
1301 bld.mask = mask;
1302 bld.pos = pos;
1303 bld.inputs = inputs;
1304 bld.outputs = outputs;
1305 bld.consts_ptr = consts_ptr;
1306 bld.emit_fetch_texel = emit_fetch_texel;
1307 bld.emit_fetch_texel_context = emit_fetch_texel_context;
1308
1309 tgsi_parse_init( &parse, tokens );
1310
1311 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1312 tgsi_parse_token( &parse );
1313
1314 switch( parse.FullToken.Token.Type ) {
1315 case TGSI_TOKEN_TYPE_DECLARATION:
1316 /* Input already interpolated */
1317 break;
1318
1319 case TGSI_TOKEN_TYPE_INSTRUCTION:
1320 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1321 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1322 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1323 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1324 info ? info->mnemonic : "<invalid>");
1325 }
1326 break;
1327
1328 case TGSI_TOKEN_TYPE_IMMEDIATE:
1329 /* simply copy the immediate values into the next immediates[] slot */
1330 {
1331 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1332 assert(size <= 4);
1333 assert(num_immediates < LP_MAX_IMMEDIATES);
1334 for( i = 0; i < size; ++i )
1335 bld.immediates[num_immediates][i] =
1336 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1337 for( i = size; i < 4; ++i )
1338 bld.immediates[num_immediates][i] = bld.base.undef;
1339 num_immediates++;
1340 }
1341 break;
1342
1343 default:
1344 assert( 0 );
1345 }
1346 }
1347
1348 tgsi_parse_free( &parse );
1349 }
1350