4756811dc35b61128d05fab7565bd8702f6a5e27
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81
82 struct lp_build_tgsi_soa_context
83 {
84 struct lp_build_context base;
85
86 LLVMValueRef consts_ptr;
87 const LLVMValueRef *pos;
88 const LLVMValueRef (*inputs)[NUM_CHANNELS];
89 LLVMValueRef (*outputs)[NUM_CHANNELS];
90
91 struct lp_build_sampler_soa *sampler;
92
93 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
94 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
95
96 struct lp_build_mask_context *mask;
97 };
98
99
100 /**
101 * Register fetch.
102 */
103 static LLVMValueRef
104 emit_fetch(
105 struct lp_build_tgsi_soa_context *bld,
106 const struct tgsi_full_instruction *inst,
107 unsigned index,
108 const unsigned chan_index )
109 {
110 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
111 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
112 LLVMValueRef res;
113
114 switch (swizzle) {
115 case TGSI_EXTSWIZZLE_X:
116 case TGSI_EXTSWIZZLE_Y:
117 case TGSI_EXTSWIZZLE_Z:
118 case TGSI_EXTSWIZZLE_W:
119
120 switch (reg->SrcRegister.File) {
121 case TGSI_FILE_CONSTANT: {
122 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
123 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
124 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
125 res = lp_build_broadcast_scalar(&bld->base, scalar);
126 break;
127 }
128
129 case TGSI_FILE_IMMEDIATE:
130 res = bld->immediates[reg->SrcRegister.Index][swizzle];
131 assert(res);
132 break;
133
134 case TGSI_FILE_INPUT:
135 res = bld->inputs[reg->SrcRegister.Index][swizzle];
136 assert(res);
137 break;
138
139 case TGSI_FILE_TEMPORARY:
140 res = bld->temps[reg->SrcRegister.Index][swizzle];
141 if(!res)
142 return bld->base.undef;
143 break;
144
145 default:
146 assert( 0 );
147 return bld->base.undef;
148 }
149 break;
150
151 case TGSI_EXTSWIZZLE_ZERO:
152 res = bld->base.zero;
153 break;
154
155 case TGSI_EXTSWIZZLE_ONE:
156 res = bld->base.one;
157 break;
158
159 default:
160 assert( 0 );
161 return bld->base.undef;
162 }
163
164 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
165 case TGSI_UTIL_SIGN_CLEAR:
166 res = lp_build_abs( &bld->base, res );
167 break;
168
169 case TGSI_UTIL_SIGN_SET:
170 /* TODO: Use bitwese OR for floating point */
171 res = lp_build_abs( &bld->base, res );
172 res = LLVMBuildNeg( bld->base.builder, res, "" );
173 break;
174
175 case TGSI_UTIL_SIGN_TOGGLE:
176 res = LLVMBuildNeg( bld->base.builder, res, "" );
177 break;
178
179 case TGSI_UTIL_SIGN_KEEP:
180 break;
181 }
182
183 return res;
184 }
185
186
187 /**
188 * Register store.
189 */
190 static void
191 emit_store(
192 struct lp_build_tgsi_soa_context *bld,
193 const struct tgsi_full_instruction *inst,
194 unsigned index,
195 unsigned chan_index,
196 LLVMValueRef value)
197 {
198 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
199
200 switch( inst->Instruction.Saturate ) {
201 case TGSI_SAT_NONE:
202 break;
203
204 case TGSI_SAT_ZERO_ONE:
205 value = lp_build_max(&bld->base, value, bld->base.zero);
206 value = lp_build_min(&bld->base, value, bld->base.one);
207 break;
208
209 case TGSI_SAT_MINUS_PLUS_ONE:
210 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
211 value = lp_build_min(&bld->base, value, bld->base.one);
212 break;
213
214 default:
215 assert(0);
216 }
217
218 switch( reg->DstRegister.File ) {
219 case TGSI_FILE_OUTPUT:
220 bld->outputs[reg->DstRegister.Index][chan_index] = value;
221 break;
222
223 case TGSI_FILE_TEMPORARY:
224 bld->temps[reg->DstRegister.Index][chan_index] = value;
225 break;
226
227 case TGSI_FILE_ADDRESS:
228 /* FIXME */
229 assert(0);
230 break;
231
232 default:
233 assert( 0 );
234 }
235 }
236
237
238 /**
239 * High-level instruction translators.
240 */
241
242 static void
243 emit_tex( struct lp_build_tgsi_soa_context *bld,
244 const struct tgsi_full_instruction *inst,
245 boolean apply_lodbias,
246 boolean projected)
247 {
248 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
249 LLVMValueRef lodbias;
250 LLVMValueRef oow;
251 LLVMValueRef coords[3];
252 LLVMValueRef texel[4];
253 unsigned num_coords;
254 unsigned i;
255
256 switch (inst->InstructionExtTexture.Texture) {
257 case TGSI_TEXTURE_1D:
258 num_coords = 1;
259 break;
260 case TGSI_TEXTURE_2D:
261 case TGSI_TEXTURE_RECT:
262 num_coords = 2;
263 break;
264 case TGSI_TEXTURE_SHADOW1D:
265 case TGSI_TEXTURE_SHADOW2D:
266 case TGSI_TEXTURE_SHADOWRECT:
267 case TGSI_TEXTURE_3D:
268 case TGSI_TEXTURE_CUBE:
269 num_coords = 3;
270 break;
271 default:
272 assert(0);
273 return;
274 }
275
276 if(apply_lodbias)
277 lodbias = emit_fetch( bld, inst, 0, 3 );
278 else
279 lodbias = bld->base.zero;
280
281 if (projected) {
282 oow = emit_fetch( bld, inst, 0, 3 );
283 oow = lp_build_rcp(&bld->base, oow);
284 }
285
286 for (i = 0; i < num_coords; i++) {
287 coords[i] = emit_fetch( bld, inst, 0, i );
288 if (projected)
289 coords[i] = lp_build_mul(&bld->base, coords[i], oow);
290 }
291
292 bld->sampler->emit_fetch_texel(bld->sampler,
293 bld->base.builder,
294 bld->base.type,
295 unit, num_coords, coords, lodbias,
296 texel);
297
298 FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
299 emit_store( bld, inst, 0, i, texel[i] );
300 }
301 }
302
303
304 static void
305 emit_kil(
306 struct lp_build_tgsi_soa_context *bld,
307 const struct tgsi_full_instruction *inst )
308 {
309 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
310 LLVMValueRef terms[NUM_CHANNELS];
311 LLVMValueRef mask;
312 unsigned chan_index;
313
314 memset(&terms, 0, sizeof terms);
315
316 FOR_EACH_CHANNEL( chan_index ) {
317 unsigned swizzle;
318
319 /* Unswizzle channel */
320 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
321
322 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
323 * not to be tested. */
324 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
325 continue;
326
327 /* Check if the component has not been already tested. */
328 assert(swizzle < NUM_CHANNELS);
329 if( !terms[swizzle] )
330 /* TODO: change the comparison operator instead of setting the sign */
331 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
332 }
333
334 mask = NULL;
335 FOR_EACH_CHANNEL( chan_index ) {
336 if(terms[chan_index]) {
337 LLVMValueRef chan_mask;
338
339 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
340
341 if(mask)
342 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
343 else
344 mask = chan_mask;
345 }
346 }
347
348 if(mask)
349 lp_build_mask_update(bld->mask, mask);
350 }
351
352
353 /**
354 * Check if inst src/dest regs use indirect addressing into temporary
355 * register file.
356 */
357 static boolean
358 indirect_temp_reference(const struct tgsi_full_instruction *inst)
359 {
360 uint i;
361 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
362 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
363 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
364 reg->SrcRegister.Indirect)
365 return TRUE;
366 }
367 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
368 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
369 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
370 reg->DstRegister.Indirect)
371 return TRUE;
372 }
373 return FALSE;
374 }
375
376
377 static int
378 emit_instruction(
379 struct lp_build_tgsi_soa_context *bld,
380 struct tgsi_full_instruction *inst )
381 {
382 unsigned chan_index;
383 LLVMValueRef src0, src1, src2;
384 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
385 LLVMValueRef dst0;
386
387 /* we can't handle indirect addressing into temp register file yet */
388 if (indirect_temp_reference(inst))
389 return FALSE;
390
391 switch (inst->Instruction.Opcode) {
392 #if 0
393 case TGSI_OPCODE_ARL:
394 /* FIXME */
395 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
396 tmp0 = emit_fetch( bld, inst, 0, chan_index );
397 emit_flr(bld, 0, 0);
398 emit_f2it( bld, 0 );
399 emit_store( bld, inst, 0, chan_index, tmp0);
400 }
401 break;
402 #endif
403
404 case TGSI_OPCODE_MOV:
405 case TGSI_OPCODE_SWZ:
406 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
407 tmp0 = emit_fetch( bld, inst, 0, chan_index );
408 emit_store( bld, inst, 0, chan_index, tmp0);
409 }
410 break;
411
412 case TGSI_OPCODE_LIT:
413 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
414 emit_store( bld, inst, 0, CHAN_X, bld->base.one);
415 }
416 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
417 src0 = emit_fetch( bld, inst, 0, CHAN_X );
418 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
419 emit_store( bld, inst, 0, CHAN_Y, dst0);
420 }
421 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
422 /* XMM[1] = SrcReg[0].yyyy */
423 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
424 /* XMM[1] = max(XMM[1], 0) */
425 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
426 /* XMM[2] = SrcReg[0].wwww */
427 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
428 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
429 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
430 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
431 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
432 emit_store( bld, inst, 0, CHAN_Z, dst0);
433 }
434 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
435 emit_store( bld, inst, 0, CHAN_W, bld->base.one);
436 }
437 break;
438
439 case TGSI_OPCODE_RCP:
440 /* TGSI_OPCODE_RECIP */
441 src0 = emit_fetch( bld, inst, 0, CHAN_X );
442 dst0 = lp_build_rcp(&bld->base, src0);
443 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
444 emit_store( bld, inst, 0, chan_index, dst0 );
445 }
446 break;
447
448 case TGSI_OPCODE_RSQ:
449 /* TGSI_OPCODE_RECIPSQRT */
450 src0 = emit_fetch( bld, inst, 0, CHAN_X );
451 src0 = lp_build_abs(&bld->base, src0);
452 dst0 = lp_build_rsqrt(&bld->base, src0);
453 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
454 emit_store( bld, inst, 0, chan_index, dst0 );
455 }
456 break;
457
458 case TGSI_OPCODE_EXP:
459 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
460 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
461 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
462 LLVMValueRef *p_exp2_int_part = NULL;
463 LLVMValueRef *p_frac_part = NULL;
464 LLVMValueRef *p_exp2 = NULL;
465
466 src0 = emit_fetch( bld, inst, 0, CHAN_X );
467
468 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
469 p_exp2_int_part = &tmp0;
470 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
471 p_frac_part = &tmp1;
472 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
473 p_exp2 = &tmp2;
474
475 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
476
477 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
478 emit_store( bld, inst, 0, CHAN_X, tmp0);
479 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
480 emit_store( bld, inst, 0, CHAN_Y, tmp1);
481 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
482 emit_store( bld, inst, 0, CHAN_Z, tmp2);
483 }
484 /* dst.w = 1.0 */
485 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
486 tmp0 = bld->base.one;
487 emit_store( bld, inst, 0, CHAN_W, tmp0);
488 }
489 break;
490
491 case TGSI_OPCODE_LOG:
492 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
493 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
494 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
495 LLVMValueRef *p_floor_log2;
496 LLVMValueRef *p_exp;
497 LLVMValueRef *p_log2;
498
499 src0 = emit_fetch( bld, inst, 0, CHAN_X );
500 src0 = lp_build_abs( &bld->base, src0 );
501
502 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
503 p_floor_log2 = &tmp0;
504 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
505 p_exp = &tmp1;
506 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
507 p_log2 = &tmp2;
508
509 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
510
511 /* dst.x = floor(lg2(abs(src.x))) */
512 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
513 emit_store( bld, inst, 0, CHAN_X, tmp0);
514 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
515 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
516 tmp1 = lp_build_div( &bld->base, src0, tmp1);
517 emit_store( bld, inst, 0, CHAN_Y, tmp1);
518 }
519 /* dst.z = lg2(abs(src.x)) */
520 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
521 emit_store( bld, inst, 0, CHAN_Z, tmp2);
522 }
523 /* dst.w = 1.0 */
524 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
525 tmp0 = bld->base.one;
526 emit_store( bld, inst, 0, CHAN_W, tmp0);
527 }
528 break;
529
530 case TGSI_OPCODE_MUL:
531 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
532 src0 = emit_fetch( bld, inst, 0, chan_index );
533 src1 = emit_fetch( bld, inst, 1, chan_index );
534 dst0 = lp_build_mul(&bld->base, src0, src1);
535 emit_store( bld, inst, 0, chan_index, dst0);
536 }
537 break;
538
539 case TGSI_OPCODE_ADD:
540 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
541 src0 = emit_fetch( bld, inst, 0, chan_index );
542 src1 = emit_fetch( bld, inst, 1, chan_index );
543 dst0 = lp_build_add(&bld->base, src0, src1);
544 emit_store( bld, inst, 0, chan_index, dst0);
545 }
546 break;
547
548 case TGSI_OPCODE_DP3:
549 /* TGSI_OPCODE_DOT3 */
550 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
551 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
552 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
553 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
554 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
555 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
556 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
557 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
558 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
559 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
560 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
561 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
562 emit_store( bld, inst, 0, chan_index, tmp0);
563 }
564 break;
565
566 case TGSI_OPCODE_DP4:
567 /* TGSI_OPCODE_DOT4 */
568 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
569 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
570 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
571 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
572 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
573 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
574 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
575 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
576 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
577 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
578 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
579 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
580 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
581 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
582 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
583 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
584 emit_store( bld, inst, 0, chan_index, tmp0);
585 }
586 break;
587
588 case TGSI_OPCODE_DST:
589 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
590 tmp0 = bld->base.one;
591 emit_store( bld, inst, 0, CHAN_X, tmp0);
592 }
593 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
594 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
595 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
596 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
597 emit_store( bld, inst, 0, CHAN_Y, tmp0);
598 }
599 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
600 tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
601 emit_store( bld, inst, 0, CHAN_Z, tmp0);
602 }
603 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
604 tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
605 emit_store( bld, inst, 0, CHAN_W, tmp0);
606 }
607 break;
608
609 case TGSI_OPCODE_MIN:
610 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
611 src0 = emit_fetch( bld, inst, 0, chan_index );
612 src1 = emit_fetch( bld, inst, 1, chan_index );
613 dst0 = lp_build_min( &bld->base, src0, src1 );
614 emit_store( bld, inst, 0, chan_index, dst0);
615 }
616 break;
617
618 case TGSI_OPCODE_MAX:
619 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
620 src0 = emit_fetch( bld, inst, 0, chan_index );
621 src1 = emit_fetch( bld, inst, 1, chan_index );
622 dst0 = lp_build_max( &bld->base, src0, src1 );
623 emit_store( bld, inst, 0, chan_index, dst0);
624 }
625 break;
626
627 case TGSI_OPCODE_SLT:
628 /* TGSI_OPCODE_SETLT */
629 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
630 src0 = emit_fetch( bld, inst, 0, chan_index );
631 src1 = emit_fetch( bld, inst, 1, chan_index );
632 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
633 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
634 emit_store( bld, inst, 0, chan_index, dst0);
635 }
636 break;
637
638 case TGSI_OPCODE_SGE:
639 /* TGSI_OPCODE_SETGE */
640 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
641 src0 = emit_fetch( bld, inst, 0, chan_index );
642 src1 = emit_fetch( bld, inst, 1, chan_index );
643 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
644 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
645 emit_store( bld, inst, 0, chan_index, dst0);
646 }
647 break;
648
649 case TGSI_OPCODE_MAD:
650 /* TGSI_OPCODE_MADD */
651 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
652 tmp0 = emit_fetch( bld, inst, 0, chan_index );
653 tmp1 = emit_fetch( bld, inst, 1, chan_index );
654 tmp2 = emit_fetch( bld, inst, 2, chan_index );
655 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
656 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
657 emit_store( bld, inst, 0, chan_index, tmp0);
658 }
659 break;
660
661 case TGSI_OPCODE_SUB:
662 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
663 tmp0 = emit_fetch( bld, inst, 0, chan_index );
664 tmp1 = emit_fetch( bld, inst, 1, chan_index );
665 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
666 emit_store( bld, inst, 0, chan_index, tmp0);
667 }
668 break;
669
670 case TGSI_OPCODE_LRP:
671 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
672 src0 = emit_fetch( bld, inst, 0, chan_index );
673 src1 = emit_fetch( bld, inst, 1, chan_index );
674 src2 = emit_fetch( bld, inst, 2, chan_index );
675 tmp0 = lp_build_sub( &bld->base, src1, src2 );
676 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
677 dst0 = lp_build_add( &bld->base, tmp0, src2 );
678 emit_store( bld, inst, 0, chan_index, dst0 );
679 }
680 break;
681
682 case TGSI_OPCODE_CND:
683 /* FIXME */
684 return 0;
685 break;
686
687 case TGSI_OPCODE_DP2A:
688 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
689 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
690 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
691 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
692 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
693 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
694 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
695 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
696 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
697 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
698 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
699 }
700 break;
701
702 #if 0
703 case TGSI_OPCODE_FRC:
704 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
705 tmp0 = emit_fetch( bld, inst, 0, chan_index );
706 emit_frc( bld, 0, 0 );
707 emit_store( bld, inst, 0, chan_index, tmp0);
708 }
709 break;
710
711 case TGSI_OPCODE_CLAMP:
712 return 0;
713 break;
714
715 case TGSI_OPCODE_FLR:
716 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
717 tmp0 = emit_fetch( bld, inst, 0, chan_index );
718 emit_flr( bld, 0, 0 );
719 emit_store( bld, inst, 0, chan_index, tmp0);
720 }
721 break;
722
723 case TGSI_OPCODE_ROUND:
724 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
725 tmp0 = emit_fetch( bld, inst, 0, chan_index );
726 emit_rnd( bld, 0, 0 );
727 emit_store( bld, inst, 0, chan_index, tmp0);
728 }
729 break;
730 #endif
731
732 case TGSI_OPCODE_EX2: {
733 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
734 tmp0 = lp_build_exp2( &bld->base, tmp0);
735 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
736 emit_store( bld, inst, 0, chan_index, tmp0);
737 }
738 break;
739 }
740
741 case TGSI_OPCODE_LG2:
742 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
743 tmp0 = lp_build_log2( &bld->base, tmp0);
744 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
745 emit_store( bld, inst, 0, chan_index, tmp0);
746 }
747 break;
748
749 case TGSI_OPCODE_POW:
750 src0 = emit_fetch( bld, inst, 0, CHAN_X );
751 src1 = emit_fetch( bld, inst, 1, CHAN_X );
752 dst0 = lp_build_pow( &bld->base, src0, src1 );
753 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
754 emit_store( bld, inst, 0, chan_index, dst0 );
755 }
756 break;
757
758 case TGSI_OPCODE_XPD:
759 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
760 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
761 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
762 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
763 }
764 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
765 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
766 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
767 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
768 }
769 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
770 tmp2 = tmp0;
771 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
772 tmp5 = tmp3;
773 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
774 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
775 emit_store( bld, inst, 0, CHAN_X, tmp2);
776 }
777 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
778 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
779 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
780 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
781 }
782 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
783 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
784 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
785 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
786 emit_store( bld, inst, 0, CHAN_Y, tmp3);
787 }
788 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
789 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
790 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
791 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
792 emit_store( bld, inst, 0, CHAN_Z, tmp5);
793 }
794 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
795 tmp0 = bld->base.one;
796 emit_store( bld, inst, 0, CHAN_W, tmp0);
797 }
798 break;
799
800 case TGSI_OPCODE_ABS:
801 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
802 tmp0 = emit_fetch( bld, inst, 0, chan_index );
803 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
804 emit_store( bld, inst, 0, chan_index, tmp0);
805 }
806 break;
807
808 case TGSI_OPCODE_RCC:
809 return 0;
810 break;
811
812 case TGSI_OPCODE_DPH:
813 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
814 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
815 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
816 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
817 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
818 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
819 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
820 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
821 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
822 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
823 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
824 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
825 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
826 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
827 emit_store( bld, inst, 0, chan_index, tmp0);
828 }
829 break;
830
831 case TGSI_OPCODE_COS:
832 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
833 tmp0 = lp_build_cos( &bld->base, tmp0 );
834 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
835 emit_store( bld, inst, 0, chan_index, tmp0);
836 }
837 break;
838
839 case TGSI_OPCODE_DDX:
840 return 0;
841 break;
842
843 case TGSI_OPCODE_DDY:
844 return 0;
845 break;
846
847 case TGSI_OPCODE_KILP:
848 /* predicated kill */
849 /* FIXME */
850 return 0;
851 break;
852
853 case TGSI_OPCODE_KIL:
854 /* conditional kill */
855 emit_kil( bld, inst );
856 break;
857
858 case TGSI_OPCODE_PK2H:
859 return 0;
860 break;
861
862 case TGSI_OPCODE_PK2US:
863 return 0;
864 break;
865
866 case TGSI_OPCODE_PK4B:
867 return 0;
868 break;
869
870 case TGSI_OPCODE_PK4UB:
871 return 0;
872 break;
873
874 case TGSI_OPCODE_RFL:
875 return 0;
876 break;
877
878 case TGSI_OPCODE_SEQ:
879 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
880 src0 = emit_fetch( bld, inst, 0, chan_index );
881 src1 = emit_fetch( bld, inst, 1, chan_index );
882 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
883 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
884 emit_store( bld, inst, 0, chan_index, dst0);
885 }
886 break;
887
888 case TGSI_OPCODE_SFL:
889 return 0;
890 break;
891
892 case TGSI_OPCODE_SGT:
893 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
894 src0 = emit_fetch( bld, inst, 0, chan_index );
895 src1 = emit_fetch( bld, inst, 1, chan_index );
896 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
897 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
898 emit_store( bld, inst, 0, chan_index, dst0);
899 }
900 break;
901
902 case TGSI_OPCODE_SIN:
903 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
904 tmp0 = lp_build_sin( &bld->base, tmp0 );
905 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
906 emit_store( bld, inst, 0, chan_index, tmp0);
907 }
908 break;
909
910 case TGSI_OPCODE_SLE:
911 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
912 src0 = emit_fetch( bld, inst, 0, chan_index );
913 src1 = emit_fetch( bld, inst, 1, chan_index );
914 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
915 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
916 emit_store( bld, inst, 0, chan_index, dst0);
917 }
918 break;
919
920 case TGSI_OPCODE_SNE:
921 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
922 src0 = emit_fetch( bld, inst, 0, chan_index );
923 src1 = emit_fetch( bld, inst, 1, chan_index );
924 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
925 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
926 emit_store( bld, inst, 0, chan_index, dst0);
927 }
928 break;
929
930 case TGSI_OPCODE_STR:
931 return 0;
932 break;
933
934 case TGSI_OPCODE_TEX:
935 emit_tex( bld, inst, FALSE, FALSE );
936 break;
937
938 case TGSI_OPCODE_TXD:
939 return 0;
940 break;
941
942 case TGSI_OPCODE_UP2H:
943 return 0;
944 break;
945
946 case TGSI_OPCODE_UP2US:
947 return 0;
948 break;
949
950 case TGSI_OPCODE_UP4B:
951 return 0;
952 break;
953
954 case TGSI_OPCODE_UP4UB:
955 return 0;
956 break;
957
958 case TGSI_OPCODE_X2D:
959 return 0;
960 break;
961
962 case TGSI_OPCODE_ARA:
963 return 0;
964 break;
965
966 #if 0
967 case TGSI_OPCODE_ARR:
968 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
969 tmp0 = emit_fetch( bld, inst, 0, chan_index );
970 emit_rnd( bld, 0, 0 );
971 emit_f2it( bld, 0 );
972 emit_store( bld, inst, 0, chan_index, tmp0);
973 }
974 break;
975 #endif
976
977 case TGSI_OPCODE_BRA:
978 return 0;
979 break;
980
981 case TGSI_OPCODE_CAL:
982 return 0;
983 break;
984
985 #if 0
986 case TGSI_OPCODE_RET:
987 emit_ret( bld );
988 break;
989 #endif
990
991 case TGSI_OPCODE_END:
992 break;
993
994 #if 0
995 case TGSI_OPCODE_SSG:
996 /* TGSI_OPCODE_SGN */
997 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
998 tmp0 = emit_fetch( bld, inst, 0, chan_index );
999 emit_sgn( bld, 0, 0 );
1000 emit_store( bld, inst, 0, chan_index, tmp0);
1001 }
1002 break;
1003 #endif
1004
1005 case TGSI_OPCODE_CMP:
1006 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1007 src0 = emit_fetch( bld, inst, 0, chan_index );
1008 src1 = emit_fetch( bld, inst, 1, chan_index );
1009 src2 = emit_fetch( bld, inst, 2, chan_index );
1010 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1011 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1012 emit_store( bld, inst, 0, chan_index, dst0);
1013 }
1014 break;
1015
1016 case TGSI_OPCODE_SCS:
1017 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1018 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1019 tmp0 = lp_build_cos( &bld->base, tmp0 );
1020 emit_store( bld, inst, 0, CHAN_X, tmp0);
1021 }
1022 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1023 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1024 tmp0 = lp_build_sin( &bld->base, tmp0 );
1025 emit_store( bld, inst, 0, CHAN_Y, tmp0);
1026 }
1027 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1028 tmp0 = bld->base.zero;
1029 emit_store( bld, inst, 0, CHAN_Z, tmp0);
1030 }
1031 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1032 tmp0 = bld->base.one;
1033 emit_store( bld, inst, 0, CHAN_W, tmp0);
1034 }
1035 break;
1036
1037 case TGSI_OPCODE_TXB:
1038 emit_tex( bld, inst, TRUE, FALSE );
1039 break;
1040
1041 case TGSI_OPCODE_NRM:
1042 /* fall-through */
1043 case TGSI_OPCODE_NRM4:
1044 /* 3 or 4-component normalization */
1045 {
1046 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1047
1048 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1049 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1050 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1051 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1052
1053 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1054
1055 /* xmm4 = src.x */
1056 /* xmm0 = src.x * src.x */
1057 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1058 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1059 tmp4 = tmp0;
1060 }
1061 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1062
1063 /* xmm5 = src.y */
1064 /* xmm0 = xmm0 + src.y * src.y */
1065 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1066 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1067 tmp5 = tmp1;
1068 }
1069 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1070 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1071
1072 /* xmm6 = src.z */
1073 /* xmm0 = xmm0 + src.z * src.z */
1074 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1075 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1076 tmp6 = tmp1;
1077 }
1078 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1079 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1080
1081 if (dims == 4) {
1082 /* xmm7 = src.w */
1083 /* xmm0 = xmm0 + src.w * src.w */
1084 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1085 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1086 tmp7 = tmp1;
1087 }
1088 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1089 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1090 }
1091
1092 /* xmm1 = 1 / sqrt(xmm0) */
1093 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1094
1095 /* dst.x = xmm1 * src.x */
1096 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1097 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1098 emit_store(bld, inst, 0, CHAN_X, tmp4);
1099 }
1100
1101 /* dst.y = xmm1 * src.y */
1102 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1103 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1104 emit_store(bld, inst, 0, CHAN_Y, tmp5);
1105 }
1106
1107 /* dst.z = xmm1 * src.z */
1108 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1109 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1110 emit_store(bld, inst, 0, CHAN_Z, tmp6);
1111 }
1112
1113 /* dst.w = xmm1 * src.w */
1114 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1115 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1116 emit_store(bld, inst, 0, CHAN_W, tmp7);
1117 }
1118 }
1119
1120 /* dst0.w = 1.0 */
1121 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1122 tmp0 = bld->base.one;
1123 emit_store(bld, inst, 0, CHAN_W, tmp0);
1124 }
1125 }
1126 break;
1127
1128 case TGSI_OPCODE_DIV:
1129 return 0;
1130 break;
1131
1132 case TGSI_OPCODE_DP2:
1133 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1134 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1135 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1136 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1137 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1138 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1139 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1140 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1141 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1142 }
1143 break;
1144
1145 case TGSI_OPCODE_TXL:
1146 emit_tex( bld, inst, TRUE, FALSE );
1147 break;
1148
1149 case TGSI_OPCODE_TXP:
1150 emit_tex( bld, inst, FALSE, TRUE );
1151 break;
1152
1153 case TGSI_OPCODE_BRK:
1154 return 0;
1155 break;
1156
1157 case TGSI_OPCODE_IF:
1158 return 0;
1159 break;
1160
1161 case TGSI_OPCODE_BGNFOR:
1162 return 0;
1163 break;
1164
1165 case TGSI_OPCODE_REP:
1166 return 0;
1167 break;
1168
1169 case TGSI_OPCODE_ELSE:
1170 return 0;
1171 break;
1172
1173 case TGSI_OPCODE_ENDIF:
1174 return 0;
1175 break;
1176
1177 case TGSI_OPCODE_ENDFOR:
1178 return 0;
1179 break;
1180
1181 case TGSI_OPCODE_ENDREP:
1182 return 0;
1183 break;
1184
1185 case TGSI_OPCODE_PUSHA:
1186 return 0;
1187 break;
1188
1189 case TGSI_OPCODE_POPA:
1190 return 0;
1191 break;
1192
1193 case TGSI_OPCODE_CEIL:
1194 return 0;
1195 break;
1196
1197 case TGSI_OPCODE_I2F:
1198 return 0;
1199 break;
1200
1201 case TGSI_OPCODE_NOT:
1202 return 0;
1203 break;
1204
1205 #if 0
1206 case TGSI_OPCODE_TRUNC:
1207 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1208 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1209 emit_f2it( bld, 0 );
1210 emit_i2f( bld, 0 );
1211 emit_store( bld, inst, 0, chan_index, tmp0);
1212 }
1213 break;
1214 #endif
1215
1216 case TGSI_OPCODE_SHL:
1217 return 0;
1218 break;
1219
1220 case TGSI_OPCODE_SHR:
1221 return 0;
1222 break;
1223
1224 case TGSI_OPCODE_AND:
1225 return 0;
1226 break;
1227
1228 case TGSI_OPCODE_OR:
1229 return 0;
1230 break;
1231
1232 case TGSI_OPCODE_MOD:
1233 return 0;
1234 break;
1235
1236 case TGSI_OPCODE_XOR:
1237 return 0;
1238 break;
1239
1240 case TGSI_OPCODE_SAD:
1241 return 0;
1242 break;
1243
1244 case TGSI_OPCODE_TXF:
1245 return 0;
1246 break;
1247
1248 case TGSI_OPCODE_TXQ:
1249 return 0;
1250 break;
1251
1252 case TGSI_OPCODE_CONT:
1253 return 0;
1254 break;
1255
1256 case TGSI_OPCODE_EMIT:
1257 return 0;
1258 break;
1259
1260 case TGSI_OPCODE_ENDPRIM:
1261 return 0;
1262 break;
1263
1264 default:
1265 return 0;
1266 }
1267
1268 return 1;
1269 }
1270
1271
1272 void
1273 lp_build_tgsi_soa(LLVMBuilderRef builder,
1274 const struct tgsi_token *tokens,
1275 union lp_type type,
1276 struct lp_build_mask_context *mask,
1277 LLVMValueRef consts_ptr,
1278 const LLVMValueRef *pos,
1279 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1280 LLVMValueRef (*outputs)[NUM_CHANNELS],
1281 struct lp_build_sampler_soa *sampler)
1282 {
1283 struct lp_build_tgsi_soa_context bld;
1284 struct tgsi_parse_context parse;
1285 uint num_immediates = 0;
1286 unsigned i;
1287
1288 /* Setup build context */
1289 memset(&bld, 0, sizeof bld);
1290 lp_build_context_init(&bld.base, builder, type);
1291 bld.mask = mask;
1292 bld.pos = pos;
1293 bld.inputs = inputs;
1294 bld.outputs = outputs;
1295 bld.consts_ptr = consts_ptr;
1296 bld.sampler = sampler;
1297
1298 tgsi_parse_init( &parse, tokens );
1299
1300 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1301 tgsi_parse_token( &parse );
1302
1303 switch( parse.FullToken.Token.Type ) {
1304 case TGSI_TOKEN_TYPE_DECLARATION:
1305 /* Inputs already interpolated */
1306 break;
1307
1308 case TGSI_TOKEN_TYPE_INSTRUCTION:
1309 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1310 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1311 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1312 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1313 info ? info->mnemonic : "<invalid>");
1314 }
1315 break;
1316
1317 case TGSI_TOKEN_TYPE_IMMEDIATE:
1318 /* simply copy the immediate values into the next immediates[] slot */
1319 {
1320 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1321 assert(size <= 4);
1322 assert(num_immediates < LP_MAX_IMMEDIATES);
1323 for( i = 0; i < size; ++i )
1324 bld.immediates[num_immediates][i] =
1325 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1326 for( i = size; i < 4; ++i )
1327 bld.immediates[num_immediates][i] = bld.base.undef;
1328 num_immediates++;
1329 }
1330 break;
1331
1332 default:
1333 assert( 0 );
1334 }
1335 }
1336
1337 tgsi_parse_free( &parse );
1338 }
1339