451915f23f648e1c59098c6046055ba65b94b006
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_util.h"
37 #include "tgsi/tgsi_exec.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_intr.h"
41 #include "lp_bld_arit.h"
42 #include "lp_bld_logic.h"
43 #include "lp_bld_swizzle.h"
44 #include "lp_bld_tgsi.h"
45 #include "lp_bld_debug.h"
46
47
48 #define LP_MAX_TEMPS 256
49 #define LP_MAX_IMMEDIATES 256
50
51
52 #define FOR_EACH_CHANNEL( CHAN )\
53 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
54
55 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
56 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
57
58 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
59 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
60
61 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
62 FOR_EACH_CHANNEL( CHAN )\
63 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
64
65 #define CHAN_X 0
66 #define CHAN_Y 1
67 #define CHAN_Z 2
68 #define CHAN_W 3
69
70
71 struct lp_build_tgsi_soa_context
72 {
73 struct lp_build_context base;
74
75 LLVMValueRef x, y, w;
76 LLVMValueRef a0_ptr;
77 LLVMValueRef dadx_ptr;
78 LLVMValueRef dady_ptr;
79
80 LLVMValueRef consts_ptr;
81 LLVMValueRef (*outputs)[NUM_CHANNELS];
82 LLVMValueRef samplers_ptr;
83
84 LLVMValueRef oow;
85
86 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
87
88 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
89 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
90
91 LLVMValueRef mask;
92
93 /** Coords/texels store */
94 LLVMValueRef store_ptr;
95 };
96
97
98 /**
99 * Register fetch.
100 */
101
102 static LLVMValueRef
103 emit_fetch(
104 struct lp_build_tgsi_soa_context *bld,
105 const struct tgsi_full_src_register *reg,
106 const unsigned chan_index )
107 {
108 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
109 LLVMValueRef res;
110
111 switch (swizzle) {
112 case TGSI_EXTSWIZZLE_X:
113 case TGSI_EXTSWIZZLE_Y:
114 case TGSI_EXTSWIZZLE_Z:
115 case TGSI_EXTSWIZZLE_W:
116
117 switch (reg->SrcRegister.File) {
118 case TGSI_FILE_CONSTANT: {
119 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
120 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
121 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
122 res = lp_build_broadcast_scalar(&bld->base, scalar);
123 break;
124 }
125
126 case TGSI_FILE_IMMEDIATE:
127 res = bld->immediates[reg->SrcRegister.Index][swizzle];
128 assert(res);
129 break;
130
131 case TGSI_FILE_INPUT:
132 res = bld->inputs[reg->SrcRegister.Index][swizzle];
133 assert(res);
134 break;
135
136 case TGSI_FILE_TEMPORARY:
137 res = bld->temps[reg->SrcRegister.Index][swizzle];
138 if(!res)
139 return bld->base.undef;
140 break;
141
142 default:
143 assert( 0 );
144 return bld->base.undef;
145 }
146 break;
147
148 case TGSI_EXTSWIZZLE_ZERO:
149 res = bld->base.zero;
150 break;
151
152 case TGSI_EXTSWIZZLE_ONE:
153 res = bld->base.one;
154 break;
155
156 default:
157 assert( 0 );
158 return bld->base.undef;
159 }
160
161 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
162 case TGSI_UTIL_SIGN_CLEAR:
163 res = lp_build_abs( &bld->base, res );
164 break;
165
166 case TGSI_UTIL_SIGN_SET:
167 res = lp_build_abs( &bld->base, res );
168 res = LLVMBuildNeg( bld->base.builder, res, "" );
169 break;
170
171 case TGSI_UTIL_SIGN_TOGGLE:
172 res = LLVMBuildNeg( bld->base.builder, res, "" );
173 break;
174
175 case TGSI_UTIL_SIGN_KEEP:
176 break;
177 }
178
179 return res;
180 }
181
182 #define FETCH( FUNC, INST, INDEX, CHAN )\
183 emit_fetch( FUNC, &(INST).FullSrcRegisters[INDEX], CHAN )
184
185 /**
186 * Register store.
187 */
188
189 static void
190 emit_store(
191 struct lp_build_tgsi_soa_context *bld,
192 const struct tgsi_full_dst_register *reg,
193 const struct tgsi_full_instruction *inst,
194 unsigned chan_index,
195 LLVMValueRef value)
196 {
197 switch( inst->Instruction.Saturate ) {
198 case TGSI_SAT_NONE:
199 break;
200
201 case TGSI_SAT_ZERO_ONE:
202 /* assert( 0 ); */
203 break;
204
205 case TGSI_SAT_MINUS_PLUS_ONE:
206 assert( 0 );
207 break;
208 }
209
210 switch( reg->DstRegister.File ) {
211 case TGSI_FILE_OUTPUT:
212 bld->outputs[reg->DstRegister.Index][chan_index] = value;
213 break;
214
215 case TGSI_FILE_TEMPORARY:
216 bld->temps[reg->DstRegister.Index][chan_index] = value;
217 break;
218
219 case TGSI_FILE_ADDRESS:
220 /* FIXME */
221 assert(0);
222 break;
223
224 default:
225 assert( 0 );
226 }
227 }
228
229 #define STORE( FUNC, INST, INDEX, CHAN, VAL )\
230 emit_store( FUNC, &(INST).FullDstRegisters[INDEX], &(INST), CHAN, VAL )
231
232
233 void PIPE_CDECL
234 lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
235 uint32_t unit,
236 float *store )
237 {
238 struct tgsi_sampler *sampler = samplers[unit];
239
240 #if 0
241 uint j;
242
243 debug_printf("%s sampler: %p (%p) store: %p\n",
244 __FUNCTION__,
245 sampler, *sampler,
246 store );
247
248 debug_printf("lodbias %f\n", store[12]);
249
250 for (j = 0; j < 4; j++)
251 debug_printf("sample %d texcoord %f %f\n",
252 j,
253 store[0+j],
254 store[4+j]);
255 #endif
256
257 {
258 float rgba[NUM_CHANNELS][QUAD_SIZE];
259 sampler->get_samples(sampler,
260 &store[0],
261 &store[4],
262 &store[8],
263 0.0f, /*store[12], lodbias */
264 rgba);
265 memcpy(store, rgba, sizeof rgba);
266 }
267
268 #if 0
269 for (j = 0; j < 4; j++)
270 debug_printf("sample %d result %f %f %f %f\n",
271 j,
272 store[0+j],
273 store[4+j],
274 store[8+j],
275 store[12+j]);
276 #endif
277 }
278
279 /**
280 * High-level instruction translators.
281 */
282
283 static void
284 emit_tex( struct lp_build_tgsi_soa_context *bld,
285 const struct tgsi_full_instruction *inst,
286 boolean apply_lodbias,
287 boolean projected)
288 {
289 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
290 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
291 LLVMValueRef lodbias;
292 LLVMValueRef oow;
293 LLVMValueRef args[3];
294 unsigned count;
295 unsigned i;
296
297 switch (inst->InstructionExtTexture.Texture) {
298 case TGSI_TEXTURE_1D:
299 case TGSI_TEXTURE_SHADOW1D:
300 count = 1;
301 break;
302 case TGSI_TEXTURE_2D:
303 case TGSI_TEXTURE_RECT:
304 case TGSI_TEXTURE_SHADOW2D:
305 case TGSI_TEXTURE_SHADOWRECT:
306 count = 2;
307 break;
308 case TGSI_TEXTURE_3D:
309 case TGSI_TEXTURE_CUBE:
310 count = 3;
311 break;
312 default:
313 assert(0);
314 return;
315 }
316
317 if(apply_lodbias)
318 lodbias = FETCH( bld, *inst, 0, 3 );
319 else
320 lodbias = bld->base.zero;
321
322 if(!bld->store_ptr)
323 bld->store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
324 vec_type,
325 LLVMConstInt(LLVMInt32Type(), 4, 0),
326 "store");
327
328 if (projected) {
329 oow = FETCH( bld, *inst, 0, 3 );
330 oow = lp_build_rcp(&bld->base, oow);
331 }
332
333 for (i = 0; i < count; i++) {
334 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
335 LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
336 LLVMValueRef coord;
337
338 coord = FETCH( bld, *inst, 0, i );
339
340 if (projected)
341 coord = lp_build_mul(&bld->base, coord, oow);
342
343 LLVMBuildStore(bld->base.builder, coord, coord_ptr);
344 }
345
346 args[0] = bld->samplers_ptr;
347 args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
348 args[2] = bld->store_ptr;
349
350 lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
351
352 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) {
353 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
354 LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
355 LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
356 STORE( bld, *inst, 0, i, res );
357 }
358 }
359
360
361 static void
362 emit_kil(
363 struct lp_build_tgsi_soa_context *bld,
364 const struct tgsi_full_src_register *reg )
365 {
366 LLVMValueRef terms[NUM_CHANNELS];
367 unsigned chan_index;
368
369 memset(&terms, 0, sizeof terms);
370
371 FOR_EACH_CHANNEL( chan_index ) {
372 unsigned swizzle;
373
374 /* Unswizzle channel */
375 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
376
377 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
378 * not to be tested. */
379 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
380 continue;
381
382 /* Check if the component has not been already tested. */
383 assert(swizzle < NUM_CHANNELS);
384 if( !terms[swizzle] )
385 /* TODO: change the comparison operator instead of setting the sign */
386 terms[swizzle] = emit_fetch(bld, reg, chan_index );
387 }
388
389 FOR_EACH_CHANNEL( chan_index ) {
390 if(terms[chan_index]) {
391 LLVMValueRef mask;
392
393 mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
394
395 if(bld->mask)
396 bld->mask = LLVMBuildAnd(bld->base.builder, bld->mask, mask, "");
397 else
398 bld->mask = mask;
399 }
400 }
401 }
402
403
404 static void
405 emit_kilp(
406 struct lp_build_tgsi_soa_context *bld )
407 {
408 /* XXX todo / fix me */
409 }
410
411
412 /**
413 * Check if inst src/dest regs use indirect addressing into temporary
414 * register file.
415 */
416 static boolean
417 indirect_temp_reference(const struct tgsi_full_instruction *inst)
418 {
419 uint i;
420 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
421 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
422 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
423 reg->SrcRegister.Indirect)
424 return TRUE;
425 }
426 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
427 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
428 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
429 reg->DstRegister.Indirect)
430 return TRUE;
431 }
432 return FALSE;
433 }
434
435
436 static int
437 emit_instruction(
438 struct lp_build_tgsi_soa_context *bld,
439 struct tgsi_full_instruction *inst )
440 {
441 unsigned chan_index;
442 LLVMValueRef src0, src1, src2;
443 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
444 LLVMValueRef dst0;
445
446 /* we can't handle indirect addressing into temp register file yet */
447 if (indirect_temp_reference(inst))
448 return FALSE;
449
450 switch (inst->Instruction.Opcode) {
451 #if 0
452 case TGSI_OPCODE_ARL:
453 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
454 tmp0 = FETCH( bld, *inst, 0, chan_index );
455 emit_flr(bld, 0, 0);
456 emit_f2it( bld, 0 );
457 STORE( bld, *inst, 0, chan_index, tmp0);
458 }
459 break;
460 #endif
461
462 case TGSI_OPCODE_MOV:
463 case TGSI_OPCODE_SWZ:
464 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
465 tmp0 = FETCH( bld, *inst, 0, chan_index );
466 STORE( bld, *inst, 0, chan_index, tmp0);
467 }
468 break;
469
470 case TGSI_OPCODE_LIT:
471 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
472 STORE( bld, *inst, 0, CHAN_X, bld->base.one);
473 }
474 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
475 src0 = FETCH( bld, *inst, 0, CHAN_X );
476 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
477 STORE( bld, *inst, 0, CHAN_Y, dst0);
478 }
479 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
480 /* XMM[1] = SrcReg[0].yyyy */
481 tmp1 = FETCH( bld, *inst, 0, CHAN_Y );
482 /* XMM[1] = max(XMM[1], 0) */
483 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
484 /* XMM[2] = SrcReg[0].wwww */
485 tmp2 = FETCH( bld, *inst, 0, CHAN_W );
486 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
487 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
488 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
489 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
490 STORE( bld, *inst, 0, CHAN_Z, dst0);
491 }
492 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
493 STORE( bld, *inst, 0, CHAN_W, bld->base.one);
494 }
495 break;
496
497 case TGSI_OPCODE_RCP:
498 /* TGSI_OPCODE_RECIP */
499 src0 = FETCH( bld, *inst, 0, CHAN_X );
500 dst0 = lp_build_rcp(&bld->base, src0);
501 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
502 STORE( bld, *inst, 0, chan_index, dst0 );
503 }
504 break;
505
506 case TGSI_OPCODE_RSQ:
507 /* TGSI_OPCODE_RECIPSQRT */
508 src0 = FETCH( bld, *inst, 0, CHAN_X );
509 src0 = lp_build_abs(&bld->base, src0);
510 dst0 = lp_build_rsqrt(&bld->base, src0);
511 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
512 STORE( bld, *inst, 0, chan_index, dst0 );
513 }
514 break;
515
516 case TGSI_OPCODE_EXP:
517 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
518 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
519 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
520 LLVMValueRef *p_exp2_int_part = NULL;
521 LLVMValueRef *p_frac_part = NULL;
522 LLVMValueRef *p_exp2 = NULL;
523
524 src0 = FETCH( bld, *inst, 0, CHAN_X );
525
526 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
527 p_exp2_int_part = &tmp0;
528 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
529 p_frac_part = &tmp1;
530 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
531 p_exp2 = &tmp2;
532
533 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
534
535 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
536 STORE( bld, *inst, 0, CHAN_X, tmp0);
537 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
538 STORE( bld, *inst, 0, CHAN_Y, tmp1);
539 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
540 STORE( bld, *inst, 0, CHAN_Z, tmp2);
541 }
542 /* dst.w = 1.0 */
543 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
544 tmp0 = bld->base.one;
545 STORE( bld, *inst, 0, CHAN_W, tmp0);
546 }
547 break;
548
549 case TGSI_OPCODE_LOG:
550 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
551 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
552 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
553 LLVMValueRef *p_floor_log2;
554 LLVMValueRef *p_exp;
555 LLVMValueRef *p_log2;
556
557 src0 = FETCH( bld, *inst, 0, CHAN_X );
558 src0 = lp_build_abs( &bld->base, src0 );
559
560 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
561 p_floor_log2 = &tmp0;
562 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
563 p_exp = &tmp1;
564 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
565 p_log2 = &tmp2;
566
567 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
568
569 /* dst.x = floor(lg2(abs(src.x))) */
570 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
571 STORE( bld, *inst, 0, CHAN_X, tmp0);
572 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
573 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
574 tmp1 = lp_build_div( &bld->base, src0, tmp1);
575 STORE( bld, *inst, 0, CHAN_Y, tmp1);
576 }
577 /* dst.z = lg2(abs(src.x)) */
578 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
579 STORE( bld, *inst, 0, CHAN_Z, tmp2);
580 }
581 /* dst.w = 1.0 */
582 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
583 tmp0 = bld->base.one;
584 STORE( bld, *inst, 0, CHAN_W, tmp0);
585 }
586 break;
587
588 case TGSI_OPCODE_MUL:
589 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
590 src0 = FETCH( bld, *inst, 0, chan_index );
591 src1 = FETCH( bld, *inst, 1, chan_index );
592 dst0 = lp_build_mul(&bld->base, src0, src1);
593 STORE( bld, *inst, 0, chan_index, dst0);
594 }
595 break;
596
597 case TGSI_OPCODE_ADD:
598 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
599 src0 = FETCH( bld, *inst, 0, chan_index );
600 src1 = FETCH( bld, *inst, 1, chan_index );
601 dst0 = lp_build_add(&bld->base, src0, src1);
602 STORE( bld, *inst, 0, chan_index, dst0);
603 }
604 break;
605
606 case TGSI_OPCODE_DP3:
607 /* TGSI_OPCODE_DOT3 */
608 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
609 tmp1 = FETCH( bld, *inst, 1, CHAN_X );
610 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
611 tmp1 = FETCH( bld, *inst, 0, CHAN_Y );
612 tmp2 = FETCH( bld, *inst, 1, CHAN_Y );
613 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
614 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
615 tmp1 = FETCH( bld, *inst, 0, CHAN_Z );
616 tmp2 = FETCH( bld, *inst, 1, CHAN_Z );
617 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
618 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
619 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
620 STORE( bld, *inst, 0, chan_index, tmp0);
621 }
622 break;
623
624 case TGSI_OPCODE_DP4:
625 /* TGSI_OPCODE_DOT4 */
626 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
627 tmp1 = FETCH( bld, *inst, 1, CHAN_X );
628 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
629 tmp1 = FETCH( bld, *inst, 0, CHAN_Y );
630 tmp2 = FETCH( bld, *inst, 1, CHAN_Y );
631 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
632 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
633 tmp1 = FETCH( bld, *inst, 0, CHAN_Z );
634 tmp2 = FETCH( bld, *inst, 1, CHAN_Z );
635 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
636 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
637 tmp1 = FETCH( bld, *inst, 0, CHAN_W );
638 tmp2 = FETCH( bld, *inst, 1, CHAN_W );
639 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
640 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
641 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
642 STORE( bld, *inst, 0, chan_index, tmp0);
643 }
644 break;
645
646 case TGSI_OPCODE_DST:
647 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
648 tmp0 = bld->base.one;
649 STORE( bld, *inst, 0, CHAN_X, tmp0);
650 }
651 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
652 tmp0 = FETCH( bld, *inst, 0, CHAN_Y );
653 tmp1 = FETCH( bld, *inst, 1, CHAN_Y );
654 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
655 STORE( bld, *inst, 0, CHAN_Y, tmp0);
656 }
657 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
658 tmp0 = FETCH( bld, *inst, 0, CHAN_Z );
659 STORE( bld, *inst, 0, CHAN_Z, tmp0);
660 }
661 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
662 tmp0 = FETCH( bld, *inst, 1, CHAN_W );
663 STORE( bld, *inst, 0, CHAN_W, tmp0);
664 }
665 break;
666
667 case TGSI_OPCODE_MIN:
668 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
669 src0 = FETCH( bld, *inst, 0, chan_index );
670 src1 = FETCH( bld, *inst, 1, chan_index );
671 dst0 = lp_build_min( &bld->base, src0, src1 );
672 STORE( bld, *inst, 0, chan_index, dst0);
673 }
674 break;
675
676 case TGSI_OPCODE_MAX:
677 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
678 src0 = FETCH( bld, *inst, 0, chan_index );
679 src1 = FETCH( bld, *inst, 1, chan_index );
680 dst0 = lp_build_max( &bld->base, src0, src1 );
681 STORE( bld, *inst, 0, chan_index, dst0);
682 }
683 break;
684
685 case TGSI_OPCODE_SLT:
686 /* TGSI_OPCODE_SETLT */
687 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
688 src0 = FETCH( bld, *inst, 0, chan_index );
689 src1 = FETCH( bld, *inst, 1, chan_index );
690 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
691 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
692 STORE( bld, *inst, 0, chan_index, dst0);
693 }
694 break;
695
696 case TGSI_OPCODE_SGE:
697 /* TGSI_OPCODE_SETGE */
698 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
699 src0 = FETCH( bld, *inst, 0, chan_index );
700 src1 = FETCH( bld, *inst, 1, chan_index );
701 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
702 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
703 STORE( bld, *inst, 0, chan_index, dst0);
704 }
705 break;
706
707 case TGSI_OPCODE_MAD:
708 /* TGSI_OPCODE_MADD */
709 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
710 tmp0 = FETCH( bld, *inst, 0, chan_index );
711 tmp1 = FETCH( bld, *inst, 1, chan_index );
712 tmp2 = FETCH( bld, *inst, 2, chan_index );
713 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
714 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
715 STORE( bld, *inst, 0, chan_index, tmp0);
716 }
717 break;
718
719 case TGSI_OPCODE_SUB:
720 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
721 tmp0 = FETCH( bld, *inst, 0, chan_index );
722 tmp1 = FETCH( bld, *inst, 1, chan_index );
723 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
724 STORE( bld, *inst, 0, chan_index, tmp0);
725 }
726 break;
727
728 case TGSI_OPCODE_LRP:
729 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
730 src0 = FETCH( bld, *inst, 0, chan_index );
731 src1 = FETCH( bld, *inst, 1, chan_index );
732 src2 = FETCH( bld, *inst, 2, chan_index );
733 tmp0 = lp_build_sub( &bld->base, src1, src2 );
734 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
735 dst0 = lp_build_add( &bld->base, tmp0, src2 );
736 STORE( bld, *inst, 0, chan_index, dst0 );
737 }
738 break;
739
740 case TGSI_OPCODE_CND:
741 return 0;
742 break;
743
744 case TGSI_OPCODE_CND0:
745 return 0;
746 break;
747
748 case TGSI_OPCODE_DP2A:
749 tmp0 = FETCH( bld, *inst, 0, CHAN_X ); /* xmm0 = src[0].x */
750 tmp1 = FETCH( bld, *inst, 1, CHAN_X ); /* xmm1 = src[1].x */
751 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
752 tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
753 tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
754 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
755 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
756 tmp1 = FETCH( bld, *inst, 2, CHAN_X ); /* xmm1 = src[2].x */
757 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
758 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
759 STORE( bld, *inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
760 }
761 break;
762
763 #if 0
764 case TGSI_OPCODE_FRC:
765 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
766 tmp0 = FETCH( bld, *inst, 0, chan_index );
767 emit_frc( bld, 0, 0 );
768 STORE( bld, *inst, 0, chan_index, tmp0);
769 }
770 break;
771
772 case TGSI_OPCODE_CLAMP:
773 return 0;
774 break;
775
776 case TGSI_OPCODE_FLR:
777 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
778 tmp0 = FETCH( bld, *inst, 0, chan_index );
779 emit_flr( bld, 0, 0 );
780 STORE( bld, *inst, 0, chan_index, tmp0);
781 }
782 break;
783
784 case TGSI_OPCODE_ROUND:
785 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
786 tmp0 = FETCH( bld, *inst, 0, chan_index );
787 emit_rnd( bld, 0, 0 );
788 STORE( bld, *inst, 0, chan_index, tmp0);
789 }
790 break;
791 #endif
792
793 case TGSI_OPCODE_EX2: {
794 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
795 tmp0 = lp_build_exp2( &bld->base, tmp0);
796 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
797 STORE( bld, *inst, 0, chan_index, tmp0);
798 }
799 break;
800 }
801
802 case TGSI_OPCODE_LG2:
803 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
804 tmp0 = lp_build_log2( &bld->base, tmp0);
805 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
806 STORE( bld, *inst, 0, chan_index, tmp0);
807 }
808 break;
809
810 case TGSI_OPCODE_POW:
811 src0 = FETCH( bld, *inst, 0, CHAN_X );
812 src1 = FETCH( bld, *inst, 1, CHAN_X );
813 dst0 = lp_build_pow( &bld->base, src0, src1 );
814 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
815 STORE( bld, *inst, 0, chan_index, dst0 );
816 }
817 break;
818
819 case TGSI_OPCODE_XPD:
820 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
821 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
822 tmp1 = FETCH( bld, *inst, 1, CHAN_Z );
823 tmp3 = FETCH( bld, *inst, 0, CHAN_Z );
824 }
825 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
826 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
827 tmp0 = FETCH( bld, *inst, 0, CHAN_Y );
828 tmp4 = FETCH( bld, *inst, 1, CHAN_Y );
829 }
830 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
831 tmp2 = tmp0;
832 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
833 tmp5 = tmp3;
834 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
835 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
836 STORE( bld, *inst, 0, CHAN_X, tmp2);
837 }
838 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
839 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
840 tmp2 = FETCH( bld, *inst, 1, CHAN_X );
841 tmp5 = FETCH( bld, *inst, 0, CHAN_X );
842 }
843 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
844 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
845 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
846 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
847 STORE( bld, *inst, 0, CHAN_Y, tmp3);
848 }
849 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
850 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
851 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
852 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
853 STORE( bld, *inst, 0, CHAN_Z, tmp5);
854 }
855 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
856 tmp0 = bld->base.one;
857 STORE( bld, *inst, 0, CHAN_W, tmp0);
858 }
859 break;
860
861 case TGSI_OPCODE_ABS:
862 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
863 tmp0 = FETCH( bld, *inst, 0, chan_index );
864 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
865 STORE( bld, *inst, 0, chan_index, tmp0);
866 }
867 break;
868
869 case TGSI_OPCODE_RCC:
870 return 0;
871 break;
872
873 case TGSI_OPCODE_DPH:
874 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
875 tmp1 = FETCH( bld, *inst, 1, CHAN_X );
876 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
877 tmp1 = FETCH( bld, *inst, 0, CHAN_Y );
878 tmp2 = FETCH( bld, *inst, 1, CHAN_Y );
879 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
880 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
881 tmp1 = FETCH( bld, *inst, 0, CHAN_Z );
882 tmp2 = FETCH( bld, *inst, 1, CHAN_Z );
883 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
884 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
885 tmp1 = FETCH( bld, *inst, 1, CHAN_W );
886 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
887 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
888 STORE( bld, *inst, 0, chan_index, tmp0);
889 }
890 break;
891
892 case TGSI_OPCODE_COS:
893 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
894 tmp0 = lp_build_cos( &bld->base, tmp0 );
895 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
896 STORE( bld, *inst, 0, chan_index, tmp0);
897 }
898 break;
899
900 case TGSI_OPCODE_DDX:
901 return 0;
902 break;
903
904 case TGSI_OPCODE_DDY:
905 return 0;
906 break;
907
908 #if 0
909 case TGSI_OPCODE_KILP:
910 /* predicated kill */
911 emit_kilp( bld );
912 return 0; /* XXX fix me */
913 break;
914 #endif
915
916 case TGSI_OPCODE_KIL:
917 /* conditional kill */
918 emit_kil( bld, &inst->FullSrcRegisters[0] );
919 break;
920
921 case TGSI_OPCODE_PK2H:
922 return 0;
923 break;
924
925 case TGSI_OPCODE_PK2US:
926 return 0;
927 break;
928
929 case TGSI_OPCODE_PK4B:
930 return 0;
931 break;
932
933 case TGSI_OPCODE_PK4UB:
934 return 0;
935 break;
936
937 case TGSI_OPCODE_RFL:
938 return 0;
939 break;
940
941 case TGSI_OPCODE_SEQ:
942 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
943 src0 = FETCH( bld, *inst, 0, chan_index );
944 src1 = FETCH( bld, *inst, 1, chan_index );
945 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
946 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
947 STORE( bld, *inst, 0, chan_index, dst0);
948 }
949 break;
950
951 case TGSI_OPCODE_SFL:
952 return 0;
953 break;
954
955 case TGSI_OPCODE_SGT:
956 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
957 src0 = FETCH( bld, *inst, 0, chan_index );
958 src1 = FETCH( bld, *inst, 1, chan_index );
959 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
960 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
961 STORE( bld, *inst, 0, chan_index, dst0);
962 }
963 break;
964
965 case TGSI_OPCODE_SIN:
966 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
967 tmp0 = lp_build_sin( &bld->base, tmp0 );
968 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
969 STORE( bld, *inst, 0, chan_index, tmp0);
970 }
971 break;
972
973 case TGSI_OPCODE_SLE:
974 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
975 src0 = FETCH( bld, *inst, 0, chan_index );
976 src1 = FETCH( bld, *inst, 1, chan_index );
977 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
978 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
979 STORE( bld, *inst, 0, chan_index, dst0);
980 }
981 break;
982
983 case TGSI_OPCODE_SNE:
984 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
985 src0 = FETCH( bld, *inst, 0, chan_index );
986 src1 = FETCH( bld, *inst, 1, chan_index );
987 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
988 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
989 STORE( bld, *inst, 0, chan_index, dst0);
990 }
991 break;
992
993 case TGSI_OPCODE_STR:
994 return 0;
995 break;
996
997 case TGSI_OPCODE_TEX:
998 emit_tex( bld, inst, FALSE, FALSE );
999 break;
1000
1001 case TGSI_OPCODE_TXD:
1002 return 0;
1003 break;
1004
1005 case TGSI_OPCODE_UP2H:
1006 return 0;
1007 break;
1008
1009 case TGSI_OPCODE_UP2US:
1010 return 0;
1011 break;
1012
1013 case TGSI_OPCODE_UP4B:
1014 return 0;
1015 break;
1016
1017 case TGSI_OPCODE_UP4UB:
1018 return 0;
1019 break;
1020
1021 case TGSI_OPCODE_X2D:
1022 return 0;
1023 break;
1024
1025 case TGSI_OPCODE_ARA:
1026 return 0;
1027 break;
1028
1029 #if 0
1030 case TGSI_OPCODE_ARR:
1031 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1032 tmp0 = FETCH( bld, *inst, 0, chan_index );
1033 emit_rnd( bld, 0, 0 );
1034 emit_f2it( bld, 0 );
1035 STORE( bld, *inst, 0, chan_index, tmp0);
1036 }
1037 break;
1038 #endif
1039
1040 case TGSI_OPCODE_BRA:
1041 return 0;
1042 break;
1043
1044 case TGSI_OPCODE_CAL:
1045 return 0;
1046 break;
1047
1048 #if 0
1049 case TGSI_OPCODE_RET:
1050 emit_ret( bld );
1051 break;
1052 #endif
1053
1054 case TGSI_OPCODE_END:
1055 break;
1056
1057 #if 0
1058 case TGSI_OPCODE_SSG:
1059 /* TGSI_OPCODE_SGN */
1060 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1061 tmp0 = FETCH( bld, *inst, 0, chan_index );
1062 emit_sgn( bld, 0, 0 );
1063 STORE( bld, *inst, 0, chan_index, tmp0);
1064 }
1065 break;
1066 #endif
1067
1068 case TGSI_OPCODE_CMP:
1069 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1070 src0 = FETCH( bld, *inst, 0, chan_index );
1071 src1 = FETCH( bld, *inst, 1, chan_index );
1072 src2 = FETCH( bld, *inst, 2, chan_index );
1073 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1074 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1075 STORE( bld, *inst, 0, chan_index, dst0);
1076 }
1077 break;
1078
1079 case TGSI_OPCODE_SCS:
1080 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
1081 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
1082 tmp0 = lp_build_cos( &bld->base, tmp0 );
1083 STORE( bld, *inst, 0, CHAN_X, tmp0);
1084 }
1085 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
1086 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
1087 tmp0 = lp_build_sin( &bld->base, tmp0 );
1088 STORE( bld, *inst, 0, CHAN_Y, tmp0);
1089 }
1090 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
1091 tmp0 = bld->base.zero;
1092 STORE( bld, *inst, 0, CHAN_Z, tmp0);
1093 }
1094 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
1095 tmp0 = bld->base.one;
1096 STORE( bld, *inst, 0, CHAN_W, tmp0);
1097 }
1098 break;
1099
1100 case TGSI_OPCODE_TXB:
1101 emit_tex( bld, inst, TRUE, FALSE );
1102 break;
1103
1104 case TGSI_OPCODE_NRM:
1105 /* fall-through */
1106 case TGSI_OPCODE_NRM4:
1107 /* 3 or 4-component normalization */
1108 {
1109 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1110
1111 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
1112 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
1113 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) ||
1114 (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) {
1115
1116 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1117
1118 /* xmm4 = src.x */
1119 /* xmm0 = src.x * src.x */
1120 tmp0 = FETCH(bld, *inst, 0, CHAN_X);
1121 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
1122 tmp4 = tmp0;
1123 }
1124 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1125
1126 /* xmm5 = src.y */
1127 /* xmm0 = xmm0 + src.y * src.y */
1128 tmp1 = FETCH(bld, *inst, 0, CHAN_Y);
1129 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
1130 tmp5 = tmp1;
1131 }
1132 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1133 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1134
1135 /* xmm6 = src.z */
1136 /* xmm0 = xmm0 + src.z * src.z */
1137 tmp1 = FETCH(bld, *inst, 0, CHAN_Z);
1138 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
1139 tmp6 = tmp1;
1140 }
1141 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1142 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1143
1144 if (dims == 4) {
1145 /* xmm7 = src.w */
1146 /* xmm0 = xmm0 + src.w * src.w */
1147 tmp1 = FETCH(bld, *inst, 0, CHAN_W);
1148 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
1149 tmp7 = tmp1;
1150 }
1151 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1152 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1153 }
1154
1155 /* xmm1 = 1 / sqrt(xmm0) */
1156 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1157
1158 /* dst.x = xmm1 * src.x */
1159 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
1160 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1161 STORE(bld, *inst, 0, CHAN_X, tmp4);
1162 }
1163
1164 /* dst.y = xmm1 * src.y */
1165 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
1166 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1167 STORE(bld, *inst, 0, CHAN_Y, tmp5);
1168 }
1169
1170 /* dst.z = xmm1 * src.z */
1171 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
1172 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1173 STORE(bld, *inst, 0, CHAN_Z, tmp6);
1174 }
1175
1176 /* dst.w = xmm1 * src.w */
1177 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) {
1178 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1179 STORE(bld, *inst, 0, CHAN_W, tmp7);
1180 }
1181 }
1182
1183 /* dst0.w = 1.0 */
1184 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) {
1185 tmp0 = bld->base.one;
1186 STORE(bld, *inst, 0, CHAN_W, tmp0);
1187 }
1188 }
1189 break;
1190
1191 case TGSI_OPCODE_DIV:
1192 return 0;
1193 break;
1194
1195 case TGSI_OPCODE_DP2:
1196 tmp0 = FETCH( bld, *inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1197 tmp1 = FETCH( bld, *inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1198 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1199 tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1200 tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1201 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1202 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1203 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1204 STORE( bld, *inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1205 }
1206 break;
1207
1208 case TGSI_OPCODE_TXL:
1209 emit_tex( bld, inst, TRUE, FALSE );
1210 break;
1211
1212 case TGSI_OPCODE_TXP:
1213 emit_tex( bld, inst, FALSE, TRUE );
1214 break;
1215
1216 case TGSI_OPCODE_BRK:
1217 return 0;
1218 break;
1219
1220 case TGSI_OPCODE_IF:
1221 return 0;
1222 break;
1223
1224 case TGSI_OPCODE_LOOP:
1225 return 0;
1226 break;
1227
1228 case TGSI_OPCODE_REP:
1229 return 0;
1230 break;
1231
1232 case TGSI_OPCODE_ELSE:
1233 return 0;
1234 break;
1235
1236 case TGSI_OPCODE_ENDIF:
1237 return 0;
1238 break;
1239
1240 case TGSI_OPCODE_ENDLOOP:
1241 return 0;
1242 break;
1243
1244 case TGSI_OPCODE_ENDREP:
1245 return 0;
1246 break;
1247
1248 case TGSI_OPCODE_PUSHA:
1249 return 0;
1250 break;
1251
1252 case TGSI_OPCODE_POPA:
1253 return 0;
1254 break;
1255
1256 case TGSI_OPCODE_CEIL:
1257 return 0;
1258 break;
1259
1260 case TGSI_OPCODE_I2F:
1261 return 0;
1262 break;
1263
1264 case TGSI_OPCODE_NOT:
1265 return 0;
1266 break;
1267
1268 #if 0
1269 case TGSI_OPCODE_TRUNC:
1270 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1271 tmp0 = FETCH( bld, *inst, 0, chan_index );
1272 emit_f2it( bld, 0 );
1273 emit_i2f( bld, 0 );
1274 STORE( bld, *inst, 0, chan_index, tmp0);
1275 }
1276 break;
1277 #endif
1278
1279 case TGSI_OPCODE_SHL:
1280 return 0;
1281 break;
1282
1283 case TGSI_OPCODE_SHR:
1284 return 0;
1285 break;
1286
1287 case TGSI_OPCODE_AND:
1288 return 0;
1289 break;
1290
1291 case TGSI_OPCODE_OR:
1292 return 0;
1293 break;
1294
1295 case TGSI_OPCODE_MOD:
1296 return 0;
1297 break;
1298
1299 case TGSI_OPCODE_XOR:
1300 return 0;
1301 break;
1302
1303 case TGSI_OPCODE_SAD:
1304 return 0;
1305 break;
1306
1307 case TGSI_OPCODE_TXF:
1308 return 0;
1309 break;
1310
1311 case TGSI_OPCODE_TXQ:
1312 return 0;
1313 break;
1314
1315 case TGSI_OPCODE_CONT:
1316 return 0;
1317 break;
1318
1319 case TGSI_OPCODE_EMIT:
1320 return 0;
1321 break;
1322
1323 case TGSI_OPCODE_ENDPRIM:
1324 return 0;
1325 break;
1326
1327 default:
1328 return 0;
1329 }
1330
1331 return 1;
1332 }
1333
1334 static void
1335 emit_declaration(
1336 struct lp_build_tgsi_soa_context *bld,
1337 struct tgsi_full_declaration *decl )
1338 {
1339 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1340 LLVMBuilderRef builder = bld->base.builder;
1341 unsigned first, last, mask;
1342 unsigned attrib, chan;
1343
1344 first = decl->DeclarationRange.First;
1345 last = decl->DeclarationRange.Last;
1346 mask = decl->Declaration.UsageMask;
1347
1348 for( attrib = first; attrib <= last; attrib++ ) {
1349 for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
1350 LLVMValueRef input = bld->base.undef;
1351
1352 if( mask & (1 << chan) ) {
1353 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
1354 LLVMValueRef a0;
1355 LLVMValueRef dadx;
1356 LLVMValueRef dady;
1357
1358 switch( decl->Declaration.Interpolate ) {
1359 case TGSI_INTERPOLATE_PERSPECTIVE:
1360 /* fall-through */
1361
1362 case TGSI_INTERPOLATE_LINEAR: {
1363 LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
1364 LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
1365 dadx = LLVMBuildLoad(builder, dadx_ptr, "");
1366 dady = LLVMBuildLoad(builder, dady_ptr, "");
1367 dadx = lp_build_broadcast_scalar(&bld->base, dadx);
1368 dady = lp_build_broadcast_scalar(&bld->base, dady);
1369 lp_build_name(dadx, "dadx_%u.%c", attrib, "xyzw"[chan]);
1370 lp_build_name(dady, "dady_%u.%c", attrib, "xyzw"[chan]);
1371 /* fall-through */
1372 }
1373
1374 case TGSI_INTERPOLATE_CONSTANT: {
1375 LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
1376 a0 = LLVMBuildLoad(builder, a0_ptr, "");
1377 a0 = lp_build_broadcast_scalar(&bld->base, a0);
1378 lp_build_name(a0, "a0_%u.%c", attrib, "xyzw"[chan]);
1379 break;
1380 }
1381
1382 default:
1383 assert(0);
1384 break;
1385 }
1386
1387 input = a0;
1388
1389 if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
1390 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
1391 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
1392 }
1393
1394 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
1395 if(!bld->oow)
1396 bld->oow = lp_build_rcp(&bld->base, bld->w);
1397 input = lp_build_mul(&bld->base, input, bld->oow);
1398 }
1399
1400 lp_build_name(input, "input%u.%c", attrib, "xyzw"[chan]);
1401 }
1402
1403 bld->inputs[attrib][chan] = input;
1404 }
1405 }
1406 }
1407 }
1408
1409 /**
1410 * Translate a TGSI vertex/fragment shader to SSE2 code.
1411 * Slightly different things are done for vertex vs. fragment shaders.
1412 *
1413 * \param tokens the TGSI input shader
1414 * \param bld the output SSE code/function
1415 * \param immediates buffer to place immediates, later passed to SSE bld
1416 * \param return 1 for success, 0 if translation failed
1417 */
1418 LLVMValueRef
1419 lp_build_tgsi_soa(LLVMBuilderRef builder,
1420 const struct tgsi_token *tokens,
1421 union lp_type type,
1422 LLVMValueRef *pos,
1423 LLVMValueRef a0_ptr,
1424 LLVMValueRef dadx_ptr,
1425 LLVMValueRef dady_ptr,
1426 LLVMValueRef consts_ptr,
1427 LLVMValueRef (*outputs)[4],
1428 LLVMValueRef samplers_ptr)
1429 {
1430 struct lp_build_tgsi_soa_context bld;
1431 struct tgsi_parse_context parse;
1432 uint num_immediates = 0;
1433 unsigned i;
1434
1435 /* Setup build context */
1436 memset(&bld, 0, sizeof bld);
1437 lp_build_context_init(&bld.base, builder, type);
1438 bld.x = pos[0];
1439 bld.y = pos[1];
1440 bld.w = pos[3];
1441 bld.a0_ptr = a0_ptr;
1442 bld.dadx_ptr = dadx_ptr;
1443 bld.dady_ptr = dady_ptr;
1444 bld.outputs = outputs;
1445 bld.consts_ptr = consts_ptr;
1446 bld.samplers_ptr = samplers_ptr;
1447
1448 tgsi_parse_init( &parse, tokens );
1449
1450 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1451 tgsi_parse_token( &parse );
1452
1453 switch( parse.FullToken.Token.Type ) {
1454 case TGSI_TOKEN_TYPE_DECLARATION:
1455 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
1456 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1457 }
1458 break;
1459
1460 case TGSI_TOKEN_TYPE_INSTRUCTION:
1461 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1462 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1463 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1464 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1465 info ? info->mnemonic : "<invalid>");
1466 }
1467 break;
1468
1469 case TGSI_TOKEN_TYPE_IMMEDIATE:
1470 /* simply copy the immediate values into the next immediates[] slot */
1471 {
1472 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1473 assert(size <= 4);
1474 assert(num_immediates < LP_MAX_IMMEDIATES);
1475 for( i = 0; i < size; ++i )
1476 bld.immediates[num_immediates][i] =
1477 lp_build_const_uni(type, parse.FullToken.FullImmediate.u[i].Float);
1478 for( i = size; i < 4; ++i )
1479 bld.immediates[num_immediates][i] = bld.base.undef;
1480 num_immediates++;
1481 }
1482 break;
1483
1484 default:
1485 assert( 0 );
1486 }
1487 }
1488
1489 tgsi_parse_free( &parse );
1490
1491 return bld.mask;
1492 }
1493