llvmpipe: Implement LIT.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_util.h"
37 #include "tgsi/tgsi_exec.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_intr.h"
41 #include "lp_bld_arit.h"
42 #include "lp_bld_logic.h"
43 #include "lp_bld_swizzle.h"
44 #include "lp_bld_tgsi.h"
45
46
47 #define LP_MAX_TEMPS 256
48 #define LP_MAX_IMMEDIATES 256
49
50
51 #define FOR_EACH_CHANNEL( CHAN )\
52 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
53
54 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
55 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
56
57 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
58 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
59
60 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
61 FOR_EACH_CHANNEL( CHAN )\
62 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
63
64 #define CHAN_X 0
65 #define CHAN_Y 1
66 #define CHAN_Z 2
67 #define CHAN_W 3
68
69
70 struct lp_build_tgsi_soa_context
71 {
72 struct lp_build_context base;
73
74 LLVMValueRef x, y, w;
75 LLVMValueRef a0_ptr;
76 LLVMValueRef dadx_ptr;
77 LLVMValueRef dady_ptr;
78
79 LLVMValueRef consts_ptr;
80 LLVMValueRef (*outputs)[NUM_CHANNELS];
81 LLVMValueRef samplers_ptr;
82
83 LLVMValueRef oow;
84
85 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
86
87 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
88 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
89
90 LLVMValueRef mask;
91
92 /** Coords/texels store */
93 LLVMValueRef store_ptr;
94 };
95
96
97 /**
98 * Register fetch.
99 */
100
101 static LLVMValueRef
102 emit_fetch(
103 struct lp_build_tgsi_soa_context *bld,
104 const struct tgsi_full_src_register *reg,
105 const unsigned chan_index )
106 {
107 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
108 LLVMValueRef res;
109
110 switch (swizzle) {
111 case TGSI_EXTSWIZZLE_X:
112 case TGSI_EXTSWIZZLE_Y:
113 case TGSI_EXTSWIZZLE_Z:
114 case TGSI_EXTSWIZZLE_W:
115
116 switch (reg->SrcRegister.File) {
117 case TGSI_FILE_CONSTANT: {
118 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
119 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
120 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
121 res = lp_build_broadcast_scalar(&bld->base, scalar);
122 break;
123 }
124
125 case TGSI_FILE_IMMEDIATE:
126 res = bld->immediates[reg->SrcRegister.Index][swizzle];
127 assert(res);
128 break;
129
130 case TGSI_FILE_INPUT:
131 res = bld->inputs[reg->SrcRegister.Index][swizzle];
132 assert(res);
133 break;
134
135 case TGSI_FILE_TEMPORARY:
136 res = bld->temps[reg->SrcRegister.Index][swizzle];
137 if(!res)
138 return bld->base.undef;
139 break;
140
141 default:
142 assert( 0 );
143 return bld->base.undef;
144 }
145 break;
146
147 case TGSI_EXTSWIZZLE_ZERO:
148 res = bld->base.zero;
149 break;
150
151 case TGSI_EXTSWIZZLE_ONE:
152 res = bld->base.one;
153 break;
154
155 default:
156 assert( 0 );
157 return bld->base.undef;
158 }
159
160 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
161 case TGSI_UTIL_SIGN_CLEAR:
162 res = lp_build_abs( &bld->base, res );
163 break;
164
165 case TGSI_UTIL_SIGN_SET:
166 res = lp_build_abs( &bld->base, res );
167 res = LLVMBuildNeg( bld->base.builder, res, "" );
168 break;
169
170 case TGSI_UTIL_SIGN_TOGGLE:
171 res = LLVMBuildNeg( bld->base.builder, res, "" );
172 break;
173
174 case TGSI_UTIL_SIGN_KEEP:
175 break;
176 }
177
178 return res;
179 }
180
181 #define FETCH( FUNC, INST, INDEX, CHAN )\
182 emit_fetch( FUNC, &(INST).FullSrcRegisters[INDEX], CHAN )
183
184 /**
185 * Register store.
186 */
187
188 static void
189 emit_store(
190 struct lp_build_tgsi_soa_context *bld,
191 const struct tgsi_full_dst_register *reg,
192 const struct tgsi_full_instruction *inst,
193 unsigned chan_index,
194 LLVMValueRef value)
195 {
196 switch( inst->Instruction.Saturate ) {
197 case TGSI_SAT_NONE:
198 break;
199
200 case TGSI_SAT_ZERO_ONE:
201 /* assert( 0 ); */
202 break;
203
204 case TGSI_SAT_MINUS_PLUS_ONE:
205 assert( 0 );
206 break;
207 }
208
209 switch( reg->DstRegister.File ) {
210 case TGSI_FILE_OUTPUT:
211 bld->outputs[reg->DstRegister.Index][chan_index] = value;
212 break;
213
214 case TGSI_FILE_TEMPORARY:
215 bld->temps[reg->DstRegister.Index][chan_index] = value;
216 break;
217
218 case TGSI_FILE_ADDRESS:
219 /* FIXME */
220 assert(0);
221 break;
222
223 default:
224 assert( 0 );
225 }
226 }
227
228 #define STORE( FUNC, INST, INDEX, CHAN, VAL )\
229 emit_store( FUNC, &(INST).FullDstRegisters[INDEX], &(INST), CHAN, VAL )
230
231
232 void PIPE_CDECL
233 lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
234 uint32_t unit,
235 float *store )
236 {
237 struct tgsi_sampler *sampler = samplers[unit];
238
239 #if 0
240 uint j;
241
242 debug_printf("%s sampler: %p (%p) store: %p\n",
243 __FUNCTION__,
244 sampler, *sampler,
245 store );
246
247 debug_printf("lodbias %f\n", store[12]);
248
249 for (j = 0; j < 4; j++)
250 debug_printf("sample %d texcoord %f %f\n",
251 j,
252 store[0+j],
253 store[4+j]);
254 #endif
255
256 {
257 float rgba[NUM_CHANNELS][QUAD_SIZE];
258 sampler->get_samples(sampler,
259 &store[0],
260 &store[4],
261 &store[8],
262 0.0f, /*store[12], lodbias */
263 rgba);
264 memcpy(store, rgba, sizeof rgba);
265 }
266
267 #if 0
268 for (j = 0; j < 4; j++)
269 debug_printf("sample %d result %f %f %f %f\n",
270 j,
271 store[0+j],
272 store[4+j],
273 store[8+j],
274 store[12+j]);
275 #endif
276 }
277
278 /**
279 * High-level instruction translators.
280 */
281
282 static void
283 emit_tex( struct lp_build_tgsi_soa_context *bld,
284 const struct tgsi_full_instruction *inst,
285 boolean apply_lodbias,
286 boolean projected)
287 {
288 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
289 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
290 LLVMValueRef lodbias;
291 LLVMValueRef oow;
292 LLVMValueRef args[3];
293 unsigned count;
294 unsigned i;
295
296 switch (inst->InstructionExtTexture.Texture) {
297 case TGSI_TEXTURE_1D:
298 case TGSI_TEXTURE_SHADOW1D:
299 count = 1;
300 break;
301 case TGSI_TEXTURE_2D:
302 case TGSI_TEXTURE_RECT:
303 case TGSI_TEXTURE_SHADOW2D:
304 case TGSI_TEXTURE_SHADOWRECT:
305 count = 2;
306 break;
307 case TGSI_TEXTURE_3D:
308 case TGSI_TEXTURE_CUBE:
309 count = 3;
310 break;
311 default:
312 assert(0);
313 return;
314 }
315
316 if(apply_lodbias)
317 lodbias = FETCH( bld, *inst, 0, 3 );
318 else
319 lodbias = bld->base.zero;
320
321 if(!bld->store_ptr)
322 bld->store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
323 vec_type,
324 LLVMConstInt(LLVMInt32Type(), 4, 0),
325 "store");
326
327 if (projected) {
328 oow = FETCH( bld, *inst, 0, 3 );
329 oow = lp_build_rcp(&bld->base, oow);
330 }
331
332 for (i = 0; i < count; i++) {
333 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
334 LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
335 LLVMValueRef coord;
336
337 coord = FETCH( bld, *inst, 0, i );
338
339 if (projected)
340 coord = lp_build_mul(&bld->base, coord, oow);
341
342 LLVMBuildStore(bld->base.builder, coord, coord_ptr);
343 }
344
345 args[0] = bld->samplers_ptr;
346 args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
347 args[2] = bld->store_ptr;
348
349 lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
350
351 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) {
352 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
353 LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
354 LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
355 STORE( bld, *inst, 0, i, res );
356 }
357 }
358
359
360 static void
361 emit_kil(
362 struct lp_build_tgsi_soa_context *bld,
363 const struct tgsi_full_src_register *reg )
364 {
365 LLVMValueRef terms[NUM_CHANNELS];
366 unsigned chan_index;
367
368 memset(&terms, 0, sizeof terms);
369
370 FOR_EACH_CHANNEL( chan_index ) {
371 unsigned swizzle;
372
373 /* Unswizzle channel */
374 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
375
376 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
377 * not to be tested. */
378 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
379 continue;
380
381 /* Check if the component has not been already tested. */
382 assert(swizzle < NUM_CHANNELS);
383 if( !terms[swizzle] )
384 /* TODO: change the comparison operator instead of setting the sign */
385 terms[swizzle] = emit_fetch(bld, reg, chan_index );
386 }
387
388 FOR_EACH_CHANNEL( chan_index ) {
389 if(terms[chan_index]) {
390 LLVMValueRef mask;
391
392 mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
393
394 if(bld->mask)
395 bld->mask = LLVMBuildAnd(bld->base.builder, bld->mask, mask, "");
396 else
397 bld->mask = mask;
398 }
399 }
400 }
401
402
403 static void
404 emit_kilp(
405 struct lp_build_tgsi_soa_context *bld )
406 {
407 /* XXX todo / fix me */
408 }
409
410
411 /**
412 * Check if inst src/dest regs use indirect addressing into temporary
413 * register file.
414 */
415 static boolean
416 indirect_temp_reference(const struct tgsi_full_instruction *inst)
417 {
418 uint i;
419 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
420 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
421 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
422 reg->SrcRegister.Indirect)
423 return TRUE;
424 }
425 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
426 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
427 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
428 reg->DstRegister.Indirect)
429 return TRUE;
430 }
431 return FALSE;
432 }
433
434
435 static int
436 emit_instruction(
437 struct lp_build_tgsi_soa_context *bld,
438 struct tgsi_full_instruction *inst )
439 {
440 unsigned chan_index;
441 LLVMValueRef src0, src1, src2;
442 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
443 LLVMValueRef dst0;
444
445 /* we can't handle indirect addressing into temp register file yet */
446 if (indirect_temp_reference(inst))
447 return FALSE;
448
449 switch (inst->Instruction.Opcode) {
450 #if 0
451 case TGSI_OPCODE_ARL:
452 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
453 tmp0 = FETCH( bld, *inst, 0, chan_index );
454 emit_flr(bld, 0, 0);
455 emit_f2it( bld, 0 );
456 STORE( bld, *inst, 0, chan_index, tmp0);
457 }
458 break;
459 #endif
460
461 case TGSI_OPCODE_MOV:
462 case TGSI_OPCODE_SWZ:
463 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
464 tmp0 = FETCH( bld, *inst, 0, chan_index );
465 STORE( bld, *inst, 0, chan_index, tmp0);
466 }
467 break;
468
469 case TGSI_OPCODE_LIT:
470 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
471 STORE( bld, *inst, 0, CHAN_X, bld->base.one);
472 }
473 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
474 src0 = FETCH( bld, *inst, 0, CHAN_X );
475 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
476 STORE( bld, *inst, 0, CHAN_Y, dst0);
477 }
478 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
479 /* XMM[1] = SrcReg[0].yyyy */
480 tmp1 = FETCH( bld, *inst, 0, CHAN_Y );
481 /* XMM[1] = max(XMM[1], 0) */
482 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
483 /* XMM[2] = SrcReg[0].wwww */
484 tmp2 = FETCH( bld, *inst, 0, CHAN_W );
485 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
486 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
487 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
488 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
489 STORE( bld, *inst, 0, CHAN_Z, dst0);
490 }
491 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
492 STORE( bld, *inst, 0, CHAN_W, bld->base.one);
493 }
494 break;
495
496 case TGSI_OPCODE_RCP:
497 /* TGSI_OPCODE_RECIP */
498 src0 = FETCH( bld, *inst, 0, CHAN_X );
499 dst0 = lp_build_rcp(&bld->base, src0);
500 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
501 STORE( bld, *inst, 0, chan_index, dst0 );
502 }
503 break;
504
505 case TGSI_OPCODE_RSQ:
506 /* TGSI_OPCODE_RECIPSQRT */
507 src0 = FETCH( bld, *inst, 0, CHAN_X );
508 src0 = lp_build_abs(&bld->base, src0);
509 dst0 = lp_build_rsqrt(&bld->base, src0);
510 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
511 STORE( bld, *inst, 0, chan_index, dst0 );
512 }
513 break;
514
515 case TGSI_OPCODE_EXP:
516 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
517 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
518 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
519 LLVMValueRef *p_exp2_int_part = NULL;
520 LLVMValueRef *p_frac_part = NULL;
521 LLVMValueRef *p_exp2 = NULL;
522
523 src0 = FETCH( bld, *inst, 0, CHAN_X );
524
525 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
526 p_exp2_int_part = &tmp0;
527 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
528 p_frac_part = &tmp1;
529 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
530 p_exp2 = &tmp2;
531
532 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
533
534 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
535 STORE( bld, *inst, 0, CHAN_X, tmp0);
536 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
537 STORE( bld, *inst, 0, CHAN_Y, tmp1);
538 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
539 STORE( bld, *inst, 0, CHAN_Z, tmp2);
540 }
541 /* dst.w = 1.0 */
542 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
543 tmp0 = bld->base.one;
544 STORE( bld, *inst, 0, CHAN_W, tmp0);
545 }
546 break;
547
548 case TGSI_OPCODE_LOG:
549 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
550 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
551 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
552 LLVMValueRef *p_floor_log2;
553 LLVMValueRef *p_exp;
554 LLVMValueRef *p_log2;
555
556 src0 = FETCH( bld, *inst, 0, CHAN_X );
557 src0 = lp_build_abs( &bld->base, src0 );
558
559 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
560 p_floor_log2 = &tmp0;
561 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
562 p_exp = &tmp1;
563 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
564 p_log2 = &tmp2;
565
566 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
567
568 /* dst.x = floor(lg2(abs(src.x))) */
569 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
570 STORE( bld, *inst, 0, CHAN_X, tmp0);
571 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
572 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
573 tmp1 = lp_build_div( &bld->base, src0, tmp1);
574 STORE( bld, *inst, 0, CHAN_Y, tmp1);
575 }
576 /* dst.z = lg2(abs(src.x)) */
577 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
578 STORE( bld, *inst, 0, CHAN_Z, tmp2);
579 }
580 /* dst.w = 1.0 */
581 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
582 tmp0 = bld->base.one;
583 STORE( bld, *inst, 0, CHAN_W, tmp0);
584 }
585 break;
586
587 case TGSI_OPCODE_MUL:
588 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
589 src0 = FETCH( bld, *inst, 0, chan_index );
590 src1 = FETCH( bld, *inst, 1, chan_index );
591 dst0 = lp_build_mul(&bld->base, src0, src1);
592 STORE( bld, *inst, 0, chan_index, dst0);
593 }
594 break;
595
596 case TGSI_OPCODE_ADD:
597 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
598 src0 = FETCH( bld, *inst, 0, chan_index );
599 src1 = FETCH( bld, *inst, 1, chan_index );
600 dst0 = lp_build_add(&bld->base, src0, src1);
601 STORE( bld, *inst, 0, chan_index, dst0);
602 }
603 break;
604
605 case TGSI_OPCODE_DP3:
606 /* TGSI_OPCODE_DOT3 */
607 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
608 tmp1 = FETCH( bld, *inst, 1, CHAN_X );
609 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
610 tmp1 = FETCH( bld, *inst, 0, CHAN_Y );
611 tmp2 = FETCH( bld, *inst, 1, CHAN_Y );
612 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
613 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
614 tmp1 = FETCH( bld, *inst, 0, CHAN_Z );
615 tmp2 = FETCH( bld, *inst, 1, CHAN_Z );
616 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
617 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
618 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
619 STORE( bld, *inst, 0, chan_index, tmp0);
620 }
621 break;
622
623 case TGSI_OPCODE_DP4:
624 /* TGSI_OPCODE_DOT4 */
625 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
626 tmp1 = FETCH( bld, *inst, 1, CHAN_X );
627 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
628 tmp1 = FETCH( bld, *inst, 0, CHAN_Y );
629 tmp2 = FETCH( bld, *inst, 1, CHAN_Y );
630 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
631 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
632 tmp1 = FETCH( bld, *inst, 0, CHAN_Z );
633 tmp2 = FETCH( bld, *inst, 1, CHAN_Z );
634 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
635 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
636 tmp1 = FETCH( bld, *inst, 0, CHAN_W );
637 tmp2 = FETCH( bld, *inst, 1, CHAN_W );
638 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
639 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
640 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
641 STORE( bld, *inst, 0, chan_index, tmp0);
642 }
643 break;
644
645 case TGSI_OPCODE_DST:
646 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
647 tmp0 = bld->base.one;
648 STORE( bld, *inst, 0, CHAN_X, tmp0);
649 }
650 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
651 tmp0 = FETCH( bld, *inst, 0, CHAN_Y );
652 tmp1 = FETCH( bld, *inst, 1, CHAN_Y );
653 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
654 STORE( bld, *inst, 0, CHAN_Y, tmp0);
655 }
656 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
657 tmp0 = FETCH( bld, *inst, 0, CHAN_Z );
658 STORE( bld, *inst, 0, CHAN_Z, tmp0);
659 }
660 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
661 tmp0 = FETCH( bld, *inst, 1, CHAN_W );
662 STORE( bld, *inst, 0, CHAN_W, tmp0);
663 }
664 break;
665
666 case TGSI_OPCODE_MIN:
667 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
668 src0 = FETCH( bld, *inst, 0, chan_index );
669 src1 = FETCH( bld, *inst, 1, chan_index );
670 dst0 = lp_build_min( &bld->base, src0, src1 );
671 STORE( bld, *inst, 0, chan_index, dst0);
672 }
673 break;
674
675 case TGSI_OPCODE_MAX:
676 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
677 src0 = FETCH( bld, *inst, 0, chan_index );
678 src1 = FETCH( bld, *inst, 1, chan_index );
679 dst0 = lp_build_max( &bld->base, src0, src1 );
680 STORE( bld, *inst, 0, chan_index, dst0);
681 }
682 break;
683
684 case TGSI_OPCODE_SLT:
685 /* TGSI_OPCODE_SETLT */
686 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
687 src0 = FETCH( bld, *inst, 0, chan_index );
688 src1 = FETCH( bld, *inst, 1, chan_index );
689 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
690 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
691 STORE( bld, *inst, 0, chan_index, dst0);
692 }
693 break;
694
695 case TGSI_OPCODE_SGE:
696 /* TGSI_OPCODE_SETGE */
697 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
698 src0 = FETCH( bld, *inst, 0, chan_index );
699 src1 = FETCH( bld, *inst, 1, chan_index );
700 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
701 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
702 STORE( bld, *inst, 0, chan_index, dst0);
703 }
704 break;
705
706 case TGSI_OPCODE_MAD:
707 /* TGSI_OPCODE_MADD */
708 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
709 tmp0 = FETCH( bld, *inst, 0, chan_index );
710 tmp1 = FETCH( bld, *inst, 1, chan_index );
711 tmp2 = FETCH( bld, *inst, 2, chan_index );
712 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
713 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
714 STORE( bld, *inst, 0, chan_index, tmp0);
715 }
716 break;
717
718 case TGSI_OPCODE_SUB:
719 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
720 tmp0 = FETCH( bld, *inst, 0, chan_index );
721 tmp1 = FETCH( bld, *inst, 1, chan_index );
722 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
723 STORE( bld, *inst, 0, chan_index, tmp0);
724 }
725 break;
726
727 case TGSI_OPCODE_LRP:
728 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
729 src0 = FETCH( bld, *inst, 0, chan_index );
730 src1 = FETCH( bld, *inst, 1, chan_index );
731 src2 = FETCH( bld, *inst, 2, chan_index );
732 tmp0 = lp_build_sub( &bld->base, src1, src2 );
733 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
734 dst0 = lp_build_add( &bld->base, tmp0, src2 );
735 STORE( bld, *inst, 0, chan_index, dst0 );
736 }
737 break;
738
739 case TGSI_OPCODE_CND:
740 return 0;
741 break;
742
743 case TGSI_OPCODE_CND0:
744 return 0;
745 break;
746
747 case TGSI_OPCODE_DP2A:
748 tmp0 = FETCH( bld, *inst, 0, CHAN_X ); /* xmm0 = src[0].x */
749 tmp1 = FETCH( bld, *inst, 1, CHAN_X ); /* xmm1 = src[1].x */
750 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
751 tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
752 tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
753 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
754 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
755 tmp1 = FETCH( bld, *inst, 2, CHAN_X ); /* xmm1 = src[2].x */
756 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
757 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
758 STORE( bld, *inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
759 }
760 break;
761
762 #if 0
763 case TGSI_OPCODE_FRC:
764 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
765 tmp0 = FETCH( bld, *inst, 0, chan_index );
766 emit_frc( bld, 0, 0 );
767 STORE( bld, *inst, 0, chan_index, tmp0);
768 }
769 break;
770
771 case TGSI_OPCODE_CLAMP:
772 return 0;
773 break;
774
775 case TGSI_OPCODE_FLR:
776 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
777 tmp0 = FETCH( bld, *inst, 0, chan_index );
778 emit_flr( bld, 0, 0 );
779 STORE( bld, *inst, 0, chan_index, tmp0);
780 }
781 break;
782
783 case TGSI_OPCODE_ROUND:
784 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
785 tmp0 = FETCH( bld, *inst, 0, chan_index );
786 emit_rnd( bld, 0, 0 );
787 STORE( bld, *inst, 0, chan_index, tmp0);
788 }
789 break;
790 #endif
791
792 case TGSI_OPCODE_EX2: {
793 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
794 tmp0 = lp_build_exp2( &bld->base, tmp0);
795 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
796 STORE( bld, *inst, 0, chan_index, tmp0);
797 }
798 break;
799 }
800
801 case TGSI_OPCODE_LG2:
802 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
803 tmp0 = lp_build_log2( &bld->base, tmp0);
804 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
805 STORE( bld, *inst, 0, chan_index, tmp0);
806 }
807 break;
808
809 case TGSI_OPCODE_POW:
810 src0 = FETCH( bld, *inst, 0, CHAN_X );
811 src1 = FETCH( bld, *inst, 1, CHAN_X );
812 dst0 = lp_build_pow( &bld->base, src0, src1 );
813 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
814 STORE( bld, *inst, 0, chan_index, dst0 );
815 }
816 break;
817
818 case TGSI_OPCODE_XPD:
819 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
820 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
821 tmp1 = FETCH( bld, *inst, 1, CHAN_Z );
822 tmp3 = FETCH( bld, *inst, 0, CHAN_Z );
823 }
824 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
825 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
826 tmp0 = FETCH( bld, *inst, 0, CHAN_Y );
827 tmp4 = FETCH( bld, *inst, 1, CHAN_Y );
828 }
829 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
830 tmp2 = tmp0;
831 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
832 tmp5 = tmp3;
833 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
834 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
835 STORE( bld, *inst, 0, CHAN_X, tmp2);
836 }
837 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
838 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
839 tmp2 = FETCH( bld, *inst, 1, CHAN_X );
840 tmp5 = FETCH( bld, *inst, 0, CHAN_X );
841 }
842 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
843 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
844 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
845 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
846 STORE( bld, *inst, 0, CHAN_Y, tmp3);
847 }
848 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
849 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
850 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
851 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
852 STORE( bld, *inst, 0, CHAN_Z, tmp5);
853 }
854 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
855 tmp0 = bld->base.one;
856 STORE( bld, *inst, 0, CHAN_W, tmp0);
857 }
858 break;
859
860 case TGSI_OPCODE_ABS:
861 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
862 tmp0 = FETCH( bld, *inst, 0, chan_index );
863 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
864 STORE( bld, *inst, 0, chan_index, tmp0);
865 }
866 break;
867
868 case TGSI_OPCODE_RCC:
869 return 0;
870 break;
871
872 case TGSI_OPCODE_DPH:
873 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
874 tmp1 = FETCH( bld, *inst, 1, CHAN_X );
875 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
876 tmp1 = FETCH( bld, *inst, 0, CHAN_Y );
877 tmp2 = FETCH( bld, *inst, 1, CHAN_Y );
878 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
879 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
880 tmp1 = FETCH( bld, *inst, 0, CHAN_Z );
881 tmp2 = FETCH( bld, *inst, 1, CHAN_Z );
882 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
883 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
884 tmp1 = FETCH( bld, *inst, 1, CHAN_W );
885 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
886 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
887 STORE( bld, *inst, 0, chan_index, tmp0);
888 }
889 break;
890
891 case TGSI_OPCODE_COS:
892 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
893 tmp0 = lp_build_cos( &bld->base, tmp0 );
894 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
895 STORE( bld, *inst, 0, chan_index, tmp0);
896 }
897 break;
898
899 case TGSI_OPCODE_DDX:
900 return 0;
901 break;
902
903 case TGSI_OPCODE_DDY:
904 return 0;
905 break;
906
907 #if 0
908 case TGSI_OPCODE_KILP:
909 /* predicated kill */
910 emit_kilp( bld );
911 return 0; /* XXX fix me */
912 break;
913 #endif
914
915 case TGSI_OPCODE_KIL:
916 /* conditional kill */
917 emit_kil( bld, &inst->FullSrcRegisters[0] );
918 break;
919
920 case TGSI_OPCODE_PK2H:
921 return 0;
922 break;
923
924 case TGSI_OPCODE_PK2US:
925 return 0;
926 break;
927
928 case TGSI_OPCODE_PK4B:
929 return 0;
930 break;
931
932 case TGSI_OPCODE_PK4UB:
933 return 0;
934 break;
935
936 case TGSI_OPCODE_RFL:
937 return 0;
938 break;
939
940 case TGSI_OPCODE_SEQ:
941 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
942 src0 = FETCH( bld, *inst, 0, chan_index );
943 src1 = FETCH( bld, *inst, 1, chan_index );
944 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
945 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
946 STORE( bld, *inst, 0, chan_index, dst0);
947 }
948 break;
949
950 case TGSI_OPCODE_SFL:
951 return 0;
952 break;
953
954 case TGSI_OPCODE_SGT:
955 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
956 src0 = FETCH( bld, *inst, 0, chan_index );
957 src1 = FETCH( bld, *inst, 1, chan_index );
958 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
959 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
960 STORE( bld, *inst, 0, chan_index, dst0);
961 }
962 break;
963
964 case TGSI_OPCODE_SIN:
965 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
966 tmp0 = lp_build_sin( &bld->base, tmp0 );
967 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
968 STORE( bld, *inst, 0, chan_index, tmp0);
969 }
970 break;
971
972 case TGSI_OPCODE_SLE:
973 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
974 src0 = FETCH( bld, *inst, 0, chan_index );
975 src1 = FETCH( bld, *inst, 1, chan_index );
976 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
977 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
978 STORE( bld, *inst, 0, chan_index, dst0);
979 }
980 break;
981
982 case TGSI_OPCODE_SNE:
983 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
984 src0 = FETCH( bld, *inst, 0, chan_index );
985 src1 = FETCH( bld, *inst, 1, chan_index );
986 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
987 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
988 STORE( bld, *inst, 0, chan_index, dst0);
989 }
990 break;
991
992 case TGSI_OPCODE_STR:
993 return 0;
994 break;
995
996 case TGSI_OPCODE_TEX:
997 emit_tex( bld, inst, FALSE, FALSE );
998 break;
999
1000 case TGSI_OPCODE_TXD:
1001 return 0;
1002 break;
1003
1004 case TGSI_OPCODE_UP2H:
1005 return 0;
1006 break;
1007
1008 case TGSI_OPCODE_UP2US:
1009 return 0;
1010 break;
1011
1012 case TGSI_OPCODE_UP4B:
1013 return 0;
1014 break;
1015
1016 case TGSI_OPCODE_UP4UB:
1017 return 0;
1018 break;
1019
1020 case TGSI_OPCODE_X2D:
1021 return 0;
1022 break;
1023
1024 case TGSI_OPCODE_ARA:
1025 return 0;
1026 break;
1027
1028 #if 0
1029 case TGSI_OPCODE_ARR:
1030 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1031 tmp0 = FETCH( bld, *inst, 0, chan_index );
1032 emit_rnd( bld, 0, 0 );
1033 emit_f2it( bld, 0 );
1034 STORE( bld, *inst, 0, chan_index, tmp0);
1035 }
1036 break;
1037 #endif
1038
1039 case TGSI_OPCODE_BRA:
1040 return 0;
1041 break;
1042
1043 case TGSI_OPCODE_CAL:
1044 return 0;
1045 break;
1046
1047 #if 0
1048 case TGSI_OPCODE_RET:
1049 emit_ret( bld );
1050 break;
1051 #endif
1052
1053 case TGSI_OPCODE_END:
1054 break;
1055
1056 #if 0
1057 case TGSI_OPCODE_SSG:
1058 /* TGSI_OPCODE_SGN */
1059 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1060 tmp0 = FETCH( bld, *inst, 0, chan_index );
1061 emit_sgn( bld, 0, 0 );
1062 STORE( bld, *inst, 0, chan_index, tmp0);
1063 }
1064 break;
1065 #endif
1066
1067 case TGSI_OPCODE_CMP:
1068 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1069 src0 = FETCH( bld, *inst, 0, chan_index );
1070 src1 = FETCH( bld, *inst, 1, chan_index );
1071 src2 = FETCH( bld, *inst, 2, chan_index );
1072 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1073 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1074 STORE( bld, *inst, 0, chan_index, dst0);
1075 }
1076 break;
1077
1078 case TGSI_OPCODE_SCS:
1079 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
1080 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
1081 tmp0 = lp_build_cos( &bld->base, tmp0 );
1082 STORE( bld, *inst, 0, CHAN_X, tmp0);
1083 }
1084 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
1085 tmp0 = FETCH( bld, *inst, 0, CHAN_X );
1086 tmp0 = lp_build_sin( &bld->base, tmp0 );
1087 STORE( bld, *inst, 0, CHAN_Y, tmp0);
1088 }
1089 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
1090 tmp0 = bld->base.zero;
1091 STORE( bld, *inst, 0, CHAN_Z, tmp0);
1092 }
1093 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
1094 tmp0 = bld->base.one;
1095 STORE( bld, *inst, 0, CHAN_W, tmp0);
1096 }
1097 break;
1098
1099 case TGSI_OPCODE_TXB:
1100 emit_tex( bld, inst, TRUE, FALSE );
1101 break;
1102
1103 case TGSI_OPCODE_NRM:
1104 /* fall-through */
1105 case TGSI_OPCODE_NRM4:
1106 /* 3 or 4-component normalization */
1107 {
1108 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1109
1110 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
1111 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
1112 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) ||
1113 (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) {
1114
1115 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1116
1117 /* xmm4 = src.x */
1118 /* xmm0 = src.x * src.x */
1119 tmp0 = FETCH(bld, *inst, 0, CHAN_X);
1120 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
1121 tmp4 = tmp0;
1122 }
1123 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1124
1125 /* xmm5 = src.y */
1126 /* xmm0 = xmm0 + src.y * src.y */
1127 tmp1 = FETCH(bld, *inst, 0, CHAN_Y);
1128 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
1129 tmp5 = tmp1;
1130 }
1131 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1132 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1133
1134 /* xmm6 = src.z */
1135 /* xmm0 = xmm0 + src.z * src.z */
1136 tmp1 = FETCH(bld, *inst, 0, CHAN_Z);
1137 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
1138 tmp6 = tmp1;
1139 }
1140 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1141 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1142
1143 if (dims == 4) {
1144 /* xmm7 = src.w */
1145 /* xmm0 = xmm0 + src.w * src.w */
1146 tmp1 = FETCH(bld, *inst, 0, CHAN_W);
1147 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
1148 tmp7 = tmp1;
1149 }
1150 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1151 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1152 }
1153
1154 /* xmm1 = 1 / sqrt(xmm0) */
1155 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1156
1157 /* dst.x = xmm1 * src.x */
1158 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
1159 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1160 STORE(bld, *inst, 0, CHAN_X, tmp4);
1161 }
1162
1163 /* dst.y = xmm1 * src.y */
1164 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
1165 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1166 STORE(bld, *inst, 0, CHAN_Y, tmp5);
1167 }
1168
1169 /* dst.z = xmm1 * src.z */
1170 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
1171 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1172 STORE(bld, *inst, 0, CHAN_Z, tmp6);
1173 }
1174
1175 /* dst.w = xmm1 * src.w */
1176 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) {
1177 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1178 STORE(bld, *inst, 0, CHAN_W, tmp7);
1179 }
1180 }
1181
1182 /* dst0.w = 1.0 */
1183 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) {
1184 tmp0 = bld->base.one;
1185 STORE(bld, *inst, 0, CHAN_W, tmp0);
1186 }
1187 }
1188 break;
1189
1190 case TGSI_OPCODE_DIV:
1191 return 0;
1192 break;
1193
1194 case TGSI_OPCODE_DP2:
1195 tmp0 = FETCH( bld, *inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1196 tmp1 = FETCH( bld, *inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1197 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1198 tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1199 tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1200 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1201 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1202 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1203 STORE( bld, *inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1204 }
1205 break;
1206
1207 case TGSI_OPCODE_TXL:
1208 emit_tex( bld, inst, TRUE, FALSE );
1209 break;
1210
1211 case TGSI_OPCODE_TXP:
1212 emit_tex( bld, inst, FALSE, TRUE );
1213 break;
1214
1215 case TGSI_OPCODE_BRK:
1216 return 0;
1217 break;
1218
1219 case TGSI_OPCODE_IF:
1220 return 0;
1221 break;
1222
1223 case TGSI_OPCODE_LOOP:
1224 return 0;
1225 break;
1226
1227 case TGSI_OPCODE_REP:
1228 return 0;
1229 break;
1230
1231 case TGSI_OPCODE_ELSE:
1232 return 0;
1233 break;
1234
1235 case TGSI_OPCODE_ENDIF:
1236 return 0;
1237 break;
1238
1239 case TGSI_OPCODE_ENDLOOP:
1240 return 0;
1241 break;
1242
1243 case TGSI_OPCODE_ENDREP:
1244 return 0;
1245 break;
1246
1247 case TGSI_OPCODE_PUSHA:
1248 return 0;
1249 break;
1250
1251 case TGSI_OPCODE_POPA:
1252 return 0;
1253 break;
1254
1255 case TGSI_OPCODE_CEIL:
1256 return 0;
1257 break;
1258
1259 case TGSI_OPCODE_I2F:
1260 return 0;
1261 break;
1262
1263 case TGSI_OPCODE_NOT:
1264 return 0;
1265 break;
1266
1267 #if 0
1268 case TGSI_OPCODE_TRUNC:
1269 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1270 tmp0 = FETCH( bld, *inst, 0, chan_index );
1271 emit_f2it( bld, 0 );
1272 emit_i2f( bld, 0 );
1273 STORE( bld, *inst, 0, chan_index, tmp0);
1274 }
1275 break;
1276 #endif
1277
1278 case TGSI_OPCODE_SHL:
1279 return 0;
1280 break;
1281
1282 case TGSI_OPCODE_SHR:
1283 return 0;
1284 break;
1285
1286 case TGSI_OPCODE_AND:
1287 return 0;
1288 break;
1289
1290 case TGSI_OPCODE_OR:
1291 return 0;
1292 break;
1293
1294 case TGSI_OPCODE_MOD:
1295 return 0;
1296 break;
1297
1298 case TGSI_OPCODE_XOR:
1299 return 0;
1300 break;
1301
1302 case TGSI_OPCODE_SAD:
1303 return 0;
1304 break;
1305
1306 case TGSI_OPCODE_TXF:
1307 return 0;
1308 break;
1309
1310 case TGSI_OPCODE_TXQ:
1311 return 0;
1312 break;
1313
1314 case TGSI_OPCODE_CONT:
1315 return 0;
1316 break;
1317
1318 case TGSI_OPCODE_EMIT:
1319 return 0;
1320 break;
1321
1322 case TGSI_OPCODE_ENDPRIM:
1323 return 0;
1324 break;
1325
1326 default:
1327 return 0;
1328 }
1329
1330 return 1;
1331 }
1332
1333 static void
1334 emit_declaration(
1335 struct lp_build_tgsi_soa_context *bld,
1336 struct tgsi_full_declaration *decl )
1337 {
1338 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1339 LLVMBuilderRef builder = bld->base.builder;
1340 unsigned first, last, mask;
1341 unsigned attrib, chan;
1342
1343 first = decl->DeclarationRange.First;
1344 last = decl->DeclarationRange.Last;
1345 mask = decl->Declaration.UsageMask;
1346
1347 for( attrib = first; attrib <= last; attrib++ ) {
1348 for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
1349 LLVMValueRef input = bld->base.undef;
1350
1351 if( mask & (1 << chan) ) {
1352 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
1353 LLVMValueRef a0;
1354 LLVMValueRef dadx;
1355 LLVMValueRef dady;
1356 char name[32];
1357
1358 switch( decl->Declaration.Interpolate ) {
1359 case TGSI_INTERPOLATE_PERSPECTIVE:
1360 case TGSI_INTERPOLATE_LINEAR: {
1361 LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
1362 LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
1363 dadx = LLVMBuildLoad(builder, dadx_ptr, "");
1364 dady = LLVMBuildLoad(builder, dady_ptr, "");
1365 dadx = lp_build_broadcast_scalar(&bld->base, dadx);
1366 dady = lp_build_broadcast_scalar(&bld->base, dady);
1367 util_snprintf(name, sizeof name, "dadx_%u.%c", attrib, "xyzw"[chan]);
1368 LLVMSetValueName(dadx, name);
1369 util_snprintf(name, sizeof name, "dady_%u.%c", attrib, "xyzw"[chan]);
1370 LLVMSetValueName(dady, name);
1371 }
1372
1373 case TGSI_INTERPOLATE_CONSTANT: {
1374 LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
1375 a0 = LLVMBuildLoad(builder, a0_ptr, "");
1376 a0 = lp_build_broadcast_scalar(&bld->base, a0);
1377 util_snprintf(name, sizeof name, "a0_%u.%c", attrib, "xyzw"[chan]);
1378 LLVMSetValueName(a0, name);
1379 break;
1380 }
1381
1382 default:
1383 assert(0);
1384 break;
1385 }
1386
1387 input = a0;
1388
1389 if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
1390 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
1391 input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
1392 }
1393
1394 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
1395 if(!bld->oow)
1396 bld->oow = lp_build_rcp(&bld->base, bld->w);
1397 input = lp_build_mul(&bld->base, input, bld->oow);
1398 }
1399
1400 util_snprintf(name, sizeof name, "input%u.%c", attrib, "xyzw"[chan]);
1401 LLVMSetValueName(input, name);
1402 }
1403
1404 bld->inputs[attrib][chan] = input;
1405 }
1406 }
1407 }
1408 }
1409
1410 /**
1411 * Translate a TGSI vertex/fragment shader to SSE2 code.
1412 * Slightly different things are done for vertex vs. fragment shaders.
1413 *
1414 * \param tokens the TGSI input shader
1415 * \param bld the output SSE code/function
1416 * \param immediates buffer to place immediates, later passed to SSE bld
1417 * \param return 1 for success, 0 if translation failed
1418 */
1419 LLVMValueRef
1420 lp_build_tgsi_soa(LLVMBuilderRef builder,
1421 const struct tgsi_token *tokens,
1422 union lp_type type,
1423 LLVMValueRef *pos,
1424 LLVMValueRef a0_ptr,
1425 LLVMValueRef dadx_ptr,
1426 LLVMValueRef dady_ptr,
1427 LLVMValueRef consts_ptr,
1428 LLVMValueRef (*outputs)[4],
1429 LLVMValueRef samplers_ptr)
1430 {
1431 struct lp_build_tgsi_soa_context bld;
1432 struct tgsi_parse_context parse;
1433 uint num_immediates = 0;
1434 unsigned i;
1435
1436 /* Setup build context */
1437 memset(&bld, 0, sizeof bld);
1438 lp_build_context_init(&bld.base, builder, type);
1439 bld.x = pos[0];
1440 bld.y = pos[1];
1441 bld.w = pos[3];
1442 bld.a0_ptr = a0_ptr;
1443 bld.dadx_ptr = dadx_ptr;
1444 bld.dady_ptr = dady_ptr;
1445 bld.outputs = outputs;
1446 bld.consts_ptr = consts_ptr;
1447 bld.samplers_ptr = samplers_ptr;
1448
1449 tgsi_parse_init( &parse, tokens );
1450
1451 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1452 tgsi_parse_token( &parse );
1453
1454 switch( parse.FullToken.Token.Type ) {
1455 case TGSI_TOKEN_TYPE_DECLARATION:
1456 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
1457 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1458 }
1459 break;
1460
1461 case TGSI_TOKEN_TYPE_INSTRUCTION:
1462 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1463 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1464 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1465 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1466 info ? info->mnemonic : "<invalid>");
1467 }
1468 break;
1469
1470 case TGSI_TOKEN_TYPE_IMMEDIATE:
1471 /* simply copy the immediate values into the next immediates[] slot */
1472 {
1473 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1474 assert(size <= 4);
1475 assert(num_immediates < LP_MAX_IMMEDIATES);
1476 for( i = 0; i < size; ++i )
1477 bld.immediates[num_immediates][i] =
1478 lp_build_const_uni(type, parse.FullToken.FullImmediate.u[i].Float);
1479 for( i = size; i < 4; ++i )
1480 bld.immediates[num_immediates][i] = bld.base.undef;
1481 num_immediates++;
1482 }
1483 break;
1484
1485 default:
1486 assert( 0 );
1487 }
1488 }
1489
1490 tgsi_parse_free( &parse );
1491
1492 return bld.mask;
1493 }
1494