llvmpipe: Factor out and optimize the input interpolation.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_info.h"
45 #include "tgsi/tgsi_parse.h"
46 #include "tgsi/tgsi_util.h"
47 #include "tgsi/tgsi_exec.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_intr.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_tgsi.h"
56 #include "lp_bld_debug.h"
57
58
59 #define LP_MAX_TEMPS 256
60 #define LP_MAX_IMMEDIATES 256
61
62
63 #define FOR_EACH_CHANNEL( CHAN )\
64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67 ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
68
69 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73 FOR_EACH_CHANNEL( CHAN )\
74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76 #define CHAN_X 0
77 #define CHAN_Y 1
78 #define CHAN_Z 2
79 #define CHAN_W 3
80
81
82 struct lp_build_tgsi_soa_context
83 {
84 struct lp_build_context base;
85
86 LLVMValueRef consts_ptr;
87 const LLVMValueRef *pos;
88 const LLVMValueRef (*inputs)[NUM_CHANNELS];
89 LLVMValueRef (*outputs)[NUM_CHANNELS];
90 LLVMValueRef samplers_ptr;
91
92 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
93 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
94
95 struct lp_build_mask_context *mask;
96
97 /** Coords/texels store */
98 LLVMValueRef store_ptr;
99 };
100
101
102 /**
103 * Register fetch.
104 */
105 static LLVMValueRef
106 emit_fetch(
107 struct lp_build_tgsi_soa_context *bld,
108 const struct tgsi_full_instruction *inst,
109 unsigned index,
110 const unsigned chan_index )
111 {
112 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
113 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
114 LLVMValueRef res;
115
116 switch (swizzle) {
117 case TGSI_EXTSWIZZLE_X:
118 case TGSI_EXTSWIZZLE_Y:
119 case TGSI_EXTSWIZZLE_Z:
120 case TGSI_EXTSWIZZLE_W:
121
122 switch (reg->SrcRegister.File) {
123 case TGSI_FILE_CONSTANT: {
124 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
125 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
126 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
127 res = lp_build_broadcast_scalar(&bld->base, scalar);
128 break;
129 }
130
131 case TGSI_FILE_IMMEDIATE:
132 res = bld->immediates[reg->SrcRegister.Index][swizzle];
133 assert(res);
134 break;
135
136 case TGSI_FILE_INPUT:
137 res = bld->inputs[reg->SrcRegister.Index][swizzle];
138 assert(res);
139 break;
140
141 case TGSI_FILE_TEMPORARY:
142 res = bld->temps[reg->SrcRegister.Index][swizzle];
143 if(!res)
144 return bld->base.undef;
145 break;
146
147 default:
148 assert( 0 );
149 return bld->base.undef;
150 }
151 break;
152
153 case TGSI_EXTSWIZZLE_ZERO:
154 res = bld->base.zero;
155 break;
156
157 case TGSI_EXTSWIZZLE_ONE:
158 res = bld->base.one;
159 break;
160
161 default:
162 assert( 0 );
163 return bld->base.undef;
164 }
165
166 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
167 case TGSI_UTIL_SIGN_CLEAR:
168 res = lp_build_abs( &bld->base, res );
169 break;
170
171 case TGSI_UTIL_SIGN_SET:
172 res = lp_build_abs( &bld->base, res );
173 res = LLVMBuildNeg( bld->base.builder, res, "" );
174 break;
175
176 case TGSI_UTIL_SIGN_TOGGLE:
177 res = LLVMBuildNeg( bld->base.builder, res, "" );
178 break;
179
180 case TGSI_UTIL_SIGN_KEEP:
181 break;
182 }
183
184 return res;
185 }
186
187
188 /**
189 * Register store.
190 */
191 static void
192 emit_store(
193 struct lp_build_tgsi_soa_context *bld,
194 const struct tgsi_full_instruction *inst,
195 unsigned index,
196 unsigned chan_index,
197 LLVMValueRef value)
198 {
199 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
200
201 switch( inst->Instruction.Saturate ) {
202 case TGSI_SAT_NONE:
203 break;
204
205 case TGSI_SAT_ZERO_ONE:
206 value = lp_build_max(&bld->base, value, bld->base.zero);
207 value = lp_build_min(&bld->base, value, bld->base.one);
208 break;
209
210 case TGSI_SAT_MINUS_PLUS_ONE:
211 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
212 value = lp_build_min(&bld->base, value, bld->base.one);
213 break;
214
215 default:
216 assert(0);
217 }
218
219 switch( reg->DstRegister.File ) {
220 case TGSI_FILE_OUTPUT:
221 bld->outputs[reg->DstRegister.Index][chan_index] = value;
222 break;
223
224 case TGSI_FILE_TEMPORARY:
225 bld->temps[reg->DstRegister.Index][chan_index] = value;
226 break;
227
228 case TGSI_FILE_ADDRESS:
229 /* FIXME */
230 assert(0);
231 break;
232
233 default:
234 assert( 0 );
235 }
236 }
237
238
239 void PIPE_CDECL
240 lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
241 uint32_t unit,
242 float *store )
243 {
244 struct tgsi_sampler *sampler = samplers[unit];
245
246 #if 0
247 uint j;
248
249 debug_printf("%s sampler: %p (%p) store: %p\n",
250 __FUNCTION__,
251 sampler, *sampler,
252 store );
253
254 debug_printf("lodbias %f\n", store[12]);
255
256 for (j = 0; j < 4; j++)
257 debug_printf("sample %d texcoord %f %f\n",
258 j,
259 store[0+j],
260 store[4+j]);
261 #endif
262
263 {
264 float rgba[NUM_CHANNELS][QUAD_SIZE];
265 sampler->get_samples(sampler,
266 &store[0],
267 &store[4],
268 &store[8],
269 0.0f, /*store[12], lodbias */
270 rgba);
271 memcpy(store, rgba, sizeof rgba);
272 }
273
274 #if 0
275 for (j = 0; j < 4; j++)
276 debug_printf("sample %d result %f %f %f %f\n",
277 j,
278 store[0+j],
279 store[4+j],
280 store[8+j],
281 store[12+j]);
282 #endif
283 }
284
285 /**
286 * High-level instruction translators.
287 */
288
289 static void
290 emit_tex( struct lp_build_tgsi_soa_context *bld,
291 const struct tgsi_full_instruction *inst,
292 boolean apply_lodbias,
293 boolean projected)
294 {
295 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
296 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
297 LLVMValueRef lodbias;
298 LLVMValueRef oow;
299 LLVMValueRef args[3];
300 unsigned count;
301 unsigned i;
302
303 switch (inst->InstructionExtTexture.Texture) {
304 case TGSI_TEXTURE_1D:
305 case TGSI_TEXTURE_SHADOW1D:
306 count = 1;
307 break;
308 case TGSI_TEXTURE_2D:
309 case TGSI_TEXTURE_RECT:
310 case TGSI_TEXTURE_SHADOW2D:
311 case TGSI_TEXTURE_SHADOWRECT:
312 count = 2;
313 break;
314 case TGSI_TEXTURE_3D:
315 case TGSI_TEXTURE_CUBE:
316 count = 3;
317 break;
318 default:
319 assert(0);
320 return;
321 }
322
323 if(apply_lodbias)
324 lodbias = emit_fetch( bld, inst, 0, 3 );
325 else
326 lodbias = bld->base.zero;
327
328 if(!bld->store_ptr)
329 bld->store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
330 vec_type,
331 LLVMConstInt(LLVMInt32Type(), 4, 0),
332 "store");
333
334 if (projected) {
335 oow = emit_fetch( bld, inst, 0, 3 );
336 oow = lp_build_rcp(&bld->base, oow);
337 }
338
339 for (i = 0; i < count; i++) {
340 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
341 LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
342 LLVMValueRef coord;
343
344 coord = emit_fetch( bld, inst, 0, i );
345
346 if (projected)
347 coord = lp_build_mul(&bld->base, coord, oow);
348
349 LLVMBuildStore(bld->base.builder, coord, coord_ptr);
350 }
351
352 args[0] = bld->samplers_ptr;
353 args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
354 args[2] = bld->store_ptr;
355
356 lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
357
358 FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
359 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
360 LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, bld->store_ptr, &index, 1, "");
361 LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
362 emit_store( bld, inst, 0, i, res );
363 }
364 }
365
366
367 static void
368 emit_kil(
369 struct lp_build_tgsi_soa_context *bld,
370 const struct tgsi_full_instruction *inst )
371 {
372 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
373 LLVMValueRef terms[NUM_CHANNELS];
374 LLVMValueRef mask;
375 unsigned chan_index;
376
377 memset(&terms, 0, sizeof terms);
378
379 FOR_EACH_CHANNEL( chan_index ) {
380 unsigned swizzle;
381
382 /* Unswizzle channel */
383 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
384
385 /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
386 * not to be tested. */
387 if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
388 continue;
389
390 /* Check if the component has not been already tested. */
391 assert(swizzle < NUM_CHANNELS);
392 if( !terms[swizzle] )
393 /* TODO: change the comparison operator instead of setting the sign */
394 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
395 }
396
397 mask = NULL;
398 FOR_EACH_CHANNEL( chan_index ) {
399 if(terms[chan_index]) {
400 LLVMValueRef chan_mask;
401
402 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
403
404 if(mask)
405 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
406 else
407 mask = chan_mask;
408 }
409 }
410
411 if(mask)
412 lp_build_mask_update(bld->mask, mask);
413 }
414
415
416 static void
417 emit_kilp(
418 struct lp_build_tgsi_soa_context *bld )
419 {
420 /* XXX todo / fix me */
421 }
422
423
424 /**
425 * Check if inst src/dest regs use indirect addressing into temporary
426 * register file.
427 */
428 static boolean
429 indirect_temp_reference(const struct tgsi_full_instruction *inst)
430 {
431 uint i;
432 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
433 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
434 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
435 reg->SrcRegister.Indirect)
436 return TRUE;
437 }
438 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
439 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
440 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
441 reg->DstRegister.Indirect)
442 return TRUE;
443 }
444 return FALSE;
445 }
446
447
448 static int
449 emit_instruction(
450 struct lp_build_tgsi_soa_context *bld,
451 struct tgsi_full_instruction *inst )
452 {
453 unsigned chan_index;
454 LLVMValueRef src0, src1, src2;
455 LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
456 LLVMValueRef dst0;
457
458 /* we can't handle indirect addressing into temp register file yet */
459 if (indirect_temp_reference(inst))
460 return FALSE;
461
462 switch (inst->Instruction.Opcode) {
463 #if 0
464 case TGSI_OPCODE_ARL:
465 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
466 tmp0 = emit_fetch( bld, inst, 0, chan_index );
467 emit_flr(bld, 0, 0);
468 emit_f2it( bld, 0 );
469 emit_store( bld, inst, 0, chan_index, tmp0);
470 }
471 break;
472 #endif
473
474 case TGSI_OPCODE_MOV:
475 case TGSI_OPCODE_SWZ:
476 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
477 tmp0 = emit_fetch( bld, inst, 0, chan_index );
478 emit_store( bld, inst, 0, chan_index, tmp0);
479 }
480 break;
481
482 case TGSI_OPCODE_LIT:
483 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
484 emit_store( bld, inst, 0, CHAN_X, bld->base.one);
485 }
486 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
487 src0 = emit_fetch( bld, inst, 0, CHAN_X );
488 dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
489 emit_store( bld, inst, 0, CHAN_Y, dst0);
490 }
491 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
492 /* XMM[1] = SrcReg[0].yyyy */
493 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
494 /* XMM[1] = max(XMM[1], 0) */
495 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
496 /* XMM[2] = SrcReg[0].wwww */
497 tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
498 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
499 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
500 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
501 dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
502 emit_store( bld, inst, 0, CHAN_Z, dst0);
503 }
504 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
505 emit_store( bld, inst, 0, CHAN_W, bld->base.one);
506 }
507 break;
508
509 case TGSI_OPCODE_RCP:
510 /* TGSI_OPCODE_RECIP */
511 src0 = emit_fetch( bld, inst, 0, CHAN_X );
512 dst0 = lp_build_rcp(&bld->base, src0);
513 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
514 emit_store( bld, inst, 0, chan_index, dst0 );
515 }
516 break;
517
518 case TGSI_OPCODE_RSQ:
519 /* TGSI_OPCODE_RECIPSQRT */
520 src0 = emit_fetch( bld, inst, 0, CHAN_X );
521 src0 = lp_build_abs(&bld->base, src0);
522 dst0 = lp_build_rsqrt(&bld->base, src0);
523 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
524 emit_store( bld, inst, 0, chan_index, dst0 );
525 }
526 break;
527
528 case TGSI_OPCODE_EXP:
529 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
530 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
531 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
532 LLVMValueRef *p_exp2_int_part = NULL;
533 LLVMValueRef *p_frac_part = NULL;
534 LLVMValueRef *p_exp2 = NULL;
535
536 src0 = emit_fetch( bld, inst, 0, CHAN_X );
537
538 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
539 p_exp2_int_part = &tmp0;
540 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
541 p_frac_part = &tmp1;
542 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
543 p_exp2 = &tmp2;
544
545 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
546
547 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
548 emit_store( bld, inst, 0, CHAN_X, tmp0);
549 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
550 emit_store( bld, inst, 0, CHAN_Y, tmp1);
551 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
552 emit_store( bld, inst, 0, CHAN_Z, tmp2);
553 }
554 /* dst.w = 1.0 */
555 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
556 tmp0 = bld->base.one;
557 emit_store( bld, inst, 0, CHAN_W, tmp0);
558 }
559 break;
560
561 case TGSI_OPCODE_LOG:
562 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
563 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
564 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
565 LLVMValueRef *p_floor_log2;
566 LLVMValueRef *p_exp;
567 LLVMValueRef *p_log2;
568
569 src0 = emit_fetch( bld, inst, 0, CHAN_X );
570 src0 = lp_build_abs( &bld->base, src0 );
571
572 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
573 p_floor_log2 = &tmp0;
574 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
575 p_exp = &tmp1;
576 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
577 p_log2 = &tmp2;
578
579 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
580
581 /* dst.x = floor(lg2(abs(src.x))) */
582 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
583 emit_store( bld, inst, 0, CHAN_X, tmp0);
584 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
585 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
586 tmp1 = lp_build_div( &bld->base, src0, tmp1);
587 emit_store( bld, inst, 0, CHAN_Y, tmp1);
588 }
589 /* dst.z = lg2(abs(src.x)) */
590 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
591 emit_store( bld, inst, 0, CHAN_Z, tmp2);
592 }
593 /* dst.w = 1.0 */
594 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
595 tmp0 = bld->base.one;
596 emit_store( bld, inst, 0, CHAN_W, tmp0);
597 }
598 break;
599
600 case TGSI_OPCODE_MUL:
601 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
602 src0 = emit_fetch( bld, inst, 0, chan_index );
603 src1 = emit_fetch( bld, inst, 1, chan_index );
604 dst0 = lp_build_mul(&bld->base, src0, src1);
605 emit_store( bld, inst, 0, chan_index, dst0);
606 }
607 break;
608
609 case TGSI_OPCODE_ADD:
610 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
611 src0 = emit_fetch( bld, inst, 0, chan_index );
612 src1 = emit_fetch( bld, inst, 1, chan_index );
613 dst0 = lp_build_add(&bld->base, src0, src1);
614 emit_store( bld, inst, 0, chan_index, dst0);
615 }
616 break;
617
618 case TGSI_OPCODE_DP3:
619 /* TGSI_OPCODE_DOT3 */
620 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
621 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
622 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
623 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
624 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
625 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
626 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
627 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
628 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
629 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
630 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
631 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
632 emit_store( bld, inst, 0, chan_index, tmp0);
633 }
634 break;
635
636 case TGSI_OPCODE_DP4:
637 /* TGSI_OPCODE_DOT4 */
638 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
639 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
640 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
641 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
642 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
643 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
644 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
645 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
646 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
647 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
648 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
649 tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
650 tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
651 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
652 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
653 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
654 emit_store( bld, inst, 0, chan_index, tmp0);
655 }
656 break;
657
658 case TGSI_OPCODE_DST:
659 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
660 tmp0 = bld->base.one;
661 emit_store( bld, inst, 0, CHAN_X, tmp0);
662 }
663 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
664 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
665 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
666 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
667 emit_store( bld, inst, 0, CHAN_Y, tmp0);
668 }
669 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
670 tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
671 emit_store( bld, inst, 0, CHAN_Z, tmp0);
672 }
673 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
674 tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
675 emit_store( bld, inst, 0, CHAN_W, tmp0);
676 }
677 break;
678
679 case TGSI_OPCODE_MIN:
680 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
681 src0 = emit_fetch( bld, inst, 0, chan_index );
682 src1 = emit_fetch( bld, inst, 1, chan_index );
683 dst0 = lp_build_min( &bld->base, src0, src1 );
684 emit_store( bld, inst, 0, chan_index, dst0);
685 }
686 break;
687
688 case TGSI_OPCODE_MAX:
689 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
690 src0 = emit_fetch( bld, inst, 0, chan_index );
691 src1 = emit_fetch( bld, inst, 1, chan_index );
692 dst0 = lp_build_max( &bld->base, src0, src1 );
693 emit_store( bld, inst, 0, chan_index, dst0);
694 }
695 break;
696
697 case TGSI_OPCODE_SLT:
698 /* TGSI_OPCODE_SETLT */
699 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
700 src0 = emit_fetch( bld, inst, 0, chan_index );
701 src1 = emit_fetch( bld, inst, 1, chan_index );
702 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
703 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
704 emit_store( bld, inst, 0, chan_index, dst0);
705 }
706 break;
707
708 case TGSI_OPCODE_SGE:
709 /* TGSI_OPCODE_SETGE */
710 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
711 src0 = emit_fetch( bld, inst, 0, chan_index );
712 src1 = emit_fetch( bld, inst, 1, chan_index );
713 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
714 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
715 emit_store( bld, inst, 0, chan_index, dst0);
716 }
717 break;
718
719 case TGSI_OPCODE_MAD:
720 /* TGSI_OPCODE_MADD */
721 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
722 tmp0 = emit_fetch( bld, inst, 0, chan_index );
723 tmp1 = emit_fetch( bld, inst, 1, chan_index );
724 tmp2 = emit_fetch( bld, inst, 2, chan_index );
725 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
726 tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
727 emit_store( bld, inst, 0, chan_index, tmp0);
728 }
729 break;
730
731 case TGSI_OPCODE_SUB:
732 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
733 tmp0 = emit_fetch( bld, inst, 0, chan_index );
734 tmp1 = emit_fetch( bld, inst, 1, chan_index );
735 tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
736 emit_store( bld, inst, 0, chan_index, tmp0);
737 }
738 break;
739
740 case TGSI_OPCODE_LRP:
741 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
742 src0 = emit_fetch( bld, inst, 0, chan_index );
743 src1 = emit_fetch( bld, inst, 1, chan_index );
744 src2 = emit_fetch( bld, inst, 2, chan_index );
745 tmp0 = lp_build_sub( &bld->base, src1, src2 );
746 tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
747 dst0 = lp_build_add( &bld->base, tmp0, src2 );
748 emit_store( bld, inst, 0, chan_index, dst0 );
749 }
750 break;
751
752 case TGSI_OPCODE_CND:
753 return 0;
754 break;
755
756 case TGSI_OPCODE_CND0:
757 return 0;
758 break;
759
760 case TGSI_OPCODE_DP2A:
761 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
762 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
763 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
764 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
765 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
766 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
767 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
768 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
769 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
770 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
771 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
772 }
773 break;
774
775 #if 0
776 case TGSI_OPCODE_FRC:
777 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
778 tmp0 = emit_fetch( bld, inst, 0, chan_index );
779 emit_frc( bld, 0, 0 );
780 emit_store( bld, inst, 0, chan_index, tmp0);
781 }
782 break;
783
784 case TGSI_OPCODE_CLAMP:
785 return 0;
786 break;
787
788 case TGSI_OPCODE_FLR:
789 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
790 tmp0 = emit_fetch( bld, inst, 0, chan_index );
791 emit_flr( bld, 0, 0 );
792 emit_store( bld, inst, 0, chan_index, tmp0);
793 }
794 break;
795
796 case TGSI_OPCODE_ROUND:
797 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
798 tmp0 = emit_fetch( bld, inst, 0, chan_index );
799 emit_rnd( bld, 0, 0 );
800 emit_store( bld, inst, 0, chan_index, tmp0);
801 }
802 break;
803 #endif
804
805 case TGSI_OPCODE_EX2: {
806 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
807 tmp0 = lp_build_exp2( &bld->base, tmp0);
808 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
809 emit_store( bld, inst, 0, chan_index, tmp0);
810 }
811 break;
812 }
813
814 case TGSI_OPCODE_LG2:
815 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
816 tmp0 = lp_build_log2( &bld->base, tmp0);
817 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
818 emit_store( bld, inst, 0, chan_index, tmp0);
819 }
820 break;
821
822 case TGSI_OPCODE_POW:
823 src0 = emit_fetch( bld, inst, 0, CHAN_X );
824 src1 = emit_fetch( bld, inst, 1, CHAN_X );
825 dst0 = lp_build_pow( &bld->base, src0, src1 );
826 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
827 emit_store( bld, inst, 0, chan_index, dst0 );
828 }
829 break;
830
831 case TGSI_OPCODE_XPD:
832 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
833 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
834 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
835 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
836 }
837 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
838 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
839 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
840 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
841 }
842 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
843 tmp2 = tmp0;
844 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
845 tmp5 = tmp3;
846 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
847 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
848 emit_store( bld, inst, 0, CHAN_X, tmp2);
849 }
850 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
851 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
852 tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
853 tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
854 }
855 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
856 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
857 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
858 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
859 emit_store( bld, inst, 0, CHAN_Y, tmp3);
860 }
861 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
862 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
863 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
864 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
865 emit_store( bld, inst, 0, CHAN_Z, tmp5);
866 }
867 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
868 tmp0 = bld->base.one;
869 emit_store( bld, inst, 0, CHAN_W, tmp0);
870 }
871 break;
872
873 case TGSI_OPCODE_ABS:
874 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
875 tmp0 = emit_fetch( bld, inst, 0, chan_index );
876 tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
877 emit_store( bld, inst, 0, chan_index, tmp0);
878 }
879 break;
880
881 case TGSI_OPCODE_RCC:
882 return 0;
883 break;
884
885 case TGSI_OPCODE_DPH:
886 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
887 tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
888 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
889 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
890 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
891 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
892 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
893 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
894 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
895 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
896 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
897 tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
898 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
899 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
900 emit_store( bld, inst, 0, chan_index, tmp0);
901 }
902 break;
903
904 case TGSI_OPCODE_COS:
905 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
906 tmp0 = lp_build_cos( &bld->base, tmp0 );
907 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
908 emit_store( bld, inst, 0, chan_index, tmp0);
909 }
910 break;
911
912 case TGSI_OPCODE_DDX:
913 return 0;
914 break;
915
916 case TGSI_OPCODE_DDY:
917 return 0;
918 break;
919
920 #if 0
921 case TGSI_OPCODE_KILP:
922 /* predicated kill */
923 emit_kilp( bld );
924 return 0; /* XXX fix me */
925 break;
926 #endif
927
928 case TGSI_OPCODE_KIL:
929 /* conditional kill */
930 emit_kil( bld, inst );
931 break;
932
933 case TGSI_OPCODE_PK2H:
934 return 0;
935 break;
936
937 case TGSI_OPCODE_PK2US:
938 return 0;
939 break;
940
941 case TGSI_OPCODE_PK4B:
942 return 0;
943 break;
944
945 case TGSI_OPCODE_PK4UB:
946 return 0;
947 break;
948
949 case TGSI_OPCODE_RFL:
950 return 0;
951 break;
952
953 case TGSI_OPCODE_SEQ:
954 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
955 src0 = emit_fetch( bld, inst, 0, chan_index );
956 src1 = emit_fetch( bld, inst, 1, chan_index );
957 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
958 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
959 emit_store( bld, inst, 0, chan_index, dst0);
960 }
961 break;
962
963 case TGSI_OPCODE_SFL:
964 return 0;
965 break;
966
967 case TGSI_OPCODE_SGT:
968 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
969 src0 = emit_fetch( bld, inst, 0, chan_index );
970 src1 = emit_fetch( bld, inst, 1, chan_index );
971 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
972 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
973 emit_store( bld, inst, 0, chan_index, dst0);
974 }
975 break;
976
977 case TGSI_OPCODE_SIN:
978 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
979 tmp0 = lp_build_sin( &bld->base, tmp0 );
980 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
981 emit_store( bld, inst, 0, chan_index, tmp0);
982 }
983 break;
984
985 case TGSI_OPCODE_SLE:
986 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
987 src0 = emit_fetch( bld, inst, 0, chan_index );
988 src1 = emit_fetch( bld, inst, 1, chan_index );
989 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
990 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
991 emit_store( bld, inst, 0, chan_index, dst0);
992 }
993 break;
994
995 case TGSI_OPCODE_SNE:
996 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
997 src0 = emit_fetch( bld, inst, 0, chan_index );
998 src1 = emit_fetch( bld, inst, 1, chan_index );
999 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1000 dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1001 emit_store( bld, inst, 0, chan_index, dst0);
1002 }
1003 break;
1004
1005 case TGSI_OPCODE_STR:
1006 return 0;
1007 break;
1008
1009 case TGSI_OPCODE_TEX:
1010 emit_tex( bld, inst, FALSE, FALSE );
1011 break;
1012
1013 case TGSI_OPCODE_TXD:
1014 return 0;
1015 break;
1016
1017 case TGSI_OPCODE_UP2H:
1018 return 0;
1019 break;
1020
1021 case TGSI_OPCODE_UP2US:
1022 return 0;
1023 break;
1024
1025 case TGSI_OPCODE_UP4B:
1026 return 0;
1027 break;
1028
1029 case TGSI_OPCODE_UP4UB:
1030 return 0;
1031 break;
1032
1033 case TGSI_OPCODE_X2D:
1034 return 0;
1035 break;
1036
1037 case TGSI_OPCODE_ARA:
1038 return 0;
1039 break;
1040
1041 #if 0
1042 case TGSI_OPCODE_ARR:
1043 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1044 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1045 emit_rnd( bld, 0, 0 );
1046 emit_f2it( bld, 0 );
1047 emit_store( bld, inst, 0, chan_index, tmp0);
1048 }
1049 break;
1050 #endif
1051
1052 case TGSI_OPCODE_BRA:
1053 return 0;
1054 break;
1055
1056 case TGSI_OPCODE_CAL:
1057 return 0;
1058 break;
1059
1060 #if 0
1061 case TGSI_OPCODE_RET:
1062 emit_ret( bld );
1063 break;
1064 #endif
1065
1066 case TGSI_OPCODE_END:
1067 break;
1068
1069 #if 0
1070 case TGSI_OPCODE_SSG:
1071 /* TGSI_OPCODE_SGN */
1072 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1073 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1074 emit_sgn( bld, 0, 0 );
1075 emit_store( bld, inst, 0, chan_index, tmp0);
1076 }
1077 break;
1078 #endif
1079
1080 case TGSI_OPCODE_CMP:
1081 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1082 src0 = emit_fetch( bld, inst, 0, chan_index );
1083 src1 = emit_fetch( bld, inst, 1, chan_index );
1084 src2 = emit_fetch( bld, inst, 2, chan_index );
1085 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1086 dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
1087 emit_store( bld, inst, 0, chan_index, dst0);
1088 }
1089 break;
1090
1091 case TGSI_OPCODE_SCS:
1092 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1093 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1094 tmp0 = lp_build_cos( &bld->base, tmp0 );
1095 emit_store( bld, inst, 0, CHAN_X, tmp0);
1096 }
1097 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1098 tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1099 tmp0 = lp_build_sin( &bld->base, tmp0 );
1100 emit_store( bld, inst, 0, CHAN_Y, tmp0);
1101 }
1102 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1103 tmp0 = bld->base.zero;
1104 emit_store( bld, inst, 0, CHAN_Z, tmp0);
1105 }
1106 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1107 tmp0 = bld->base.one;
1108 emit_store( bld, inst, 0, CHAN_W, tmp0);
1109 }
1110 break;
1111
1112 case TGSI_OPCODE_TXB:
1113 emit_tex( bld, inst, TRUE, FALSE );
1114 break;
1115
1116 case TGSI_OPCODE_NRM:
1117 /* fall-through */
1118 case TGSI_OPCODE_NRM4:
1119 /* 3 or 4-component normalization */
1120 {
1121 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1122
1123 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1124 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1125 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1126 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1127
1128 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1129
1130 /* xmm4 = src.x */
1131 /* xmm0 = src.x * src.x */
1132 tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1133 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1134 tmp4 = tmp0;
1135 }
1136 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1137
1138 /* xmm5 = src.y */
1139 /* xmm0 = xmm0 + src.y * src.y */
1140 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1141 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1142 tmp5 = tmp1;
1143 }
1144 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1145 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1146
1147 /* xmm6 = src.z */
1148 /* xmm0 = xmm0 + src.z * src.z */
1149 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1150 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1151 tmp6 = tmp1;
1152 }
1153 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1154 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1155
1156 if (dims == 4) {
1157 /* xmm7 = src.w */
1158 /* xmm0 = xmm0 + src.w * src.w */
1159 tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1160 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1161 tmp7 = tmp1;
1162 }
1163 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1164 tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1165 }
1166
1167 /* xmm1 = 1 / sqrt(xmm0) */
1168 tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1169
1170 /* dst.x = xmm1 * src.x */
1171 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1172 tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
1173 emit_store(bld, inst, 0, CHAN_X, tmp4);
1174 }
1175
1176 /* dst.y = xmm1 * src.y */
1177 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1178 tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
1179 emit_store(bld, inst, 0, CHAN_Y, tmp5);
1180 }
1181
1182 /* dst.z = xmm1 * src.z */
1183 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1184 tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
1185 emit_store(bld, inst, 0, CHAN_Z, tmp6);
1186 }
1187
1188 /* dst.w = xmm1 * src.w */
1189 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1190 tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
1191 emit_store(bld, inst, 0, CHAN_W, tmp7);
1192 }
1193 }
1194
1195 /* dst0.w = 1.0 */
1196 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1197 tmp0 = bld->base.one;
1198 emit_store(bld, inst, 0, CHAN_W, tmp0);
1199 }
1200 }
1201 break;
1202
1203 case TGSI_OPCODE_DIV:
1204 return 0;
1205 break;
1206
1207 case TGSI_OPCODE_DP2:
1208 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
1209 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
1210 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
1211 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
1212 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
1213 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
1214 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
1215 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1216 emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
1217 }
1218 break;
1219
1220 case TGSI_OPCODE_TXL:
1221 emit_tex( bld, inst, TRUE, FALSE );
1222 break;
1223
1224 case TGSI_OPCODE_TXP:
1225 emit_tex( bld, inst, FALSE, TRUE );
1226 break;
1227
1228 case TGSI_OPCODE_BRK:
1229 return 0;
1230 break;
1231
1232 case TGSI_OPCODE_IF:
1233 return 0;
1234 break;
1235
1236 case TGSI_OPCODE_LOOP:
1237 return 0;
1238 break;
1239
1240 case TGSI_OPCODE_REP:
1241 return 0;
1242 break;
1243
1244 case TGSI_OPCODE_ELSE:
1245 return 0;
1246 break;
1247
1248 case TGSI_OPCODE_ENDIF:
1249 return 0;
1250 break;
1251
1252 case TGSI_OPCODE_ENDLOOP:
1253 return 0;
1254 break;
1255
1256 case TGSI_OPCODE_ENDREP:
1257 return 0;
1258 break;
1259
1260 case TGSI_OPCODE_PUSHA:
1261 return 0;
1262 break;
1263
1264 case TGSI_OPCODE_POPA:
1265 return 0;
1266 break;
1267
1268 case TGSI_OPCODE_CEIL:
1269 return 0;
1270 break;
1271
1272 case TGSI_OPCODE_I2F:
1273 return 0;
1274 break;
1275
1276 case TGSI_OPCODE_NOT:
1277 return 0;
1278 break;
1279
1280 #if 0
1281 case TGSI_OPCODE_TRUNC:
1282 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1283 tmp0 = emit_fetch( bld, inst, 0, chan_index );
1284 emit_f2it( bld, 0 );
1285 emit_i2f( bld, 0 );
1286 emit_store( bld, inst, 0, chan_index, tmp0);
1287 }
1288 break;
1289 #endif
1290
1291 case TGSI_OPCODE_SHL:
1292 return 0;
1293 break;
1294
1295 case TGSI_OPCODE_SHR:
1296 return 0;
1297 break;
1298
1299 case TGSI_OPCODE_AND:
1300 return 0;
1301 break;
1302
1303 case TGSI_OPCODE_OR:
1304 return 0;
1305 break;
1306
1307 case TGSI_OPCODE_MOD:
1308 return 0;
1309 break;
1310
1311 case TGSI_OPCODE_XOR:
1312 return 0;
1313 break;
1314
1315 case TGSI_OPCODE_SAD:
1316 return 0;
1317 break;
1318
1319 case TGSI_OPCODE_TXF:
1320 return 0;
1321 break;
1322
1323 case TGSI_OPCODE_TXQ:
1324 return 0;
1325 break;
1326
1327 case TGSI_OPCODE_CONT:
1328 return 0;
1329 break;
1330
1331 case TGSI_OPCODE_EMIT:
1332 return 0;
1333 break;
1334
1335 case TGSI_OPCODE_ENDPRIM:
1336 return 0;
1337 break;
1338
1339 default:
1340 return 0;
1341 }
1342
1343 return 1;
1344 }
1345
1346
1347 void
1348 lp_build_tgsi_soa(LLVMBuilderRef builder,
1349 const struct tgsi_token *tokens,
1350 union lp_type type,
1351 struct lp_build_mask_context *mask,
1352 LLVMValueRef consts_ptr,
1353 const LLVMValueRef *pos,
1354 const LLVMValueRef (*inputs)[NUM_CHANNELS],
1355 LLVMValueRef (*outputs)[NUM_CHANNELS],
1356 LLVMValueRef samplers_ptr)
1357 {
1358 struct lp_build_tgsi_soa_context bld;
1359 struct tgsi_parse_context parse;
1360 uint num_immediates = 0;
1361 unsigned i;
1362
1363 /* Setup build context */
1364 memset(&bld, 0, sizeof bld);
1365 lp_build_context_init(&bld.base, builder, type);
1366 bld.mask = mask;
1367 bld.pos = pos;
1368 bld.inputs = inputs;
1369 bld.outputs = outputs;
1370 bld.consts_ptr = consts_ptr;
1371 bld.samplers_ptr = samplers_ptr;
1372
1373 tgsi_parse_init( &parse, tokens );
1374
1375 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1376 tgsi_parse_token( &parse );
1377
1378 switch( parse.FullToken.Token.Type ) {
1379 case TGSI_TOKEN_TYPE_DECLARATION:
1380 /* Input already interpolated */
1381 break;
1382
1383 case TGSI_TOKEN_TYPE_INSTRUCTION:
1384 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1385 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1386 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1387 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1388 info ? info->mnemonic : "<invalid>");
1389 }
1390 break;
1391
1392 case TGSI_TOKEN_TYPE_IMMEDIATE:
1393 /* simply copy the immediate values into the next immediates[] slot */
1394 {
1395 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1396 assert(size <= 4);
1397 assert(num_immediates < LP_MAX_IMMEDIATES);
1398 for( i = 0; i < size; ++i )
1399 bld.immediates[num_immediates][i] =
1400 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1401 for( i = size; i < 4; ++i )
1402 bld.immediates[num_immediates][i] = bld.base.undef;
1403 num_immediates++;
1404 }
1405 break;
1406
1407 default:
1408 assert( 0 );
1409 }
1410 }
1411
1412 tgsi_parse_free( &parse );
1413 }
1414