d5acafe840ce43c93240f72bd0abe652ffaf8116
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_tgsi_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "tgsi/tgsi_parse.h"
35 #include "tgsi/tgsi_util.h"
36 #include "tgsi/tgsi_exec.h"
37 #include "lp_bld_type.h"
38 #include "lp_bld_const.h"
39 #include "lp_bld_intr.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_swizzle.h"
42 #include "lp_bld_tgsi.h"
43
44
45 #define LP_MAX_TEMPS 256
46 #define LP_MAX_IMMEDIATES 256
47
48
49 #define FOR_EACH_CHANNEL( CHAN )\
50 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
51
52 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
53 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
54
55 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
56 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
57
58 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
59 FOR_EACH_CHANNEL( CHAN )\
60 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
61
62 #define CHAN_X 0
63 #define CHAN_Y 1
64 #define CHAN_Z 2
65 #define CHAN_W 3
66
67
68 struct lp_build_tgsi_soa_context
69 {
70 struct lp_build_context base;
71
72 LLVMValueRef (*inputs)[4];
73 LLVMValueRef consts_ptr;
74 LLVMValueRef (*outputs)[4];
75 LLVMValueRef samplers_ptr;
76
77 LLVMValueRef immediates[LP_MAX_IMMEDIATES][4];
78 LLVMValueRef temps[LP_MAX_TEMPS][4];
79 };
80
81
82 /**
83 * Function call helpers.
84 */
85
86 /**
87 * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be
88 * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
89 * that the stack pointer is 16 byte aligned, as expected.
90 */
91 static void
92 emit_func_call(
93 struct lp_build_tgsi_soa_context *bld,
94 const LLVMValueRef *args,
95 unsigned nr_args,
96 void (PIPE_CDECL *code)() )
97 {
98 #if 0
99 LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
100 void* Addr);
101 #endif
102
103 }
104
105
106 /**
107 * Register fetch.
108 */
109
110 static LLVMValueRef
111 emit_fetch(
112 struct lp_build_tgsi_soa_context *bld,
113 const struct tgsi_full_src_register *reg,
114 const unsigned chan_index )
115 {
116 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
117 LLVMValueRef res;
118
119 switch (swizzle) {
120 case TGSI_EXTSWIZZLE_X:
121 case TGSI_EXTSWIZZLE_Y:
122 case TGSI_EXTSWIZZLE_Z:
123 case TGSI_EXTSWIZZLE_W:
124
125 switch (reg->SrcRegister.File) {
126 case TGSI_FILE_CONSTANT: {
127 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
128 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
129 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
130 res = lp_build_broadcast_scalar(&bld->base, scalar);
131 break;
132 }
133
134 case TGSI_FILE_IMMEDIATE:
135 res = bld->immediates[reg->SrcRegister.Index][swizzle];
136 assert(res);
137 break;
138
139 case TGSI_FILE_INPUT:
140 res = bld->inputs[reg->SrcRegister.Index][swizzle];
141 assert(res);
142 break;
143
144 case TGSI_FILE_TEMPORARY:
145 res = bld->temps[reg->SrcRegister.Index][swizzle];
146 if(!res)
147 return bld->base.undef;
148 break;
149
150 default:
151 assert( 0 );
152 }
153 break;
154
155 case TGSI_EXTSWIZZLE_ZERO:
156 res = bld->base.zero;
157 break;
158
159 case TGSI_EXTSWIZZLE_ONE:
160 res = bld->base.one;
161 break;
162
163 default:
164 assert( 0 );
165 }
166
167 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
168 case TGSI_UTIL_SIGN_CLEAR:
169 res = lp_build_abs( &bld->base, res );
170 break;
171
172 case TGSI_UTIL_SIGN_SET:
173 res = lp_build_abs( &bld->base, res );
174 res = LLVMBuildNeg( bld->base.builder, res, "" );
175 break;
176
177 case TGSI_UTIL_SIGN_TOGGLE:
178 res = LLVMBuildNeg( bld->base.builder, res, "" );
179 break;
180
181 case TGSI_UTIL_SIGN_KEEP:
182 break;
183 }
184
185 return res;
186 }
187
188 #define FETCH( FUNC, INST, INDEX, CHAN )\
189 emit_fetch( FUNC, &(INST).FullSrcRegisters[INDEX], CHAN )
190
191 /**
192 * Register store.
193 */
194
195 static void
196 emit_store(
197 struct lp_build_tgsi_soa_context *bld,
198 const struct tgsi_full_dst_register *reg,
199 const struct tgsi_full_instruction *inst,
200 unsigned chan_index,
201 LLVMValueRef value)
202 {
203 switch( inst->Instruction.Saturate ) {
204 case TGSI_SAT_NONE:
205 break;
206
207 case TGSI_SAT_ZERO_ONE:
208 /* assert( 0 ); */
209 break;
210
211 case TGSI_SAT_MINUS_PLUS_ONE:
212 assert( 0 );
213 break;
214 }
215
216 switch( reg->DstRegister.File ) {
217 case TGSI_FILE_OUTPUT:
218 bld->outputs[reg->DstRegister.Index][chan_index] = value;
219 break;
220
221 case TGSI_FILE_TEMPORARY:
222 bld->temps[reg->DstRegister.Index][chan_index] = value;
223 break;
224
225 case TGSI_FILE_ADDRESS:
226 /* FIXME */
227 assert(0);
228 break;
229
230 default:
231 assert( 0 );
232 }
233 }
234
235 #define STORE( FUNC, INST, INDEX, CHAN, VAL )\
236 emit_store( FUNC, &(INST).FullDstRegisters[INDEX], &(INST), CHAN, VAL )
237
238
239 void PIPE_CDECL
240 lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
241 uint32_t unit,
242 float *store )
243 {
244 struct tgsi_sampler *sampler = samplers[unit];
245
246 #if 0
247 uint j;
248
249 debug_printf("%s sampler: %p (%p) store: %p\n",
250 __FUNCTION__,
251 sampler, *sampler,
252 store );
253
254 debug_printf("lodbias %f\n", store[12]);
255
256 for (j = 0; j < 4; j++)
257 debug_printf("sample %d texcoord %f %f\n",
258 j,
259 store[0+j],
260 store[4+j]);
261 #endif
262
263 {
264 float rgba[NUM_CHANNELS][QUAD_SIZE];
265 sampler->get_samples(sampler,
266 &store[0],
267 &store[4],
268 &store[8],
269 0.0f, /*store[12], lodbias */
270 rgba);
271 memcpy(store, rgba, sizeof rgba);
272 }
273
274 #if 0
275 for (j = 0; j < 4; j++)
276 debug_printf("sample %d result %f %f %f %f\n",
277 j,
278 store[0+j],
279 store[4+j],
280 store[8+j],
281 store[12+j]);
282 #endif
283 }
284
285 /**
286 * High-level instruction translators.
287 */
288
289 static void
290 emit_tex( struct lp_build_tgsi_soa_context *bld,
291 const struct tgsi_full_instruction *inst,
292 boolean apply_lodbias,
293 boolean projected)
294 {
295 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
296 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
297 LLVMValueRef lodbias;
298 LLVMValueRef oow;
299 LLVMValueRef store_ptr;
300 LLVMValueRef args[3];
301 unsigned count;
302 unsigned i;
303
304 switch (inst->InstructionExtTexture.Texture) {
305 case TGSI_TEXTURE_1D:
306 case TGSI_TEXTURE_SHADOW1D:
307 count = 1;
308 break;
309 case TGSI_TEXTURE_2D:
310 case TGSI_TEXTURE_RECT:
311 case TGSI_TEXTURE_SHADOW2D:
312 case TGSI_TEXTURE_SHADOWRECT:
313 count = 2;
314 break;
315 case TGSI_TEXTURE_3D:
316 case TGSI_TEXTURE_CUBE:
317 count = 3;
318 break;
319 default:
320 assert(0);
321 return;
322 }
323
324 if(apply_lodbias)
325 lodbias = FETCH( bld, *inst, 0, 3 );
326 else
327 lodbias = bld->base.zero;
328
329 store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
330 vec_type,
331 LLVMConstInt(LLVMInt32Type(), 4, 0),
332 "store");
333
334 if (projected) {
335 oow = FETCH( bld, *inst, 0, 3 );
336 oow = lp_build_rcp(&bld->base, oow);
337 }
338
339 for (i = 0; i < count; i++) {
340 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
341 LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, store_ptr, &index, 1, "");
342 LLVMValueRef coord;
343
344 coord = FETCH( bld, *inst, 0, i );
345
346 if (projected)
347 coord = lp_build_mul(&bld->base, coord, oow);
348
349 LLVMBuildStore(bld->base.builder, coord, coord_ptr);
350 }
351
352 args[0] = bld->samplers_ptr;
353 args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
354 args[2] = store_ptr;
355
356 lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
357
358 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) {
359 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
360 LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, store_ptr, &index, 1, "");
361 LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
362 STORE( bld, *inst, 0, i, res );
363 }
364 }
365
366
367 static void
368 emit_kil(
369 struct lp_build_tgsi_soa_context *bld,
370 const struct tgsi_full_src_register *reg )
371 {
372 #if 0
373 unsigned uniquemask;
374 unsigned unique_count = 0;
375 unsigned chan_index;
376 unsigned i;
377
378 /* This mask stores component bits that were already tested. Note that
379 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
380 * tested. */
381 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
382
383 FOR_EACH_CHANNEL( chan_index ) {
384 unsigned swizzle;
385
386 /* unswizzle channel */
387 swizzle = tgsi_util_get_full_src_register_extswizzle(
388 reg,
389 chan_index );
390
391 /* check if the component has not been already tested */
392 if( !(uniquemask & (1 << swizzle)) ) {
393 uniquemask |= 1 << swizzle;
394
395 /* allocate register */
396 emit_fetch(
397 bld,
398 unique_count++,
399 reg,
400 chan_index );
401 }
402 }
403
404 x86_push(
405 bld,
406 x86_make_reg( file_REG32, reg_AX ) );
407 x86_push(
408 bld,
409 x86_make_reg( file_REG32, reg_DX ) );
410
411 for (i = 0 ; i < unique_count; i++ ) {
412 LLVMValueRef dataXMM = make_xmm(i);
413
414 sse_cmpps(
415 bld,
416 dataXMM,
417 get_temp(
418 TGSI_EXEC_TEMP_00000000_I,
419 TGSI_EXEC_TEMP_00000000_C ),
420 cc_LessThan );
421
422 if( i == 0 ) {
423 sse_movmskps(
424 bld,
425 x86_make_reg( file_REG32, reg_AX ),
426 dataXMM );
427 }
428 else {
429 sse_movmskps(
430 bld,
431 x86_make_reg( file_REG32, reg_DX ),
432 dataXMM );
433 x86_or(
434 bld,
435 x86_make_reg( file_REG32, reg_AX ),
436 x86_make_reg( file_REG32, reg_DX ) );
437 }
438 }
439
440 x86_or(
441 bld,
442 get_temp(
443 TGSI_EXEC_TEMP_KILMASK_I,
444 TGSI_EXEC_TEMP_KILMASK_C ),
445 x86_make_reg( file_REG32, reg_AX ) );
446
447 x86_pop(
448 bld,
449 x86_make_reg( file_REG32, reg_DX ) );
450 x86_pop(
451 bld,
452 x86_make_reg( file_REG32, reg_AX ) );
453 #endif
454 }
455
456
457 static void
458 emit_kilp(
459 struct lp_build_tgsi_soa_context *bld )
460 {
461 /* XXX todo / fix me */
462 }
463
464
465 /**
466 * Check if inst src/dest regs use indirect addressing into temporary
467 * register file.
468 */
469 static boolean
470 indirect_temp_reference(const struct tgsi_full_instruction *inst)
471 {
472 uint i;
473 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
474 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
475 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
476 reg->SrcRegister.Indirect)
477 return TRUE;
478 }
479 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
480 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
481 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
482 reg->DstRegister.Indirect)
483 return TRUE;
484 }
485 return FALSE;
486 }
487
488
489 static int
490 emit_instruction(
491 struct lp_build_tgsi_soa_context *bld,
492 struct tgsi_full_instruction *inst )
493 {
494 unsigned chan_index;
495 LLVMValueRef tmp;
496
497 /* we can't handle indirect addressing into temp register file yet */
498 if (indirect_temp_reference(inst))
499 return FALSE;
500
501 switch (inst->Instruction.Opcode) {
502 #if 0
503 case TGSI_OPCODE_ARL:
504 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
505 FETCH( bld, *inst, 0, 0, chan_index );
506 emit_flr(bld, 0, 0);
507 emit_f2it( bld, 0 );
508 STORE( bld, *inst, 0, 0, chan_index );
509 }
510 break;
511 #endif
512
513 case TGSI_OPCODE_MOV:
514 case TGSI_OPCODE_SWZ:
515 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
516 STORE( bld, *inst, 0, chan_index, FETCH( bld, *inst, 0, chan_index ) );
517 }
518 break;
519
520 #if 0
521 case TGSI_OPCODE_LIT:
522 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
523 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
524 emit_tempf(
525 bld,
526 0,
527 TEMP_ONE_I,
528 TEMP_ONE_C);
529 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
530 STORE( bld, *inst, 0, 0, CHAN_X );
531 }
532 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
533 STORE( bld, *inst, 0, 0, CHAN_W );
534 }
535 }
536 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
537 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
538 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
539 tmp = FETCH( bld, *inst, 0, 0, CHAN_X );
540 sse_maxps(
541 bld,
542 make_xmm( 0 ),
543 get_temp(
544 TGSI_EXEC_TEMP_00000000_I,
545 TGSI_EXEC_TEMP_00000000_C ) );
546 STORE( bld, *inst, 0, 0, CHAN_Y );
547 }
548 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
549 /* XMM[1] = SrcReg[0].yyyy */
550 FETCH( bld, *inst, 1, 0, CHAN_Y );
551 /* XMM[1] = max(XMM[1], 0) */
552 sse_maxps(
553 bld,
554 make_xmm( 1 ),
555 get_temp(
556 TGSI_EXEC_TEMP_00000000_I,
557 TGSI_EXEC_TEMP_00000000_C ) );
558 /* XMM[2] = SrcReg[0].wwww */
559 FETCH( bld, *inst, 2, 0, CHAN_W );
560 /* XMM[2] = min(XMM[2], 128.0) */
561 sse_minps(
562 bld,
563 make_xmm( 2 ),
564 get_temp(
565 TGSI_EXEC_TEMP_128_I,
566 TGSI_EXEC_TEMP_128_C ) );
567 /* XMM[2] = max(XMM[2], -128.0) */
568 sse_maxps(
569 bld,
570 make_xmm( 2 ),
571 get_temp(
572 TGSI_EXEC_TEMP_MINUS_128_I,
573 TGSI_EXEC_TEMP_MINUS_128_C ) );
574 emit_pow( bld, 3, 1, 1, 2 );
575 FETCH( bld, *inst, 0, 0, CHAN_X );
576 sse_xorps(
577 bld,
578 make_xmm( 2 ),
579 make_xmm( 2 ) );
580 sse_cmpps(
581 bld,
582 make_xmm( 2 ),
583 make_xmm( 0 ),
584 cc_LessThan );
585 sse_andps(
586 bld,
587 make_xmm( 2 ),
588 make_xmm( 1 ) );
589 STORE( bld, *inst, 2, 0, CHAN_Z );
590 }
591 }
592 break;
593 #endif
594
595 case TGSI_OPCODE_RCP:
596 /* TGSI_OPCODE_RECIP */
597 tmp = FETCH( bld, *inst, 0, CHAN_X );
598 tmp = lp_build_rcp(&bld->base, tmp);
599 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
600 STORE( bld, *inst, 0, chan_index, tmp );
601 }
602 break;
603
604 case TGSI_OPCODE_RSQ:
605 /* TGSI_OPCODE_RECIPSQRT */
606 tmp = FETCH( bld, *inst, 0, CHAN_X );
607 tmp = lp_build_abs(&bld->base, tmp);
608 tmp = lp_build_rsqrt(&bld->base, tmp);
609 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
610 STORE( bld, *inst, 0, chan_index, tmp );
611 }
612 break;
613
614 #if 0
615 case TGSI_OPCODE_EXP:
616 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
617 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
618 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
619 FETCH( bld, *inst, 0, 0, CHAN_X );
620 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
621 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
622 emit_MOV( bld, 1, 0 );
623 emit_flr( bld, 2, 1 );
624 /* dst.x = ex2(floor(src.x)) */
625 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
626 emit_MOV( bld, 2, 1 );
627 emit_ex2( bld, 3, 2 );
628 STORE( bld, *inst, 2, 0, CHAN_X );
629 }
630 /* dst.y = src.x - floor(src.x) */
631 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
632 emit_MOV( bld, 2, 0 );
633 emit_sub( bld, 2, 1 );
634 STORE( bld, *inst, 2, 0, CHAN_Y );
635 }
636 }
637 /* dst.z = ex2(src.x) */
638 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
639 emit_ex2( bld, 3, 0 );
640 STORE( bld, *inst, 0, 0, CHAN_Z );
641 }
642 }
643 /* dst.w = 1.0 */
644 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
645 emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C );
646 STORE( bld, *inst, 0, 0, CHAN_W );
647 }
648 break;
649 #endif
650
651 #if 0
652 case TGSI_OPCODE_LOG:
653 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
654 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
655 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
656 FETCH( bld, *inst, 0, 0, CHAN_X );
657 emit_abs( bld, 0 );
658 emit_MOV( bld, 1, 0 );
659 emit_lg2( bld, 2, 1 );
660 /* dst.z = lg2(abs(src.x)) */
661 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
662 STORE( bld, *inst, 1, 0, CHAN_Z );
663 }
664 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
665 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
666 emit_flr( bld, 2, 1 );
667 /* dst.x = floor(lg2(abs(src.x))) */
668 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
669 STORE( bld, *inst, 1, 0, CHAN_X );
670 }
671 /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
672 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
673 emit_ex2( bld, 2, 1 );
674 emit_rcp( bld, 1, 1 );
675 emit_mul( bld, 0, 1 );
676 STORE( bld, *inst, 0, 0, CHAN_Y );
677 }
678 }
679 }
680 /* dst.w = 1.0 */
681 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
682 emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C );
683 STORE( bld, *inst, 0, 0, CHAN_W );
684 }
685 break;
686 #endif
687
688 case TGSI_OPCODE_MUL:
689 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
690 LLVMValueRef a = FETCH( bld, *inst, 0, chan_index );
691 LLVMValueRef b = FETCH( bld, *inst, 1, chan_index );
692 tmp = lp_build_mul(&bld->base, a, b);
693 STORE( bld, *inst, 0, chan_index, tmp );
694 }
695 break;
696
697 case TGSI_OPCODE_ADD:
698 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
699 LLVMValueRef a = FETCH( bld, *inst, 0, chan_index );
700 LLVMValueRef b = FETCH( bld, *inst, 1, chan_index );
701 tmp = lp_build_add(&bld->base, a, b);
702 STORE( bld, *inst, 0, chan_index, tmp );
703 }
704 break;
705
706 #if 0
707 case TGSI_OPCODE_DP3:
708 /* TGSI_OPCODE_DOT3 */
709 FETCH( bld, *inst, 0, 0, CHAN_X );
710 FETCH( bld, *inst, 1, 1, CHAN_X );
711 emit_mul( bld, 0, 1 );
712 FETCH( bld, *inst, 1, 0, CHAN_Y );
713 FETCH( bld, *inst, 2, 1, CHAN_Y );
714 emit_mul( bld, 1, 2 );
715 emit_add( bld, 0, 1 );
716 FETCH( bld, *inst, 1, 0, CHAN_Z );
717 FETCH( bld, *inst, 2, 1, CHAN_Z );
718 emit_mul( bld, 1, 2 );
719 emit_add( bld, 0, 1 );
720 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
721 STORE( bld, *inst, 0, 0, chan_index );
722 }
723 break;
724
725 case TGSI_OPCODE_DP4:
726 /* TGSI_OPCODE_DOT4 */
727 FETCH( bld, *inst, 0, 0, CHAN_X );
728 FETCH( bld, *inst, 1, 1, CHAN_X );
729 emit_mul( bld, 0, 1 );
730 FETCH( bld, *inst, 1, 0, CHAN_Y );
731 FETCH( bld, *inst, 2, 1, CHAN_Y );
732 emit_mul( bld, 1, 2 );
733 emit_add( bld, 0, 1 );
734 FETCH( bld, *inst, 1, 0, CHAN_Z );
735 FETCH( bld, *inst, 2, 1, CHAN_Z );
736 emit_mul(bld, 1, 2 );
737 emit_add(bld, 0, 1 );
738 FETCH( bld, *inst, 1, 0, CHAN_W );
739 FETCH( bld, *inst, 2, 1, CHAN_W );
740 emit_mul( bld, 1, 2 );
741 emit_add( bld, 0, 1 );
742 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
743 STORE( bld, *inst, 0, 0, chan_index );
744 }
745 break;
746
747 case TGSI_OPCODE_DST:
748 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
749 emit_tempf(
750 bld,
751 0,
752 TEMP_ONE_I,
753 TEMP_ONE_C );
754 STORE( bld, *inst, 0, 0, CHAN_X );
755 }
756 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
757 FETCH( bld, *inst, 0, 0, CHAN_Y );
758 FETCH( bld, *inst, 1, 1, CHAN_Y );
759 emit_mul( bld, 0, 1 );
760 STORE( bld, *inst, 0, 0, CHAN_Y );
761 }
762 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
763 FETCH( bld, *inst, 0, 0, CHAN_Z );
764 STORE( bld, *inst, 0, 0, CHAN_Z );
765 }
766 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
767 FETCH( bld, *inst, 0, 1, CHAN_W );
768 STORE( bld, *inst, 0, 0, CHAN_W );
769 }
770 break;
771
772 case TGSI_OPCODE_MIN:
773 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
774 FETCH( bld, *inst, 0, 0, chan_index );
775 FETCH( bld, *inst, 1, 1, chan_index );
776 sse_minps(
777 bld,
778 make_xmm( 0 ),
779 make_xmm( 1 ) );
780 STORE( bld, *inst, 0, 0, chan_index );
781 }
782 break;
783
784 case TGSI_OPCODE_MAX:
785 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
786 FETCH( bld, *inst, 0, 0, chan_index );
787 FETCH( bld, *inst, 1, 1, chan_index );
788 sse_maxps(
789 bld,
790 make_xmm( 0 ),
791 make_xmm( 1 ) );
792 STORE( bld, *inst, 0, 0, chan_index );
793 }
794 break;
795
796 case TGSI_OPCODE_SLT:
797 /* TGSI_OPCODE_SETLT */
798 emit_setcc( bld, inst, cc_LessThan );
799 break;
800
801 case TGSI_OPCODE_SGE:
802 /* TGSI_OPCODE_SETGE */
803 emit_setcc( bld, inst, cc_NotLessThan );
804 break;
805
806 case TGSI_OPCODE_MAD:
807 /* TGSI_OPCODE_MADD */
808 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
809 FETCH( bld, *inst, 0, 0, chan_index );
810 FETCH( bld, *inst, 1, 1, chan_index );
811 FETCH( bld, *inst, 2, 2, chan_index );
812 emit_mul( bld, 0, 1 );
813 emit_add( bld, 0, 2 );
814 STORE( bld, *inst, 0, 0, chan_index );
815 }
816 break;
817
818 case TGSI_OPCODE_SUB:
819 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
820 FETCH( bld, *inst, 0, 0, chan_index );
821 FETCH( bld, *inst, 1, 1, chan_index );
822 emit_sub( bld, 0, 1 );
823 STORE( bld, *inst, 0, 0, chan_index );
824 }
825 break;
826
827 case TGSI_OPCODE_LRP:
828 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
829 FETCH( bld, *inst, 0, 0, chan_index );
830 FETCH( bld, *inst, 1, 1, chan_index );
831 FETCH( bld, *inst, 2, 2, chan_index );
832 emit_sub( bld, 1, 2 );
833 emit_mul( bld, 0, 1 );
834 emit_add( bld, 0, 2 );
835 STORE( bld, *inst, 0, 0, chan_index );
836 }
837 break;
838
839 case TGSI_OPCODE_CND:
840 return 0;
841 break;
842
843 case TGSI_OPCODE_CND0:
844 return 0;
845 break;
846
847 case TGSI_OPCODE_DP2A:
848 FETCH( bld, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
849 FETCH( bld, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
850 emit_mul( bld, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
851 FETCH( bld, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */
852 FETCH( bld, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */
853 emit_mul( bld, 1, 2 ); /* xmm1 = xmm1 * xmm2 */
854 emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
855 FETCH( bld, *inst, 1, 2, CHAN_X ); /* xmm1 = src[2].x */
856 emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
857 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
858 STORE( bld, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */
859 }
860 break;
861
862 case TGSI_OPCODE_FRC:
863 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
864 FETCH( bld, *inst, 0, 0, chan_index );
865 emit_frc( bld, 0, 0 );
866 STORE( bld, *inst, 0, 0, chan_index );
867 }
868 break;
869
870 case TGSI_OPCODE_CLAMP:
871 return 0;
872 break;
873
874 case TGSI_OPCODE_FLR:
875 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
876 FETCH( bld, *inst, 0, 0, chan_index );
877 emit_flr( bld, 0, 0 );
878 STORE( bld, *inst, 0, 0, chan_index );
879 }
880 break;
881
882 case TGSI_OPCODE_ROUND:
883 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
884 FETCH( bld, *inst, 0, 0, chan_index );
885 emit_rnd( bld, 0, 0 );
886 STORE( bld, *inst, 0, 0, chan_index );
887 }
888 break;
889
890 case TGSI_OPCODE_EX2:
891 FETCH( bld, *inst, 0, 0, CHAN_X );
892 emit_ex2( bld, 0, 0 );
893 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
894 STORE( bld, *inst, 0, 0, chan_index );
895 }
896 break;
897
898 case TGSI_OPCODE_LG2:
899 FETCH( bld, *inst, 0, 0, CHAN_X );
900 emit_lg2( bld, 0, 0 );
901 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
902 STORE( bld, *inst, 0, 0, chan_index );
903 }
904 break;
905
906 case TGSI_OPCODE_POW:
907 FETCH( bld, *inst, 0, 0, CHAN_X );
908 FETCH( bld, *inst, 1, 1, CHAN_X );
909 emit_pow( bld, 0, 0, 0, 1 );
910 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
911 STORE( bld, *inst, 0, 0, chan_index );
912 }
913 break;
914
915 case TGSI_OPCODE_XPD:
916 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
917 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
918 FETCH( bld, *inst, 1, 1, CHAN_Z );
919 FETCH( bld, *inst, 3, 0, CHAN_Z );
920 }
921 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
922 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
923 FETCH( bld, *inst, 0, 0, CHAN_Y );
924 FETCH( bld, *inst, 4, 1, CHAN_Y );
925 }
926 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
927 emit_MOV( bld, 2, 0 );
928 emit_mul( bld, 2, 1 );
929 emit_MOV( bld, 5, 3 );
930 emit_mul( bld, 5, 4 );
931 emit_sub( bld, 2, 5 );
932 STORE( bld, *inst, 2, 0, CHAN_X );
933 }
934 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
935 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
936 FETCH( bld, *inst, 2, 1, CHAN_X );
937 FETCH( bld, *inst, 5, 0, CHAN_X );
938 }
939 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
940 emit_mul( bld, 3, 2 );
941 emit_mul( bld, 1, 5 );
942 emit_sub( bld, 3, 1 );
943 STORE( bld, *inst, 3, 0, CHAN_Y );
944 }
945 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
946 emit_mul( bld, 5, 4 );
947 emit_mul( bld, 0, 2 );
948 emit_sub( bld, 5, 0 );
949 STORE( bld, *inst, 5, 0, CHAN_Z );
950 }
951 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
952 emit_tempf(
953 bld,
954 0,
955 TEMP_ONE_I,
956 TEMP_ONE_C );
957 STORE( bld, *inst, 0, 0, CHAN_W );
958 }
959 break;
960
961 case TGSI_OPCODE_ABS:
962 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
963 FETCH( bld, *inst, 0, 0, chan_index );
964 emit_abs( bld, 0) ;
965
966 STORE( bld, *inst, 0, 0, chan_index );
967 }
968 break;
969
970 case TGSI_OPCODE_RCC:
971 return 0;
972 break;
973
974 case TGSI_OPCODE_DPH:
975 FETCH( bld, *inst, 0, 0, CHAN_X );
976 FETCH( bld, *inst, 1, 1, CHAN_X );
977 emit_mul( bld, 0, 1 );
978 FETCH( bld, *inst, 1, 0, CHAN_Y );
979 FETCH( bld, *inst, 2, 1, CHAN_Y );
980 emit_mul( bld, 1, 2 );
981 emit_add( bld, 0, 1 );
982 FETCH( bld, *inst, 1, 0, CHAN_Z );
983 FETCH( bld, *inst, 2, 1, CHAN_Z );
984 emit_mul( bld, 1, 2 );
985 emit_add( bld, 0, 1 );
986 FETCH( bld, *inst, 1, 1, CHAN_W );
987 emit_add( bld, 0, 1 );
988 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
989 STORE( bld, *inst, 0, 0, chan_index );
990 }
991 break;
992
993 case TGSI_OPCODE_COS:
994 FETCH( bld, *inst, 0, 0, CHAN_X );
995 emit_cos( bld, 0, 0 );
996 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
997 STORE( bld, *inst, 0, 0, chan_index );
998 }
999 break;
1000
1001 case TGSI_OPCODE_DDX:
1002 return 0;
1003 break;
1004
1005 case TGSI_OPCODE_DDY:
1006 return 0;
1007 break;
1008
1009 case TGSI_OPCODE_KILP:
1010 /* predicated kill */
1011 emit_kilp( bld );
1012 return 0; /* XXX fix me */
1013 break;
1014
1015 case TGSI_OPCODE_KIL:
1016 /* conditional kill */
1017 emit_kil( bld, &inst->FullSrcRegisters[0] );
1018 break;
1019
1020 case TGSI_OPCODE_PK2H:
1021 return 0;
1022 break;
1023
1024 case TGSI_OPCODE_PK2US:
1025 return 0;
1026 break;
1027
1028 case TGSI_OPCODE_PK4B:
1029 return 0;
1030 break;
1031
1032 case TGSI_OPCODE_PK4UB:
1033 return 0;
1034 break;
1035
1036 case TGSI_OPCODE_RFL:
1037 return 0;
1038 break;
1039
1040 case TGSI_OPCODE_SEQ:
1041 return 0;
1042 break;
1043
1044 case TGSI_OPCODE_SFL:
1045 return 0;
1046 break;
1047
1048 case TGSI_OPCODE_SGT:
1049 return 0;
1050 break;
1051
1052 case TGSI_OPCODE_SIN:
1053 FETCH( bld, *inst, 0, 0, CHAN_X );
1054 emit_sin( bld, 0, 0 );
1055 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1056 STORE( bld, *inst, 0, 0, chan_index );
1057 }
1058 break;
1059
1060 case TGSI_OPCODE_SLE:
1061 return 0;
1062 break;
1063
1064 case TGSI_OPCODE_SNE:
1065 return 0;
1066 break;
1067
1068 case TGSI_OPCODE_STR:
1069 return 0;
1070 break;
1071 #endif
1072
1073 case TGSI_OPCODE_TEX:
1074 emit_tex( bld, inst, FALSE, FALSE );
1075 break;
1076
1077 #if 0
1078 case TGSI_OPCODE_TXD:
1079 return 0;
1080 break;
1081
1082 case TGSI_OPCODE_UP2H:
1083 return 0;
1084 break;
1085
1086 case TGSI_OPCODE_UP2US:
1087 return 0;
1088 break;
1089
1090 case TGSI_OPCODE_UP4B:
1091 return 0;
1092 break;
1093
1094 case TGSI_OPCODE_UP4UB:
1095 return 0;
1096 break;
1097
1098 case TGSI_OPCODE_X2D:
1099 return 0;
1100 break;
1101
1102 case TGSI_OPCODE_ARA:
1103 return 0;
1104 break;
1105
1106 case TGSI_OPCODE_ARR:
1107 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1108 FETCH( bld, *inst, 0, 0, chan_index );
1109 emit_rnd( bld, 0, 0 );
1110 emit_f2it( bld, 0 );
1111 STORE( bld, *inst, 0, 0, chan_index );
1112 }
1113 break;
1114
1115 case TGSI_OPCODE_BRA:
1116 return 0;
1117 break;
1118
1119 case TGSI_OPCODE_CAL:
1120 return 0;
1121 break;
1122
1123 case TGSI_OPCODE_RET:
1124 emit_ret( bld );
1125 break;
1126 #endif
1127
1128 case TGSI_OPCODE_END:
1129 break;
1130
1131 #if 0
1132 case TGSI_OPCODE_SSG:
1133 /* TGSI_OPCODE_SGN */
1134 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1135 FETCH( bld, *inst, 0, 0, chan_index );
1136 emit_sgn( bld, 0, 0 );
1137 STORE( bld, *inst, 0, 0, chan_index );
1138 }
1139 break;
1140
1141 case TGSI_OPCODE_CMP:
1142 emit_cmp (bld, inst);
1143 break;
1144
1145 case TGSI_OPCODE_SCS:
1146 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
1147 FETCH( bld, *inst, 0, 0, CHAN_X );
1148 emit_cos( bld, 0, 0 );
1149 STORE( bld, *inst, 0, 0, CHAN_X );
1150 }
1151 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
1152 FETCH( bld, *inst, 0, 0, CHAN_X );
1153 emit_sin( bld, 0, 0 );
1154 STORE( bld, *inst, 0, 0, CHAN_Y );
1155 }
1156 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
1157 emit_tempf(
1158 bld,
1159 0,
1160 TGSI_EXEC_TEMP_00000000_I,
1161 TGSI_EXEC_TEMP_00000000_C );
1162 STORE( bld, *inst, 0, 0, CHAN_Z );
1163 }
1164 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
1165 emit_tempf(
1166 bld,
1167 0,
1168 TEMP_ONE_I,
1169 TEMP_ONE_C );
1170 STORE( bld, *inst, 0, 0, CHAN_W );
1171 }
1172 break;
1173 #endif
1174
1175 case TGSI_OPCODE_TXB:
1176 emit_tex( bld, inst, TRUE, FALSE );
1177 break;
1178
1179 #if 0
1180 case TGSI_OPCODE_NRM:
1181 /* fall-through */
1182 case TGSI_OPCODE_NRM4:
1183 /* 3 or 4-component normalization */
1184 {
1185 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1186
1187 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
1188 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
1189 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) ||
1190 (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) {
1191
1192 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1193
1194 /* xmm4 = src.x */
1195 /* xmm0 = src.x * src.x */
1196 FETCH(bld, *inst, 0, 0, CHAN_X);
1197 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
1198 emit_MOV(bld, 4, 0);
1199 }
1200 emit_mul(bld, 0, 0);
1201
1202 /* xmm5 = src.y */
1203 /* xmm0 = xmm0 + src.y * src.y */
1204 FETCH(bld, *inst, 1, 0, CHAN_Y);
1205 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
1206 emit_MOV(bld, 5, 1);
1207 }
1208 emit_mul(bld, 1, 1);
1209 emit_add(bld, 0, 1);
1210
1211 /* xmm6 = src.z */
1212 /* xmm0 = xmm0 + src.z * src.z */
1213 FETCH(bld, *inst, 1, 0, CHAN_Z);
1214 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
1215 emit_MOV(bld, 6, 1);
1216 }
1217 emit_mul(bld, 1, 1);
1218 emit_add(bld, 0, 1);
1219
1220 if (dims == 4) {
1221 /* xmm7 = src.w */
1222 /* xmm0 = xmm0 + src.w * src.w */
1223 FETCH(bld, *inst, 1, 0, CHAN_W);
1224 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
1225 emit_MOV(bld, 7, 1);
1226 }
1227 emit_mul(bld, 1, 1);
1228 emit_add(bld, 0, 1);
1229 }
1230
1231 /* xmm1 = 1 / sqrt(xmm0) */
1232 emit_rsqrt(bld, 1, 0);
1233
1234 /* dst.x = xmm1 * src.x */
1235 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
1236 emit_mul(bld, 4, 1);
1237 STORE(bld, *inst, 4, 0, CHAN_X);
1238 }
1239
1240 /* dst.y = xmm1 * src.y */
1241 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
1242 emit_mul(bld, 5, 1);
1243 STORE(bld, *inst, 5, 0, CHAN_Y);
1244 }
1245
1246 /* dst.z = xmm1 * src.z */
1247 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
1248 emit_mul(bld, 6, 1);
1249 STORE(bld, *inst, 6, 0, CHAN_Z);
1250 }
1251
1252 /* dst.w = xmm1 * src.w */
1253 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) {
1254 emit_mul(bld, 7, 1);
1255 STORE(bld, *inst, 7, 0, CHAN_W);
1256 }
1257 }
1258
1259 /* dst0.w = 1.0 */
1260 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) {
1261 emit_tempf(bld, 0, TEMP_ONE_I, TEMP_ONE_C);
1262 STORE(bld, *inst, 0, 0, CHAN_W);
1263 }
1264 }
1265 break;
1266
1267 case TGSI_OPCODE_DIV:
1268 return 0;
1269 break;
1270
1271 case TGSI_OPCODE_DP2:
1272 FETCH( bld, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
1273 FETCH( bld, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
1274 emit_mul( bld, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
1275 FETCH( bld, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */
1276 FETCH( bld, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */
1277 emit_mul( bld, 1, 2 ); /* xmm1 = xmm1 * xmm2 */
1278 emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
1279 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1280 STORE( bld, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */
1281 }
1282 break;
1283 #endif
1284
1285 case TGSI_OPCODE_TXL:
1286 emit_tex( bld, inst, TRUE, FALSE );
1287 break;
1288
1289 case TGSI_OPCODE_TXP:
1290 emit_tex( bld, inst, FALSE, TRUE );
1291 break;
1292
1293 #if 0
1294 case TGSI_OPCODE_BRK:
1295 return 0;
1296 break;
1297
1298 case TGSI_OPCODE_IF:
1299 return 0;
1300 break;
1301
1302 case TGSI_OPCODE_LOOP:
1303 return 0;
1304 break;
1305
1306 case TGSI_OPCODE_REP:
1307 return 0;
1308 break;
1309
1310 case TGSI_OPCODE_ELSE:
1311 return 0;
1312 break;
1313
1314 case TGSI_OPCODE_ENDIF:
1315 return 0;
1316 break;
1317
1318 case TGSI_OPCODE_ENDLOOP:
1319 return 0;
1320 break;
1321
1322 case TGSI_OPCODE_ENDREP:
1323 return 0;
1324 break;
1325
1326 case TGSI_OPCODE_PUSHA:
1327 return 0;
1328 break;
1329
1330 case TGSI_OPCODE_POPA:
1331 return 0;
1332 break;
1333
1334 case TGSI_OPCODE_CEIL:
1335 return 0;
1336 break;
1337
1338 case TGSI_OPCODE_I2F:
1339 return 0;
1340 break;
1341
1342 case TGSI_OPCODE_NOT:
1343 return 0;
1344 break;
1345
1346 case TGSI_OPCODE_TRUNC:
1347 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1348 FETCH( bld, *inst, 0, 0, chan_index );
1349 emit_f2it( bld, 0 );
1350 emit_i2f( bld, 0 );
1351 STORE( bld, *inst, 0, 0, chan_index );
1352 }
1353 break;
1354
1355 case TGSI_OPCODE_SHL:
1356 return 0;
1357 break;
1358
1359 case TGSI_OPCODE_SHR:
1360 return 0;
1361 break;
1362
1363 case TGSI_OPCODE_AND:
1364 return 0;
1365 break;
1366
1367 case TGSI_OPCODE_OR:
1368 return 0;
1369 break;
1370
1371 case TGSI_OPCODE_MOD:
1372 return 0;
1373 break;
1374
1375 case TGSI_OPCODE_XOR:
1376 return 0;
1377 break;
1378
1379 case TGSI_OPCODE_SAD:
1380 return 0;
1381 break;
1382
1383 case TGSI_OPCODE_TXF:
1384 return 0;
1385 break;
1386
1387 case TGSI_OPCODE_TXQ:
1388 return 0;
1389 break;
1390
1391 case TGSI_OPCODE_CONT:
1392 return 0;
1393 break;
1394
1395 case TGSI_OPCODE_EMIT:
1396 return 0;
1397 break;
1398
1399 case TGSI_OPCODE_ENDPRIM:
1400 return 0;
1401 break;
1402 #endif
1403
1404 default:
1405 return 0;
1406 }
1407
1408 return 1;
1409 }
1410
1411 static void
1412 emit_declaration(
1413 struct lp_build_tgsi_soa_context *bld,
1414 struct tgsi_full_declaration *decl )
1415 {
1416 #if 0
1417 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1418 unsigned first, last, mask;
1419 unsigned i, j;
1420 LLVMValueRef tmp;
1421
1422 first = decl->DeclarationRange.First;
1423 last = decl->DeclarationRange.Last;
1424 mask = decl->Declaration.UsageMask;
1425
1426 for( i = first; i <= last; i++ ) {
1427 for( j = 0; j < NUM_CHANNELS; j++ ) {
1428 if( mask & (1 << j) ) {
1429 switch( decl->Declaration.Interpolate ) {
1430 case TGSI_INTERPOLATE_CONSTANT:
1431 bld->inputs[i][j] = bld->interp_coefs[i].a0[j];
1432 break;
1433
1434 case TGSI_INTERPOLATE_LINEAR:
1435 tmp = bld->interp_coefs[i].a0[j];
1436 tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
1437 tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
1438 bld->inputs[i][j] = tmp;
1439 break;
1440
1441 case TGSI_INTERPOLATE_PERSPECTIVE:
1442 tmp = bld->interp_coefs[i].a0[j];
1443 tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
1444 tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
1445 tmp = lp_build_div(&bld->base, tmp, bld->pos[3]);
1446 bld->inputs[i][j] = tmp;
1447 break;
1448
1449 default:
1450 assert( 0 );
1451 break;
1452 }
1453 }
1454 }
1455 }
1456 }
1457 #endif
1458 }
1459
1460 /**
1461 * Translate a TGSI vertex/fragment shader to SSE2 code.
1462 * Slightly different things are done for vertex vs. fragment shaders.
1463 *
1464 * \param tokens the TGSI input shader
1465 * \param bld the output SSE code/function
1466 * \param immediates buffer to place immediates, later passed to SSE bld
1467 * \param return 1 for success, 0 if translation failed
1468 */
1469 void
1470 lp_build_tgsi_soa(LLVMBuilderRef builder,
1471 const struct tgsi_token *tokens,
1472 union lp_type type,
1473 LLVMValueRef (*inputs)[4],
1474 LLVMValueRef consts_ptr,
1475 LLVMValueRef (*outputs)[4],
1476 LLVMValueRef samplers_ptr)
1477 {
1478 struct lp_build_tgsi_soa_context bld;
1479 struct tgsi_parse_context parse;
1480 uint num_immediates = 0;
1481 unsigned i;
1482
1483 /* Setup build context */
1484 memset(&bld, 0, sizeof bld);
1485 lp_build_context_init(&bld.base, builder, type);
1486 bld.inputs = inputs;
1487 bld.outputs = outputs;
1488 bld.consts_ptr = consts_ptr;
1489 bld.samplers_ptr = samplers_ptr;
1490
1491 tgsi_parse_init( &parse, tokens );
1492
1493 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1494 tgsi_parse_token( &parse );
1495
1496 switch( parse.FullToken.Token.Type ) {
1497 case TGSI_TOKEN_TYPE_DECLARATION:
1498 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
1499 emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1500 }
1501 break;
1502
1503 case TGSI_TOKEN_TYPE_INSTRUCTION:
1504 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
1505 debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n",
1506 parse.FullToken.FullInstruction.Instruction.Opcode,
1507 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
1508 "vertex shader" : "fragment shader");
1509 }
1510 break;
1511
1512 case TGSI_TOKEN_TYPE_IMMEDIATE:
1513 /* simply copy the immediate values into the next immediates[] slot */
1514 {
1515 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1516 assert(size <= 4);
1517 assert(num_immediates < LP_MAX_IMMEDIATES);
1518 for( i = 0; i < size; ++i )
1519 bld.immediates[num_immediates][i] =
1520 lp_build_const_uni(type, parse.FullToken.FullImmediate.u[i].Float);
1521 for( i = size; i < 4; ++i )
1522 bld.immediates[num_immediates][i] = bld.base.undef;
1523 num_immediates++;
1524 }
1525 break;
1526
1527 default:
1528 assert( 0 );
1529 }
1530 }
1531
1532 tgsi_parse_free( &parse );
1533 }
1534