Code re-org. Add comments.
[mesa.git] / src / mesa / pipe / tgsi / exec / tgsi_sse2.c
1 #include "tgsi_platform.h"
2 #include "tgsi_core.h"
3 #include "x86/rtasm/x86sse.h"
4
5 #define FOR_EACH_CHANNEL( CHAN )\
6 for( CHAN = 0; CHAN < 4; CHAN++ )
7
8 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
9 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
10
11 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
12 if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
13
14 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
15 FOR_EACH_CHANNEL( CHAN )\
16 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
17
18 #define CHAN_X 0
19 #define CHAN_Y 1
20 #define CHAN_Z 2
21 #define CHAN_W 3
22
23 #define TEMP_R0 TGSI_EXEC_TEMP_R0
24
25 /**
26 * X86 utility functions.
27 */
28
29 static struct x86_reg
30 make_xmm(
31 unsigned xmm )
32 {
33 return x86_make_reg(
34 file_XMM,
35 (enum x86_reg_name) xmm );
36 }
37
38 /**
39 * X86 register mapping helpers.
40 */
41
42 static struct x86_reg
43 get_const_base( void )
44 {
45 return x86_make_reg(
46 file_REG32,
47 reg_CX );
48 }
49
50 static struct x86_reg
51 get_input_base( void )
52 {
53 return x86_make_reg(
54 file_REG32,
55 reg_AX );
56 }
57
58 static struct x86_reg
59 get_output_base( void )
60 {
61 return x86_make_reg(
62 file_REG32,
63 reg_DX );
64 }
65
66 static struct x86_reg
67 get_temp_base( void )
68 {
69 return x86_make_reg(
70 file_REG32,
71 reg_BX );
72 }
73
74 static struct x86_reg
75 get_coef_base( void )
76 {
77 return get_output_base();
78 }
79
80 /**
81 * Data access helpers.
82 */
83
84 static struct x86_reg
85 get_argument(
86 unsigned index )
87 {
88 return x86_make_disp(
89 x86_make_reg( file_REG32, reg_SP ),
90 (index + 1) * 4 );
91 }
92
93 static struct x86_reg
94 get_const(
95 unsigned vec,
96 unsigned chan )
97 {
98 return x86_make_disp(
99 get_const_base(),
100 (vec * 4 + chan) * 4 );
101 }
102
103 static struct x86_reg
104 get_input(
105 unsigned vec,
106 unsigned chan )
107 {
108 return x86_make_disp(
109 get_input_base(),
110 (vec * 4 + chan) * 16 );
111 }
112
113 static struct x86_reg
114 get_output(
115 unsigned vec,
116 unsigned chan )
117 {
118 return x86_make_disp(
119 get_output_base(),
120 (vec * 4 + chan) * 16 );
121 }
122
123 static struct x86_reg
124 get_temp(
125 unsigned vec,
126 unsigned chan )
127 {
128 return x86_make_disp(
129 get_temp_base(),
130 (vec * 4 + chan) * 16 );
131 }
132
133 static struct x86_reg
134 get_coef(
135 unsigned vec,
136 unsigned chan,
137 unsigned member )
138 {
139 return x86_make_disp(
140 get_coef_base(),
141 ((vec * 3 + member) * 4 + chan) * 4 );
142 }
143
144 /**
145 * Data fetch helpers.
146 */
147
148 static void
149 emit_const(
150 struct x86_function *func,
151 unsigned xmm,
152 unsigned vec,
153 unsigned chan )
154 {
155 sse_movss(
156 func,
157 make_xmm( xmm ),
158 get_const( vec, chan ) );
159 sse_shufps(
160 func,
161 make_xmm( xmm ),
162 make_xmm( xmm ),
163 SHUF( 0, 0, 0, 0 ) );
164 }
165
166 static void
167 emit_inputf(
168 struct x86_function *func,
169 unsigned xmm,
170 unsigned vec,
171 unsigned chan )
172 {
173 sse_movups(
174 func,
175 make_xmm( xmm ),
176 get_input( vec, chan ) );
177 }
178
179 static void
180 emit_output(
181 struct x86_function *func,
182 unsigned xmm,
183 unsigned vec,
184 unsigned chan )
185 {
186 sse_movups(
187 func,
188 get_output( vec, chan ),
189 make_xmm( xmm ) );
190 }
191
192 static void
193 emit_tempf(
194 struct x86_function *func,
195 unsigned xmm,
196 unsigned vec,
197 unsigned chan )
198 {
199 sse_movaps(
200 func,
201 make_xmm( xmm ),
202 get_temp( vec, chan ) );
203 }
204
205 static void
206 emit_coef(
207 struct x86_function *func,
208 unsigned xmm,
209 unsigned vec,
210 unsigned chan,
211 unsigned member )
212 {
213 sse_movss(
214 func,
215 make_xmm( xmm ),
216 get_coef( vec, chan, member ) );
217 sse_shufps(
218 func,
219 make_xmm( xmm ),
220 make_xmm( xmm ),
221 SHUF( 0, 0, 0, 0 ) );
222 }
223
224 /**
225 * Data store helpers.
226 */
227
228 static void
229 emit_inputs(
230 struct x86_function *func,
231 unsigned xmm,
232 unsigned vec,
233 unsigned chan )
234 {
235 sse_movups(
236 func,
237 get_input( vec, chan ),
238 make_xmm( xmm ) );
239 }
240
241 static void
242 emit_temps(
243 struct x86_function *func,
244 unsigned xmm,
245 unsigned vec,
246 unsigned chan )
247 {
248 sse_movaps(
249 func,
250 get_temp( vec, chan ),
251 make_xmm( xmm ) );
252 }
253
254 static void
255 emit_addrs(
256 struct x86_function *func,
257 unsigned xmm,
258 unsigned vec,
259 unsigned chan )
260 {
261 emit_temps(
262 func,
263 xmm,
264 vec + TGSI_EXEC_NUM_TEMPS,
265 chan );
266 }
267
268 /**
269 * Coefficent fetch helpers.
270 */
271
272 static void
273 emit_coef_a0(
274 struct x86_function *func,
275 unsigned xmm,
276 unsigned vec,
277 unsigned chan )
278 {
279 emit_coef(
280 func,
281 xmm,
282 vec,
283 chan,
284 0 );
285 }
286
287 static void
288 emit_coef_dadx(
289 struct x86_function *func,
290 unsigned xmm,
291 unsigned vec,
292 unsigned chan )
293 {
294 emit_coef(
295 func,
296 xmm,
297 vec,
298 chan,
299 1 );
300 }
301
302 static void
303 emit_coef_dady(
304 struct x86_function *func,
305 unsigned xmm,
306 unsigned vec,
307 unsigned chan )
308 {
309 emit_coef(
310 func,
311 xmm,
312 vec,
313 chan,
314 2 );
315 }
316
317 /**
318 * Function call helpers.
319 */
320
321 static void
322 emit_push_gp(
323 struct x86_function *func )
324 {
325 x86_push(
326 func,
327 get_const_base() );
328 x86_push(
329 func,
330 get_input_base() );
331 x86_push(
332 func,
333 get_output_base() );
334
335 /* It is important on non-win32 platforms that temp base is pushed last.
336 */
337 x86_push(
338 func,
339 get_temp_base() );
340 }
341
342 static void
343 emit_pop_gp(
344 struct x86_function *func )
345 {
346 /* Restore GP registers in a reverse order.
347 */
348 x86_pop(
349 func,
350 get_temp_base() );
351 x86_pop(
352 func,
353 get_output_base() );
354 x86_pop(
355 func,
356 get_input_base() );
357 x86_pop(
358 func,
359 get_const_base() );
360 }
361
362 static void
363 emit_func_call_dst(
364 struct x86_function *func,
365 unsigned xmm_dst,
366 void (*code)() )
367 {
368 sse_movaps(
369 func,
370 get_temp( TEMP_R0, 0 ),
371 make_xmm( xmm_dst ) );
372
373 emit_push_gp(
374 func );
375
376 #ifdef WIN32
377 x86_push(
378 func,
379 get_temp( TEMP_R0, 0 ) );
380 #endif
381
382 x86_call(
383 func,
384 code );
385
386 emit_pop_gp(
387 func );
388
389 sse_movaps(
390 func,
391 make_xmm( xmm_dst ),
392 get_temp( TEMP_R0, 0 ) );
393 }
394
395 static void
396 emit_func_call_dst_src(
397 struct x86_function *func,
398 unsigned xmm_dst,
399 unsigned xmm_src,
400 void (*code)() )
401 {
402 sse_movaps(
403 func,
404 get_temp( TEMP_R0, 1 ),
405 make_xmm( xmm_src ) );
406
407 emit_func_call_dst(
408 func,
409 xmm_dst,
410 code );
411 }
412
413 /**
414 * Low-level instruction translators.
415 */
416
417 static void
418 emit_abs(
419 struct x86_function *func,
420 unsigned xmm )
421 {
422 sse_andps(
423 func,
424 make_xmm( xmm ),
425 get_temp(
426 TGSI_EXEC_TEMP_7FFFFFFF_I,
427 TGSI_EXEC_TEMP_7FFFFFFF_C ) );
428 }
429
430 static void
431 emit_add(
432 struct x86_function *func,
433 unsigned xmm_dst,
434 unsigned xmm_src )
435 {
436 sse_addps(
437 func,
438 make_xmm( xmm_dst ),
439 make_xmm( xmm_src ) );
440 }
441
442 static void XSTDCALL
443 cos4f(
444 float *store )
445 {
446 #ifdef WIN32
447 store[0] = (float) cos( (double) store[0] );
448 store[1] = (float) cos( (double) store[1] );
449 store[2] = (float) cos( (double) store[2] );
450 store[3] = (float) cos( (double) store[3] );
451 #else
452 const unsigned X = TEMP_R0 * 16;
453 store[X + 0] = cosf( store[X + 0] );
454 store[X + 1] = cosf( store[X + 1] );
455 store[X + 2] = cosf( store[X + 2] );
456 store[X + 3] = cosf( store[X + 3] );
457 #endif
458 }
459
460 static void
461 emit_cos(
462 struct x86_function *func,
463 unsigned xmm_dst )
464 {
465 emit_func_call_dst(
466 func,
467 xmm_dst,
468 cos4f );
469 }
470
471 static void XSTDCALL
472 ex24f(
473 float *store )
474 {
475 #ifdef WIN32
476 store[0] = (float) pow( 2.0, (double) store[0] );
477 store[1] = (float) pow( 2.0, (double) store[1] );
478 store[2] = (float) pow( 2.0, (double) store[2] );
479 store[3] = (float) pow( 2.0, (double) store[3] );
480 #else
481 const unsigned X = TEMP_R0 * 16;
482 store[X + 0] = powf( 2.0f, store[X + 0] );
483 store[X + 1] = powf( 2.0f, store[X + 1] );
484 store[X + 2] = powf( 2.0f, store[X + 2] );
485 store[X + 3] = powf( 2.0f, store[X + 3] );
486 #endif
487 }
488
489 static void
490 emit_ex2(
491 struct x86_function *func,
492 unsigned xmm_dst )
493 {
494 emit_func_call_dst(
495 func,
496 xmm_dst,
497 ex24f );
498 }
499
500 static void
501 emit_f2it(
502 struct x86_function *func,
503 unsigned xmm )
504 {
505 sse2_cvttps2dq(
506 func,
507 make_xmm( xmm ),
508 make_xmm( xmm ) );
509 }
510
511 static void XSTDCALL
512 flr4f(
513 float *store )
514 {
515 #ifdef WIN32
516 const unsigned X = 0;
517 #else
518 const unsigned X = TEMP_R0 * 16;
519 #endif
520 store[X + 0] = (float) floor( (double) store[X + 0] );
521 store[X + 1] = (float) floor( (double) store[X + 1] );
522 store[X + 2] = (float) floor( (double) store[X + 2] );
523 store[X + 3] = (float) floor( (double) store[X + 3] );
524 }
525
526 static void
527 emit_flr(
528 struct x86_function *func,
529 unsigned xmm_dst )
530 {
531 emit_func_call_dst(
532 func,
533 xmm_dst,
534 flr4f );
535 }
536
537 static void XSTDCALL
538 frc4f(
539 float *store )
540 {
541 #ifdef WIN32
542 const unsigned X = 0;
543 #else
544 const unsigned X = TEMP_R0 * 16;
545 #endif
546 store[X + 0] -= (float) floor( (double) store[X + 0] );
547 store[X + 1] -= (float) floor( (double) store[X + 1] );
548 store[X + 2] -= (float) floor( (double) store[X + 2] );
549 store[X + 3] -= (float) floor( (double) store[X + 3] );
550 }
551
552 static void
553 emit_frc(
554 struct x86_function *func,
555 unsigned xmm_dst )
556 {
557 emit_func_call_dst(
558 func,
559 xmm_dst,
560 frc4f );
561 }
562
563 static void XSTDCALL
564 lg24f(
565 float *store )
566 {
567 #ifdef WIN32
568 const unsigned X = 0;
569 #else
570 const unsigned X = TEMP_R0 * 16;
571 #endif
572 store[X + 0] = LOG2( store[X + 0] );
573 store[X + 1] = LOG2( store[X + 1] );
574 store[X + 2] = LOG2( store[X + 2] );
575 store[X + 3] = LOG2( store[X + 3] );
576 }
577
578 static void
579 emit_lg2(
580 struct x86_function *func,
581 unsigned xmm_dst )
582 {
583 emit_func_call_dst(
584 func,
585 xmm_dst,
586 lg24f );
587 }
588
589 static void
590 emit_mov(
591 struct x86_function *func,
592 unsigned xmm_dst,
593 unsigned xmm_src )
594 {
595 sse_movups(
596 func,
597 make_xmm( xmm_dst ),
598 make_xmm( xmm_src ) );
599 }
600
601 static void
602 emit_mul (struct x86_function *func,
603 unsigned xmm_dst,
604 unsigned xmm_src)
605 {
606 sse_mulps(
607 func,
608 make_xmm( xmm_dst ),
609 make_xmm( xmm_src ) );
610 }
611
612 static void
613 emit_neg(
614 struct x86_function *func,
615 unsigned xmm )
616 {
617 sse_xorps(
618 func,
619 make_xmm( xmm ),
620 get_temp(
621 TGSI_EXEC_TEMP_80000000_I,
622 TGSI_EXEC_TEMP_80000000_C ) );
623 }
624
625 static void XSTDCALL
626 pow4f(
627 float *store )
628 {
629 #ifdef WIN32
630 store[0] = (float) pow( (double) store[0], (double) store[4] );
631 store[1] = (float) pow( (double) store[1], (double) store[5] );
632 store[2] = (float) pow( (double) store[2], (double) store[6] );
633 store[3] = (float) pow( (double) store[3], (double) store[7] );
634 #else
635 const unsigned X = TEMP_R0 * 16;
636 store[X + 0] = powf( store[X + 0], store[X + 4] );
637 store[X + 1] = powf( store[X + 1], store[X + 5] );
638 store[X + 2] = powf( store[X + 2], store[X + 6] );
639 store[X + 3] = powf( store[X + 3], store[X + 7] );
640 #endif
641 }
642
643 static void
644 emit_pow(
645 struct x86_function *func,
646 unsigned xmm_dst,
647 unsigned xmm_src )
648 {
649 emit_func_call_dst_src(
650 func,
651 xmm_dst,
652 xmm_src,
653 pow4f );
654 }
655
656 static void
657 emit_rcp (
658 struct x86_function *func,
659 unsigned xmm_dst,
660 unsigned xmm_src )
661 {
662 sse2_rcpps(
663 func,
664 make_xmm( xmm_dst ),
665 make_xmm( xmm_src ) );
666 }
667
668 static void
669 emit_rsqrt(
670 struct x86_function *func,
671 unsigned xmm_dst,
672 unsigned xmm_src )
673 {
674 sse_rsqrtps(
675 func,
676 make_xmm( xmm_dst ),
677 make_xmm( xmm_src ) );
678 }
679
680 static void
681 emit_setsign(
682 struct x86_function *func,
683 unsigned xmm )
684 {
685 sse_orps(
686 func,
687 make_xmm( xmm ),
688 get_temp(
689 TGSI_EXEC_TEMP_80000000_I,
690 TGSI_EXEC_TEMP_80000000_C ) );
691 }
692
693 static void XSTDCALL
694 sin4f(
695 float *store )
696 {
697 #ifdef WIN32
698 store[0] = (float) sin( (double) store[0] );
699 store[1] = (float) sin( (double) store[1] );
700 store[2] = (float) sin( (double) store[2] );
701 store[3] = (float) sin( (double) store[3] );
702 #else
703 const unsigned X = TEMP_R0 * 16;
704 store[X + 0] = sinf( store[X + 0] );
705 store[X + 1] = sinf( store[X + 1] );
706 store[X + 2] = sinf( store[X + 2] );
707 store[X + 3] = sinf( store[X + 3] );
708 #endif
709 }
710
711 static void
712 emit_sin (struct x86_function *func,
713 unsigned xmm_dst)
714 {
715 emit_func_call_dst(
716 func,
717 xmm_dst,
718 sin4f );
719 }
720
721 static void
722 emit_sub(
723 struct x86_function *func,
724 unsigned xmm_dst,
725 unsigned xmm_src )
726 {
727 sse_subps(
728 func,
729 make_xmm( xmm_dst ),
730 make_xmm( xmm_src ) );
731 }
732
733 /**
734 * Register fetch.
735 */
736
737 static void
738 emit_fetch(
739 struct x86_function *func,
740 unsigned xmm,
741 const struct tgsi_full_src_register *reg,
742 const unsigned chan_index )
743 {
744 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
745
746 switch( swizzle ) {
747 case TGSI_EXTSWIZZLE_X:
748 case TGSI_EXTSWIZZLE_Y:
749 case TGSI_EXTSWIZZLE_Z:
750 case TGSI_EXTSWIZZLE_W:
751 switch( reg->SrcRegister.File ) {
752 case TGSI_FILE_CONSTANT:
753 emit_const(
754 func,
755 xmm,
756 reg->SrcRegister.Index,
757 swizzle );
758 break;
759
760 case TGSI_FILE_INPUT:
761 emit_inputf(
762 func,
763 xmm,
764 reg->SrcRegister.Index,
765 swizzle );
766 break;
767
768 case TGSI_FILE_TEMPORARY:
769 emit_tempf(
770 func,
771 xmm,
772 reg->SrcRegister.Index,
773 swizzle );
774 break;
775
776 default:
777 assert( 0 );
778 }
779 break;
780
781 case TGSI_EXTSWIZZLE_ZERO:
782 emit_tempf(
783 func,
784 xmm,
785 TGSI_EXEC_TEMP_00000000_I,
786 TGSI_EXEC_TEMP_00000000_C );
787 break;
788
789 case TGSI_EXTSWIZZLE_ONE:
790 emit_tempf(
791 func,
792 xmm,
793 TGSI_EXEC_TEMP_ONE_I,
794 TGSI_EXEC_TEMP_ONE_C );
795 break;
796
797 default:
798 assert( 0 );
799 }
800
801 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
802 case TGSI_UTIL_SIGN_CLEAR:
803 emit_abs( func, xmm );
804 break;
805
806 case TGSI_UTIL_SIGN_SET:
807 emit_setsign( func, xmm );
808 break;
809
810 case TGSI_UTIL_SIGN_TOGGLE:
811 emit_neg( func, xmm );
812 break;
813
814 case TGSI_UTIL_SIGN_KEEP:
815 break;
816 }
817 }
818
819 #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
820 emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
821
822 /**
823 * Register store.
824 */
825
826 static void
827 emit_store(
828 struct x86_function *func,
829 unsigned xmm,
830 const struct tgsi_full_dst_register *reg,
831 const struct tgsi_full_instruction *inst,
832 unsigned chan_index )
833 {
834 switch( reg->DstRegister.File ) {
835 case TGSI_FILE_OUTPUT:
836 emit_output(
837 func,
838 xmm,
839 reg->DstRegister.Index,
840 chan_index );
841 break;
842
843 case TGSI_FILE_TEMPORARY:
844 emit_temps(
845 func,
846 xmm,
847 reg->DstRegister.Index,
848 chan_index );
849 break;
850
851 case TGSI_FILE_ADDRESS:
852 emit_addrs(
853 func,
854 xmm,
855 reg->DstRegister.Index,
856 chan_index );
857 break;
858
859 default:
860 assert( 0 );
861 }
862
863 switch( inst->Instruction.Saturate ) {
864 case TGSI_SAT_NONE:
865 break;
866
867 case TGSI_SAT_ZERO_ONE:
868 // assert( 0 );
869 break;
870
871 case TGSI_SAT_MINUS_PLUS_ONE:
872 assert( 0 );
873 break;
874 }
875 }
876
877 #define STORE( FUNC, INST, XMM, INDEX, CHAN )\
878 emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
879
880 /**
881 * High-level instruction translators.
882 */
883
884 static void
885 emit_kil(
886 struct x86_function *func,
887 const struct tgsi_full_src_register *reg )
888 {
889 unsigned uniquemask;
890 unsigned registers[4];
891 unsigned nextregister = 0;
892 unsigned firstchan = ~0;
893 unsigned chan_index;
894
895 /* This mask stores component bits that were already tested. Note that
896 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
897 * tested. */
898 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
899
900 FOR_EACH_CHANNEL( chan_index ) {
901 unsigned swizzle;
902
903 /* unswizzle channel */
904 swizzle = tgsi_util_get_full_src_register_extswizzle(
905 reg,
906 chan_index );
907
908 /* check if the component has not been already tested */
909 if( !(uniquemask & (1 << swizzle)) ) {
910 uniquemask |= 1 << swizzle;
911
912 /* allocate register */
913 registers[chan_index] = nextregister;
914 emit_fetch(
915 func,
916 nextregister,
917 reg,
918 chan_index );
919 nextregister++;
920
921 /* mark the first channel used */
922 if( firstchan == ~0 ) {
923 firstchan = chan_index;
924 }
925 }
926 }
927
928 x86_push(
929 func,
930 x86_make_reg( file_REG32, reg_AX ) );
931 x86_push(
932 func,
933 x86_make_reg( file_REG32, reg_DX ) );
934
935 FOR_EACH_CHANNEL( chan_index ) {
936 if( uniquemask & (1 << chan_index) ) {
937 sse_cmpps(
938 func,
939 make_xmm( registers[chan_index] ),
940 get_temp(
941 TGSI_EXEC_TEMP_00000000_I,
942 TGSI_EXEC_TEMP_00000000_C ),
943 cc_LessThan );
944
945 if( chan_index == firstchan ) {
946 sse_pmovmskb(
947 func,
948 x86_make_reg( file_REG32, reg_AX ),
949 make_xmm( registers[chan_index] ) );
950 }
951 else {
952 sse_pmovmskb(
953 func,
954 x86_make_reg( file_REG32, reg_DX ),
955 make_xmm( registers[chan_index] ) );
956 x86_or(
957 func,
958 x86_make_reg( file_REG32, reg_AX ),
959 x86_make_reg( file_REG32, reg_DX ) );
960 }
961 }
962 }
963
964 x86_or(
965 func,
966 get_temp(
967 TGSI_EXEC_TEMP_KILMASK_I,
968 TGSI_EXEC_TEMP_KILMASK_C ),
969 x86_make_reg( file_REG32, reg_AX ) );
970
971 x86_pop(
972 func,
973 x86_make_reg( file_REG32, reg_DX ) );
974 x86_pop(
975 func,
976 x86_make_reg( file_REG32, reg_AX ) );
977 }
978
979 static void
980 emit_setcc(
981 struct x86_function *func,
982 struct tgsi_full_instruction *inst,
983 enum sse_cc cc )
984 {
985 unsigned chan_index;
986
987 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
988 FETCH( func, *inst, 0, 0, chan_index );
989 FETCH( func, *inst, 1, 1, chan_index );
990 sse_cmpps(
991 func,
992 make_xmm( 0 ),
993 make_xmm( 1 ),
994 cc );
995 sse_andps(
996 func,
997 make_xmm( 0 ),
998 get_temp(
999 TGSI_EXEC_TEMP_ONE_I,
1000 TGSI_EXEC_TEMP_ONE_C ) );
1001 STORE( func, *inst, 0, 0, chan_index );
1002 }
1003 }
1004
1005 static void
1006 emit_cmp(
1007 struct x86_function *func,
1008 struct tgsi_full_instruction *inst )
1009 {
1010 unsigned chan_index;
1011
1012 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1013 FETCH( func, *inst, 0, 0, chan_index );
1014 FETCH( func, *inst, 1, 1, chan_index );
1015 FETCH( func, *inst, 2, 2, chan_index );
1016 sse_cmpps(
1017 func,
1018 make_xmm( 0 ),
1019 get_temp(
1020 TGSI_EXEC_TEMP_00000000_I,
1021 TGSI_EXEC_TEMP_00000000_C ),
1022 cc_LessThan );
1023 sse_andps(
1024 func,
1025 make_xmm( 1 ),
1026 make_xmm( 0 ) );
1027 sse_andnps(
1028 func,
1029 make_xmm( 0 ),
1030 make_xmm( 2 ) );
1031 sse_orps(
1032 func,
1033 make_xmm( 0 ),
1034 make_xmm( 1 ) );
1035 STORE( func, *inst, 0, 0, chan_index );
1036 }
1037 }
1038
1039 static void
1040 emit_instruction(
1041 struct x86_function *func,
1042 struct tgsi_full_instruction *inst )
1043 {
1044 unsigned chan_index;
1045
1046 switch( inst->Instruction.Opcode ) {
1047 case TGSI_OPCODE_ARL:
1048 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1049 FETCH( func, *inst, 0, 0, chan_index );
1050 emit_f2it( func, 0 );
1051 STORE( func, *inst, 0, 0, chan_index );
1052 }
1053 break;
1054
1055 case TGSI_OPCODE_MOV:
1056 /* TGSI_OPCODE_SWZ */
1057 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1058 FETCH( func, *inst, 0, 0, chan_index );
1059 STORE( func, *inst, 0, 0, chan_index );
1060 }
1061 break;
1062
1063 case TGSI_OPCODE_LIT:
1064 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
1065 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1066 emit_tempf(
1067 func,
1068 0,
1069 TGSI_EXEC_TEMP_ONE_I,
1070 TGSI_EXEC_TEMP_ONE_C);
1071 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
1072 STORE( func, *inst, 0, 0, CHAN_X );
1073 }
1074 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1075 STORE( func, *inst, 0, 0, CHAN_W );
1076 }
1077 }
1078 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
1079 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1080 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1081 FETCH( func, *inst, 0, 0, CHAN_X );
1082 sse_maxps(
1083 func,
1084 make_xmm( 0 ),
1085 get_temp(
1086 TGSI_EXEC_TEMP_00000000_I,
1087 TGSI_EXEC_TEMP_00000000_C ) );
1088 STORE( func, *inst, 0, 0, CHAN_Y );
1089 }
1090 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1091 FETCH( func, *inst, 1, 0, CHAN_Y );
1092 sse_maxps(
1093 func,
1094 make_xmm( 1 ),
1095 get_temp(
1096 TGSI_EXEC_TEMP_00000000_I,
1097 TGSI_EXEC_TEMP_00000000_C ) );
1098 FETCH( func, *inst, 2, 0, CHAN_W );
1099 sse_minps(
1100 func,
1101 make_xmm( 2 ),
1102 get_temp(
1103 TGSI_EXEC_TEMP_128_I,
1104 TGSI_EXEC_TEMP_128_C ) );
1105 sse_maxps(
1106 func,
1107 make_xmm( 2 ),
1108 get_temp(
1109 TGSI_EXEC_TEMP_MINUS_128_I,
1110 TGSI_EXEC_TEMP_MINUS_128_C ) );
1111 emit_pow( func, 1, 2 );
1112 FETCH( func, *inst, 0, 0, CHAN_X );
1113 sse_xorps(
1114 func,
1115 make_xmm( 2 ),
1116 make_xmm( 2 ) );
1117 sse_cmpps(
1118 func,
1119 make_xmm( 2 ),
1120 make_xmm( 0 ),
1121 cc_LessThanEqual );
1122 sse_andps(
1123 func,
1124 make_xmm( 2 ),
1125 make_xmm( 1 ) );
1126 STORE( func, *inst, 2, 0, CHAN_Z );
1127 }
1128 }
1129 break;
1130
1131 case TGSI_OPCODE_RCP:
1132 /* TGSI_OPCODE_RECIP */
1133 FETCH( func, *inst, 0, 0, CHAN_X );
1134 emit_rcp( func, 0, 0 );
1135 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1136 STORE( func, *inst, 0, 0, chan_index );
1137 }
1138 break;
1139
1140 case TGSI_OPCODE_RSQ:
1141 /* TGSI_OPCODE_RECIPSQRT */
1142 FETCH( func, *inst, 0, 0, CHAN_X );
1143 emit_rsqrt( func, 0, 0 );
1144 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1145 STORE( func, *inst, 0, 0, chan_index );
1146 }
1147 break;
1148
1149 case TGSI_OPCODE_EXP:
1150 assert( 0 );
1151 break;
1152
1153 case TGSI_OPCODE_LOG:
1154 assert( 0 );
1155 break;
1156
1157 case TGSI_OPCODE_MUL:
1158 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1159 FETCH( func, *inst, 0, 0, chan_index );
1160 FETCH( func, *inst, 1, 1, chan_index );
1161 emit_mul( func, 0, 1 );
1162 STORE( func, *inst, 0, 0, chan_index );
1163 }
1164 break;
1165
1166 case TGSI_OPCODE_ADD:
1167 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1168 FETCH( func, *inst, 0, 0, chan_index );
1169 FETCH( func, *inst, 1, 1, chan_index );
1170 emit_add( func, 0, 1 );
1171 STORE( func, *inst, 0, 0, chan_index );
1172 }
1173 break;
1174
1175 case TGSI_OPCODE_DP3:
1176 /* TGSI_OPCODE_DOT3 */
1177 FETCH( func, *inst, 0, 0, CHAN_X );
1178 FETCH( func, *inst, 1, 1, CHAN_X );
1179 emit_mul( func, 0, 1 );
1180 FETCH( func, *inst, 1, 0, CHAN_Y );
1181 FETCH( func, *inst, 2, 1, CHAN_Y );
1182 emit_mul( func, 1, 2 );
1183 emit_add( func, 0, 1 );
1184 FETCH( func, *inst, 1, 0, CHAN_Z );
1185 FETCH( func, *inst, 2, 1, CHAN_Z );
1186 emit_mul( func, 1, 2 );
1187 emit_add( func, 0, 1 );
1188 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1189 STORE( func, *inst, 0, 0, chan_index );
1190 }
1191 break;
1192
1193 case TGSI_OPCODE_DP4:
1194 /* TGSI_OPCODE_DOT4 */
1195 FETCH( func, *inst, 0, 0, CHAN_X );
1196 FETCH( func, *inst, 1, 1, CHAN_X );
1197 emit_mul( func, 0, 1 );
1198 FETCH( func, *inst, 1, 0, CHAN_Y );
1199 FETCH( func, *inst, 2, 1, CHAN_Y );
1200 emit_mul( func, 1, 2 );
1201 emit_add( func, 0, 1 );
1202 FETCH( func, *inst, 1, 0, CHAN_Z );
1203 FETCH( func, *inst, 2, 1, CHAN_Z );
1204 emit_mul(func, 1, 2 );
1205 emit_add(func, 0, 1 );
1206 FETCH( func, *inst, 1, 0, CHAN_W );
1207 FETCH( func, *inst, 2, 1, CHAN_W );
1208 emit_mul( func, 1, 2 );
1209 emit_add( func, 0, 1 );
1210 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1211 STORE( func, *inst, 0, 0, chan_index );
1212 }
1213 break;
1214
1215 case TGSI_OPCODE_DST:
1216 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
1217 emit_tempf(
1218 func,
1219 0,
1220 TGSI_EXEC_TEMP_ONE_I,
1221 TGSI_EXEC_TEMP_ONE_C );
1222 STORE( func, *inst, 0, 0, CHAN_X );
1223 }
1224 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
1225 FETCH( func, *inst, 0, 0, CHAN_Y );
1226 FETCH( func, *inst, 1, 1, CHAN_Y );
1227 emit_mul( func, 0, 1 );
1228 STORE( func, *inst, 0, 0, CHAN_Y );
1229 }
1230 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
1231 FETCH( func, *inst, 0, 0, CHAN_Z );
1232 STORE( func, *inst, 0, 0, CHAN_Z );
1233 }
1234 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
1235 FETCH( func, *inst, 0, 1, CHAN_W );
1236 STORE( func, *inst, 0, 0, CHAN_W );
1237 }
1238 break;
1239
1240 case TGSI_OPCODE_MIN:
1241 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1242 FETCH( func, *inst, 0, 0, chan_index );
1243 FETCH( func, *inst, 1, 1, chan_index );
1244 sse_minps(
1245 func,
1246 make_xmm( 0 ),
1247 make_xmm( 1 ) );
1248 STORE( func, *inst, 0, 0, chan_index );
1249 }
1250 break;
1251
1252 case TGSI_OPCODE_MAX:
1253 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1254 FETCH( func, *inst, 0, 0, chan_index );
1255 FETCH( func, *inst, 1, 1, chan_index );
1256 sse_maxps(
1257 func,
1258 make_xmm( 0 ),
1259 make_xmm( 1 ) );
1260 STORE( func, *inst, 0, 0, chan_index );
1261 }
1262 break;
1263
1264 case TGSI_OPCODE_SLT:
1265 /* TGSI_OPCODE_SETLT */
1266 emit_setcc( func, inst, cc_LessThan );
1267 break;
1268
1269 case TGSI_OPCODE_SGE:
1270 /* TGSI_OPCODE_SETGE */
1271 emit_setcc( func, inst, cc_NotLessThan );
1272 break;
1273
1274 case TGSI_OPCODE_MAD:
1275 /* TGSI_OPCODE_MADD */
1276 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1277 FETCH( func, *inst, 0, 0, chan_index );
1278 FETCH( func, *inst, 1, 1, chan_index );
1279 FETCH( func, *inst, 2, 2, chan_index );
1280 emit_mul( func, 0, 1 );
1281 emit_add( func, 0, 2 );
1282 STORE( func, *inst, 0, 0, chan_index );
1283 }
1284 break;
1285
1286 case TGSI_OPCODE_SUB:
1287 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1288 FETCH( func, *inst, 0, 0, chan_index );
1289 FETCH( func, *inst, 1, 1, chan_index );
1290 emit_sub( func, 0, 1 );
1291 STORE( func, *inst, 0, 0, chan_index );
1292 }
1293 break;
1294
1295 case TGSI_OPCODE_LERP:
1296 /* TGSI_OPCODE_LRP */
1297 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1298 FETCH( func, *inst, 0, 0, chan_index );
1299 FETCH( func, *inst, 1, 1, chan_index );
1300 FETCH( func, *inst, 2, 2, chan_index );
1301 emit_sub( func, 1, 2 );
1302 emit_mul( func, 0, 1 );
1303 emit_add( func, 0, 2 );
1304 STORE( func, *inst, 0, 0, chan_index );
1305 }
1306 break;
1307
1308 case TGSI_OPCODE_CND:
1309 assert( 0 );
1310 break;
1311
1312 case TGSI_OPCODE_CND0:
1313 assert( 0 );
1314 break;
1315
1316 case TGSI_OPCODE_DOT2ADD:
1317 /* TGSI_OPCODE_DP2A */
1318 assert( 0 );
1319 break;
1320
1321 case TGSI_OPCODE_INDEX:
1322 assert( 0 );
1323 break;
1324
1325 case TGSI_OPCODE_NEGATE:
1326 assert( 0 );
1327 break;
1328
1329 case TGSI_OPCODE_FRAC:
1330 /* TGSI_OPCODE_FRC */
1331 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1332 FETCH( func, *inst, 0, 0, chan_index );
1333 emit_frc( func, 0 );
1334 STORE( func, *inst, 0, 0, chan_index );
1335 }
1336 break;
1337
1338 case TGSI_OPCODE_CLAMP:
1339 assert( 0 );
1340 break;
1341
1342 case TGSI_OPCODE_FLOOR:
1343 /* TGSI_OPCODE_FLR */
1344 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1345 FETCH( func, *inst, 0, 0, chan_index );
1346 emit_flr( func, 0 );
1347 STORE( func, *inst, 0, 0, chan_index );
1348 }
1349 break;
1350
1351 case TGSI_OPCODE_ROUND:
1352 assert( 0 );
1353 break;
1354
1355 case TGSI_OPCODE_EXPBASE2:
1356 /* TGSI_OPCODE_EX2 */
1357 FETCH( func, *inst, 0, 0, CHAN_X );
1358 emit_ex2( func, 0 );
1359 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1360 STORE( func, *inst, 0, 0, chan_index );
1361 }
1362 break;
1363
1364 case TGSI_OPCODE_LOGBASE2:
1365 /* TGSI_OPCODE_LG2 */
1366 FETCH( func, *inst, 0, 0, CHAN_X );
1367 emit_lg2( func, 0 );
1368 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1369 STORE( func, *inst, 0, 0, chan_index );
1370 }
1371 break;
1372
1373 case TGSI_OPCODE_POWER:
1374 /* TGSI_OPCODE_POW */
1375 FETCH( func, *inst, 0, 0, CHAN_X );
1376 FETCH( func, *inst, 1, 1, CHAN_X );
1377 emit_pow( func, 0, 1 );
1378 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1379 STORE( func, *inst, 0, 0, chan_index );
1380 }
1381 break;
1382
1383 case TGSI_OPCODE_CROSSPRODUCT:
1384 /* TGSI_OPCODE_XPD */
1385 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
1386 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1387 FETCH( func, *inst, 1, 1, CHAN_Z );
1388 FETCH( func, *inst, 3, 0, CHAN_Z );
1389 }
1390 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
1391 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1392 FETCH( func, *inst, 0, 0, CHAN_Y );
1393 FETCH( func, *inst, 4, 1, CHAN_Y );
1394 }
1395 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
1396 emit_mov( func, 2, 0 );
1397 emit_mul( func, 2, 1 );
1398 emit_mov( func, 5, 3 );
1399 emit_mul( func, 5, 4 );
1400 emit_sub( func, 2, 5 );
1401 STORE( func, *inst, 2, 0, CHAN_X );
1402 }
1403 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
1404 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1405 FETCH( func, *inst, 2, 1, CHAN_X );
1406 FETCH( func, *inst, 5, 0, CHAN_X );
1407 }
1408 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
1409 emit_mul( func, 3, 2 );
1410 emit_mul( func, 1, 5 );
1411 emit_sub( func, 3, 1 );
1412 STORE( func, *inst, 3, 0, CHAN_Y );
1413 }
1414 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
1415 emit_mul( func, 5, 4 );
1416 emit_mul( func, 0, 2 );
1417 emit_sub( func, 5, 0 );
1418 STORE( func, *inst, 5, 0, CHAN_Z );
1419 }
1420 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
1421 FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
1422 STORE( func, *inst, 0, 0, CHAN_W );
1423 }
1424 break;
1425
1426 case TGSI_OPCODE_MULTIPLYMATRIX:
1427 assert( 0 );
1428 break;
1429
1430 case TGSI_OPCODE_ABS:
1431 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1432 FETCH( func, *inst, 0, 0, chan_index );
1433 emit_abs( func, 0) ;
1434
1435 STORE( func, *inst, 0, 0, chan_index );
1436 }
1437 break;
1438
1439 case TGSI_OPCODE_RCC:
1440 assert( 0 );
1441 break;
1442
1443 case TGSI_OPCODE_DPH:
1444 FETCH( func, *inst, 0, 0, CHAN_X );
1445 FETCH( func, *inst, 1, 1, CHAN_X );
1446 emit_mul( func, 0, 1 );
1447 FETCH( func, *inst, 1, 0, CHAN_Y );
1448 FETCH( func, *inst, 2, 1, CHAN_Y );
1449 emit_mul( func, 1, 2 );
1450 emit_add( func, 0, 1 );
1451 FETCH( func, *inst, 1, 0, CHAN_Z );
1452 FETCH( func, *inst, 2, 1, CHAN_Z );
1453 emit_mul( func, 1, 2 );
1454 emit_add( func, 0, 1 );
1455 FETCH( func, *inst, 1, 1, CHAN_W );
1456 emit_add( func, 0, 1 );
1457 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1458 STORE( func, *inst, 0, 0, chan_index );
1459 }
1460 break;
1461
1462 case TGSI_OPCODE_COS:
1463 FETCH( func, *inst, 0, 0, CHAN_X );
1464 emit_cos( func, 0 );
1465 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1466 STORE( func, *inst, 0, 0, chan_index );
1467 }
1468 break;
1469
1470 case TGSI_OPCODE_DDX:
1471 assert( 0 );
1472 break;
1473
1474 case TGSI_OPCODE_DDY:
1475 assert( 0 );
1476 break;
1477
1478 case TGSI_OPCODE_KIL:
1479 emit_kil( func, &inst->FullSrcRegisters[0] );
1480 break;
1481
1482 case TGSI_OPCODE_PK2H:
1483 assert( 0 );
1484 break;
1485
1486 case TGSI_OPCODE_PK2US:
1487 assert( 0 );
1488 break;
1489
1490 case TGSI_OPCODE_PK4B:
1491 assert( 0 );
1492 break;
1493
1494 case TGSI_OPCODE_PK4UB:
1495 assert( 0 );
1496 break;
1497
1498 case TGSI_OPCODE_RFL:
1499 assert( 0 );
1500 break;
1501
1502 case TGSI_OPCODE_SEQ:
1503 assert( 0 );
1504 break;
1505
1506 case TGSI_OPCODE_SFL:
1507 assert( 0 );
1508 break;
1509
1510 case TGSI_OPCODE_SGT:
1511 assert( 0 );
1512 break;
1513
1514 case TGSI_OPCODE_SIN:
1515 FETCH( func, *inst, 0, 0, CHAN_X );
1516 emit_sin( func, 0 );
1517 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1518 STORE( func, *inst, 0, 0, chan_index );
1519 }
1520 break;
1521
1522 case TGSI_OPCODE_SLE:
1523 assert( 0 );
1524 break;
1525
1526 case TGSI_OPCODE_SNE:
1527 assert( 0 );
1528 break;
1529
1530 case TGSI_OPCODE_STR:
1531 assert( 0 );
1532 break;
1533
1534 case TGSI_OPCODE_TEX:
1535 emit_tempf(
1536 func,
1537 0,
1538 TGSI_EXEC_TEMP_ONE_I,
1539 TGSI_EXEC_TEMP_ONE_C );
1540 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1541 STORE( func, *inst, 0, 0, chan_index );
1542 }
1543 break;
1544
1545 case TGSI_OPCODE_TXD:
1546 assert( 0 );
1547 break;
1548
1549 case TGSI_OPCODE_UP2H:
1550 assert( 0 );
1551 break;
1552
1553 case TGSI_OPCODE_UP2US:
1554 assert( 0 );
1555 break;
1556
1557 case TGSI_OPCODE_UP4B:
1558 assert( 0 );
1559 break;
1560
1561 case TGSI_OPCODE_UP4UB:
1562 assert( 0 );
1563 break;
1564
1565 case TGSI_OPCODE_X2D:
1566 assert( 0 );
1567 break;
1568
1569 case TGSI_OPCODE_ARA:
1570 assert( 0 );
1571 break;
1572
1573 case TGSI_OPCODE_ARR:
1574 assert( 0 );
1575 break;
1576
1577 case TGSI_OPCODE_BRA:
1578 assert( 0 );
1579 break;
1580
1581 case TGSI_OPCODE_CAL:
1582 assert( 0 );
1583 break;
1584
1585 case TGSI_OPCODE_RET:
1586 #ifdef WIN32
1587 x86_retw( func, 16 );
1588 #else
1589 x86_ret( func );
1590 #endif
1591 break;
1592
1593 case TGSI_OPCODE_SSG:
1594 assert( 0 );
1595 break;
1596
1597 case TGSI_OPCODE_CMP:
1598 emit_cmp (func, inst);
1599 break;
1600
1601 case TGSI_OPCODE_SCS:
1602 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
1603 FETCH( func, *inst, 0, 0, CHAN_X );
1604 emit_cos( func, 0 );
1605 STORE( func, *inst, 0, 0, CHAN_X );
1606 }
1607 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
1608 FETCH( func, *inst, 0, 0, CHAN_Y );
1609 emit_sin( func, 0 );
1610 STORE( func, *inst, 0, 0, CHAN_Y );
1611 }
1612 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
1613 FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C );
1614 STORE( func, *inst, 0, 0, CHAN_Z );
1615 }
1616 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
1617 FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
1618 STORE( func, *inst, 0, 0, CHAN_W );
1619 }
1620 break;
1621
1622 case TGSI_OPCODE_TXB:
1623 assert( 0 );
1624 break;
1625
1626 case TGSI_OPCODE_NRM:
1627 assert( 0 );
1628 break;
1629
1630 case TGSI_OPCODE_DIV:
1631 assert( 0 );
1632 break;
1633
1634 case TGSI_OPCODE_DP2:
1635 assert( 0 );
1636 break;
1637
1638 case TGSI_OPCODE_TXL:
1639 assert( 0 );
1640 break;
1641
1642 case TGSI_OPCODE_BRK:
1643 assert( 0 );
1644 break;
1645
1646 case TGSI_OPCODE_IF:
1647 assert( 0 );
1648 break;
1649
1650 case TGSI_OPCODE_LOOP:
1651 assert( 0 );
1652 break;
1653
1654 case TGSI_OPCODE_REP:
1655 assert( 0 );
1656 break;
1657
1658 case TGSI_OPCODE_ELSE:
1659 assert( 0 );
1660 break;
1661
1662 case TGSI_OPCODE_ENDIF:
1663 assert( 0 );
1664 break;
1665
1666 case TGSI_OPCODE_ENDLOOP:
1667 assert( 0 );
1668 break;
1669
1670 case TGSI_OPCODE_ENDREP:
1671 assert( 0 );
1672 break;
1673
1674 case TGSI_OPCODE_PUSHA:
1675 assert( 0 );
1676 break;
1677
1678 case TGSI_OPCODE_POPA:
1679 assert( 0 );
1680 break;
1681
1682 case TGSI_OPCODE_CEIL:
1683 assert( 0 );
1684 break;
1685
1686 case TGSI_OPCODE_I2F:
1687 assert( 0 );
1688 break;
1689
1690 case TGSI_OPCODE_NOT:
1691 assert( 0 );
1692 break;
1693
1694 case TGSI_OPCODE_TRUNC:
1695 assert( 0 );
1696 break;
1697
1698 case TGSI_OPCODE_SHL:
1699 assert( 0 );
1700 break;
1701
1702 case TGSI_OPCODE_SHR:
1703 assert( 0 );
1704 break;
1705
1706 case TGSI_OPCODE_AND:
1707 assert( 0 );
1708 break;
1709
1710 case TGSI_OPCODE_OR:
1711 assert( 0 );
1712 break;
1713
1714 case TGSI_OPCODE_MOD:
1715 assert( 0 );
1716 break;
1717
1718 case TGSI_OPCODE_XOR:
1719 assert( 0 );
1720 break;
1721
1722 case TGSI_OPCODE_SAD:
1723 assert( 0 );
1724 break;
1725
1726 case TGSI_OPCODE_TXF:
1727 assert( 0 );
1728 break;
1729
1730 case TGSI_OPCODE_TXQ:
1731 assert( 0 );
1732 break;
1733
1734 case TGSI_OPCODE_CONT:
1735 assert( 0 );
1736 break;
1737
1738 case TGSI_OPCODE_EMIT:
1739 assert( 0 );
1740 break;
1741
1742 case TGSI_OPCODE_ENDPRIM:
1743 assert( 0 );
1744 break;
1745
1746 default:
1747 assert( 0 );
1748 }
1749 }
1750
1751 static void
1752 emit_declaration(
1753 struct x86_function *func,
1754 struct tgsi_full_declaration *decl )
1755 {
1756 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1757 unsigned first, last, mask;
1758 unsigned i, j;
1759
1760 assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
1761
1762 first = decl->u.DeclarationRange.First;
1763 last = decl->u.DeclarationRange.Last;
1764 mask = decl->Declaration.UsageMask;
1765
1766 /* Do not touch WPOS.xy */
1767 if( first == 0 ) {
1768 mask &= ~TGSI_WRITEMASK_XY;
1769 if( mask == TGSI_WRITEMASK_NONE ) {
1770 first++;
1771 }
1772 }
1773
1774 for( i = first; i <= last; i++ ) {
1775 for( j = 0; j < NUM_CHANNELS; j++ ) {
1776 if( mask & (1 << j) ) {
1777 switch( decl->Interpolation.Interpolate ) {
1778 case TGSI_INTERPOLATE_CONSTANT:
1779 emit_coef_a0( func, 0, i, j );
1780 emit_inputs( func, 0, i, j );
1781 break;
1782
1783 case TGSI_INTERPOLATE_LINEAR:
1784 emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
1785 emit_coef_dadx( func, 1, i, j );
1786 emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
1787 emit_coef_dady( func, 3, i, j );
1788 emit_mul( func, 0, 1 ); /* x * dadx */
1789 emit_coef_a0( func, 4, i, j );
1790 emit_mul( func, 2, 3 ); /* y * dady */
1791 emit_add( func, 0, 4 ); /* x * dadx + a0 */
1792 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
1793 emit_inputs( func, 0, i, j );
1794 break;
1795
1796 case TGSI_INTERPOLATE_PERSPECTIVE:
1797 emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
1798 emit_coef_dadx( func, 1, i, j );
1799 emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
1800 emit_coef_dady( func, 3, i, j );
1801 emit_mul( func, 0, 1 ); /* x * dadx */
1802 emit_inputf( func, 4, 0, TGSI_SWIZZLE_W );
1803 emit_coef_a0( func, 5, i, j );
1804 emit_rcp( func, 4, 4 ); /* 1.0 / w */
1805 emit_mul( func, 2, 3 ); /* y * dady */
1806 emit_add( func, 0, 5 ); /* x * dadx + a0 */
1807 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
1808 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
1809 emit_inputs( func, 0, i, j );
1810 break;
1811
1812 default:
1813 assert( 0 );
1814 }
1815 }
1816 }
1817 }
1818 }
1819 }
1820
1821 unsigned
1822 tgsi_emit_sse2(
1823 struct tgsi_token *tokens,
1824 struct x86_function *func )
1825 {
1826 struct tgsi_parse_context parse;
1827
1828 func->csr = func->store;
1829
1830 x86_mov(
1831 func,
1832 get_input_base(),
1833 get_argument( 0 ) );
1834 x86_mov(
1835 func,
1836 get_output_base(),
1837 get_argument( 1 ) );
1838 x86_mov(
1839 func,
1840 get_const_base(),
1841 get_argument( 2 ) );
1842 x86_mov(
1843 func,
1844 get_temp_base(),
1845 get_argument( 3 ) );
1846
1847 tgsi_parse_init( &parse, tokens );
1848
1849 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1850 tgsi_parse_token( &parse );
1851
1852 switch( parse.FullToken.Token.Type ) {
1853 case TGSI_TOKEN_TYPE_DECLARATION:
1854 break;
1855
1856 case TGSI_TOKEN_TYPE_INSTRUCTION:
1857 emit_instruction(
1858 func,
1859 &parse.FullToken.FullInstruction );
1860 break;
1861
1862 default:
1863 assert( 0 );
1864 }
1865 }
1866
1867 tgsi_parse_free( &parse );
1868
1869 return 1;
1870 }
1871
1872 /**
1873 * Fragment shaders are responsible for interpolating shader inputs. Because on
1874 * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
1875 * output, const, temp and coef), the code is split into two phases --
1876 * DECLARATION and INSTRUCTION phase.
1877 * GP register holding the output argument is aliased with the coeff argument,
1878 * as outputs are not needed in the DECLARATION phase.
1879 */
1880 unsigned
1881 tgsi_emit_sse2_fs(
1882 struct tgsi_token *tokens,
1883 struct x86_function *func )
1884 {
1885 struct tgsi_parse_context parse;
1886 boolean instruction_phase = FALSE;
1887
1888 func->csr = func->store;
1889
1890 /* DECLARATION phase, do not load output argument. */
1891 x86_mov(
1892 func,
1893 get_input_base(),
1894 get_argument( 0 ) );
1895 x86_mov(
1896 func,
1897 get_const_base(),
1898 get_argument( 2 ) );
1899 x86_mov(
1900 func,
1901 get_temp_base(),
1902 get_argument( 3 ) );
1903 x86_mov(
1904 func,
1905 get_coef_base(),
1906 get_argument( 4 ) );
1907
1908 tgsi_parse_init( &parse, tokens );
1909
1910 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1911 tgsi_parse_token( &parse );
1912
1913 switch( parse.FullToken.Token.Type ) {
1914 case TGSI_TOKEN_TYPE_DECLARATION:
1915 emit_declaration(
1916 func,
1917 &parse.FullToken.FullDeclaration );
1918 break;
1919
1920 case TGSI_TOKEN_TYPE_INSTRUCTION:
1921 if( !instruction_phase ) {
1922 /* INSTRUCTION phase, overwrite coeff with output. */
1923 instruction_phase = TRUE;
1924 x86_mov(
1925 func,
1926 get_output_base(),
1927 get_argument( 1 ) );
1928 }
1929 emit_instruction(
1930 func,
1931 &parse.FullToken.FullInstruction );
1932 break;
1933
1934 default:
1935 assert( 0 );
1936 }
1937 }
1938
1939 tgsi_parse_free( &parse );
1940
1941 return 1;
1942 }