tgsi: include more of the register info in debug dumps
[mesa.git] / src / mesa / pipe / tgsi / exec / tgsi_sse2.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "pipe/p_util.h"
29 #include "pipe/p_shader_tokens.h"
30 #include "pipe/tgsi/util/tgsi_parse.h"
31 #include "pipe/tgsi/util/tgsi_util.h"
32 #include "tgsi_exec.h"
33 #include "tgsi_sse2.h"
34
35 #include "x86/rtasm/x86sse.h"
36
37 #if defined(__i386__) || defined(__386__)
38
39 #define DUMP_SSE 0
40
41 #if DUMP_SSE
42
43 static void
44 _print_reg(
45 struct x86_reg reg )
46 {
47 if (reg.mod != mod_REG)
48 debug_printf( "[" );
49
50 switch( reg.file ) {
51 case file_REG32:
52 switch( reg.idx ) {
53 case reg_AX:
54 debug_printf( "EAX" );
55 break;
56 case reg_CX:
57 debug_printf( "ECX" );
58 break;
59 case reg_DX:
60 debug_printf( "EDX" );
61 break;
62 case reg_BX:
63 debug_printf( "EBX" );
64 break;
65 case reg_SP:
66 debug_printf( "ESP" );
67 break;
68 case reg_BP:
69 debug_printf( "EBP" );
70 break;
71 case reg_SI:
72 debug_printf( "ESI" );
73 break;
74 case reg_DI:
75 debug_printf( "EDI" );
76 break;
77 }
78 break;
79 case file_MMX:
80 assert( 0 );
81 break;
82 case file_XMM:
83 debug_printf( "XMM%u", reg.idx );
84 break;
85 case file_x87:
86 assert( 0 );
87 break;
88 }
89
90 if (reg.mod == mod_DISP8 ||
91 reg.mod == mod_DISP32)
92 debug_printf("+%d", reg.disp);
93
94 if (reg.mod != mod_REG)
95 debug_printf( "]" );
96 }
97
98 static void
99 _fill(
100 const char *op )
101 {
102 unsigned count = 10 - strlen( op );
103
104 while( count-- ) {
105 debug_printf( " " );
106 }
107 }
108
109 #define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
110 #define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
111 #define DUMP( OP ) debug_printf( "\n%s", OP )
112 #define DUMP_I( OP, I ) do {\
113 debug_printf( "\n%s", OP );\
114 _fill( OP );\
115 debug_printf( "%u", I ); } while( 0 )
116 #define DUMP_R( OP, R0 ) do {\
117 debug_printf( "\n%s", OP );\
118 _fill( OP );\
119 _print_reg( R0 ); } while( 0 )
120 #define DUMP_RR( OP, R0, R1 ) do {\
121 debug_printf( "\n%s", OP );\
122 _fill( OP );\
123 _print_reg( R0 );\
124 debug_printf( ", " );\
125 _print_reg( R1 ); } while( 0 )
126 #define DUMP_RRI( OP, R0, R1, I ) do {\
127 debug_printf( "\n%s", OP );\
128 _fill( OP );\
129 _print_reg( R0 );\
130 debug_printf( ", " );\
131 _print_reg( R1 );\
132 debug_printf( ", " );\
133 debug_printf( "%u", I ); } while( 0 )
134
135 #else
136
137 #define DUMP_START()
138 #define DUMP_END()
139 #define DUMP( OP )
140 #define DUMP_I( OP, I )
141 #define DUMP_R( OP, R0 )
142 #define DUMP_RR( OP, R0, R1 )
143 #define DUMP_RRI( OP, R0, R1, I )
144
145 #endif
146
147 #define FOR_EACH_CHANNEL( CHAN )\
148 for( CHAN = 0; CHAN < 4; CHAN++ )
149
150 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
151 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
152
153 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
154 if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
155
156 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
157 FOR_EACH_CHANNEL( CHAN )\
158 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
159
160 #define CHAN_X 0
161 #define CHAN_Y 1
162 #define CHAN_Z 2
163 #define CHAN_W 3
164
165 #define TEMP_R0 TGSI_EXEC_TEMP_R0
166
167 /**
168 * X86 utility functions.
169 */
170
171 static struct x86_reg
172 make_xmm(
173 unsigned xmm )
174 {
175 return x86_make_reg(
176 file_XMM,
177 (enum x86_reg_name) xmm );
178 }
179
180 /**
181 * X86 register mapping helpers.
182 */
183
184 static struct x86_reg
185 get_const_base( void )
186 {
187 return x86_make_reg(
188 file_REG32,
189 reg_CX );
190 }
191
192 static struct x86_reg
193 get_input_base( void )
194 {
195 return x86_make_reg(
196 file_REG32,
197 reg_AX );
198 }
199
200 static struct x86_reg
201 get_output_base( void )
202 {
203 return x86_make_reg(
204 file_REG32,
205 reg_DX );
206 }
207
208 static struct x86_reg
209 get_temp_base( void )
210 {
211 #ifdef WIN32
212 return x86_make_reg(
213 file_REG32,
214 reg_BX );
215 #else
216 return x86_make_reg(
217 file_REG32,
218 reg_SI );
219 #endif
220 }
221
222 static struct x86_reg
223 get_coef_base( void )
224 {
225 return get_output_base();
226 }
227
228 /**
229 * Data access helpers.
230 */
231
232 static struct x86_reg
233 get_argument(
234 unsigned index )
235 {
236 return x86_make_disp(
237 x86_make_reg( file_REG32, reg_SP ),
238 (index + 1) * 4 );
239 }
240
241 static struct x86_reg
242 get_const(
243 unsigned vec,
244 unsigned chan )
245 {
246 return x86_make_disp(
247 get_const_base(),
248 (vec * 4 + chan) * 4 );
249 }
250
251 static struct x86_reg
252 get_input(
253 unsigned vec,
254 unsigned chan )
255 {
256 return x86_make_disp(
257 get_input_base(),
258 (vec * 4 + chan) * 16 );
259 }
260
261 static struct x86_reg
262 get_output(
263 unsigned vec,
264 unsigned chan )
265 {
266 return x86_make_disp(
267 get_output_base(),
268 (vec * 4 + chan) * 16 );
269 }
270
271 static struct x86_reg
272 get_temp(
273 unsigned vec,
274 unsigned chan )
275 {
276 return x86_make_disp(
277 get_temp_base(),
278 (vec * 4 + chan) * 16 );
279 }
280
281 static struct x86_reg
282 get_coef(
283 unsigned vec,
284 unsigned chan,
285 unsigned member )
286 {
287 return x86_make_disp(
288 get_coef_base(),
289 ((vec * 3 + member) * 4 + chan) * 4 );
290 }
291
292 /**
293 * X86 rtasm wrappers.
294 */
295
296 static void
297 emit_addps(
298 struct x86_function *func,
299 struct x86_reg dst,
300 struct x86_reg src )
301 {
302 DUMP_RR( "ADDPS", dst, src );
303 sse_addps( func, dst, src );
304 }
305
306 static void
307 emit_andnps(
308 struct x86_function *func,
309 struct x86_reg dst,
310 struct x86_reg src )
311 {
312 DUMP_RR( "ANDNPS", dst, src );
313 sse_andnps( func, dst, src );
314 }
315
316 static void
317 emit_andps(
318 struct x86_function *func,
319 struct x86_reg dst,
320 struct x86_reg src )
321 {
322 DUMP_RR( "ANDPS", dst, src );
323 sse_andps( func, dst, src );
324 }
325
326 static void
327 emit_call(
328 struct x86_function *func,
329 void (* addr)() )
330 {
331 DUMP_I( "CALL", addr );
332 x86_call( func, addr );
333 }
334
335 static void
336 emit_cmpps(
337 struct x86_function *func,
338 struct x86_reg dst,
339 struct x86_reg src,
340 enum sse_cc cc )
341 {
342 DUMP_RRI( "CMPPS", dst, src, cc );
343 sse_cmpps( func, dst, src, cc );
344 }
345
346 static void
347 emit_cvttps2dq(
348 struct x86_function *func,
349 struct x86_reg dst,
350 struct x86_reg src )
351 {
352 DUMP_RR( "CVTTPS2DQ", dst, src );
353 sse2_cvttps2dq( func, dst, src );
354 }
355
356 static void
357 emit_maxps(
358 struct x86_function *func,
359 struct x86_reg dst,
360 struct x86_reg src )
361 {
362 DUMP_RR( "MAXPS", dst, src );
363 sse_maxps( func, dst, src );
364 }
365
366 static void
367 emit_minps(
368 struct x86_function *func,
369 struct x86_reg dst,
370 struct x86_reg src )
371 {
372 DUMP_RR( "MINPS", dst, src );
373 sse_minps( func, dst, src );
374 }
375
376 static void
377 emit_mov(
378 struct x86_function *func,
379 struct x86_reg dst,
380 struct x86_reg src )
381 {
382 DUMP_RR( "MOV", dst, src );
383 x86_mov( func, dst, src );
384 }
385
386 static void
387 emit_movaps(
388 struct x86_function *func,
389 struct x86_reg dst,
390 struct x86_reg src )
391 {
392 DUMP_RR( "MOVAPS", dst, src );
393 sse_movaps( func, dst, src );
394 }
395
396 static void
397 emit_movss(
398 struct x86_function *func,
399 struct x86_reg dst,
400 struct x86_reg src )
401 {
402 DUMP_RR( "MOVSS", dst, src );
403 sse_movss( func, dst, src );
404 }
405
406 static void
407 emit_movups(
408 struct x86_function *func,
409 struct x86_reg dst,
410 struct x86_reg src )
411 {
412 DUMP_RR( "MOVUPS", dst, src );
413 sse_movups( func, dst, src );
414 }
415
416 static void
417 emit_mulps(
418 struct x86_function *func,
419 struct x86_reg dst,
420 struct x86_reg src )
421 {
422 DUMP_RR( "MULPS", dst, src );
423 sse_mulps( func, dst, src );
424 }
425
426 static void
427 emit_or(
428 struct x86_function *func,
429 struct x86_reg dst,
430 struct x86_reg src )
431 {
432 DUMP_RR( "OR", dst, src );
433 x86_or( func, dst, src );
434 }
435
436 static void
437 emit_orps(
438 struct x86_function *func,
439 struct x86_reg dst,
440 struct x86_reg src )
441 {
442 DUMP_RR( "ORPS", dst, src );
443 sse_orps( func, dst, src );
444 }
445
446 static void
447 emit_pmovmskb(
448 struct x86_function *func,
449 struct x86_reg dst,
450 struct x86_reg src )
451 {
452 DUMP_RR( "PMOVMSKB", dst, src );
453 sse_pmovmskb( func, dst, src );
454 }
455
456 static void
457 emit_pop(
458 struct x86_function *func,
459 struct x86_reg dst )
460 {
461 DUMP_R( "POP", dst );
462 x86_pop( func, dst );
463 }
464
465 static void
466 emit_push(
467 struct x86_function *func,
468 struct x86_reg dst )
469 {
470 DUMP_R( "PUSH", dst );
471 x86_push( func, dst );
472 }
473
474 static void
475 emit_rcpps(
476 struct x86_function *func,
477 struct x86_reg dst,
478 struct x86_reg src )
479 {
480 DUMP_RR( "RCPPS", dst, src );
481 sse2_rcpps( func, dst, src );
482 }
483
484 #ifdef WIN32
485 static void
486 emit_retw(
487 struct x86_function *func,
488 unsigned size )
489 {
490 DUMP_I( "RET", size );
491 x86_retw( func, size );
492 }
493 #else
494 static void
495 emit_ret(
496 struct x86_function *func )
497 {
498 DUMP( "RET" );
499 x86_ret( func );
500 }
501 #endif
502
503 static void
504 emit_rsqrtps(
505 struct x86_function *func,
506 struct x86_reg dst,
507 struct x86_reg src )
508 {
509 DUMP_RR( "RSQRTPS", dst, src );
510 sse_rsqrtps( func, dst, src );
511 }
512
513 static void
514 emit_shufps(
515 struct x86_function *func,
516 struct x86_reg dst,
517 struct x86_reg src,
518 unsigned char shuf )
519 {
520 DUMP_RRI( "SHUFPS", dst, src, shuf );
521 sse_shufps( func, dst, src, shuf );
522 }
523
524 static void
525 emit_subps(
526 struct x86_function *func,
527 struct x86_reg dst,
528 struct x86_reg src )
529 {
530 DUMP_RR( "SUBPS", dst, src );
531 sse_subps( func, dst, src );
532 }
533
534 static void
535 emit_xorps(
536 struct x86_function *func,
537 struct x86_reg dst,
538 struct x86_reg src )
539 {
540 DUMP_RR( "XORPS", dst, src );
541 sse_xorps( func, dst, src );
542 }
543
544 /**
545 * Data fetch helpers.
546 */
547
548 static void
549 emit_const(
550 struct x86_function *func,
551 unsigned xmm,
552 unsigned vec,
553 unsigned chan )
554 {
555 emit_movss(
556 func,
557 make_xmm( xmm ),
558 get_const( vec, chan ) );
559 emit_shufps(
560 func,
561 make_xmm( xmm ),
562 make_xmm( xmm ),
563 SHUF( 0, 0, 0, 0 ) );
564 }
565
566 static void
567 emit_inputf(
568 struct x86_function *func,
569 unsigned xmm,
570 unsigned vec,
571 unsigned chan )
572 {
573 emit_movups(
574 func,
575 make_xmm( xmm ),
576 get_input( vec, chan ) );
577 }
578
579 static void
580 emit_output(
581 struct x86_function *func,
582 unsigned xmm,
583 unsigned vec,
584 unsigned chan )
585 {
586 emit_movups(
587 func,
588 get_output( vec, chan ),
589 make_xmm( xmm ) );
590 }
591
592 static void
593 emit_tempf(
594 struct x86_function *func,
595 unsigned xmm,
596 unsigned vec,
597 unsigned chan )
598 {
599 emit_movaps(
600 func,
601 make_xmm( xmm ),
602 get_temp( vec, chan ) );
603 }
604
605 static void
606 emit_coef(
607 struct x86_function *func,
608 unsigned xmm,
609 unsigned vec,
610 unsigned chan,
611 unsigned member )
612 {
613 emit_movss(
614 func,
615 make_xmm( xmm ),
616 get_coef( vec, chan, member ) );
617 emit_shufps(
618 func,
619 make_xmm( xmm ),
620 make_xmm( xmm ),
621 SHUF( 0, 0, 0, 0 ) );
622 }
623
624 /**
625 * Data store helpers.
626 */
627
628 static void
629 emit_inputs(
630 struct x86_function *func,
631 unsigned xmm,
632 unsigned vec,
633 unsigned chan )
634 {
635 emit_movups(
636 func,
637 get_input( vec, chan ),
638 make_xmm( xmm ) );
639 }
640
641 static void
642 emit_temps(
643 struct x86_function *func,
644 unsigned xmm,
645 unsigned vec,
646 unsigned chan )
647 {
648 emit_movaps(
649 func,
650 get_temp( vec, chan ),
651 make_xmm( xmm ) );
652 }
653
654 static void
655 emit_addrs(
656 struct x86_function *func,
657 unsigned xmm,
658 unsigned vec,
659 unsigned chan )
660 {
661 emit_temps(
662 func,
663 xmm,
664 vec + TGSI_EXEC_NUM_TEMPS,
665 chan );
666 }
667
668 /**
669 * Coefficent fetch helpers.
670 */
671
672 static void
673 emit_coef_a0(
674 struct x86_function *func,
675 unsigned xmm,
676 unsigned vec,
677 unsigned chan )
678 {
679 emit_coef(
680 func,
681 xmm,
682 vec,
683 chan,
684 0 );
685 }
686
687 static void
688 emit_coef_dadx(
689 struct x86_function *func,
690 unsigned xmm,
691 unsigned vec,
692 unsigned chan )
693 {
694 emit_coef(
695 func,
696 xmm,
697 vec,
698 chan,
699 1 );
700 }
701
702 static void
703 emit_coef_dady(
704 struct x86_function *func,
705 unsigned xmm,
706 unsigned vec,
707 unsigned chan )
708 {
709 emit_coef(
710 func,
711 xmm,
712 vec,
713 chan,
714 2 );
715 }
716
717 /**
718 * Function call helpers.
719 */
720
721 static void
722 emit_push_gp(
723 struct x86_function *func )
724 {
725 emit_push(
726 func,
727 get_const_base() );
728 emit_push(
729 func,
730 get_input_base() );
731 emit_push(
732 func,
733 get_output_base() );
734
735 /* It is important on non-win32 platforms that temp base is pushed last.
736 */
737 emit_push(
738 func,
739 get_temp_base() );
740 }
741
742 static void
743 emit_pop_gp(
744 struct x86_function *func )
745 {
746 /* Restore GP registers in a reverse order.
747 */
748 emit_pop(
749 func,
750 get_temp_base() );
751 emit_pop(
752 func,
753 get_output_base() );
754 emit_pop(
755 func,
756 get_input_base() );
757 emit_pop(
758 func,
759 get_const_base() );
760 }
761
762 static void
763 emit_func_call_dst(
764 struct x86_function *func,
765 unsigned xmm_dst,
766 void (*code)() )
767 {
768 emit_movaps(
769 func,
770 get_temp( TEMP_R0, 0 ),
771 make_xmm( xmm_dst ) );
772
773 emit_push_gp(
774 func );
775
776 #ifdef WIN32
777 emit_push(
778 func,
779 get_temp( TEMP_R0, 0 ) );
780 #endif
781
782 emit_call(
783 func,
784 code );
785
786 emit_pop_gp(
787 func );
788
789 emit_movaps(
790 func,
791 make_xmm( xmm_dst ),
792 get_temp( TEMP_R0, 0 ) );
793 }
794
795 static void
796 emit_func_call_dst_src(
797 struct x86_function *func,
798 unsigned xmm_dst,
799 unsigned xmm_src,
800 void (*code)() )
801 {
802 emit_movaps(
803 func,
804 get_temp( TEMP_R0, 1 ),
805 make_xmm( xmm_src ) );
806
807 emit_func_call_dst(
808 func,
809 xmm_dst,
810 code );
811 }
812
813 /**
814 * Low-level instruction translators.
815 */
816
817 static void
818 emit_abs(
819 struct x86_function *func,
820 unsigned xmm )
821 {
822 emit_andps(
823 func,
824 make_xmm( xmm ),
825 get_temp(
826 TGSI_EXEC_TEMP_7FFFFFFF_I,
827 TGSI_EXEC_TEMP_7FFFFFFF_C ) );
828 }
829
830 static void
831 emit_add(
832 struct x86_function *func,
833 unsigned xmm_dst,
834 unsigned xmm_src )
835 {
836 emit_addps(
837 func,
838 make_xmm( xmm_dst ),
839 make_xmm( xmm_src ) );
840 }
841
842 static void XSTDCALL
843 cos4f(
844 float *store )
845 {
846 #ifdef WIN32
847 store[0] = (float) cos( (double) store[0] );
848 store[1] = (float) cos( (double) store[1] );
849 store[2] = (float) cos( (double) store[2] );
850 store[3] = (float) cos( (double) store[3] );
851 #else
852 const unsigned X = TEMP_R0 * 16;
853 store[X + 0] = cosf( store[X + 0] );
854 store[X + 1] = cosf( store[X + 1] );
855 store[X + 2] = cosf( store[X + 2] );
856 store[X + 3] = cosf( store[X + 3] );
857 #endif
858 }
859
860 static void
861 emit_cos(
862 struct x86_function *func,
863 unsigned xmm_dst )
864 {
865 emit_func_call_dst(
866 func,
867 xmm_dst,
868 cos4f );
869 }
870
871 static void XSTDCALL
872 ex24f(
873 float *store )
874 {
875 #ifdef WIN32
876 store[0] = (float) pow( 2.0, (double) store[0] );
877 store[1] = (float) pow( 2.0, (double) store[1] );
878 store[2] = (float) pow( 2.0, (double) store[2] );
879 store[3] = (float) pow( 2.0, (double) store[3] );
880 #else
881 const unsigned X = TEMP_R0 * 16;
882 store[X + 0] = powf( 2.0f, store[X + 0] );
883 store[X + 1] = powf( 2.0f, store[X + 1] );
884 store[X + 2] = powf( 2.0f, store[X + 2] );
885 store[X + 3] = powf( 2.0f, store[X + 3] );
886 #endif
887 }
888
889 static void
890 emit_ex2(
891 struct x86_function *func,
892 unsigned xmm_dst )
893 {
894 emit_func_call_dst(
895 func,
896 xmm_dst,
897 ex24f );
898 }
899
900 static void
901 emit_f2it(
902 struct x86_function *func,
903 unsigned xmm )
904 {
905 emit_cvttps2dq(
906 func,
907 make_xmm( xmm ),
908 make_xmm( xmm ) );
909 }
910
911 static void XSTDCALL
912 flr4f(
913 float *store )
914 {
915 #ifdef WIN32
916 const unsigned X = 0;
917 #else
918 const unsigned X = TEMP_R0 * 16;
919 #endif
920 store[X + 0] = (float) floor( (double) store[X + 0] );
921 store[X + 1] = (float) floor( (double) store[X + 1] );
922 store[X + 2] = (float) floor( (double) store[X + 2] );
923 store[X + 3] = (float) floor( (double) store[X + 3] );
924 }
925
926 static void
927 emit_flr(
928 struct x86_function *func,
929 unsigned xmm_dst )
930 {
931 emit_func_call_dst(
932 func,
933 xmm_dst,
934 flr4f );
935 }
936
937 static void XSTDCALL
938 frc4f(
939 float *store )
940 {
941 #ifdef WIN32
942 const unsigned X = 0;
943 #else
944 const unsigned X = TEMP_R0 * 16;
945 #endif
946 store[X + 0] -= (float) floor( (double) store[X + 0] );
947 store[X + 1] -= (float) floor( (double) store[X + 1] );
948 store[X + 2] -= (float) floor( (double) store[X + 2] );
949 store[X + 3] -= (float) floor( (double) store[X + 3] );
950 }
951
952 static void
953 emit_frc(
954 struct x86_function *func,
955 unsigned xmm_dst )
956 {
957 emit_func_call_dst(
958 func,
959 xmm_dst,
960 frc4f );
961 }
962
963 static void XSTDCALL
964 lg24f(
965 float *store )
966 {
967 #ifdef WIN32
968 const unsigned X = 0;
969 #else
970 const unsigned X = TEMP_R0 * 16;
971 #endif
972 store[X + 0] = LOG2( store[X + 0] );
973 store[X + 1] = LOG2( store[X + 1] );
974 store[X + 2] = LOG2( store[X + 2] );
975 store[X + 3] = LOG2( store[X + 3] );
976 }
977
978 static void
979 emit_lg2(
980 struct x86_function *func,
981 unsigned xmm_dst )
982 {
983 emit_func_call_dst(
984 func,
985 xmm_dst,
986 lg24f );
987 }
988
989 static void
990 emit_MOV(
991 struct x86_function *func,
992 unsigned xmm_dst,
993 unsigned xmm_src )
994 {
995 emit_movups(
996 func,
997 make_xmm( xmm_dst ),
998 make_xmm( xmm_src ) );
999 }
1000
1001 static void
1002 emit_mul (struct x86_function *func,
1003 unsigned xmm_dst,
1004 unsigned xmm_src)
1005 {
1006 emit_mulps(
1007 func,
1008 make_xmm( xmm_dst ),
1009 make_xmm( xmm_src ) );
1010 }
1011
1012 static void
1013 emit_neg(
1014 struct x86_function *func,
1015 unsigned xmm )
1016 {
1017 emit_xorps(
1018 func,
1019 make_xmm( xmm ),
1020 get_temp(
1021 TGSI_EXEC_TEMP_80000000_I,
1022 TGSI_EXEC_TEMP_80000000_C ) );
1023 }
1024
1025 static void XSTDCALL
1026 pow4f(
1027 float *store )
1028 {
1029 #ifdef WIN32
1030 store[0] = (float) pow( (double) store[0], (double) store[4] );
1031 store[1] = (float) pow( (double) store[1], (double) store[5] );
1032 store[2] = (float) pow( (double) store[2], (double) store[6] );
1033 store[3] = (float) pow( (double) store[3], (double) store[7] );
1034 #else
1035 const unsigned X = TEMP_R0 * 16;
1036 store[X + 0] = powf( store[X + 0], store[X + 4] );
1037 store[X + 1] = powf( store[X + 1], store[X + 5] );
1038 store[X + 2] = powf( store[X + 2], store[X + 6] );
1039 store[X + 3] = powf( store[X + 3], store[X + 7] );
1040 #endif
1041 }
1042
1043 static void
1044 emit_pow(
1045 struct x86_function *func,
1046 unsigned xmm_dst,
1047 unsigned xmm_src )
1048 {
1049 emit_func_call_dst_src(
1050 func,
1051 xmm_dst,
1052 xmm_src,
1053 pow4f );
1054 }
1055
1056 static void
1057 emit_rcp (
1058 struct x86_function *func,
1059 unsigned xmm_dst,
1060 unsigned xmm_src )
1061 {
1062 emit_rcpps(
1063 func,
1064 make_xmm( xmm_dst ),
1065 make_xmm( xmm_src ) );
1066 }
1067
1068 static void
1069 emit_rsqrt(
1070 struct x86_function *func,
1071 unsigned xmm_dst,
1072 unsigned xmm_src )
1073 {
1074 emit_rsqrtps(
1075 func,
1076 make_xmm( xmm_dst ),
1077 make_xmm( xmm_src ) );
1078 }
1079
1080 static void
1081 emit_setsign(
1082 struct x86_function *func,
1083 unsigned xmm )
1084 {
1085 emit_orps(
1086 func,
1087 make_xmm( xmm ),
1088 get_temp(
1089 TGSI_EXEC_TEMP_80000000_I,
1090 TGSI_EXEC_TEMP_80000000_C ) );
1091 }
1092
1093 static void XSTDCALL
1094 sin4f(
1095 float *store )
1096 {
1097 #ifdef WIN32
1098 store[0] = (float) sin( (double) store[0] );
1099 store[1] = (float) sin( (double) store[1] );
1100 store[2] = (float) sin( (double) store[2] );
1101 store[3] = (float) sin( (double) store[3] );
1102 #else
1103 const unsigned X = TEMP_R0 * 16;
1104 store[X + 0] = sinf( store[X + 0] );
1105 store[X + 1] = sinf( store[X + 1] );
1106 store[X + 2] = sinf( store[X + 2] );
1107 store[X + 3] = sinf( store[X + 3] );
1108 #endif
1109 }
1110
1111 static void
1112 emit_sin (struct x86_function *func,
1113 unsigned xmm_dst)
1114 {
1115 emit_func_call_dst(
1116 func,
1117 xmm_dst,
1118 sin4f );
1119 }
1120
1121 static void
1122 emit_sub(
1123 struct x86_function *func,
1124 unsigned xmm_dst,
1125 unsigned xmm_src )
1126 {
1127 emit_subps(
1128 func,
1129 make_xmm( xmm_dst ),
1130 make_xmm( xmm_src ) );
1131 }
1132
1133 /**
1134 * Register fetch.
1135 */
1136
1137 static void
1138 emit_fetch(
1139 struct x86_function *func,
1140 unsigned xmm,
1141 const struct tgsi_full_src_register *reg,
1142 const unsigned chan_index )
1143 {
1144 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1145
1146 switch( swizzle ) {
1147 case TGSI_EXTSWIZZLE_X:
1148 case TGSI_EXTSWIZZLE_Y:
1149 case TGSI_EXTSWIZZLE_Z:
1150 case TGSI_EXTSWIZZLE_W:
1151 switch( reg->SrcRegister.File ) {
1152 case TGSI_FILE_CONSTANT:
1153 emit_const(
1154 func,
1155 xmm,
1156 reg->SrcRegister.Index,
1157 swizzle );
1158 break;
1159
1160 case TGSI_FILE_INPUT:
1161 emit_inputf(
1162 func,
1163 xmm,
1164 reg->SrcRegister.Index,
1165 swizzle );
1166 break;
1167
1168 case TGSI_FILE_TEMPORARY:
1169 emit_tempf(
1170 func,
1171 xmm,
1172 reg->SrcRegister.Index,
1173 swizzle );
1174 break;
1175
1176 default:
1177 assert( 0 );
1178 }
1179 break;
1180
1181 case TGSI_EXTSWIZZLE_ZERO:
1182 emit_tempf(
1183 func,
1184 xmm,
1185 TGSI_EXEC_TEMP_00000000_I,
1186 TGSI_EXEC_TEMP_00000000_C );
1187 break;
1188
1189 case TGSI_EXTSWIZZLE_ONE:
1190 emit_tempf(
1191 func,
1192 xmm,
1193 TGSI_EXEC_TEMP_ONE_I,
1194 TGSI_EXEC_TEMP_ONE_C );
1195 break;
1196
1197 default:
1198 assert( 0 );
1199 }
1200
1201 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
1202 case TGSI_UTIL_SIGN_CLEAR:
1203 emit_abs( func, xmm );
1204 break;
1205
1206 case TGSI_UTIL_SIGN_SET:
1207 emit_setsign( func, xmm );
1208 break;
1209
1210 case TGSI_UTIL_SIGN_TOGGLE:
1211 emit_neg( func, xmm );
1212 break;
1213
1214 case TGSI_UTIL_SIGN_KEEP:
1215 break;
1216 }
1217 }
1218
1219 #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
1220 emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
1221
1222 /**
1223 * Register store.
1224 */
1225
1226 static void
1227 emit_store(
1228 struct x86_function *func,
1229 unsigned xmm,
1230 const struct tgsi_full_dst_register *reg,
1231 const struct tgsi_full_instruction *inst,
1232 unsigned chan_index )
1233 {
1234 switch( reg->DstRegister.File ) {
1235 case TGSI_FILE_OUTPUT:
1236 emit_output(
1237 func,
1238 xmm,
1239 reg->DstRegister.Index,
1240 chan_index );
1241 break;
1242
1243 case TGSI_FILE_TEMPORARY:
1244 emit_temps(
1245 func,
1246 xmm,
1247 reg->DstRegister.Index,
1248 chan_index );
1249 break;
1250
1251 case TGSI_FILE_ADDRESS:
1252 emit_addrs(
1253 func,
1254 xmm,
1255 reg->DstRegister.Index,
1256 chan_index );
1257 break;
1258
1259 default:
1260 assert( 0 );
1261 }
1262
1263 switch( inst->Instruction.Saturate ) {
1264 case TGSI_SAT_NONE:
1265 break;
1266
1267 case TGSI_SAT_ZERO_ONE:
1268 // assert( 0 );
1269 break;
1270
1271 case TGSI_SAT_MINUS_PLUS_ONE:
1272 assert( 0 );
1273 break;
1274 }
1275 }
1276
1277 #define STORE( FUNC, INST, XMM, INDEX, CHAN )\
1278 emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
1279
1280 /**
1281 * High-level instruction translators.
1282 */
1283
1284 static void
1285 emit_kil(
1286 struct x86_function *func,
1287 const struct tgsi_full_src_register *reg )
1288 {
1289 unsigned uniquemask;
1290 unsigned registers[4];
1291 unsigned nextregister = 0;
1292 unsigned firstchan = ~0;
1293 unsigned chan_index;
1294
1295 /* This mask stores component bits that were already tested. Note that
1296 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1297 * tested. */
1298 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1299
1300 FOR_EACH_CHANNEL( chan_index ) {
1301 unsigned swizzle;
1302
1303 /* unswizzle channel */
1304 swizzle = tgsi_util_get_full_src_register_extswizzle(
1305 reg,
1306 chan_index );
1307
1308 /* check if the component has not been already tested */
1309 if( !(uniquemask & (1 << swizzle)) ) {
1310 uniquemask |= 1 << swizzle;
1311
1312 /* allocate register */
1313 registers[chan_index] = nextregister;
1314 emit_fetch(
1315 func,
1316 nextregister,
1317 reg,
1318 chan_index );
1319 nextregister++;
1320
1321 /* mark the first channel used */
1322 if( firstchan == ~0 ) {
1323 firstchan = chan_index;
1324 }
1325 }
1326 }
1327
1328 emit_push(
1329 func,
1330 x86_make_reg( file_REG32, reg_AX ) );
1331 emit_push(
1332 func,
1333 x86_make_reg( file_REG32, reg_DX ) );
1334
1335 FOR_EACH_CHANNEL( chan_index ) {
1336 if( uniquemask & (1 << chan_index) ) {
1337 emit_cmpps(
1338 func,
1339 make_xmm( registers[chan_index] ),
1340 get_temp(
1341 TGSI_EXEC_TEMP_00000000_I,
1342 TGSI_EXEC_TEMP_00000000_C ),
1343 cc_LessThan );
1344
1345 if( chan_index == firstchan ) {
1346 emit_pmovmskb(
1347 func,
1348 x86_make_reg( file_REG32, reg_AX ),
1349 make_xmm( registers[chan_index] ) );
1350 }
1351 else {
1352 emit_pmovmskb(
1353 func,
1354 x86_make_reg( file_REG32, reg_DX ),
1355 make_xmm( registers[chan_index] ) );
1356 emit_or(
1357 func,
1358 x86_make_reg( file_REG32, reg_AX ),
1359 x86_make_reg( file_REG32, reg_DX ) );
1360 }
1361 }
1362 }
1363
1364 emit_or(
1365 func,
1366 get_temp(
1367 TGSI_EXEC_TEMP_KILMASK_I,
1368 TGSI_EXEC_TEMP_KILMASK_C ),
1369 x86_make_reg( file_REG32, reg_AX ) );
1370
1371 emit_pop(
1372 func,
1373 x86_make_reg( file_REG32, reg_DX ) );
1374 emit_pop(
1375 func,
1376 x86_make_reg( file_REG32, reg_AX ) );
1377 }
1378
1379 static void
1380 emit_setcc(
1381 struct x86_function *func,
1382 struct tgsi_full_instruction *inst,
1383 enum sse_cc cc )
1384 {
1385 unsigned chan_index;
1386
1387 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1388 FETCH( func, *inst, 0, 0, chan_index );
1389 FETCH( func, *inst, 1, 1, chan_index );
1390 emit_cmpps(
1391 func,
1392 make_xmm( 0 ),
1393 make_xmm( 1 ),
1394 cc );
1395 emit_andps(
1396 func,
1397 make_xmm( 0 ),
1398 get_temp(
1399 TGSI_EXEC_TEMP_ONE_I,
1400 TGSI_EXEC_TEMP_ONE_C ) );
1401 STORE( func, *inst, 0, 0, chan_index );
1402 }
1403 }
1404
1405 static void
1406 emit_cmp(
1407 struct x86_function *func,
1408 struct tgsi_full_instruction *inst )
1409 {
1410 unsigned chan_index;
1411
1412 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1413 FETCH( func, *inst, 0, 0, chan_index );
1414 FETCH( func, *inst, 1, 1, chan_index );
1415 FETCH( func, *inst, 2, 2, chan_index );
1416 emit_cmpps(
1417 func,
1418 make_xmm( 0 ),
1419 get_temp(
1420 TGSI_EXEC_TEMP_00000000_I,
1421 TGSI_EXEC_TEMP_00000000_C ),
1422 cc_LessThan );
1423 emit_andps(
1424 func,
1425 make_xmm( 1 ),
1426 make_xmm( 0 ) );
1427 emit_andnps(
1428 func,
1429 make_xmm( 0 ),
1430 make_xmm( 2 ) );
1431 emit_orps(
1432 func,
1433 make_xmm( 0 ),
1434 make_xmm( 1 ) );
1435 STORE( func, *inst, 0, 0, chan_index );
1436 }
1437 }
1438
1439 static void
1440 emit_instruction(
1441 struct x86_function *func,
1442 struct tgsi_full_instruction *inst )
1443 {
1444 unsigned chan_index;
1445
1446 switch( inst->Instruction.Opcode ) {
1447 case TGSI_OPCODE_ARL:
1448 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1449 FETCH( func, *inst, 0, 0, chan_index );
1450 emit_f2it( func, 0 );
1451 STORE( func, *inst, 0, 0, chan_index );
1452 }
1453 break;
1454
1455 case TGSI_OPCODE_MOV:
1456 /* TGSI_OPCODE_SWZ */
1457 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1458 FETCH( func, *inst, 0, 0, chan_index );
1459 STORE( func, *inst, 0, 0, chan_index );
1460 }
1461 break;
1462
1463 case TGSI_OPCODE_LIT:
1464 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
1465 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1466 emit_tempf(
1467 func,
1468 0,
1469 TGSI_EXEC_TEMP_ONE_I,
1470 TGSI_EXEC_TEMP_ONE_C);
1471 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
1472 STORE( func, *inst, 0, 0, CHAN_X );
1473 }
1474 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
1475 STORE( func, *inst, 0, 0, CHAN_W );
1476 }
1477 }
1478 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
1479 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1480 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1481 FETCH( func, *inst, 0, 0, CHAN_X );
1482 emit_maxps(
1483 func,
1484 make_xmm( 0 ),
1485 get_temp(
1486 TGSI_EXEC_TEMP_00000000_I,
1487 TGSI_EXEC_TEMP_00000000_C ) );
1488 STORE( func, *inst, 0, 0, CHAN_Y );
1489 }
1490 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1491 FETCH( func, *inst, 1, 0, CHAN_Y );
1492 emit_maxps(
1493 func,
1494 make_xmm( 1 ),
1495 get_temp(
1496 TGSI_EXEC_TEMP_00000000_I,
1497 TGSI_EXEC_TEMP_00000000_C ) );
1498 FETCH( func, *inst, 2, 0, CHAN_W );
1499 emit_minps(
1500 func,
1501 make_xmm( 2 ),
1502 get_temp(
1503 TGSI_EXEC_TEMP_128_I,
1504 TGSI_EXEC_TEMP_128_C ) );
1505 emit_maxps(
1506 func,
1507 make_xmm( 2 ),
1508 get_temp(
1509 TGSI_EXEC_TEMP_MINUS_128_I,
1510 TGSI_EXEC_TEMP_MINUS_128_C ) );
1511 emit_pow( func, 1, 2 );
1512 FETCH( func, *inst, 0, 0, CHAN_X );
1513 emit_xorps(
1514 func,
1515 make_xmm( 2 ),
1516 make_xmm( 2 ) );
1517 emit_cmpps(
1518 func,
1519 make_xmm( 2 ),
1520 make_xmm( 0 ),
1521 cc_LessThanEqual );
1522 emit_andps(
1523 func,
1524 make_xmm( 2 ),
1525 make_xmm( 1 ) );
1526 STORE( func, *inst, 2, 0, CHAN_Z );
1527 }
1528 }
1529 break;
1530
1531 case TGSI_OPCODE_RCP:
1532 /* TGSI_OPCODE_RECIP */
1533 FETCH( func, *inst, 0, 0, CHAN_X );
1534 emit_rcp( func, 0, 0 );
1535 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1536 STORE( func, *inst, 0, 0, chan_index );
1537 }
1538 break;
1539
1540 case TGSI_OPCODE_RSQ:
1541 /* TGSI_OPCODE_RECIPSQRT */
1542 FETCH( func, *inst, 0, 0, CHAN_X );
1543 emit_rsqrt( func, 0, 0 );
1544 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1545 STORE( func, *inst, 0, 0, chan_index );
1546 }
1547 break;
1548
1549 case TGSI_OPCODE_EXP:
1550 assert( 0 );
1551 break;
1552
1553 case TGSI_OPCODE_LOG:
1554 assert( 0 );
1555 break;
1556
1557 case TGSI_OPCODE_MUL:
1558 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1559 FETCH( func, *inst, 0, 0, chan_index );
1560 FETCH( func, *inst, 1, 1, chan_index );
1561 emit_mul( func, 0, 1 );
1562 STORE( func, *inst, 0, 0, chan_index );
1563 }
1564 break;
1565
1566 case TGSI_OPCODE_ADD:
1567 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1568 FETCH( func, *inst, 0, 0, chan_index );
1569 FETCH( func, *inst, 1, 1, chan_index );
1570 emit_add( func, 0, 1 );
1571 STORE( func, *inst, 0, 0, chan_index );
1572 }
1573 break;
1574
1575 case TGSI_OPCODE_DP3:
1576 /* TGSI_OPCODE_DOT3 */
1577 FETCH( func, *inst, 0, 0, CHAN_X );
1578 FETCH( func, *inst, 1, 1, CHAN_X );
1579 emit_mul( func, 0, 1 );
1580 FETCH( func, *inst, 1, 0, CHAN_Y );
1581 FETCH( func, *inst, 2, 1, CHAN_Y );
1582 emit_mul( func, 1, 2 );
1583 emit_add( func, 0, 1 );
1584 FETCH( func, *inst, 1, 0, CHAN_Z );
1585 FETCH( func, *inst, 2, 1, CHAN_Z );
1586 emit_mul( func, 1, 2 );
1587 emit_add( func, 0, 1 );
1588 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1589 STORE( func, *inst, 0, 0, chan_index );
1590 }
1591 break;
1592
1593 case TGSI_OPCODE_DP4:
1594 /* TGSI_OPCODE_DOT4 */
1595 FETCH( func, *inst, 0, 0, CHAN_X );
1596 FETCH( func, *inst, 1, 1, CHAN_X );
1597 emit_mul( func, 0, 1 );
1598 FETCH( func, *inst, 1, 0, CHAN_Y );
1599 FETCH( func, *inst, 2, 1, CHAN_Y );
1600 emit_mul( func, 1, 2 );
1601 emit_add( func, 0, 1 );
1602 FETCH( func, *inst, 1, 0, CHAN_Z );
1603 FETCH( func, *inst, 2, 1, CHAN_Z );
1604 emit_mul(func, 1, 2 );
1605 emit_add(func, 0, 1 );
1606 FETCH( func, *inst, 1, 0, CHAN_W );
1607 FETCH( func, *inst, 2, 1, CHAN_W );
1608 emit_mul( func, 1, 2 );
1609 emit_add( func, 0, 1 );
1610 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1611 STORE( func, *inst, 0, 0, chan_index );
1612 }
1613 break;
1614
1615 case TGSI_OPCODE_DST:
1616 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
1617 emit_tempf(
1618 func,
1619 0,
1620 TGSI_EXEC_TEMP_ONE_I,
1621 TGSI_EXEC_TEMP_ONE_C );
1622 STORE( func, *inst, 0, 0, CHAN_X );
1623 }
1624 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
1625 FETCH( func, *inst, 0, 0, CHAN_Y );
1626 FETCH( func, *inst, 1, 1, CHAN_Y );
1627 emit_mul( func, 0, 1 );
1628 STORE( func, *inst, 0, 0, CHAN_Y );
1629 }
1630 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
1631 FETCH( func, *inst, 0, 0, CHAN_Z );
1632 STORE( func, *inst, 0, 0, CHAN_Z );
1633 }
1634 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
1635 FETCH( func, *inst, 0, 1, CHAN_W );
1636 STORE( func, *inst, 0, 0, CHAN_W );
1637 }
1638 break;
1639
1640 case TGSI_OPCODE_MIN:
1641 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1642 FETCH( func, *inst, 0, 0, chan_index );
1643 FETCH( func, *inst, 1, 1, chan_index );
1644 emit_minps(
1645 func,
1646 make_xmm( 0 ),
1647 make_xmm( 1 ) );
1648 STORE( func, *inst, 0, 0, chan_index );
1649 }
1650 break;
1651
1652 case TGSI_OPCODE_MAX:
1653 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1654 FETCH( func, *inst, 0, 0, chan_index );
1655 FETCH( func, *inst, 1, 1, chan_index );
1656 emit_maxps(
1657 func,
1658 make_xmm( 0 ),
1659 make_xmm( 1 ) );
1660 STORE( func, *inst, 0, 0, chan_index );
1661 }
1662 break;
1663
1664 case TGSI_OPCODE_SLT:
1665 /* TGSI_OPCODE_SETLT */
1666 emit_setcc( func, inst, cc_LessThan );
1667 break;
1668
1669 case TGSI_OPCODE_SGE:
1670 /* TGSI_OPCODE_SETGE */
1671 emit_setcc( func, inst, cc_NotLessThan );
1672 break;
1673
1674 case TGSI_OPCODE_MAD:
1675 /* TGSI_OPCODE_MADD */
1676 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1677 FETCH( func, *inst, 0, 0, chan_index );
1678 FETCH( func, *inst, 1, 1, chan_index );
1679 FETCH( func, *inst, 2, 2, chan_index );
1680 emit_mul( func, 0, 1 );
1681 emit_add( func, 0, 2 );
1682 STORE( func, *inst, 0, 0, chan_index );
1683 }
1684 break;
1685
1686 case TGSI_OPCODE_SUB:
1687 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1688 FETCH( func, *inst, 0, 0, chan_index );
1689 FETCH( func, *inst, 1, 1, chan_index );
1690 emit_sub( func, 0, 1 );
1691 STORE( func, *inst, 0, 0, chan_index );
1692 }
1693 break;
1694
1695 case TGSI_OPCODE_LERP:
1696 /* TGSI_OPCODE_LRP */
1697 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1698 FETCH( func, *inst, 0, 0, chan_index );
1699 FETCH( func, *inst, 1, 1, chan_index );
1700 FETCH( func, *inst, 2, 2, chan_index );
1701 emit_sub( func, 1, 2 );
1702 emit_mul( func, 0, 1 );
1703 emit_add( func, 0, 2 );
1704 STORE( func, *inst, 0, 0, chan_index );
1705 }
1706 break;
1707
1708 case TGSI_OPCODE_CND:
1709 assert( 0 );
1710 break;
1711
1712 case TGSI_OPCODE_CND0:
1713 assert( 0 );
1714 break;
1715
1716 case TGSI_OPCODE_DOT2ADD:
1717 /* TGSI_OPCODE_DP2A */
1718 assert( 0 );
1719 break;
1720
1721 case TGSI_OPCODE_INDEX:
1722 assert( 0 );
1723 break;
1724
1725 case TGSI_OPCODE_NEGATE:
1726 assert( 0 );
1727 break;
1728
1729 case TGSI_OPCODE_FRAC:
1730 /* TGSI_OPCODE_FRC */
1731 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1732 FETCH( func, *inst, 0, 0, chan_index );
1733 emit_frc( func, 0 );
1734 STORE( func, *inst, 0, 0, chan_index );
1735 }
1736 break;
1737
1738 case TGSI_OPCODE_CLAMP:
1739 assert( 0 );
1740 break;
1741
1742 case TGSI_OPCODE_FLOOR:
1743 /* TGSI_OPCODE_FLR */
1744 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1745 FETCH( func, *inst, 0, 0, chan_index );
1746 emit_flr( func, 0 );
1747 STORE( func, *inst, 0, 0, chan_index );
1748 }
1749 break;
1750
1751 case TGSI_OPCODE_ROUND:
1752 assert( 0 );
1753 break;
1754
1755 case TGSI_OPCODE_EXPBASE2:
1756 /* TGSI_OPCODE_EX2 */
1757 FETCH( func, *inst, 0, 0, CHAN_X );
1758 emit_ex2( func, 0 );
1759 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1760 STORE( func, *inst, 0, 0, chan_index );
1761 }
1762 break;
1763
1764 case TGSI_OPCODE_LOGBASE2:
1765 /* TGSI_OPCODE_LG2 */
1766 FETCH( func, *inst, 0, 0, CHAN_X );
1767 emit_lg2( func, 0 );
1768 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1769 STORE( func, *inst, 0, 0, chan_index );
1770 }
1771 break;
1772
1773 case TGSI_OPCODE_POWER:
1774 /* TGSI_OPCODE_POW */
1775 FETCH( func, *inst, 0, 0, CHAN_X );
1776 FETCH( func, *inst, 1, 1, CHAN_X );
1777 emit_pow( func, 0, 1 );
1778 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1779 STORE( func, *inst, 0, 0, chan_index );
1780 }
1781 break;
1782
1783 case TGSI_OPCODE_CROSSPRODUCT:
1784 /* TGSI_OPCODE_XPD */
1785 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
1786 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
1787 FETCH( func, *inst, 1, 1, CHAN_Z );
1788 FETCH( func, *inst, 3, 0, CHAN_Z );
1789 }
1790 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
1791 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1792 FETCH( func, *inst, 0, 0, CHAN_Y );
1793 FETCH( func, *inst, 4, 1, CHAN_Y );
1794 }
1795 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
1796 emit_MOV( func, 2, 0 );
1797 emit_mul( func, 2, 1 );
1798 emit_MOV( func, 5, 3 );
1799 emit_mul( func, 5, 4 );
1800 emit_sub( func, 2, 5 );
1801 STORE( func, *inst, 2, 0, CHAN_X );
1802 }
1803 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
1804 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
1805 FETCH( func, *inst, 2, 1, CHAN_X );
1806 FETCH( func, *inst, 5, 0, CHAN_X );
1807 }
1808 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
1809 emit_mul( func, 3, 2 );
1810 emit_mul( func, 1, 5 );
1811 emit_sub( func, 3, 1 );
1812 STORE( func, *inst, 3, 0, CHAN_Y );
1813 }
1814 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
1815 emit_mul( func, 5, 4 );
1816 emit_mul( func, 0, 2 );
1817 emit_sub( func, 5, 0 );
1818 STORE( func, *inst, 5, 0, CHAN_Z );
1819 }
1820 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
1821 FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
1822 STORE( func, *inst, 0, 0, CHAN_W );
1823 }
1824 break;
1825
1826 case TGSI_OPCODE_MULTIPLYMATRIX:
1827 assert( 0 );
1828 break;
1829
1830 case TGSI_OPCODE_ABS:
1831 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1832 FETCH( func, *inst, 0, 0, chan_index );
1833 emit_abs( func, 0) ;
1834
1835 STORE( func, *inst, 0, 0, chan_index );
1836 }
1837 break;
1838
1839 case TGSI_OPCODE_RCC:
1840 assert( 0 );
1841 break;
1842
1843 case TGSI_OPCODE_DPH:
1844 FETCH( func, *inst, 0, 0, CHAN_X );
1845 FETCH( func, *inst, 1, 1, CHAN_X );
1846 emit_mul( func, 0, 1 );
1847 FETCH( func, *inst, 1, 0, CHAN_Y );
1848 FETCH( func, *inst, 2, 1, CHAN_Y );
1849 emit_mul( func, 1, 2 );
1850 emit_add( func, 0, 1 );
1851 FETCH( func, *inst, 1, 0, CHAN_Z );
1852 FETCH( func, *inst, 2, 1, CHAN_Z );
1853 emit_mul( func, 1, 2 );
1854 emit_add( func, 0, 1 );
1855 FETCH( func, *inst, 1, 1, CHAN_W );
1856 emit_add( func, 0, 1 );
1857 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1858 STORE( func, *inst, 0, 0, chan_index );
1859 }
1860 break;
1861
1862 case TGSI_OPCODE_COS:
1863 FETCH( func, *inst, 0, 0, CHAN_X );
1864 emit_cos( func, 0 );
1865 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1866 STORE( func, *inst, 0, 0, chan_index );
1867 }
1868 break;
1869
1870 case TGSI_OPCODE_DDX:
1871 assert( 0 );
1872 break;
1873
1874 case TGSI_OPCODE_DDY:
1875 assert( 0 );
1876 break;
1877
1878 case TGSI_OPCODE_KIL:
1879 emit_kil( func, &inst->FullSrcRegisters[0] );
1880 break;
1881
1882 case TGSI_OPCODE_PK2H:
1883 assert( 0 );
1884 break;
1885
1886 case TGSI_OPCODE_PK2US:
1887 assert( 0 );
1888 break;
1889
1890 case TGSI_OPCODE_PK4B:
1891 assert( 0 );
1892 break;
1893
1894 case TGSI_OPCODE_PK4UB:
1895 assert( 0 );
1896 break;
1897
1898 case TGSI_OPCODE_RFL:
1899 assert( 0 );
1900 break;
1901
1902 case TGSI_OPCODE_SEQ:
1903 assert( 0 );
1904 break;
1905
1906 case TGSI_OPCODE_SFL:
1907 assert( 0 );
1908 break;
1909
1910 case TGSI_OPCODE_SGT:
1911 assert( 0 );
1912 break;
1913
1914 case TGSI_OPCODE_SIN:
1915 FETCH( func, *inst, 0, 0, CHAN_X );
1916 emit_sin( func, 0 );
1917 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1918 STORE( func, *inst, 0, 0, chan_index );
1919 }
1920 break;
1921
1922 case TGSI_OPCODE_SLE:
1923 assert( 0 );
1924 break;
1925
1926 case TGSI_OPCODE_SNE:
1927 assert( 0 );
1928 break;
1929
1930 case TGSI_OPCODE_STR:
1931 assert( 0 );
1932 break;
1933
1934 case TGSI_OPCODE_TEX:
1935 emit_tempf(
1936 func,
1937 0,
1938 TGSI_EXEC_TEMP_ONE_I,
1939 TGSI_EXEC_TEMP_ONE_C );
1940 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
1941 STORE( func, *inst, 0, 0, chan_index );
1942 }
1943 break;
1944
1945 case TGSI_OPCODE_TXD:
1946 assert( 0 );
1947 break;
1948
1949 case TGSI_OPCODE_UP2H:
1950 assert( 0 );
1951 break;
1952
1953 case TGSI_OPCODE_UP2US:
1954 assert( 0 );
1955 break;
1956
1957 case TGSI_OPCODE_UP4B:
1958 assert( 0 );
1959 break;
1960
1961 case TGSI_OPCODE_UP4UB:
1962 assert( 0 );
1963 break;
1964
1965 case TGSI_OPCODE_X2D:
1966 assert( 0 );
1967 break;
1968
1969 case TGSI_OPCODE_ARA:
1970 assert( 0 );
1971 break;
1972
1973 case TGSI_OPCODE_ARR:
1974 assert( 0 );
1975 break;
1976
1977 case TGSI_OPCODE_BRA:
1978 assert( 0 );
1979 break;
1980
1981 case TGSI_OPCODE_CAL:
1982 assert( 0 );
1983 break;
1984
1985 case TGSI_OPCODE_RET:
1986 case TGSI_OPCODE_END:
1987 #ifdef WIN32
1988 emit_retw( func, 16 );
1989 #else
1990 emit_ret( func );
1991 #endif
1992 break;
1993
1994 case TGSI_OPCODE_SSG:
1995 assert( 0 );
1996 break;
1997
1998 case TGSI_OPCODE_CMP:
1999 emit_cmp (func, inst);
2000 break;
2001
2002 case TGSI_OPCODE_SCS:
2003 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
2004 FETCH( func, *inst, 0, 0, CHAN_X );
2005 emit_cos( func, 0 );
2006 STORE( func, *inst, 0, 0, CHAN_X );
2007 }
2008 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
2009 FETCH( func, *inst, 0, 0, CHAN_Y );
2010 emit_sin( func, 0 );
2011 STORE( func, *inst, 0, 0, CHAN_Y );
2012 }
2013 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
2014 FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C );
2015 STORE( func, *inst, 0, 0, CHAN_Z );
2016 }
2017 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
2018 FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
2019 STORE( func, *inst, 0, 0, CHAN_W );
2020 }
2021 break;
2022
2023 case TGSI_OPCODE_TXB:
2024 assert( 0 );
2025 break;
2026
2027 case TGSI_OPCODE_NRM:
2028 assert( 0 );
2029 break;
2030
2031 case TGSI_OPCODE_DIV:
2032 assert( 0 );
2033 break;
2034
2035 case TGSI_OPCODE_DP2:
2036 assert( 0 );
2037 break;
2038
2039 case TGSI_OPCODE_TXL:
2040 assert( 0 );
2041 break;
2042
2043 case TGSI_OPCODE_BRK:
2044 assert( 0 );
2045 break;
2046
2047 case TGSI_OPCODE_IF:
2048 assert( 0 );
2049 break;
2050
2051 case TGSI_OPCODE_LOOP:
2052 assert( 0 );
2053 break;
2054
2055 case TGSI_OPCODE_REP:
2056 assert( 0 );
2057 break;
2058
2059 case TGSI_OPCODE_ELSE:
2060 assert( 0 );
2061 break;
2062
2063 case TGSI_OPCODE_ENDIF:
2064 assert( 0 );
2065 break;
2066
2067 case TGSI_OPCODE_ENDLOOP:
2068 assert( 0 );
2069 break;
2070
2071 case TGSI_OPCODE_ENDREP:
2072 assert( 0 );
2073 break;
2074
2075 case TGSI_OPCODE_PUSHA:
2076 assert( 0 );
2077 break;
2078
2079 case TGSI_OPCODE_POPA:
2080 assert( 0 );
2081 break;
2082
2083 case TGSI_OPCODE_CEIL:
2084 assert( 0 );
2085 break;
2086
2087 case TGSI_OPCODE_I2F:
2088 assert( 0 );
2089 break;
2090
2091 case TGSI_OPCODE_NOT:
2092 assert( 0 );
2093 break;
2094
2095 case TGSI_OPCODE_TRUNC:
2096 assert( 0 );
2097 break;
2098
2099 case TGSI_OPCODE_SHL:
2100 assert( 0 );
2101 break;
2102
2103 case TGSI_OPCODE_SHR:
2104 assert( 0 );
2105 break;
2106
2107 case TGSI_OPCODE_AND:
2108 assert( 0 );
2109 break;
2110
2111 case TGSI_OPCODE_OR:
2112 assert( 0 );
2113 break;
2114
2115 case TGSI_OPCODE_MOD:
2116 assert( 0 );
2117 break;
2118
2119 case TGSI_OPCODE_XOR:
2120 assert( 0 );
2121 break;
2122
2123 case TGSI_OPCODE_SAD:
2124 assert( 0 );
2125 break;
2126
2127 case TGSI_OPCODE_TXF:
2128 assert( 0 );
2129 break;
2130
2131 case TGSI_OPCODE_TXQ:
2132 assert( 0 );
2133 break;
2134
2135 case TGSI_OPCODE_CONT:
2136 assert( 0 );
2137 break;
2138
2139 case TGSI_OPCODE_EMIT:
2140 assert( 0 );
2141 break;
2142
2143 case TGSI_OPCODE_ENDPRIM:
2144 assert( 0 );
2145 break;
2146
2147 default:
2148 assert( 0 );
2149 }
2150 }
2151
2152 static void
2153 emit_declaration(
2154 struct x86_function *func,
2155 struct tgsi_full_declaration *decl )
2156 {
2157 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
2158 unsigned first, last, mask;
2159 unsigned i, j;
2160
2161 assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
2162
2163 first = decl->u.DeclarationRange.First;
2164 last = decl->u.DeclarationRange.Last;
2165 mask = decl->Declaration.UsageMask;
2166
2167 /* Do not touch WPOS.xy */
2168 if( first == 0 ) {
2169 mask &= ~TGSI_WRITEMASK_XY;
2170 if( mask == TGSI_WRITEMASK_NONE ) {
2171 first++;
2172 }
2173 }
2174
2175 for( i = first; i <= last; i++ ) {
2176 for( j = 0; j < NUM_CHANNELS; j++ ) {
2177 if( mask & (1 << j) ) {
2178 switch( decl->Interpolation.Interpolate ) {
2179 case TGSI_INTERPOLATE_CONSTANT:
2180 emit_coef_a0( func, 0, i, j );
2181 emit_inputs( func, 0, i, j );
2182 break;
2183
2184 case TGSI_INTERPOLATE_LINEAR:
2185 emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
2186 emit_coef_dadx( func, 1, i, j );
2187 emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
2188 emit_coef_dady( func, 3, i, j );
2189 emit_mul( func, 0, 1 ); /* x * dadx */
2190 emit_coef_a0( func, 4, i, j );
2191 emit_mul( func, 2, 3 ); /* y * dady */
2192 emit_add( func, 0, 4 ); /* x * dadx + a0 */
2193 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
2194 emit_inputs( func, 0, i, j );
2195 break;
2196
2197 case TGSI_INTERPOLATE_PERSPECTIVE:
2198 emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
2199 emit_coef_dadx( func, 1, i, j );
2200 emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
2201 emit_coef_dady( func, 3, i, j );
2202 emit_mul( func, 0, 1 ); /* x * dadx */
2203 emit_inputf( func, 4, 0, TGSI_SWIZZLE_W );
2204 emit_coef_a0( func, 5, i, j );
2205 emit_rcp( func, 4, 4 ); /* 1.0 / w */
2206 emit_mul( func, 2, 3 ); /* y * dady */
2207 emit_add( func, 0, 5 ); /* x * dadx + a0 */
2208 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
2209 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
2210 emit_inputs( func, 0, i, j );
2211 break;
2212
2213 default:
2214 assert( 0 );
2215 }
2216 }
2217 }
2218 }
2219 }
2220 }
2221
2222 unsigned
2223 tgsi_emit_sse2(
2224 struct tgsi_token *tokens,
2225 struct x86_function *func )
2226 {
2227 struct tgsi_parse_context parse;
2228
2229 DUMP_START();
2230
2231 func->csr = func->store;
2232
2233 emit_mov(
2234 func,
2235 get_input_base(),
2236 get_argument( 0 ) );
2237 emit_mov(
2238 func,
2239 get_output_base(),
2240 get_argument( 1 ) );
2241 emit_mov(
2242 func,
2243 get_const_base(),
2244 get_argument( 2 ) );
2245 emit_mov(
2246 func,
2247 get_temp_base(),
2248 get_argument( 3 ) );
2249
2250 tgsi_parse_init( &parse, tokens );
2251
2252 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2253 tgsi_parse_token( &parse );
2254
2255 switch( parse.FullToken.Token.Type ) {
2256 case TGSI_TOKEN_TYPE_DECLARATION:
2257 break;
2258
2259 case TGSI_TOKEN_TYPE_INSTRUCTION:
2260 emit_instruction(
2261 func,
2262 &parse.FullToken.FullInstruction );
2263 break;
2264
2265 case TGSI_TOKEN_TYPE_IMMEDIATE:
2266 /* XXX implement this */
2267 return 0;
2268
2269 default:
2270 assert( 0 );
2271 }
2272 }
2273
2274 tgsi_parse_free( &parse );
2275
2276 DUMP_END();
2277
2278 return 1;
2279 }
2280
2281 /**
2282 * Fragment shaders are responsible for interpolating shader inputs. Because on
2283 * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
2284 * output, const, temp and coef), the code is split into two phases --
2285 * DECLARATION and INSTRUCTION phase.
2286 * GP register holding the output argument is aliased with the coeff argument,
2287 * as outputs are not needed in the DECLARATION phase.
2288 */
2289 unsigned
2290 tgsi_emit_sse2_fs(
2291 struct tgsi_token *tokens,
2292 struct x86_function *func )
2293 {
2294 struct tgsi_parse_context parse;
2295 boolean instruction_phase = FALSE;
2296
2297 DUMP_START();
2298
2299 func->csr = func->store;
2300
2301 /* DECLARATION phase, do not load output argument. */
2302 emit_mov(
2303 func,
2304 get_input_base(),
2305 get_argument( 0 ) );
2306 emit_mov(
2307 func,
2308 get_const_base(),
2309 get_argument( 2 ) );
2310 emit_mov(
2311 func,
2312 get_temp_base(),
2313 get_argument( 3 ) );
2314 emit_mov(
2315 func,
2316 get_coef_base(),
2317 get_argument( 4 ) );
2318
2319 tgsi_parse_init( &parse, tokens );
2320
2321 while( !tgsi_parse_end_of_tokens( &parse ) ) {
2322 tgsi_parse_token( &parse );
2323
2324 switch( parse.FullToken.Token.Type ) {
2325 case TGSI_TOKEN_TYPE_DECLARATION:
2326 emit_declaration(
2327 func,
2328 &parse.FullToken.FullDeclaration );
2329 break;
2330
2331 case TGSI_TOKEN_TYPE_INSTRUCTION:
2332 if( !instruction_phase ) {
2333 /* INSTRUCTION phase, overwrite coeff with output. */
2334 instruction_phase = TRUE;
2335 emit_mov(
2336 func,
2337 get_output_base(),
2338 get_argument( 1 ) );
2339 }
2340 emit_instruction(
2341 func,
2342 &parse.FullToken.FullInstruction );
2343 break;
2344
2345 case TGSI_TOKEN_TYPE_IMMEDIATE:
2346 /* XXX implement this */
2347 assert(0);
2348 break;
2349
2350 default:
2351 assert( 0 );
2352 }
2353 }
2354
2355 tgsi_parse_free( &parse );
2356
2357 DUMP_END();
2358
2359 return 1;
2360 }
2361
2362 #endif /* i386 */