1 #include "tgsi_platform.h"
3 #include "x86/rtasm/x86sse.h"
5 #define FOR_EACH_CHANNEL( CHAN )\
6 for( CHAN = 0; CHAN < 4; CHAN++ )
8 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
9 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
11 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
12 if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
14 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
15 FOR_EACH_CHANNEL( CHAN )\
16 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
23 #define TEMP_R0 TGSI_EXEC_TEMP_R0
26 * X86 utility functions.
35 (enum x86_reg_name
) xmm
);
39 * X86 register mapping helpers.
43 get_const_base( void )
51 get_input_base( void )
59 get_output_base( void )
77 return get_output_base();
81 * Data access helpers.
89 x86_make_reg( file_REG32
, reg_SP
),
100 (vec
* 4 + chan
) * 4 );
103 static struct x86_reg
108 return x86_make_disp(
110 (vec
* 4 + chan
) * 16 );
113 static struct x86_reg
118 return x86_make_disp(
120 (vec
* 4 + chan
) * 16 );
123 static struct x86_reg
128 return x86_make_disp(
130 (vec
* 4 + chan
) * 16 );
133 static struct x86_reg
139 return x86_make_disp(
141 ((vec
* 3 + member
) * 4 + chan
) * 4 );
145 * Data fetch helpers.
150 struct x86_function
*func
,
158 get_const( vec
, chan
) );
163 SHUF( 0, 0, 0, 0 ) );
168 struct x86_function
*func
,
176 get_input( vec
, chan
) );
181 struct x86_function
*func
,
188 get_output( vec
, chan
),
194 struct x86_function
*func
,
202 get_temp( vec
, chan
) );
207 struct x86_function
*func
,
216 get_coef( vec
, chan
, member
) );
221 SHUF( 0, 0, 0, 0 ) );
225 * Data store helpers.
230 struct x86_function
*func
,
237 get_input( vec
, chan
),
243 struct x86_function
*func
,
250 get_temp( vec
, chan
),
256 struct x86_function
*func
,
264 vec
+ TGSI_EXEC_NUM_TEMPS
,
269 * Coefficent fetch helpers.
274 struct x86_function
*func
,
289 struct x86_function
*func
,
304 struct x86_function
*func
,
318 * Function call helpers.
323 struct x86_function
*func
)
335 /* It is important on non-win32 platforms that temp base is pushed last.
344 struct x86_function
*func
)
346 /* Restore GP registers in a reverse order.
364 struct x86_function
*func
,
370 get_temp( TEMP_R0
, 0 ),
371 make_xmm( xmm_dst
) );
379 get_temp( TEMP_R0
, 0 ) );
392 get_temp( TEMP_R0
, 0 ) );
396 emit_func_call_dst_src(
397 struct x86_function
*func
,
404 get_temp( TEMP_R0
, 1 ),
405 make_xmm( xmm_src
) );
414 * Low-level instruction translators.
419 struct x86_function
*func
,
426 TGSI_EXEC_TEMP_7FFFFFFF_I
,
427 TGSI_EXEC_TEMP_7FFFFFFF_C
) );
432 struct x86_function
*func
,
439 make_xmm( xmm_src
) );
447 store
[0] = (float) cos( (double) store
[0] );
448 store
[1] = (float) cos( (double) store
[1] );
449 store
[2] = (float) cos( (double) store
[2] );
450 store
[3] = (float) cos( (double) store
[3] );
452 const unsigned X
= TEMP_R0
* 16;
453 store
[X
+ 0] = cosf( store
[X
+ 0] );
454 store
[X
+ 1] = cosf( store
[X
+ 1] );
455 store
[X
+ 2] = cosf( store
[X
+ 2] );
456 store
[X
+ 3] = cosf( store
[X
+ 3] );
462 struct x86_function
*func
,
476 store
[0] = (float) pow( 2.0, (double) store
[0] );
477 store
[1] = (float) pow( 2.0, (double) store
[1] );
478 store
[2] = (float) pow( 2.0, (double) store
[2] );
479 store
[3] = (float) pow( 2.0, (double) store
[3] );
481 const unsigned X
= TEMP_R0
* 16;
482 store
[X
+ 0] = powf( 2.0f
, store
[X
+ 0] );
483 store
[X
+ 1] = powf( 2.0f
, store
[X
+ 1] );
484 store
[X
+ 2] = powf( 2.0f
, store
[X
+ 2] );
485 store
[X
+ 3] = powf( 2.0f
, store
[X
+ 3] );
491 struct x86_function
*func
,
502 struct x86_function
*func
,
516 const unsigned X
= 0;
518 const unsigned X
= TEMP_R0
* 16;
520 store
[X
+ 0] = (float) floor( (double) store
[X
+ 0] );
521 store
[X
+ 1] = (float) floor( (double) store
[X
+ 1] );
522 store
[X
+ 2] = (float) floor( (double) store
[X
+ 2] );
523 store
[X
+ 3] = (float) floor( (double) store
[X
+ 3] );
528 struct x86_function
*func
,
542 const unsigned X
= 0;
544 const unsigned X
= TEMP_R0
* 16;
546 store
[X
+ 0] -= (float) floor( (double) store
[X
+ 0] );
547 store
[X
+ 1] -= (float) floor( (double) store
[X
+ 1] );
548 store
[X
+ 2] -= (float) floor( (double) store
[X
+ 2] );
549 store
[X
+ 3] -= (float) floor( (double) store
[X
+ 3] );
554 struct x86_function
*func
,
568 const unsigned X
= 0;
570 const unsigned X
= TEMP_R0
* 16;
572 store
[X
+ 0] = LOG2( store
[X
+ 0] );
573 store
[X
+ 1] = LOG2( store
[X
+ 1] );
574 store
[X
+ 2] = LOG2( store
[X
+ 2] );
575 store
[X
+ 3] = LOG2( store
[X
+ 3] );
580 struct x86_function
*func
,
591 struct x86_function
*func
,
598 make_xmm( xmm_src
) );
602 emit_mul (struct x86_function
*func
,
609 make_xmm( xmm_src
) );
614 struct x86_function
*func
,
621 TGSI_EXEC_TEMP_80000000_I
,
622 TGSI_EXEC_TEMP_80000000_C
) );
630 store
[0] = (float) pow( (double) store
[0], (double) store
[4] );
631 store
[1] = (float) pow( (double) store
[1], (double) store
[5] );
632 store
[2] = (float) pow( (double) store
[2], (double) store
[6] );
633 store
[3] = (float) pow( (double) store
[3], (double) store
[7] );
635 const unsigned X
= TEMP_R0
* 16;
636 store
[X
+ 0] = powf( store
[X
+ 0], store
[X
+ 4] );
637 store
[X
+ 1] = powf( store
[X
+ 1], store
[X
+ 5] );
638 store
[X
+ 2] = powf( store
[X
+ 2], store
[X
+ 6] );
639 store
[X
+ 3] = powf( store
[X
+ 3], store
[X
+ 7] );
645 struct x86_function
*func
,
649 emit_func_call_dst_src(
658 struct x86_function
*func
,
665 make_xmm( xmm_src
) );
670 struct x86_function
*func
,
677 make_xmm( xmm_src
) );
682 struct x86_function
*func
,
689 TGSI_EXEC_TEMP_80000000_I
,
690 TGSI_EXEC_TEMP_80000000_C
) );
698 store
[0] = (float) sin( (double) store
[0] );
699 store
[1] = (float) sin( (double) store
[1] );
700 store
[2] = (float) sin( (double) store
[2] );
701 store
[3] = (float) sin( (double) store
[3] );
703 const unsigned X
= TEMP_R0
* 16;
704 store
[X
+ 0] = sinf( store
[X
+ 0] );
705 store
[X
+ 1] = sinf( store
[X
+ 1] );
706 store
[X
+ 2] = sinf( store
[X
+ 2] );
707 store
[X
+ 3] = sinf( store
[X
+ 3] );
712 emit_sin (struct x86_function
*func
,
723 struct x86_function
*func
,
730 make_xmm( xmm_src
) );
739 struct x86_function
*func
,
741 const struct tgsi_full_src_register
*reg
,
742 const unsigned chan_index
)
744 unsigned swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
747 case TGSI_EXTSWIZZLE_X
:
748 case TGSI_EXTSWIZZLE_Y
:
749 case TGSI_EXTSWIZZLE_Z
:
750 case TGSI_EXTSWIZZLE_W
:
751 switch( reg
->SrcRegister
.File
) {
752 case TGSI_FILE_CONSTANT
:
756 reg
->SrcRegister
.Index
,
760 case TGSI_FILE_INPUT
:
764 reg
->SrcRegister
.Index
,
768 case TGSI_FILE_TEMPORARY
:
772 reg
->SrcRegister
.Index
,
781 case TGSI_EXTSWIZZLE_ZERO
:
785 TGSI_EXEC_TEMP_00000000_I
,
786 TGSI_EXEC_TEMP_00000000_C
);
789 case TGSI_EXTSWIZZLE_ONE
:
793 TGSI_EXEC_TEMP_ONE_I
,
794 TGSI_EXEC_TEMP_ONE_C
);
801 switch( tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
) ) {
802 case TGSI_UTIL_SIGN_CLEAR
:
803 emit_abs( func
, xmm
);
806 case TGSI_UTIL_SIGN_SET
:
807 emit_setsign( func
, xmm
);
810 case TGSI_UTIL_SIGN_TOGGLE
:
811 emit_neg( func
, xmm
);
814 case TGSI_UTIL_SIGN_KEEP
:
819 #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
820 emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
828 struct x86_function
*func
,
830 const struct tgsi_full_dst_register
*reg
,
831 const struct tgsi_full_instruction
*inst
,
832 unsigned chan_index
)
834 switch( reg
->DstRegister
.File
) {
835 case TGSI_FILE_OUTPUT
:
839 reg
->DstRegister
.Index
,
843 case TGSI_FILE_TEMPORARY
:
847 reg
->DstRegister
.Index
,
851 case TGSI_FILE_ADDRESS
:
855 reg
->DstRegister
.Index
,
863 switch( inst
->Instruction
.Saturate
) {
867 case TGSI_SAT_ZERO_ONE
:
871 case TGSI_SAT_MINUS_PLUS_ONE
:
877 #define STORE( FUNC, INST, XMM, INDEX, CHAN )\
878 emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
881 * High-level instruction translators.
886 struct x86_function
*func
,
887 const struct tgsi_full_src_register
*reg
)
890 unsigned registers
[4];
891 unsigned nextregister
= 0;
892 unsigned firstchan
= ~0;
895 /* This mask stores component bits that were already tested. Note that
896 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
898 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
900 FOR_EACH_CHANNEL( chan_index
) {
903 /* unswizzle channel */
904 swizzle
= tgsi_util_get_full_src_register_extswizzle(
908 /* check if the component has not been already tested */
909 if( !(uniquemask
& (1 << swizzle
)) ) {
910 uniquemask
|= 1 << swizzle
;
912 /* allocate register */
913 registers
[chan_index
] = nextregister
;
921 /* mark the first channel used */
922 if( firstchan
== ~0 ) {
923 firstchan
= chan_index
;
930 x86_make_reg( file_REG32
, reg_AX
) );
933 x86_make_reg( file_REG32
, reg_DX
) );
935 FOR_EACH_CHANNEL( chan_index
) {
936 if( uniquemask
& (1 << chan_index
) ) {
939 make_xmm( registers
[chan_index
] ),
941 TGSI_EXEC_TEMP_00000000_I
,
942 TGSI_EXEC_TEMP_00000000_C
),
945 if( chan_index
== firstchan
) {
948 x86_make_reg( file_REG32
, reg_AX
),
949 make_xmm( registers
[chan_index
] ) );
954 x86_make_reg( file_REG32
, reg_DX
),
955 make_xmm( registers
[chan_index
] ) );
958 x86_make_reg( file_REG32
, reg_AX
),
959 x86_make_reg( file_REG32
, reg_DX
) );
967 TGSI_EXEC_TEMP_KILMASK_I
,
968 TGSI_EXEC_TEMP_KILMASK_C
),
969 x86_make_reg( file_REG32
, reg_AX
) );
973 x86_make_reg( file_REG32
, reg_DX
) );
976 x86_make_reg( file_REG32
, reg_AX
) );
981 struct x86_function
*func
,
982 struct tgsi_full_instruction
*inst
,
987 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
988 FETCH( func
, *inst
, 0, 0, chan_index
);
989 FETCH( func
, *inst
, 1, 1, chan_index
);
999 TGSI_EXEC_TEMP_ONE_I
,
1000 TGSI_EXEC_TEMP_ONE_C
) );
1001 STORE( func
, *inst
, 0, 0, chan_index
);
1007 struct x86_function
*func
,
1008 struct tgsi_full_instruction
*inst
)
1010 unsigned chan_index
;
1012 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1013 FETCH( func
, *inst
, 0, 0, chan_index
);
1014 FETCH( func
, *inst
, 1, 1, chan_index
);
1015 FETCH( func
, *inst
, 2, 2, chan_index
);
1020 TGSI_EXEC_TEMP_00000000_I
,
1021 TGSI_EXEC_TEMP_00000000_C
),
1035 STORE( func
, *inst
, 0, 0, chan_index
);
1041 struct x86_function
*func
,
1042 struct tgsi_full_instruction
*inst
)
1044 unsigned chan_index
;
1046 switch( inst
->Instruction
.Opcode
) {
1047 case TGSI_OPCODE_ARL
:
1048 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1049 FETCH( func
, *inst
, 0, 0, chan_index
);
1050 emit_f2it( func
, 0 );
1051 STORE( func
, *inst
, 0, 0, chan_index
);
1055 case TGSI_OPCODE_MOV
:
1056 /* TGSI_OPCODE_SWZ */
1057 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1058 FETCH( func
, *inst
, 0, 0, chan_index
);
1059 STORE( func
, *inst
, 0, 0, chan_index
);
1063 case TGSI_OPCODE_LIT
:
1064 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1065 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
1069 TGSI_EXEC_TEMP_ONE_I
,
1070 TGSI_EXEC_TEMP_ONE_C
);
1071 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
1072 STORE( func
, *inst
, 0, 0, CHAN_X
);
1074 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
1075 STORE( func
, *inst
, 0, 0, CHAN_W
);
1078 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ||
1079 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1080 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
1081 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1086 TGSI_EXEC_TEMP_00000000_I
,
1087 TGSI_EXEC_TEMP_00000000_C
) );
1088 STORE( func
, *inst
, 0, 0, CHAN_Y
);
1090 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1091 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1096 TGSI_EXEC_TEMP_00000000_I
,
1097 TGSI_EXEC_TEMP_00000000_C
) );
1098 FETCH( func
, *inst
, 2, 0, CHAN_W
);
1103 TGSI_EXEC_TEMP_128_I
,
1104 TGSI_EXEC_TEMP_128_C
) );
1109 TGSI_EXEC_TEMP_MINUS_128_I
,
1110 TGSI_EXEC_TEMP_MINUS_128_C
) );
1111 emit_pow( func
, 1, 2 );
1112 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1126 STORE( func
, *inst
, 2, 0, CHAN_Z
);
1131 case TGSI_OPCODE_RCP
:
1132 /* TGSI_OPCODE_RECIP */
1133 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1134 emit_rcp( func
, 0, 0 );
1135 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1136 STORE( func
, *inst
, 0, 0, chan_index
);
1140 case TGSI_OPCODE_RSQ
:
1141 /* TGSI_OPCODE_RECIPSQRT */
1142 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1143 emit_rsqrt( func
, 0, 0 );
1144 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1145 STORE( func
, *inst
, 0, 0, chan_index
);
1149 case TGSI_OPCODE_EXP
:
1153 case TGSI_OPCODE_LOG
:
1157 case TGSI_OPCODE_MUL
:
1158 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1159 FETCH( func
, *inst
, 0, 0, chan_index
);
1160 FETCH( func
, *inst
, 1, 1, chan_index
);
1161 emit_mul( func
, 0, 1 );
1162 STORE( func
, *inst
, 0, 0, chan_index
);
1166 case TGSI_OPCODE_ADD
:
1167 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1168 FETCH( func
, *inst
, 0, 0, chan_index
);
1169 FETCH( func
, *inst
, 1, 1, chan_index
);
1170 emit_add( func
, 0, 1 );
1171 STORE( func
, *inst
, 0, 0, chan_index
);
1175 case TGSI_OPCODE_DP3
:
1176 /* TGSI_OPCODE_DOT3 */
1177 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1178 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1179 emit_mul( func
, 0, 1 );
1180 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1181 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1182 emit_mul( func
, 1, 2 );
1183 emit_add( func
, 0, 1 );
1184 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1185 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1186 emit_mul( func
, 1, 2 );
1187 emit_add( func
, 0, 1 );
1188 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1189 STORE( func
, *inst
, 0, 0, chan_index
);
1193 case TGSI_OPCODE_DP4
:
1194 /* TGSI_OPCODE_DOT4 */
1195 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1196 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1197 emit_mul( func
, 0, 1 );
1198 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1199 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1200 emit_mul( func
, 1, 2 );
1201 emit_add( func
, 0, 1 );
1202 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1203 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1204 emit_mul(func
, 1, 2 );
1205 emit_add(func
, 0, 1 );
1206 FETCH( func
, *inst
, 1, 0, CHAN_W
);
1207 FETCH( func
, *inst
, 2, 1, CHAN_W
);
1208 emit_mul( func
, 1, 2 );
1209 emit_add( func
, 0, 1 );
1210 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1211 STORE( func
, *inst
, 0, 0, chan_index
);
1215 case TGSI_OPCODE_DST
:
1216 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
1220 TGSI_EXEC_TEMP_ONE_I
,
1221 TGSI_EXEC_TEMP_ONE_C
);
1222 STORE( func
, *inst
, 0, 0, CHAN_X
);
1224 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
1225 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
1226 FETCH( func
, *inst
, 1, 1, CHAN_Y
);
1227 emit_mul( func
, 0, 1 );
1228 STORE( func
, *inst
, 0, 0, CHAN_Y
);
1230 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
1231 FETCH( func
, *inst
, 0, 0, CHAN_Z
);
1232 STORE( func
, *inst
, 0, 0, CHAN_Z
);
1234 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
1235 FETCH( func
, *inst
, 0, 1, CHAN_W
);
1236 STORE( func
, *inst
, 0, 0, CHAN_W
);
1240 case TGSI_OPCODE_MIN
:
1241 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1242 FETCH( func
, *inst
, 0, 0, chan_index
);
1243 FETCH( func
, *inst
, 1, 1, chan_index
);
1248 STORE( func
, *inst
, 0, 0, chan_index
);
1252 case TGSI_OPCODE_MAX
:
1253 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1254 FETCH( func
, *inst
, 0, 0, chan_index
);
1255 FETCH( func
, *inst
, 1, 1, chan_index
);
1260 STORE( func
, *inst
, 0, 0, chan_index
);
1264 case TGSI_OPCODE_SLT
:
1265 /* TGSI_OPCODE_SETLT */
1266 emit_setcc( func
, inst
, cc_LessThan
);
1269 case TGSI_OPCODE_SGE
:
1270 /* TGSI_OPCODE_SETGE */
1271 emit_setcc( func
, inst
, cc_NotLessThan
);
1274 case TGSI_OPCODE_MAD
:
1275 /* TGSI_OPCODE_MADD */
1276 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1277 FETCH( func
, *inst
, 0, 0, chan_index
);
1278 FETCH( func
, *inst
, 1, 1, chan_index
);
1279 FETCH( func
, *inst
, 2, 2, chan_index
);
1280 emit_mul( func
, 0, 1 );
1281 emit_add( func
, 0, 2 );
1282 STORE( func
, *inst
, 0, 0, chan_index
);
1286 case TGSI_OPCODE_SUB
:
1287 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1288 FETCH( func
, *inst
, 0, 0, chan_index
);
1289 FETCH( func
, *inst
, 1, 1, chan_index
);
1290 emit_sub( func
, 0, 1 );
1291 STORE( func
, *inst
, 0, 0, chan_index
);
1295 case TGSI_OPCODE_LERP
:
1296 /* TGSI_OPCODE_LRP */
1297 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1298 FETCH( func
, *inst
, 0, 0, chan_index
);
1299 FETCH( func
, *inst
, 1, 1, chan_index
);
1300 FETCH( func
, *inst
, 2, 2, chan_index
);
1301 emit_sub( func
, 1, 2 );
1302 emit_mul( func
, 0, 1 );
1303 emit_add( func
, 0, 2 );
1304 STORE( func
, *inst
, 0, 0, chan_index
);
1308 case TGSI_OPCODE_CND
:
1312 case TGSI_OPCODE_CND0
:
1316 case TGSI_OPCODE_DOT2ADD
:
1317 /* TGSI_OPCODE_DP2A */
1321 case TGSI_OPCODE_INDEX
:
1325 case TGSI_OPCODE_NEGATE
:
1329 case TGSI_OPCODE_FRAC
:
1330 /* TGSI_OPCODE_FRC */
1331 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1332 FETCH( func
, *inst
, 0, 0, chan_index
);
1333 emit_frc( func
, 0 );
1334 STORE( func
, *inst
, 0, 0, chan_index
);
1338 case TGSI_OPCODE_CLAMP
:
1342 case TGSI_OPCODE_FLOOR
:
1343 /* TGSI_OPCODE_FLR */
1344 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1345 FETCH( func
, *inst
, 0, 0, chan_index
);
1346 emit_flr( func
, 0 );
1347 STORE( func
, *inst
, 0, 0, chan_index
);
1351 case TGSI_OPCODE_ROUND
:
1355 case TGSI_OPCODE_EXPBASE2
:
1356 /* TGSI_OPCODE_EX2 */
1357 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1358 emit_ex2( func
, 0 );
1359 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1360 STORE( func
, *inst
, 0, 0, chan_index
);
1364 case TGSI_OPCODE_LOGBASE2
:
1365 /* TGSI_OPCODE_LG2 */
1366 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1367 emit_lg2( func
, 0 );
1368 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1369 STORE( func
, *inst
, 0, 0, chan_index
);
1373 case TGSI_OPCODE_POWER
:
1374 /* TGSI_OPCODE_POW */
1375 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1376 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1377 emit_pow( func
, 0, 1 );
1378 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1379 STORE( func
, *inst
, 0, 0, chan_index
);
1383 case TGSI_OPCODE_CROSSPRODUCT
:
1384 /* TGSI_OPCODE_XPD */
1385 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1386 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
1387 FETCH( func
, *inst
, 1, 1, CHAN_Z
);
1388 FETCH( func
, *inst
, 3, 0, CHAN_Z
);
1390 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1391 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1392 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
1393 FETCH( func
, *inst
, 4, 1, CHAN_Y
);
1395 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
1396 emit_mov( func
, 2, 0 );
1397 emit_mul( func
, 2, 1 );
1398 emit_mov( func
, 5, 3 );
1399 emit_mul( func
, 5, 4 );
1400 emit_sub( func
, 2, 5 );
1401 STORE( func
, *inst
, 2, 0, CHAN_X
);
1403 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ||
1404 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1405 FETCH( func
, *inst
, 2, 1, CHAN_X
);
1406 FETCH( func
, *inst
, 5, 0, CHAN_X
);
1408 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
1409 emit_mul( func
, 3, 2 );
1410 emit_mul( func
, 1, 5 );
1411 emit_sub( func
, 3, 1 );
1412 STORE( func
, *inst
, 3, 0, CHAN_Y
);
1414 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
1415 emit_mul( func
, 5, 4 );
1416 emit_mul( func
, 0, 2 );
1417 emit_sub( func
, 5, 0 );
1418 STORE( func
, *inst
, 5, 0, CHAN_Z
);
1420 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
1421 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_ONE_I
, TGSI_EXEC_TEMP_ONE_C
);
1422 STORE( func
, *inst
, 0, 0, CHAN_W
);
1426 case TGSI_OPCODE_MULTIPLYMATRIX
:
1430 case TGSI_OPCODE_ABS
:
1431 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1432 FETCH( func
, *inst
, 0, 0, chan_index
);
1433 emit_abs( func
, 0) ;
1435 STORE( func
, *inst
, 0, 0, chan_index
);
1439 case TGSI_OPCODE_RCC
:
1443 case TGSI_OPCODE_DPH
:
1444 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1445 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1446 emit_mul( func
, 0, 1 );
1447 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1448 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1449 emit_mul( func
, 1, 2 );
1450 emit_add( func
, 0, 1 );
1451 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1452 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1453 emit_mul( func
, 1, 2 );
1454 emit_add( func
, 0, 1 );
1455 FETCH( func
, *inst
, 1, 1, CHAN_W
);
1456 emit_add( func
, 0, 1 );
1457 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1458 STORE( func
, *inst
, 0, 0, chan_index
);
1462 case TGSI_OPCODE_COS
:
1463 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1464 emit_cos( func
, 0 );
1465 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1466 STORE( func
, *inst
, 0, 0, chan_index
);
1470 case TGSI_OPCODE_DDX
:
1474 case TGSI_OPCODE_DDY
:
1478 case TGSI_OPCODE_KIL
:
1479 emit_kil( func
, &inst
->FullSrcRegisters
[0] );
1482 case TGSI_OPCODE_PK2H
:
1486 case TGSI_OPCODE_PK2US
:
1490 case TGSI_OPCODE_PK4B
:
1494 case TGSI_OPCODE_PK4UB
:
1498 case TGSI_OPCODE_RFL
:
1502 case TGSI_OPCODE_SEQ
:
1506 case TGSI_OPCODE_SFL
:
1510 case TGSI_OPCODE_SGT
:
1514 case TGSI_OPCODE_SIN
:
1515 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1516 emit_sin( func
, 0 );
1517 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1518 STORE( func
, *inst
, 0, 0, chan_index
);
1522 case TGSI_OPCODE_SLE
:
1526 case TGSI_OPCODE_SNE
:
1530 case TGSI_OPCODE_STR
:
1534 case TGSI_OPCODE_TEX
:
1538 TGSI_EXEC_TEMP_ONE_I
,
1539 TGSI_EXEC_TEMP_ONE_C
);
1540 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1541 STORE( func
, *inst
, 0, 0, chan_index
);
1545 case TGSI_OPCODE_TXD
:
1549 case TGSI_OPCODE_UP2H
:
1553 case TGSI_OPCODE_UP2US
:
1557 case TGSI_OPCODE_UP4B
:
1561 case TGSI_OPCODE_UP4UB
:
1565 case TGSI_OPCODE_X2D
:
1569 case TGSI_OPCODE_ARA
:
1573 case TGSI_OPCODE_ARR
:
1577 case TGSI_OPCODE_BRA
:
1581 case TGSI_OPCODE_CAL
:
1585 case TGSI_OPCODE_RET
:
1587 x86_retw( func
, 16 );
1593 case TGSI_OPCODE_SSG
:
1597 case TGSI_OPCODE_CMP
:
1598 emit_cmp (func
, inst
);
1601 case TGSI_OPCODE_SCS
:
1602 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
1603 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1604 emit_cos( func
, 0 );
1605 STORE( func
, *inst
, 0, 0, CHAN_X
);
1607 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
1608 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
1609 emit_sin( func
, 0 );
1610 STORE( func
, *inst
, 0, 0, CHAN_Y
);
1612 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
1613 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_00000000_I
, TGSI_EXEC_TEMP_00000000_C
);
1614 STORE( func
, *inst
, 0, 0, CHAN_Z
);
1616 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
1617 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_ONE_I
, TGSI_EXEC_TEMP_ONE_C
);
1618 STORE( func
, *inst
, 0, 0, CHAN_W
);
1622 case TGSI_OPCODE_TXB
:
1626 case TGSI_OPCODE_NRM
:
1630 case TGSI_OPCODE_DIV
:
1634 case TGSI_OPCODE_DP2
:
1638 case TGSI_OPCODE_TXL
:
1642 case TGSI_OPCODE_BRK
:
1646 case TGSI_OPCODE_IF
:
1650 case TGSI_OPCODE_LOOP
:
1654 case TGSI_OPCODE_REP
:
1658 case TGSI_OPCODE_ELSE
:
1662 case TGSI_OPCODE_ENDIF
:
1666 case TGSI_OPCODE_ENDLOOP
:
1670 case TGSI_OPCODE_ENDREP
:
1674 case TGSI_OPCODE_PUSHA
:
1678 case TGSI_OPCODE_POPA
:
1682 case TGSI_OPCODE_CEIL
:
1686 case TGSI_OPCODE_I2F
:
1690 case TGSI_OPCODE_NOT
:
1694 case TGSI_OPCODE_TRUNC
:
1698 case TGSI_OPCODE_SHL
:
1702 case TGSI_OPCODE_SHR
:
1706 case TGSI_OPCODE_AND
:
1710 case TGSI_OPCODE_OR
:
1714 case TGSI_OPCODE_MOD
:
1718 case TGSI_OPCODE_XOR
:
1722 case TGSI_OPCODE_SAD
:
1726 case TGSI_OPCODE_TXF
:
1730 case TGSI_OPCODE_TXQ
:
1734 case TGSI_OPCODE_CONT
:
1738 case TGSI_OPCODE_EMIT
:
1742 case TGSI_OPCODE_ENDPRIM
:
1753 struct x86_function
*func
,
1754 struct tgsi_full_declaration
*decl
)
1756 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1757 unsigned first
, last
, mask
;
1760 assert( decl
->Declaration
.Declare
== TGSI_DECLARE_RANGE
);
1762 first
= decl
->u
.DeclarationRange
.First
;
1763 last
= decl
->u
.DeclarationRange
.Last
;
1764 mask
= decl
->Declaration
.UsageMask
;
1766 /* Do not touch WPOS.xy */
1768 mask
&= ~TGSI_WRITEMASK_XY
;
1769 if( mask
== TGSI_WRITEMASK_NONE
) {
1774 for( i
= first
; i
<= last
; i
++ ) {
1775 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
1776 if( mask
& (1 << j
) ) {
1777 switch( decl
->Interpolation
.Interpolate
) {
1778 case TGSI_INTERPOLATE_CONSTANT
:
1779 emit_coef_a0( func
, 0, i
, j
);
1780 emit_inputs( func
, 0, i
, j
);
1783 case TGSI_INTERPOLATE_LINEAR
:
1784 emit_inputf( func
, 0, 0, TGSI_SWIZZLE_X
);
1785 emit_coef_dadx( func
, 1, i
, j
);
1786 emit_inputf( func
, 2, 0, TGSI_SWIZZLE_Y
);
1787 emit_coef_dady( func
, 3, i
, j
);
1788 emit_mul( func
, 0, 1 ); /* x * dadx */
1789 emit_coef_a0( func
, 4, i
, j
);
1790 emit_mul( func
, 2, 3 ); /* y * dady */
1791 emit_add( func
, 0, 4 ); /* x * dadx + a0 */
1792 emit_add( func
, 0, 2 ); /* x * dadx + y * dady + a0 */
1793 emit_inputs( func
, 0, i
, j
);
1796 case TGSI_INTERPOLATE_PERSPECTIVE
:
1797 emit_inputf( func
, 0, 0, TGSI_SWIZZLE_X
);
1798 emit_coef_dadx( func
, 1, i
, j
);
1799 emit_inputf( func
, 2, 0, TGSI_SWIZZLE_Y
);
1800 emit_coef_dady( func
, 3, i
, j
);
1801 emit_mul( func
, 0, 1 ); /* x * dadx */
1802 emit_inputf( func
, 4, 0, TGSI_SWIZZLE_W
);
1803 emit_coef_a0( func
, 5, i
, j
);
1804 emit_rcp( func
, 4, 4 ); /* 1.0 / w */
1805 emit_mul( func
, 2, 3 ); /* y * dady */
1806 emit_add( func
, 0, 5 ); /* x * dadx + a0 */
1807 emit_add( func
, 0, 2 ); /* x * dadx + y * dady + a0 */
1808 emit_mul( func
, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
1809 emit_inputs( func
, 0, i
, j
);
1823 struct tgsi_token
*tokens
,
1824 struct x86_function
*func
)
1826 struct tgsi_parse_context parse
;
1828 func
->csr
= func
->store
;
1833 get_argument( 0 ) );
1837 get_argument( 1 ) );
1841 get_argument( 2 ) );
1845 get_argument( 3 ) );
1847 tgsi_parse_init( &parse
, tokens
);
1849 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1850 tgsi_parse_token( &parse
);
1852 switch( parse
.FullToken
.Token
.Type
) {
1853 case TGSI_TOKEN_TYPE_DECLARATION
:
1856 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1859 &parse
.FullToken
.FullInstruction
);
1867 tgsi_parse_free( &parse
);
1873 * Fragment shaders are responsible for interpolating shader inputs. Because on
1874 * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
1875 * output, const, temp and coef), the code is split into two phases --
1876 * DECLARATION and INSTRUCTION phase.
1877 * GP register holding the output argument is aliased with the coeff argument,
1878 * as outputs are not needed in the DECLARATION phase.
1882 struct tgsi_token
*tokens
,
1883 struct x86_function
*func
)
1885 struct tgsi_parse_context parse
;
1886 boolean instruction_phase
= FALSE
;
1888 func
->csr
= func
->store
;
1890 /* DECLARATION phase, do not load output argument. */
1894 get_argument( 0 ) );
1898 get_argument( 2 ) );
1902 get_argument( 3 ) );
1906 get_argument( 4 ) );
1908 tgsi_parse_init( &parse
, tokens
);
1910 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1911 tgsi_parse_token( &parse
);
1913 switch( parse
.FullToken
.Token
.Type
) {
1914 case TGSI_TOKEN_TYPE_DECLARATION
:
1917 &parse
.FullToken
.FullDeclaration
);
1920 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1921 if( !instruction_phase
) {
1922 /* INSTRUCTION phase, overwrite coeff with output. */
1923 instruction_phase
= TRUE
;
1927 get_argument( 1 ) );
1931 &parse
.FullToken
.FullInstruction
);
1939 tgsi_parse_free( &parse
);