1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "tgsi_platform.h"
29 #include "tgsi_core.h"
30 #include "x86/rtasm/x86sse.h"
32 #if defined(__i386__) || defined(__386__)
75 printf( "XMM%u", reg
.idx
);
87 unsigned count
= 10 - strlen( op
);
94 #define DUMP_START() printf( "\nsse-dump start ----------------" )
95 #define DUMP_END() printf( "\nsse-dump end ----------------\n" )
96 #define DUMP( OP ) printf( "\n%s", OP )
97 #define DUMP_I( OP, I ) do {\
98 printf( "\n%s", OP );\
100 printf( "%u", I ); } while( 0 )
101 #define DUMP_R( OP, R0 ) do {\
102 printf( "\n%s", OP );\
104 _print_reg( R0 ); } while( 0 )
105 #define DUMP_RR( OP, R0, R1 ) do {\
106 printf( "\n%s", OP );\
110 _print_reg( R1 ); } while( 0 )
111 #define DUMP_RRI( OP, R0, R1, I ) do {\
112 printf( "\n%s", OP );\
118 printf( "%u", I ); } while( 0 )
125 #define DUMP_I( OP, I )
126 #define DUMP_R( OP, R0 )
127 #define DUMP_RR( OP, R0, R1 )
128 #define DUMP_RRI( OP, R0, R1, I )
132 #define FOR_EACH_CHANNEL( CHAN )\
133 for( CHAN = 0; CHAN < 4; CHAN++ )
135 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
136 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
138 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
139 if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
141 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
142 FOR_EACH_CHANNEL( CHAN )\
143 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
150 #define TEMP_R0 TGSI_EXEC_TEMP_R0
153 * X86 utility functions.
156 static struct x86_reg
162 (enum x86_reg_name
) xmm
);
166 * X86 register mapping helpers.
169 static struct x86_reg
170 get_const_base( void )
177 static struct x86_reg
178 get_input_base( void )
185 static struct x86_reg
186 get_output_base( void )
193 static struct x86_reg
194 get_temp_base( void )
201 static struct x86_reg
202 get_coef_base( void )
204 return get_output_base();
208 * Data access helpers.
211 static struct x86_reg
215 return x86_make_disp(
216 x86_make_reg( file_REG32
, reg_SP
),
220 static struct x86_reg
225 return x86_make_disp(
227 (vec
* 4 + chan
) * 4 );
230 static struct x86_reg
235 return x86_make_disp(
237 (vec
* 4 + chan
) * 16 );
240 static struct x86_reg
245 return x86_make_disp(
247 (vec
* 4 + chan
) * 16 );
250 static struct x86_reg
255 return x86_make_disp(
257 (vec
* 4 + chan
) * 16 );
260 static struct x86_reg
266 return x86_make_disp(
268 ((vec
* 3 + member
) * 4 + chan
) * 4 );
272 * X86 rtasm wrappers.
277 struct x86_function
*func
,
281 DUMP_RR( "ADDPS", dst
, src
);
282 sse_addps( func
, dst
, src
);
287 struct x86_function
*func
,
291 DUMP_RR( "ANDNPS", dst
, src
);
292 sse_andnps( func
, dst
, src
);
297 struct x86_function
*func
,
301 DUMP_RR( "ANDPS", dst
, src
);
302 sse_andps( func
, dst
, src
);
307 struct x86_function
*func
,
310 DUMP_I( "CALL", addr
);
311 x86_call( func
, addr
);
316 struct x86_function
*func
,
321 DUMP_RRI( "CMPPS", dst
, src
, cc
);
322 sse_cmpps( func
, dst
, src
, cc
);
327 struct x86_function
*func
,
331 DUMP_RR( "CVTTPS2DQ", dst
, src
);
332 sse2_cvttps2dq( func
, dst
, src
);
337 struct x86_function
*func
,
341 DUMP_RR( "MAXPS", dst
, src
);
342 sse_maxps( func
, dst
, src
);
347 struct x86_function
*func
,
351 DUMP_RR( "MINPS", dst
, src
);
352 sse_minps( func
, dst
, src
);
357 struct x86_function
*func
,
361 DUMP_RR( "MOV", dst
, src
);
362 x86_mov( func
, dst
, src
);
367 struct x86_function
*func
,
371 DUMP_RR( "MOVAPS", dst
, src
);
372 sse_movaps( func
, dst
, src
);
377 struct x86_function
*func
,
381 DUMP_RR( "MOVSS", dst
, src
);
382 sse_movss( func
, dst
, src
);
387 struct x86_function
*func
,
391 DUMP_RR( "MOVUPS", dst
, src
);
392 sse_movups( func
, dst
, src
);
397 struct x86_function
*func
,
401 DUMP_RR( "MULPS", dst
, src
);
402 sse_mulps( func
, dst
, src
);
407 struct x86_function
*func
,
411 DUMP_RR( "OR", dst
, src
);
412 x86_or( func
, dst
, src
);
417 struct x86_function
*func
,
421 DUMP_RR( "ORPS", dst
, src
);
422 sse_orps( func
, dst
, src
);
427 struct x86_function
*func
,
431 DUMP_RR( "PMOVMSKB", dst
, src
);
432 sse_pmovmskb( func
, dst
, src
);
437 struct x86_function
*func
,
440 DUMP_R( "POP", dst
);
441 x86_pop( func
, dst
);
446 struct x86_function
*func
,
449 DUMP_R( "PUSH", dst
);
450 x86_push( func
, dst
);
455 struct x86_function
*func
,
459 DUMP_RR( "RCPPS", dst
, src
);
460 sse2_rcpps( func
, dst
, src
);
466 struct x86_function
*func
,
469 DUMP_I( "RET", size
);
470 x86_retw( func
, size
);
475 struct x86_function
*func
)
484 struct x86_function
*func
,
488 DUMP_RR( "RSQRTPS", dst
, src
);
489 sse_rsqrtps( func
, dst
, src
);
494 struct x86_function
*func
,
499 DUMP_RRI( "SHUFPS", dst
, src
, shuf
);
500 sse_shufps( func
, dst
, src
, shuf
);
505 struct x86_function
*func
,
509 DUMP_RR( "SUBPS", dst
, src
);
510 sse_subps( func
, dst
, src
);
515 struct x86_function
*func
,
519 DUMP_RR( "XORPS", dst
, src
);
520 sse_xorps( func
, dst
, src
);
524 * Data fetch helpers.
529 struct x86_function
*func
,
537 get_const( vec
, chan
) );
542 SHUF( 0, 0, 0, 0 ) );
547 struct x86_function
*func
,
555 get_input( vec
, chan
) );
560 struct x86_function
*func
,
567 get_output( vec
, chan
),
573 struct x86_function
*func
,
581 get_temp( vec
, chan
) );
586 struct x86_function
*func
,
595 get_coef( vec
, chan
, member
) );
600 SHUF( 0, 0, 0, 0 ) );
604 * Data store helpers.
609 struct x86_function
*func
,
616 get_input( vec
, chan
),
622 struct x86_function
*func
,
629 get_temp( vec
, chan
),
635 struct x86_function
*func
,
643 vec
+ TGSI_EXEC_NUM_TEMPS
,
648 * Coefficent fetch helpers.
653 struct x86_function
*func
,
668 struct x86_function
*func
,
683 struct x86_function
*func
,
697 * Function call helpers.
702 struct x86_function
*func
)
714 /* It is important on non-win32 platforms that temp base is pushed last.
723 struct x86_function
*func
)
725 /* Restore GP registers in a reverse order.
743 struct x86_function
*func
,
749 get_temp( TEMP_R0
, 0 ),
750 make_xmm( xmm_dst
) );
758 get_temp( TEMP_R0
, 0 ) );
771 get_temp( TEMP_R0
, 0 ) );
775 emit_func_call_dst_src(
776 struct x86_function
*func
,
783 get_temp( TEMP_R0
, 1 ),
784 make_xmm( xmm_src
) );
793 * Low-level instruction translators.
798 struct x86_function
*func
,
805 TGSI_EXEC_TEMP_7FFFFFFF_I
,
806 TGSI_EXEC_TEMP_7FFFFFFF_C
) );
811 struct x86_function
*func
,
818 make_xmm( xmm_src
) );
826 store
[0] = (float) cos( (double) store
[0] );
827 store
[1] = (float) cos( (double) store
[1] );
828 store
[2] = (float) cos( (double) store
[2] );
829 store
[3] = (float) cos( (double) store
[3] );
831 const unsigned X
= TEMP_R0
* 16;
832 store
[X
+ 0] = cosf( store
[X
+ 0] );
833 store
[X
+ 1] = cosf( store
[X
+ 1] );
834 store
[X
+ 2] = cosf( store
[X
+ 2] );
835 store
[X
+ 3] = cosf( store
[X
+ 3] );
841 struct x86_function
*func
,
855 store
[0] = (float) pow( 2.0, (double) store
[0] );
856 store
[1] = (float) pow( 2.0, (double) store
[1] );
857 store
[2] = (float) pow( 2.0, (double) store
[2] );
858 store
[3] = (float) pow( 2.0, (double) store
[3] );
860 const unsigned X
= TEMP_R0
* 16;
861 store
[X
+ 0] = powf( 2.0f
, store
[X
+ 0] );
862 store
[X
+ 1] = powf( 2.0f
, store
[X
+ 1] );
863 store
[X
+ 2] = powf( 2.0f
, store
[X
+ 2] );
864 store
[X
+ 3] = powf( 2.0f
, store
[X
+ 3] );
870 struct x86_function
*func
,
881 struct x86_function
*func
,
895 const unsigned X
= 0;
897 const unsigned X
= TEMP_R0
* 16;
899 store
[X
+ 0] = (float) floor( (double) store
[X
+ 0] );
900 store
[X
+ 1] = (float) floor( (double) store
[X
+ 1] );
901 store
[X
+ 2] = (float) floor( (double) store
[X
+ 2] );
902 store
[X
+ 3] = (float) floor( (double) store
[X
+ 3] );
907 struct x86_function
*func
,
921 const unsigned X
= 0;
923 const unsigned X
= TEMP_R0
* 16;
925 store
[X
+ 0] -= (float) floor( (double) store
[X
+ 0] );
926 store
[X
+ 1] -= (float) floor( (double) store
[X
+ 1] );
927 store
[X
+ 2] -= (float) floor( (double) store
[X
+ 2] );
928 store
[X
+ 3] -= (float) floor( (double) store
[X
+ 3] );
933 struct x86_function
*func
,
947 const unsigned X
= 0;
949 const unsigned X
= TEMP_R0
* 16;
951 store
[X
+ 0] = LOG2( store
[X
+ 0] );
952 store
[X
+ 1] = LOG2( store
[X
+ 1] );
953 store
[X
+ 2] = LOG2( store
[X
+ 2] );
954 store
[X
+ 3] = LOG2( store
[X
+ 3] );
959 struct x86_function
*func
,
970 struct x86_function
*func
,
977 make_xmm( xmm_src
) );
981 emit_mul (struct x86_function
*func
,
988 make_xmm( xmm_src
) );
993 struct x86_function
*func
,
1000 TGSI_EXEC_TEMP_80000000_I
,
1001 TGSI_EXEC_TEMP_80000000_C
) );
1004 static void XSTDCALL
1009 store
[0] = (float) pow( (double) store
[0], (double) store
[4] );
1010 store
[1] = (float) pow( (double) store
[1], (double) store
[5] );
1011 store
[2] = (float) pow( (double) store
[2], (double) store
[6] );
1012 store
[3] = (float) pow( (double) store
[3], (double) store
[7] );
1014 const unsigned X
= TEMP_R0
* 16;
1015 store
[X
+ 0] = powf( store
[X
+ 0], store
[X
+ 4] );
1016 store
[X
+ 1] = powf( store
[X
+ 1], store
[X
+ 5] );
1017 store
[X
+ 2] = powf( store
[X
+ 2], store
[X
+ 6] );
1018 store
[X
+ 3] = powf( store
[X
+ 3], store
[X
+ 7] );
1024 struct x86_function
*func
,
1028 emit_func_call_dst_src(
1037 struct x86_function
*func
,
1043 make_xmm( xmm_dst
),
1044 make_xmm( xmm_src
) );
1049 struct x86_function
*func
,
1055 make_xmm( xmm_dst
),
1056 make_xmm( xmm_src
) );
1061 struct x86_function
*func
,
1068 TGSI_EXEC_TEMP_80000000_I
,
1069 TGSI_EXEC_TEMP_80000000_C
) );
1072 static void XSTDCALL
1077 store
[0] = (float) sin( (double) store
[0] );
1078 store
[1] = (float) sin( (double) store
[1] );
1079 store
[2] = (float) sin( (double) store
[2] );
1080 store
[3] = (float) sin( (double) store
[3] );
1082 const unsigned X
= TEMP_R0
* 16;
1083 store
[X
+ 0] = sinf( store
[X
+ 0] );
1084 store
[X
+ 1] = sinf( store
[X
+ 1] );
1085 store
[X
+ 2] = sinf( store
[X
+ 2] );
1086 store
[X
+ 3] = sinf( store
[X
+ 3] );
1091 emit_sin (struct x86_function
*func
,
1102 struct x86_function
*func
,
1108 make_xmm( xmm_dst
),
1109 make_xmm( xmm_src
) );
1118 struct x86_function
*func
,
1120 const struct tgsi_full_src_register
*reg
,
1121 const unsigned chan_index
)
1123 unsigned swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1126 case TGSI_EXTSWIZZLE_X
:
1127 case TGSI_EXTSWIZZLE_Y
:
1128 case TGSI_EXTSWIZZLE_Z
:
1129 case TGSI_EXTSWIZZLE_W
:
1130 switch( reg
->SrcRegister
.File
) {
1131 case TGSI_FILE_CONSTANT
:
1135 reg
->SrcRegister
.Index
,
1139 case TGSI_FILE_INPUT
:
1143 reg
->SrcRegister
.Index
,
1147 case TGSI_FILE_TEMPORARY
:
1151 reg
->SrcRegister
.Index
,
1160 case TGSI_EXTSWIZZLE_ZERO
:
1164 TGSI_EXEC_TEMP_00000000_I
,
1165 TGSI_EXEC_TEMP_00000000_C
);
1168 case TGSI_EXTSWIZZLE_ONE
:
1172 TGSI_EXEC_TEMP_ONE_I
,
1173 TGSI_EXEC_TEMP_ONE_C
);
1180 switch( tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
) ) {
1181 case TGSI_UTIL_SIGN_CLEAR
:
1182 emit_abs( func
, xmm
);
1185 case TGSI_UTIL_SIGN_SET
:
1186 emit_setsign( func
, xmm
);
1189 case TGSI_UTIL_SIGN_TOGGLE
:
1190 emit_neg( func
, xmm
);
1193 case TGSI_UTIL_SIGN_KEEP
:
1198 #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
1199 emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
1207 struct x86_function
*func
,
1209 const struct tgsi_full_dst_register
*reg
,
1210 const struct tgsi_full_instruction
*inst
,
1211 unsigned chan_index
)
1213 switch( reg
->DstRegister
.File
) {
1214 case TGSI_FILE_OUTPUT
:
1218 reg
->DstRegister
.Index
,
1222 case TGSI_FILE_TEMPORARY
:
1226 reg
->DstRegister
.Index
,
1230 case TGSI_FILE_ADDRESS
:
1234 reg
->DstRegister
.Index
,
1242 switch( inst
->Instruction
.Saturate
) {
1246 case TGSI_SAT_ZERO_ONE
:
1250 case TGSI_SAT_MINUS_PLUS_ONE
:
1256 #define STORE( FUNC, INST, XMM, INDEX, CHAN )\
1257 emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
1260 * High-level instruction translators.
1265 struct x86_function
*func
,
1266 const struct tgsi_full_src_register
*reg
)
1268 unsigned uniquemask
;
1269 unsigned registers
[4];
1270 unsigned nextregister
= 0;
1271 unsigned firstchan
= ~0;
1272 unsigned chan_index
;
1274 /* This mask stores component bits that were already tested. Note that
1275 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1277 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1279 FOR_EACH_CHANNEL( chan_index
) {
1282 /* unswizzle channel */
1283 swizzle
= tgsi_util_get_full_src_register_extswizzle(
1287 /* check if the component has not been already tested */
1288 if( !(uniquemask
& (1 << swizzle
)) ) {
1289 uniquemask
|= 1 << swizzle
;
1291 /* allocate register */
1292 registers
[chan_index
] = nextregister
;
1300 /* mark the first channel used */
1301 if( firstchan
== ~0 ) {
1302 firstchan
= chan_index
;
1309 x86_make_reg( file_REG32
, reg_AX
) );
1312 x86_make_reg( file_REG32
, reg_DX
) );
1314 FOR_EACH_CHANNEL( chan_index
) {
1315 if( uniquemask
& (1 << chan_index
) ) {
1318 make_xmm( registers
[chan_index
] ),
1320 TGSI_EXEC_TEMP_00000000_I
,
1321 TGSI_EXEC_TEMP_00000000_C
),
1324 if( chan_index
== firstchan
) {
1327 x86_make_reg( file_REG32
, reg_AX
),
1328 make_xmm( registers
[chan_index
] ) );
1333 x86_make_reg( file_REG32
, reg_DX
),
1334 make_xmm( registers
[chan_index
] ) );
1337 x86_make_reg( file_REG32
, reg_AX
),
1338 x86_make_reg( file_REG32
, reg_DX
) );
1346 TGSI_EXEC_TEMP_KILMASK_I
,
1347 TGSI_EXEC_TEMP_KILMASK_C
),
1348 x86_make_reg( file_REG32
, reg_AX
) );
1352 x86_make_reg( file_REG32
, reg_DX
) );
1355 x86_make_reg( file_REG32
, reg_AX
) );
1360 struct x86_function
*func
,
1361 struct tgsi_full_instruction
*inst
,
1364 unsigned chan_index
;
1366 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1367 FETCH( func
, *inst
, 0, 0, chan_index
);
1368 FETCH( func
, *inst
, 1, 1, chan_index
);
1378 TGSI_EXEC_TEMP_ONE_I
,
1379 TGSI_EXEC_TEMP_ONE_C
) );
1380 STORE( func
, *inst
, 0, 0, chan_index
);
1386 struct x86_function
*func
,
1387 struct tgsi_full_instruction
*inst
)
1389 unsigned chan_index
;
1391 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1392 FETCH( func
, *inst
, 0, 0, chan_index
);
1393 FETCH( func
, *inst
, 1, 1, chan_index
);
1394 FETCH( func
, *inst
, 2, 2, chan_index
);
1399 TGSI_EXEC_TEMP_00000000_I
,
1400 TGSI_EXEC_TEMP_00000000_C
),
1414 STORE( func
, *inst
, 0, 0, chan_index
);
1420 struct x86_function
*func
,
1421 struct tgsi_full_instruction
*inst
)
1423 unsigned chan_index
;
1425 switch( inst
->Instruction
.Opcode
) {
1426 case TGSI_OPCODE_ARL
:
1427 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1428 FETCH( func
, *inst
, 0, 0, chan_index
);
1429 emit_f2it( func
, 0 );
1430 STORE( func
, *inst
, 0, 0, chan_index
);
1434 case TGSI_OPCODE_MOV
:
1435 /* TGSI_OPCODE_SWZ */
1436 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1437 FETCH( func
, *inst
, 0, 0, chan_index
);
1438 STORE( func
, *inst
, 0, 0, chan_index
);
1442 case TGSI_OPCODE_LIT
:
1443 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1444 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
1448 TGSI_EXEC_TEMP_ONE_I
,
1449 TGSI_EXEC_TEMP_ONE_C
);
1450 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
1451 STORE( func
, *inst
, 0, 0, CHAN_X
);
1453 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
1454 STORE( func
, *inst
, 0, 0, CHAN_W
);
1457 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ||
1458 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1459 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
1460 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1465 TGSI_EXEC_TEMP_00000000_I
,
1466 TGSI_EXEC_TEMP_00000000_C
) );
1467 STORE( func
, *inst
, 0, 0, CHAN_Y
);
1469 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1470 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1475 TGSI_EXEC_TEMP_00000000_I
,
1476 TGSI_EXEC_TEMP_00000000_C
) );
1477 FETCH( func
, *inst
, 2, 0, CHAN_W
);
1482 TGSI_EXEC_TEMP_128_I
,
1483 TGSI_EXEC_TEMP_128_C
) );
1488 TGSI_EXEC_TEMP_MINUS_128_I
,
1489 TGSI_EXEC_TEMP_MINUS_128_C
) );
1490 emit_pow( func
, 1, 2 );
1491 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1505 STORE( func
, *inst
, 2, 0, CHAN_Z
);
1510 case TGSI_OPCODE_RCP
:
1511 /* TGSI_OPCODE_RECIP */
1512 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1513 emit_rcp( func
, 0, 0 );
1514 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1515 STORE( func
, *inst
, 0, 0, chan_index
);
1519 case TGSI_OPCODE_RSQ
:
1520 /* TGSI_OPCODE_RECIPSQRT */
1521 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1522 emit_rsqrt( func
, 0, 0 );
1523 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1524 STORE( func
, *inst
, 0, 0, chan_index
);
1528 case TGSI_OPCODE_EXP
:
1532 case TGSI_OPCODE_LOG
:
1536 case TGSI_OPCODE_MUL
:
1537 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1538 FETCH( func
, *inst
, 0, 0, chan_index
);
1539 FETCH( func
, *inst
, 1, 1, chan_index
);
1540 emit_mul( func
, 0, 1 );
1541 STORE( func
, *inst
, 0, 0, chan_index
);
1545 case TGSI_OPCODE_ADD
:
1546 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1547 FETCH( func
, *inst
, 0, 0, chan_index
);
1548 FETCH( func
, *inst
, 1, 1, chan_index
);
1549 emit_add( func
, 0, 1 );
1550 STORE( func
, *inst
, 0, 0, chan_index
);
1554 case TGSI_OPCODE_DP3
:
1555 /* TGSI_OPCODE_DOT3 */
1556 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1557 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1558 emit_mul( func
, 0, 1 );
1559 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1560 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1561 emit_mul( func
, 1, 2 );
1562 emit_add( func
, 0, 1 );
1563 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1564 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1565 emit_mul( func
, 1, 2 );
1566 emit_add( func
, 0, 1 );
1567 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1568 STORE( func
, *inst
, 0, 0, chan_index
);
1572 case TGSI_OPCODE_DP4
:
1573 /* TGSI_OPCODE_DOT4 */
1574 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1575 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1576 emit_mul( func
, 0, 1 );
1577 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1578 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1579 emit_mul( func
, 1, 2 );
1580 emit_add( func
, 0, 1 );
1581 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1582 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1583 emit_mul(func
, 1, 2 );
1584 emit_add(func
, 0, 1 );
1585 FETCH( func
, *inst
, 1, 0, CHAN_W
);
1586 FETCH( func
, *inst
, 2, 1, CHAN_W
);
1587 emit_mul( func
, 1, 2 );
1588 emit_add( func
, 0, 1 );
1589 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1590 STORE( func
, *inst
, 0, 0, chan_index
);
1594 case TGSI_OPCODE_DST
:
1595 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
1599 TGSI_EXEC_TEMP_ONE_I
,
1600 TGSI_EXEC_TEMP_ONE_C
);
1601 STORE( func
, *inst
, 0, 0, CHAN_X
);
1603 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
1604 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
1605 FETCH( func
, *inst
, 1, 1, CHAN_Y
);
1606 emit_mul( func
, 0, 1 );
1607 STORE( func
, *inst
, 0, 0, CHAN_Y
);
1609 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
1610 FETCH( func
, *inst
, 0, 0, CHAN_Z
);
1611 STORE( func
, *inst
, 0, 0, CHAN_Z
);
1613 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
1614 FETCH( func
, *inst
, 0, 1, CHAN_W
);
1615 STORE( func
, *inst
, 0, 0, CHAN_W
);
1619 case TGSI_OPCODE_MIN
:
1620 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1621 FETCH( func
, *inst
, 0, 0, chan_index
);
1622 FETCH( func
, *inst
, 1, 1, chan_index
);
1627 STORE( func
, *inst
, 0, 0, chan_index
);
1631 case TGSI_OPCODE_MAX
:
1632 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1633 FETCH( func
, *inst
, 0, 0, chan_index
);
1634 FETCH( func
, *inst
, 1, 1, chan_index
);
1639 STORE( func
, *inst
, 0, 0, chan_index
);
1643 case TGSI_OPCODE_SLT
:
1644 /* TGSI_OPCODE_SETLT */
1645 emit_setcc( func
, inst
, cc_LessThan
);
1648 case TGSI_OPCODE_SGE
:
1649 /* TGSI_OPCODE_SETGE */
1650 emit_setcc( func
, inst
, cc_NotLessThan
);
1653 case TGSI_OPCODE_MAD
:
1654 /* TGSI_OPCODE_MADD */
1655 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1656 FETCH( func
, *inst
, 0, 0, chan_index
);
1657 FETCH( func
, *inst
, 1, 1, chan_index
);
1658 FETCH( func
, *inst
, 2, 2, chan_index
);
1659 emit_mul( func
, 0, 1 );
1660 emit_add( func
, 0, 2 );
1661 STORE( func
, *inst
, 0, 0, chan_index
);
1665 case TGSI_OPCODE_SUB
:
1666 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1667 FETCH( func
, *inst
, 0, 0, chan_index
);
1668 FETCH( func
, *inst
, 1, 1, chan_index
);
1669 emit_sub( func
, 0, 1 );
1670 STORE( func
, *inst
, 0, 0, chan_index
);
1674 case TGSI_OPCODE_LERP
:
1675 /* TGSI_OPCODE_LRP */
1676 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1677 FETCH( func
, *inst
, 0, 0, chan_index
);
1678 FETCH( func
, *inst
, 1, 1, chan_index
);
1679 FETCH( func
, *inst
, 2, 2, chan_index
);
1680 emit_sub( func
, 1, 2 );
1681 emit_mul( func
, 0, 1 );
1682 emit_add( func
, 0, 2 );
1683 STORE( func
, *inst
, 0, 0, chan_index
);
1687 case TGSI_OPCODE_CND
:
1691 case TGSI_OPCODE_CND0
:
1695 case TGSI_OPCODE_DOT2ADD
:
1696 /* TGSI_OPCODE_DP2A */
1700 case TGSI_OPCODE_INDEX
:
1704 case TGSI_OPCODE_NEGATE
:
1708 case TGSI_OPCODE_FRAC
:
1709 /* TGSI_OPCODE_FRC */
1710 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1711 FETCH( func
, *inst
, 0, 0, chan_index
);
1712 emit_frc( func
, 0 );
1713 STORE( func
, *inst
, 0, 0, chan_index
);
1717 case TGSI_OPCODE_CLAMP
:
1721 case TGSI_OPCODE_FLOOR
:
1722 /* TGSI_OPCODE_FLR */
1723 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1724 FETCH( func
, *inst
, 0, 0, chan_index
);
1725 emit_flr( func
, 0 );
1726 STORE( func
, *inst
, 0, 0, chan_index
);
1730 case TGSI_OPCODE_ROUND
:
1734 case TGSI_OPCODE_EXPBASE2
:
1735 /* TGSI_OPCODE_EX2 */
1736 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1737 emit_ex2( func
, 0 );
1738 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1739 STORE( func
, *inst
, 0, 0, chan_index
);
1743 case TGSI_OPCODE_LOGBASE2
:
1744 /* TGSI_OPCODE_LG2 */
1745 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1746 emit_lg2( func
, 0 );
1747 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1748 STORE( func
, *inst
, 0, 0, chan_index
);
1752 case TGSI_OPCODE_POWER
:
1753 /* TGSI_OPCODE_POW */
1754 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1755 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1756 emit_pow( func
, 0, 1 );
1757 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1758 STORE( func
, *inst
, 0, 0, chan_index
);
1762 case TGSI_OPCODE_CROSSPRODUCT
:
1763 /* TGSI_OPCODE_XPD */
1764 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1765 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
1766 FETCH( func
, *inst
, 1, 1, CHAN_Z
);
1767 FETCH( func
, *inst
, 3, 0, CHAN_Z
);
1769 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1770 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1771 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
1772 FETCH( func
, *inst
, 4, 1, CHAN_Y
);
1774 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
1775 emit_MOV( func
, 2, 0 );
1776 emit_mul( func
, 2, 1 );
1777 emit_MOV( func
, 5, 3 );
1778 emit_mul( func
, 5, 4 );
1779 emit_sub( func
, 2, 5 );
1780 STORE( func
, *inst
, 2, 0, CHAN_X
);
1782 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ||
1783 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1784 FETCH( func
, *inst
, 2, 1, CHAN_X
);
1785 FETCH( func
, *inst
, 5, 0, CHAN_X
);
1787 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
1788 emit_mul( func
, 3, 2 );
1789 emit_mul( func
, 1, 5 );
1790 emit_sub( func
, 3, 1 );
1791 STORE( func
, *inst
, 3, 0, CHAN_Y
);
1793 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
1794 emit_mul( func
, 5, 4 );
1795 emit_mul( func
, 0, 2 );
1796 emit_sub( func
, 5, 0 );
1797 STORE( func
, *inst
, 5, 0, CHAN_Z
);
1799 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
1800 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_ONE_I
, TGSI_EXEC_TEMP_ONE_C
);
1801 STORE( func
, *inst
, 0, 0, CHAN_W
);
1805 case TGSI_OPCODE_MULTIPLYMATRIX
:
1809 case TGSI_OPCODE_ABS
:
1810 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1811 FETCH( func
, *inst
, 0, 0, chan_index
);
1812 emit_abs( func
, 0) ;
1814 STORE( func
, *inst
, 0, 0, chan_index
);
1818 case TGSI_OPCODE_RCC
:
1822 case TGSI_OPCODE_DPH
:
1823 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1824 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1825 emit_mul( func
, 0, 1 );
1826 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1827 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1828 emit_mul( func
, 1, 2 );
1829 emit_add( func
, 0, 1 );
1830 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1831 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1832 emit_mul( func
, 1, 2 );
1833 emit_add( func
, 0, 1 );
1834 FETCH( func
, *inst
, 1, 1, CHAN_W
);
1835 emit_add( func
, 0, 1 );
1836 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1837 STORE( func
, *inst
, 0, 0, chan_index
);
1841 case TGSI_OPCODE_COS
:
1842 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1843 emit_cos( func
, 0 );
1844 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1845 STORE( func
, *inst
, 0, 0, chan_index
);
1849 case TGSI_OPCODE_DDX
:
1853 case TGSI_OPCODE_DDY
:
1857 case TGSI_OPCODE_KIL
:
1858 emit_kil( func
, &inst
->FullSrcRegisters
[0] );
1861 case TGSI_OPCODE_PK2H
:
1865 case TGSI_OPCODE_PK2US
:
1869 case TGSI_OPCODE_PK4B
:
1873 case TGSI_OPCODE_PK4UB
:
1877 case TGSI_OPCODE_RFL
:
1881 case TGSI_OPCODE_SEQ
:
1885 case TGSI_OPCODE_SFL
:
1889 case TGSI_OPCODE_SGT
:
1893 case TGSI_OPCODE_SIN
:
1894 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1895 emit_sin( func
, 0 );
1896 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1897 STORE( func
, *inst
, 0, 0, chan_index
);
1901 case TGSI_OPCODE_SLE
:
1905 case TGSI_OPCODE_SNE
:
1909 case TGSI_OPCODE_STR
:
1913 case TGSI_OPCODE_TEX
:
1917 TGSI_EXEC_TEMP_ONE_I
,
1918 TGSI_EXEC_TEMP_ONE_C
);
1919 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1920 STORE( func
, *inst
, 0, 0, chan_index
);
1924 case TGSI_OPCODE_TXD
:
1928 case TGSI_OPCODE_UP2H
:
1932 case TGSI_OPCODE_UP2US
:
1936 case TGSI_OPCODE_UP4B
:
1940 case TGSI_OPCODE_UP4UB
:
1944 case TGSI_OPCODE_X2D
:
1948 case TGSI_OPCODE_ARA
:
1952 case TGSI_OPCODE_ARR
:
1956 case TGSI_OPCODE_BRA
:
1960 case TGSI_OPCODE_CAL
:
1964 case TGSI_OPCODE_RET
:
1966 emit_retw( func
, 16 );
1972 case TGSI_OPCODE_SSG
:
1976 case TGSI_OPCODE_CMP
:
1977 emit_cmp (func
, inst
);
1980 case TGSI_OPCODE_SCS
:
1981 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
1982 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1983 emit_cos( func
, 0 );
1984 STORE( func
, *inst
, 0, 0, CHAN_X
);
1986 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
1987 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
1988 emit_sin( func
, 0 );
1989 STORE( func
, *inst
, 0, 0, CHAN_Y
);
1991 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
1992 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_00000000_I
, TGSI_EXEC_TEMP_00000000_C
);
1993 STORE( func
, *inst
, 0, 0, CHAN_Z
);
1995 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
1996 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_ONE_I
, TGSI_EXEC_TEMP_ONE_C
);
1997 STORE( func
, *inst
, 0, 0, CHAN_W
);
2001 case TGSI_OPCODE_TXB
:
2005 case TGSI_OPCODE_NRM
:
2009 case TGSI_OPCODE_DIV
:
2013 case TGSI_OPCODE_DP2
:
2017 case TGSI_OPCODE_TXL
:
2021 case TGSI_OPCODE_BRK
:
2025 case TGSI_OPCODE_IF
:
2029 case TGSI_OPCODE_LOOP
:
2033 case TGSI_OPCODE_REP
:
2037 case TGSI_OPCODE_ELSE
:
2041 case TGSI_OPCODE_ENDIF
:
2045 case TGSI_OPCODE_ENDLOOP
:
2049 case TGSI_OPCODE_ENDREP
:
2053 case TGSI_OPCODE_PUSHA
:
2057 case TGSI_OPCODE_POPA
:
2061 case TGSI_OPCODE_CEIL
:
2065 case TGSI_OPCODE_I2F
:
2069 case TGSI_OPCODE_NOT
:
2073 case TGSI_OPCODE_TRUNC
:
2077 case TGSI_OPCODE_SHL
:
2081 case TGSI_OPCODE_SHR
:
2085 case TGSI_OPCODE_AND
:
2089 case TGSI_OPCODE_OR
:
2093 case TGSI_OPCODE_MOD
:
2097 case TGSI_OPCODE_XOR
:
2101 case TGSI_OPCODE_SAD
:
2105 case TGSI_OPCODE_TXF
:
2109 case TGSI_OPCODE_TXQ
:
2113 case TGSI_OPCODE_CONT
:
2117 case TGSI_OPCODE_EMIT
:
2121 case TGSI_OPCODE_ENDPRIM
:
2132 struct x86_function
*func
,
2133 struct tgsi_full_declaration
*decl
)
2135 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
2136 unsigned first
, last
, mask
;
2139 assert( decl
->Declaration
.Declare
== TGSI_DECLARE_RANGE
);
2141 first
= decl
->u
.DeclarationRange
.First
;
2142 last
= decl
->u
.DeclarationRange
.Last
;
2143 mask
= decl
->Declaration
.UsageMask
;
2145 /* Do not touch WPOS.xy */
2147 mask
&= ~TGSI_WRITEMASK_XY
;
2148 if( mask
== TGSI_WRITEMASK_NONE
) {
2153 for( i
= first
; i
<= last
; i
++ ) {
2154 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
2155 if( mask
& (1 << j
) ) {
2156 switch( decl
->Interpolation
.Interpolate
) {
2157 case TGSI_INTERPOLATE_CONSTANT
:
2158 emit_coef_a0( func
, 0, i
, j
);
2159 emit_inputs( func
, 0, i
, j
);
2162 case TGSI_INTERPOLATE_LINEAR
:
2163 emit_inputf( func
, 0, 0, TGSI_SWIZZLE_X
);
2164 emit_coef_dadx( func
, 1, i
, j
);
2165 emit_inputf( func
, 2, 0, TGSI_SWIZZLE_Y
);
2166 emit_coef_dady( func
, 3, i
, j
);
2167 emit_mul( func
, 0, 1 ); /* x * dadx */
2168 emit_coef_a0( func
, 4, i
, j
);
2169 emit_mul( func
, 2, 3 ); /* y * dady */
2170 emit_add( func
, 0, 4 ); /* x * dadx + a0 */
2171 emit_add( func
, 0, 2 ); /* x * dadx + y * dady + a0 */
2172 emit_inputs( func
, 0, i
, j
);
2175 case TGSI_INTERPOLATE_PERSPECTIVE
:
2176 emit_inputf( func
, 0, 0, TGSI_SWIZZLE_X
);
2177 emit_coef_dadx( func
, 1, i
, j
);
2178 emit_inputf( func
, 2, 0, TGSI_SWIZZLE_Y
);
2179 emit_coef_dady( func
, 3, i
, j
);
2180 emit_mul( func
, 0, 1 ); /* x * dadx */
2181 emit_inputf( func
, 4, 0, TGSI_SWIZZLE_W
);
2182 emit_coef_a0( func
, 5, i
, j
);
2183 emit_rcp( func
, 4, 4 ); /* 1.0 / w */
2184 emit_mul( func
, 2, 3 ); /* y * dady */
2185 emit_add( func
, 0, 5 ); /* x * dadx + a0 */
2186 emit_add( func
, 0, 2 ); /* x * dadx + y * dady + a0 */
2187 emit_mul( func
, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
2188 emit_inputs( func
, 0, i
, j
);
2202 struct tgsi_token
*tokens
,
2203 struct x86_function
*func
)
2205 struct tgsi_parse_context parse
;
2209 func
->csr
= func
->store
;
2214 get_argument( 0 ) );
2218 get_argument( 1 ) );
2222 get_argument( 2 ) );
2226 get_argument( 3 ) );
2228 tgsi_parse_init( &parse
, tokens
);
2230 while( !tgsi_parse_end_of_tokens( &parse
) ) {
2231 tgsi_parse_token( &parse
);
2233 switch( parse
.FullToken
.Token
.Type
) {
2234 case TGSI_TOKEN_TYPE_DECLARATION
:
2237 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2240 &parse
.FullToken
.FullInstruction
);
2248 tgsi_parse_free( &parse
);
2256 * Fragment shaders are responsible for interpolating shader inputs. Because on
2257 * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
2258 * output, const, temp and coef), the code is split into two phases --
2259 * DECLARATION and INSTRUCTION phase.
2260 * GP register holding the output argument is aliased with the coeff argument,
2261 * as outputs are not needed in the DECLARATION phase.
2265 struct tgsi_token
*tokens
,
2266 struct x86_function
*func
)
2268 struct tgsi_parse_context parse
;
2269 boolean instruction_phase
= FALSE
;
2273 func
->csr
= func
->store
;
2275 /* DECLARATION phase, do not load output argument. */
2279 get_argument( 0 ) );
2283 get_argument( 2 ) );
2287 get_argument( 3 ) );
2291 get_argument( 4 ) );
2293 tgsi_parse_init( &parse
, tokens
);
2295 while( !tgsi_parse_end_of_tokens( &parse
) ) {
2296 tgsi_parse_token( &parse
);
2298 switch( parse
.FullToken
.Token
.Type
) {
2299 case TGSI_TOKEN_TYPE_DECLARATION
:
2302 &parse
.FullToken
.FullDeclaration
);
2305 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2306 if( !instruction_phase
) {
2307 /* INSTRUCTION phase, overwrite coeff with output. */
2308 instruction_phase
= TRUE
;
2312 get_argument( 1 ) );
2316 &parse
.FullToken
.FullInstruction
);
2324 tgsi_parse_free( &parse
);