1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "pipe/p_util.h"
29 #include "pipe/p_shader_tokens.h"
30 #include "pipe/tgsi/util/tgsi_parse.h"
31 #include "pipe/tgsi/util/tgsi_util.h"
32 #include "tgsi_exec.h"
33 #include "tgsi_sse2.h"
35 #include "x86/rtasm/x86sse.h"
37 #if defined(__i386__) || defined(__386__)
47 if (reg
.mod
!= mod_REG
)
54 debug_printf( "EAX" );
57 debug_printf( "ECX" );
60 debug_printf( "EDX" );
63 debug_printf( "EBX" );
66 debug_printf( "ESP" );
69 debug_printf( "EBP" );
72 debug_printf( "ESI" );
75 debug_printf( "EDI" );
83 debug_printf( "XMM%u", reg
.idx
);
90 if (reg
.mod
== mod_DISP8
||
91 reg
.mod
== mod_DISP32
)
92 debug_printf("+%d", reg
.disp
);
94 if (reg
.mod
!= mod_REG
)
102 unsigned count
= 10 - strlen( op
);
109 #define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
110 #define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
111 #define DUMP( OP ) debug_printf( "\n%s", OP )
112 #define DUMP_I( OP, I ) do {\
113 debug_printf( "\n%s", OP );\
115 debug_printf( "%u", I ); } while( 0 )
116 #define DUMP_R( OP, R0 ) do {\
117 debug_printf( "\n%s", OP );\
119 _print_reg( R0 ); } while( 0 )
120 #define DUMP_RR( OP, R0, R1 ) do {\
121 debug_printf( "\n%s", OP );\
124 debug_printf( ", " );\
125 _print_reg( R1 ); } while( 0 )
126 #define DUMP_RRI( OP, R0, R1, I ) do {\
127 debug_printf( "\n%s", OP );\
130 debug_printf( ", " );\
132 debug_printf( ", " );\
133 debug_printf( "%u", I ); } while( 0 )
140 #define DUMP_I( OP, I )
141 #define DUMP_R( OP, R0 )
142 #define DUMP_RR( OP, R0, R1 )
143 #define DUMP_RRI( OP, R0, R1, I )
147 #define FOR_EACH_CHANNEL( CHAN )\
148 for( CHAN = 0; CHAN < 4; CHAN++ )
150 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
151 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
153 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
154 if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
156 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
157 FOR_EACH_CHANNEL( CHAN )\
158 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
165 #define TEMP_R0 TGSI_EXEC_TEMP_R0
168 * X86 utility functions.
171 static struct x86_reg
177 (enum x86_reg_name
) xmm
);
181 * X86 register mapping helpers.
184 static struct x86_reg
185 get_const_base( void )
192 static struct x86_reg
193 get_input_base( void )
200 static struct x86_reg
201 get_output_base( void )
208 static struct x86_reg
209 get_temp_base( void )
222 static struct x86_reg
223 get_coef_base( void )
225 return get_output_base();
229 * Data access helpers.
232 static struct x86_reg
236 return x86_make_disp(
237 x86_make_reg( file_REG32
, reg_SP
),
241 static struct x86_reg
246 return x86_make_disp(
248 (vec
* 4 + chan
) * 4 );
251 static struct x86_reg
256 return x86_make_disp(
258 (vec
* 4 + chan
) * 16 );
261 static struct x86_reg
266 return x86_make_disp(
268 (vec
* 4 + chan
) * 16 );
271 static struct x86_reg
276 return x86_make_disp(
278 (vec
* 4 + chan
) * 16 );
281 static struct x86_reg
287 return x86_make_disp(
289 ((vec
* 3 + member
) * 4 + chan
) * 4 );
293 * X86 rtasm wrappers.
298 struct x86_function
*func
,
302 DUMP_RR( "ADDPS", dst
, src
);
303 sse_addps( func
, dst
, src
);
308 struct x86_function
*func
,
312 DUMP_RR( "ANDNPS", dst
, src
);
313 sse_andnps( func
, dst
, src
);
318 struct x86_function
*func
,
322 DUMP_RR( "ANDPS", dst
, src
);
323 sse_andps( func
, dst
, src
);
328 struct x86_function
*func
,
331 struct x86_reg ecx
= x86_make_reg( file_REG32
, reg_CX
);
333 DUMP_I( "CALL", addr
);
334 x86_mov_reg_imm( func
, ecx
, (unsigned long) addr
);
335 x86_call( func
, ecx
);
340 struct x86_function
*func
,
345 DUMP_RRI( "CMPPS", dst
, src
, cc
);
346 sse_cmpps( func
, dst
, src
, cc
);
351 struct x86_function
*func
,
355 DUMP_RR( "CVTTPS2DQ", dst
, src
);
356 sse2_cvttps2dq( func
, dst
, src
);
361 struct x86_function
*func
,
365 DUMP_RR( "MAXPS", dst
, src
);
366 sse_maxps( func
, dst
, src
);
371 struct x86_function
*func
,
375 DUMP_RR( "MINPS", dst
, src
);
376 sse_minps( func
, dst
, src
);
381 struct x86_function
*func
,
385 DUMP_RR( "MOV", dst
, src
);
386 x86_mov( func
, dst
, src
);
391 struct x86_function
*func
,
395 DUMP_RR( "MOVAPS", dst
, src
);
396 sse_movaps( func
, dst
, src
);
401 struct x86_function
*func
,
405 DUMP_RR( "MOVSS", dst
, src
);
406 sse_movss( func
, dst
, src
);
411 struct x86_function
*func
,
415 DUMP_RR( "MOVUPS", dst
, src
);
416 sse_movups( func
, dst
, src
);
421 struct x86_function
*func
,
425 DUMP_RR( "MULPS", dst
, src
);
426 sse_mulps( func
, dst
, src
);
431 struct x86_function
*func
,
435 DUMP_RR( "OR", dst
, src
);
436 x86_or( func
, dst
, src
);
441 struct x86_function
*func
,
445 DUMP_RR( "ORPS", dst
, src
);
446 sse_orps( func
, dst
, src
);
451 struct x86_function
*func
,
455 DUMP_RR( "PMOVMSKB", dst
, src
);
456 sse_pmovmskb( func
, dst
, src
);
461 struct x86_function
*func
,
464 DUMP_R( "POP", dst
);
465 x86_pop( func
, dst
);
470 struct x86_function
*func
,
473 DUMP_R( "PUSH", dst
);
474 x86_push( func
, dst
);
479 struct x86_function
*func
,
483 DUMP_RR( "RCPPS", dst
, src
);
484 sse2_rcpps( func
, dst
, src
);
490 struct x86_function
*func
,
493 DUMP_I( "RET", size
);
494 x86_retw( func
, size
);
499 struct x86_function
*func
)
508 struct x86_function
*func
,
512 DUMP_RR( "RSQRTPS", dst
, src
);
513 sse_rsqrtps( func
, dst
, src
);
518 struct x86_function
*func
,
523 DUMP_RRI( "SHUFPS", dst
, src
, shuf
);
524 sse_shufps( func
, dst
, src
, shuf
);
529 struct x86_function
*func
,
533 DUMP_RR( "SUBPS", dst
, src
);
534 sse_subps( func
, dst
, src
);
539 struct x86_function
*func
,
543 DUMP_RR( "XORPS", dst
, src
);
544 sse_xorps( func
, dst
, src
);
548 * Data fetch helpers.
553 struct x86_function
*func
,
561 get_const( vec
, chan
) );
566 SHUF( 0, 0, 0, 0 ) );
571 struct x86_function
*func
,
579 get_input( vec
, chan
) );
584 struct x86_function
*func
,
591 get_output( vec
, chan
),
597 struct x86_function
*func
,
605 get_temp( vec
, chan
) );
610 struct x86_function
*func
,
619 get_coef( vec
, chan
, member
) );
624 SHUF( 0, 0, 0, 0 ) );
628 * Data store helpers.
633 struct x86_function
*func
,
640 get_input( vec
, chan
),
646 struct x86_function
*func
,
653 get_temp( vec
, chan
),
659 struct x86_function
*func
,
667 vec
+ TGSI_EXEC_NUM_TEMPS
,
672 * Coefficent fetch helpers.
677 struct x86_function
*func
,
692 struct x86_function
*func
,
707 struct x86_function
*func
,
721 * Function call helpers.
726 struct x86_function
*func
)
738 /* It is important on non-win32 platforms that temp base is pushed last.
747 struct x86_function
*func
)
749 /* Restore GP registers in a reverse order.
767 struct x86_function
*func
,
773 get_temp( TEMP_R0
, 0 ),
774 make_xmm( xmm_dst
) );
782 get_temp( TEMP_R0
, 0 ) );
795 get_temp( TEMP_R0
, 0 ) );
799 emit_func_call_dst_src(
800 struct x86_function
*func
,
807 get_temp( TEMP_R0
, 1 ),
808 make_xmm( xmm_src
) );
817 * Low-level instruction translators.
822 struct x86_function
*func
,
829 TGSI_EXEC_TEMP_7FFFFFFF_I
,
830 TGSI_EXEC_TEMP_7FFFFFFF_C
) );
835 struct x86_function
*func
,
842 make_xmm( xmm_src
) );
850 store
[0] = (float) cos( (double) store
[0] );
851 store
[1] = (float) cos( (double) store
[1] );
852 store
[2] = (float) cos( (double) store
[2] );
853 store
[3] = (float) cos( (double) store
[3] );
855 const unsigned X
= TEMP_R0
* 16;
856 store
[X
+ 0] = cosf( store
[X
+ 0] );
857 store
[X
+ 1] = cosf( store
[X
+ 1] );
858 store
[X
+ 2] = cosf( store
[X
+ 2] );
859 store
[X
+ 3] = cosf( store
[X
+ 3] );
865 struct x86_function
*func
,
879 store
[0] = (float) pow( 2.0, (double) store
[0] );
880 store
[1] = (float) pow( 2.0, (double) store
[1] );
881 store
[2] = (float) pow( 2.0, (double) store
[2] );
882 store
[3] = (float) pow( 2.0, (double) store
[3] );
884 const unsigned X
= TEMP_R0
* 16;
885 store
[X
+ 0] = powf( 2.0f
, store
[X
+ 0] );
886 store
[X
+ 1] = powf( 2.0f
, store
[X
+ 1] );
887 store
[X
+ 2] = powf( 2.0f
, store
[X
+ 2] );
888 store
[X
+ 3] = powf( 2.0f
, store
[X
+ 3] );
894 struct x86_function
*func
,
905 struct x86_function
*func
,
919 const unsigned X
= 0;
921 const unsigned X
= TEMP_R0
* 16;
923 store
[X
+ 0] = (float) floor( (double) store
[X
+ 0] );
924 store
[X
+ 1] = (float) floor( (double) store
[X
+ 1] );
925 store
[X
+ 2] = (float) floor( (double) store
[X
+ 2] );
926 store
[X
+ 3] = (float) floor( (double) store
[X
+ 3] );
931 struct x86_function
*func
,
945 const unsigned X
= 0;
947 const unsigned X
= TEMP_R0
* 16;
949 store
[X
+ 0] -= (float) floor( (double) store
[X
+ 0] );
950 store
[X
+ 1] -= (float) floor( (double) store
[X
+ 1] );
951 store
[X
+ 2] -= (float) floor( (double) store
[X
+ 2] );
952 store
[X
+ 3] -= (float) floor( (double) store
[X
+ 3] );
957 struct x86_function
*func
,
971 const unsigned X
= 0;
973 const unsigned X
= TEMP_R0
* 16;
975 store
[X
+ 0] = LOG2( store
[X
+ 0] );
976 store
[X
+ 1] = LOG2( store
[X
+ 1] );
977 store
[X
+ 2] = LOG2( store
[X
+ 2] );
978 store
[X
+ 3] = LOG2( store
[X
+ 3] );
983 struct x86_function
*func
,
994 struct x86_function
*func
,
1000 make_xmm( xmm_dst
),
1001 make_xmm( xmm_src
) );
1005 emit_mul (struct x86_function
*func
,
1011 make_xmm( xmm_dst
),
1012 make_xmm( xmm_src
) );
1017 struct x86_function
*func
,
1024 TGSI_EXEC_TEMP_80000000_I
,
1025 TGSI_EXEC_TEMP_80000000_C
) );
1028 static void XSTDCALL
1033 store
[0] = (float) pow( (double) store
[0], (double) store
[4] );
1034 store
[1] = (float) pow( (double) store
[1], (double) store
[5] );
1035 store
[2] = (float) pow( (double) store
[2], (double) store
[6] );
1036 store
[3] = (float) pow( (double) store
[3], (double) store
[7] );
1038 const unsigned X
= TEMP_R0
* 16;
1039 store
[X
+ 0] = powf( store
[X
+ 0], store
[X
+ 4] );
1040 store
[X
+ 1] = powf( store
[X
+ 1], store
[X
+ 5] );
1041 store
[X
+ 2] = powf( store
[X
+ 2], store
[X
+ 6] );
1042 store
[X
+ 3] = powf( store
[X
+ 3], store
[X
+ 7] );
1048 struct x86_function
*func
,
1052 emit_func_call_dst_src(
1061 struct x86_function
*func
,
1067 make_xmm( xmm_dst
),
1068 make_xmm( xmm_src
) );
1073 struct x86_function
*func
,
1079 make_xmm( xmm_dst
),
1080 make_xmm( xmm_src
) );
1085 struct x86_function
*func
,
1092 TGSI_EXEC_TEMP_80000000_I
,
1093 TGSI_EXEC_TEMP_80000000_C
) );
1096 static void XSTDCALL
1101 store
[0] = (float) sin( (double) store
[0] );
1102 store
[1] = (float) sin( (double) store
[1] );
1103 store
[2] = (float) sin( (double) store
[2] );
1104 store
[3] = (float) sin( (double) store
[3] );
1106 const unsigned X
= TEMP_R0
* 16;
1107 store
[X
+ 0] = sinf( store
[X
+ 0] );
1108 store
[X
+ 1] = sinf( store
[X
+ 1] );
1109 store
[X
+ 2] = sinf( store
[X
+ 2] );
1110 store
[X
+ 3] = sinf( store
[X
+ 3] );
1115 emit_sin (struct x86_function
*func
,
1126 struct x86_function
*func
,
1132 make_xmm( xmm_dst
),
1133 make_xmm( xmm_src
) );
1142 struct x86_function
*func
,
1144 const struct tgsi_full_src_register
*reg
,
1145 const unsigned chan_index
)
1147 unsigned swizzle
= tgsi_util_get_full_src_register_extswizzle( reg
, chan_index
);
1150 case TGSI_EXTSWIZZLE_X
:
1151 case TGSI_EXTSWIZZLE_Y
:
1152 case TGSI_EXTSWIZZLE_Z
:
1153 case TGSI_EXTSWIZZLE_W
:
1154 switch( reg
->SrcRegister
.File
) {
1155 case TGSI_FILE_CONSTANT
:
1159 reg
->SrcRegister
.Index
,
1163 case TGSI_FILE_INPUT
:
1167 reg
->SrcRegister
.Index
,
1171 case TGSI_FILE_TEMPORARY
:
1175 reg
->SrcRegister
.Index
,
1184 case TGSI_EXTSWIZZLE_ZERO
:
1188 TGSI_EXEC_TEMP_00000000_I
,
1189 TGSI_EXEC_TEMP_00000000_C
);
1192 case TGSI_EXTSWIZZLE_ONE
:
1196 TGSI_EXEC_TEMP_ONE_I
,
1197 TGSI_EXEC_TEMP_ONE_C
);
1204 switch( tgsi_util_get_full_src_register_sign_mode( reg
, chan_index
) ) {
1205 case TGSI_UTIL_SIGN_CLEAR
:
1206 emit_abs( func
, xmm
);
1209 case TGSI_UTIL_SIGN_SET
:
1210 emit_setsign( func
, xmm
);
1213 case TGSI_UTIL_SIGN_TOGGLE
:
1214 emit_neg( func
, xmm
);
1217 case TGSI_UTIL_SIGN_KEEP
:
1222 #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
1223 emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
1231 struct x86_function
*func
,
1233 const struct tgsi_full_dst_register
*reg
,
1234 const struct tgsi_full_instruction
*inst
,
1235 unsigned chan_index
)
1237 switch( reg
->DstRegister
.File
) {
1238 case TGSI_FILE_OUTPUT
:
1242 reg
->DstRegister
.Index
,
1246 case TGSI_FILE_TEMPORARY
:
1250 reg
->DstRegister
.Index
,
1254 case TGSI_FILE_ADDRESS
:
1258 reg
->DstRegister
.Index
,
1266 switch( inst
->Instruction
.Saturate
) {
1270 case TGSI_SAT_ZERO_ONE
:
1274 case TGSI_SAT_MINUS_PLUS_ONE
:
1280 #define STORE( FUNC, INST, XMM, INDEX, CHAN )\
1281 emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
1284 * High-level instruction translators.
1289 struct x86_function
*func
,
1290 const struct tgsi_full_src_register
*reg
)
1292 unsigned uniquemask
;
1293 unsigned registers
[4];
1294 unsigned nextregister
= 0;
1295 unsigned firstchan
= ~0;
1296 unsigned chan_index
;
1298 /* This mask stores component bits that were already tested. Note that
1299 * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1301 uniquemask
= (1 << TGSI_EXTSWIZZLE_ZERO
) | (1 << TGSI_EXTSWIZZLE_ONE
);
1303 FOR_EACH_CHANNEL( chan_index
) {
1306 /* unswizzle channel */
1307 swizzle
= tgsi_util_get_full_src_register_extswizzle(
1311 /* check if the component has not been already tested */
1312 if( !(uniquemask
& (1 << swizzle
)) ) {
1313 uniquemask
|= 1 << swizzle
;
1315 /* allocate register */
1316 registers
[chan_index
] = nextregister
;
1324 /* mark the first channel used */
1325 if( firstchan
== ~0 ) {
1326 firstchan
= chan_index
;
1333 x86_make_reg( file_REG32
, reg_AX
) );
1336 x86_make_reg( file_REG32
, reg_DX
) );
1338 FOR_EACH_CHANNEL( chan_index
) {
1339 if( uniquemask
& (1 << chan_index
) ) {
1342 make_xmm( registers
[chan_index
] ),
1344 TGSI_EXEC_TEMP_00000000_I
,
1345 TGSI_EXEC_TEMP_00000000_C
),
1348 if( chan_index
== firstchan
) {
1351 x86_make_reg( file_REG32
, reg_AX
),
1352 make_xmm( registers
[chan_index
] ) );
1357 x86_make_reg( file_REG32
, reg_DX
),
1358 make_xmm( registers
[chan_index
] ) );
1361 x86_make_reg( file_REG32
, reg_AX
),
1362 x86_make_reg( file_REG32
, reg_DX
) );
1370 TGSI_EXEC_TEMP_KILMASK_I
,
1371 TGSI_EXEC_TEMP_KILMASK_C
),
1372 x86_make_reg( file_REG32
, reg_AX
) );
1376 x86_make_reg( file_REG32
, reg_DX
) );
1379 x86_make_reg( file_REG32
, reg_AX
) );
1384 struct x86_function
*func
,
1385 struct tgsi_full_instruction
*inst
,
1388 unsigned chan_index
;
1390 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1391 FETCH( func
, *inst
, 0, 0, chan_index
);
1392 FETCH( func
, *inst
, 1, 1, chan_index
);
1402 TGSI_EXEC_TEMP_ONE_I
,
1403 TGSI_EXEC_TEMP_ONE_C
) );
1404 STORE( func
, *inst
, 0, 0, chan_index
);
1410 struct x86_function
*func
,
1411 struct tgsi_full_instruction
*inst
)
1413 unsigned chan_index
;
1415 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1416 FETCH( func
, *inst
, 0, 0, chan_index
);
1417 FETCH( func
, *inst
, 1, 1, chan_index
);
1418 FETCH( func
, *inst
, 2, 2, chan_index
);
1423 TGSI_EXEC_TEMP_00000000_I
,
1424 TGSI_EXEC_TEMP_00000000_C
),
1438 STORE( func
, *inst
, 0, 0, chan_index
);
1444 struct x86_function
*func
,
1445 struct tgsi_full_instruction
*inst
)
1447 unsigned chan_index
;
1449 switch( inst
->Instruction
.Opcode
) {
1450 case TGSI_OPCODE_ARL
:
1451 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1452 FETCH( func
, *inst
, 0, 0, chan_index
);
1453 emit_f2it( func
, 0 );
1454 STORE( func
, *inst
, 0, 0, chan_index
);
1458 case TGSI_OPCODE_MOV
:
1459 /* TGSI_OPCODE_SWZ */
1460 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1461 FETCH( func
, *inst
, 0, 0, chan_index
);
1462 STORE( func
, *inst
, 0, 0, chan_index
);
1466 case TGSI_OPCODE_LIT
:
1467 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1468 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
1472 TGSI_EXEC_TEMP_ONE_I
,
1473 TGSI_EXEC_TEMP_ONE_C
);
1474 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ) {
1475 STORE( func
, *inst
, 0, 0, CHAN_X
);
1477 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) ) {
1478 STORE( func
, *inst
, 0, 0, CHAN_W
);
1481 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ||
1482 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1483 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
1484 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1489 TGSI_EXEC_TEMP_00000000_I
,
1490 TGSI_EXEC_TEMP_00000000_C
) );
1491 STORE( func
, *inst
, 0, 0, CHAN_Y
);
1493 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1494 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1499 TGSI_EXEC_TEMP_00000000_I
,
1500 TGSI_EXEC_TEMP_00000000_C
) );
1501 FETCH( func
, *inst
, 2, 0, CHAN_W
);
1506 TGSI_EXEC_TEMP_128_I
,
1507 TGSI_EXEC_TEMP_128_C
) );
1512 TGSI_EXEC_TEMP_MINUS_128_I
,
1513 TGSI_EXEC_TEMP_MINUS_128_C
) );
1514 emit_pow( func
, 1, 2 );
1515 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1529 STORE( func
, *inst
, 2, 0, CHAN_Z
);
1534 case TGSI_OPCODE_RCP
:
1535 /* TGSI_OPCODE_RECIP */
1536 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1537 emit_rcp( func
, 0, 0 );
1538 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1539 STORE( func
, *inst
, 0, 0, chan_index
);
1543 case TGSI_OPCODE_RSQ
:
1544 /* TGSI_OPCODE_RECIPSQRT */
1545 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1546 emit_rsqrt( func
, 0, 0 );
1547 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1548 STORE( func
, *inst
, 0, 0, chan_index
);
1552 case TGSI_OPCODE_EXP
:
1556 case TGSI_OPCODE_LOG
:
1560 case TGSI_OPCODE_MUL
:
1561 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1562 FETCH( func
, *inst
, 0, 0, chan_index
);
1563 FETCH( func
, *inst
, 1, 1, chan_index
);
1564 emit_mul( func
, 0, 1 );
1565 STORE( func
, *inst
, 0, 0, chan_index
);
1569 case TGSI_OPCODE_ADD
:
1570 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1571 FETCH( func
, *inst
, 0, 0, chan_index
);
1572 FETCH( func
, *inst
, 1, 1, chan_index
);
1573 emit_add( func
, 0, 1 );
1574 STORE( func
, *inst
, 0, 0, chan_index
);
1578 case TGSI_OPCODE_DP3
:
1579 /* TGSI_OPCODE_DOT3 */
1580 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1581 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1582 emit_mul( func
, 0, 1 );
1583 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1584 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1585 emit_mul( func
, 1, 2 );
1586 emit_add( func
, 0, 1 );
1587 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1588 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1589 emit_mul( func
, 1, 2 );
1590 emit_add( func
, 0, 1 );
1591 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1592 STORE( func
, *inst
, 0, 0, chan_index
);
1596 case TGSI_OPCODE_DP4
:
1597 /* TGSI_OPCODE_DOT4 */
1598 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1599 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1600 emit_mul( func
, 0, 1 );
1601 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1602 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1603 emit_mul( func
, 1, 2 );
1604 emit_add( func
, 0, 1 );
1605 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1606 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1607 emit_mul(func
, 1, 2 );
1608 emit_add(func
, 0, 1 );
1609 FETCH( func
, *inst
, 1, 0, CHAN_W
);
1610 FETCH( func
, *inst
, 2, 1, CHAN_W
);
1611 emit_mul( func
, 1, 2 );
1612 emit_add( func
, 0, 1 );
1613 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1614 STORE( func
, *inst
, 0, 0, chan_index
);
1618 case TGSI_OPCODE_DST
:
1619 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
1623 TGSI_EXEC_TEMP_ONE_I
,
1624 TGSI_EXEC_TEMP_ONE_C
);
1625 STORE( func
, *inst
, 0, 0, CHAN_X
);
1627 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
1628 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
1629 FETCH( func
, *inst
, 1, 1, CHAN_Y
);
1630 emit_mul( func
, 0, 1 );
1631 STORE( func
, *inst
, 0, 0, CHAN_Y
);
1633 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
1634 FETCH( func
, *inst
, 0, 0, CHAN_Z
);
1635 STORE( func
, *inst
, 0, 0, CHAN_Z
);
1637 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
1638 FETCH( func
, *inst
, 0, 1, CHAN_W
);
1639 STORE( func
, *inst
, 0, 0, CHAN_W
);
1643 case TGSI_OPCODE_MIN
:
1644 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1645 FETCH( func
, *inst
, 0, 0, chan_index
);
1646 FETCH( func
, *inst
, 1, 1, chan_index
);
1651 STORE( func
, *inst
, 0, 0, chan_index
);
1655 case TGSI_OPCODE_MAX
:
1656 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1657 FETCH( func
, *inst
, 0, 0, chan_index
);
1658 FETCH( func
, *inst
, 1, 1, chan_index
);
1663 STORE( func
, *inst
, 0, 0, chan_index
);
1667 case TGSI_OPCODE_SLT
:
1668 /* TGSI_OPCODE_SETLT */
1669 emit_setcc( func
, inst
, cc_LessThan
);
1672 case TGSI_OPCODE_SGE
:
1673 /* TGSI_OPCODE_SETGE */
1674 emit_setcc( func
, inst
, cc_NotLessThan
);
1677 case TGSI_OPCODE_MAD
:
1678 /* TGSI_OPCODE_MADD */
1679 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1680 FETCH( func
, *inst
, 0, 0, chan_index
);
1681 FETCH( func
, *inst
, 1, 1, chan_index
);
1682 FETCH( func
, *inst
, 2, 2, chan_index
);
1683 emit_mul( func
, 0, 1 );
1684 emit_add( func
, 0, 2 );
1685 STORE( func
, *inst
, 0, 0, chan_index
);
1689 case TGSI_OPCODE_SUB
:
1690 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1691 FETCH( func
, *inst
, 0, 0, chan_index
);
1692 FETCH( func
, *inst
, 1, 1, chan_index
);
1693 emit_sub( func
, 0, 1 );
1694 STORE( func
, *inst
, 0, 0, chan_index
);
1698 case TGSI_OPCODE_LERP
:
1699 /* TGSI_OPCODE_LRP */
1700 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1701 FETCH( func
, *inst
, 0, 0, chan_index
);
1702 FETCH( func
, *inst
, 1, 1, chan_index
);
1703 FETCH( func
, *inst
, 2, 2, chan_index
);
1704 emit_sub( func
, 1, 2 );
1705 emit_mul( func
, 0, 1 );
1706 emit_add( func
, 0, 2 );
1707 STORE( func
, *inst
, 0, 0, chan_index
);
1711 case TGSI_OPCODE_CND
:
1715 case TGSI_OPCODE_CND0
:
1719 case TGSI_OPCODE_DOT2ADD
:
1720 /* TGSI_OPCODE_DP2A */
1724 case TGSI_OPCODE_INDEX
:
1728 case TGSI_OPCODE_NEGATE
:
1732 case TGSI_OPCODE_FRAC
:
1733 /* TGSI_OPCODE_FRC */
1734 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1735 FETCH( func
, *inst
, 0, 0, chan_index
);
1736 emit_frc( func
, 0 );
1737 STORE( func
, *inst
, 0, 0, chan_index
);
1741 case TGSI_OPCODE_CLAMP
:
1745 case TGSI_OPCODE_FLOOR
:
1746 /* TGSI_OPCODE_FLR */
1747 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1748 FETCH( func
, *inst
, 0, 0, chan_index
);
1749 emit_flr( func
, 0 );
1750 STORE( func
, *inst
, 0, 0, chan_index
);
1754 case TGSI_OPCODE_ROUND
:
1758 case TGSI_OPCODE_EXPBASE2
:
1759 /* TGSI_OPCODE_EX2 */
1760 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1761 emit_ex2( func
, 0 );
1762 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1763 STORE( func
, *inst
, 0, 0, chan_index
);
1767 case TGSI_OPCODE_LOGBASE2
:
1768 /* TGSI_OPCODE_LG2 */
1769 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1770 emit_lg2( func
, 0 );
1771 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1772 STORE( func
, *inst
, 0, 0, chan_index
);
1776 case TGSI_OPCODE_POWER
:
1777 /* TGSI_OPCODE_POW */
1778 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1779 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1780 emit_pow( func
, 0, 1 );
1781 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1782 STORE( func
, *inst
, 0, 0, chan_index
);
1786 case TGSI_OPCODE_CROSSPRODUCT
:
1787 /* TGSI_OPCODE_XPD */
1788 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1789 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ) {
1790 FETCH( func
, *inst
, 1, 1, CHAN_Z
);
1791 FETCH( func
, *inst
, 3, 0, CHAN_Z
);
1793 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) ||
1794 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1795 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
1796 FETCH( func
, *inst
, 4, 1, CHAN_Y
);
1798 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
1799 emit_MOV( func
, 2, 0 );
1800 emit_mul( func
, 2, 1 );
1801 emit_MOV( func
, 5, 3 );
1802 emit_mul( func
, 5, 4 );
1803 emit_sub( func
, 2, 5 );
1804 STORE( func
, *inst
, 2, 0, CHAN_X
);
1806 if( IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) ||
1807 IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) ) {
1808 FETCH( func
, *inst
, 2, 1, CHAN_X
);
1809 FETCH( func
, *inst
, 5, 0, CHAN_X
);
1811 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
1812 emit_mul( func
, 3, 2 );
1813 emit_mul( func
, 1, 5 );
1814 emit_sub( func
, 3, 1 );
1815 STORE( func
, *inst
, 3, 0, CHAN_Y
);
1817 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
1818 emit_mul( func
, 5, 4 );
1819 emit_mul( func
, 0, 2 );
1820 emit_sub( func
, 5, 0 );
1821 STORE( func
, *inst
, 5, 0, CHAN_Z
);
1823 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
1824 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_ONE_I
, TGSI_EXEC_TEMP_ONE_C
);
1825 STORE( func
, *inst
, 0, 0, CHAN_W
);
1829 case TGSI_OPCODE_MULTIPLYMATRIX
:
1833 case TGSI_OPCODE_ABS
:
1834 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1835 FETCH( func
, *inst
, 0, 0, chan_index
);
1836 emit_abs( func
, 0) ;
1838 STORE( func
, *inst
, 0, 0, chan_index
);
1842 case TGSI_OPCODE_RCC
:
1846 case TGSI_OPCODE_DPH
:
1847 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1848 FETCH( func
, *inst
, 1, 1, CHAN_X
);
1849 emit_mul( func
, 0, 1 );
1850 FETCH( func
, *inst
, 1, 0, CHAN_Y
);
1851 FETCH( func
, *inst
, 2, 1, CHAN_Y
);
1852 emit_mul( func
, 1, 2 );
1853 emit_add( func
, 0, 1 );
1854 FETCH( func
, *inst
, 1, 0, CHAN_Z
);
1855 FETCH( func
, *inst
, 2, 1, CHAN_Z
);
1856 emit_mul( func
, 1, 2 );
1857 emit_add( func
, 0, 1 );
1858 FETCH( func
, *inst
, 1, 1, CHAN_W
);
1859 emit_add( func
, 0, 1 );
1860 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1861 STORE( func
, *inst
, 0, 0, chan_index
);
1865 case TGSI_OPCODE_COS
:
1866 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1867 emit_cos( func
, 0 );
1868 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1869 STORE( func
, *inst
, 0, 0, chan_index
);
1873 case TGSI_OPCODE_DDX
:
1877 case TGSI_OPCODE_DDY
:
1881 case TGSI_OPCODE_KIL
:
1882 emit_kil( func
, &inst
->FullSrcRegisters
[0] );
1885 case TGSI_OPCODE_PK2H
:
1889 case TGSI_OPCODE_PK2US
:
1893 case TGSI_OPCODE_PK4B
:
1897 case TGSI_OPCODE_PK4UB
:
1901 case TGSI_OPCODE_RFL
:
1905 case TGSI_OPCODE_SEQ
:
1909 case TGSI_OPCODE_SFL
:
1913 case TGSI_OPCODE_SGT
:
1917 case TGSI_OPCODE_SIN
:
1918 FETCH( func
, *inst
, 0, 0, CHAN_X
);
1919 emit_sin( func
, 0 );
1920 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1921 STORE( func
, *inst
, 0, 0, chan_index
);
1925 case TGSI_OPCODE_SLE
:
1929 case TGSI_OPCODE_SNE
:
1933 case TGSI_OPCODE_STR
:
1937 case TGSI_OPCODE_TEX
:
1939 /* Disable dummy texture code:
1944 TGSI_EXEC_TEMP_ONE_I
,
1945 TGSI_EXEC_TEMP_ONE_C
);
1946 FOR_EACH_DST0_ENABLED_CHANNEL( *inst
, chan_index
) {
1947 STORE( func
, *inst
, 0, 0, chan_index
);
1955 case TGSI_OPCODE_TXD
:
1959 case TGSI_OPCODE_UP2H
:
1963 case TGSI_OPCODE_UP2US
:
1967 case TGSI_OPCODE_UP4B
:
1971 case TGSI_OPCODE_UP4UB
:
1975 case TGSI_OPCODE_X2D
:
1979 case TGSI_OPCODE_ARA
:
1983 case TGSI_OPCODE_ARR
:
1987 case TGSI_OPCODE_BRA
:
1991 case TGSI_OPCODE_CAL
:
1995 case TGSI_OPCODE_RET
:
1996 case TGSI_OPCODE_END
:
1998 emit_retw( func
, 16 );
2004 case TGSI_OPCODE_SSG
:
2008 case TGSI_OPCODE_CMP
:
2009 emit_cmp (func
, inst
);
2012 case TGSI_OPCODE_SCS
:
2013 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_X
) {
2014 FETCH( func
, *inst
, 0, 0, CHAN_X
);
2015 emit_cos( func
, 0 );
2016 STORE( func
, *inst
, 0, 0, CHAN_X
);
2018 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Y
) {
2019 FETCH( func
, *inst
, 0, 0, CHAN_Y
);
2020 emit_sin( func
, 0 );
2021 STORE( func
, *inst
, 0, 0, CHAN_Y
);
2023 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_Z
) {
2024 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_00000000_I
, TGSI_EXEC_TEMP_00000000_C
);
2025 STORE( func
, *inst
, 0, 0, CHAN_Z
);
2027 IF_IS_DST0_CHANNEL_ENABLED( *inst
, CHAN_W
) {
2028 FETCH( func
, *inst
, 0, TGSI_EXEC_TEMP_ONE_I
, TGSI_EXEC_TEMP_ONE_C
);
2029 STORE( func
, *inst
, 0, 0, CHAN_W
);
2033 case TGSI_OPCODE_TXB
:
2037 case TGSI_OPCODE_NRM
:
2041 case TGSI_OPCODE_DIV
:
2045 case TGSI_OPCODE_DP2
:
2049 case TGSI_OPCODE_TXL
:
2053 case TGSI_OPCODE_BRK
:
2057 case TGSI_OPCODE_IF
:
2061 case TGSI_OPCODE_LOOP
:
2065 case TGSI_OPCODE_REP
:
2069 case TGSI_OPCODE_ELSE
:
2073 case TGSI_OPCODE_ENDIF
:
2077 case TGSI_OPCODE_ENDLOOP
:
2081 case TGSI_OPCODE_ENDREP
:
2085 case TGSI_OPCODE_PUSHA
:
2089 case TGSI_OPCODE_POPA
:
2093 case TGSI_OPCODE_CEIL
:
2097 case TGSI_OPCODE_I2F
:
2101 case TGSI_OPCODE_NOT
:
2105 case TGSI_OPCODE_TRUNC
:
2109 case TGSI_OPCODE_SHL
:
2113 case TGSI_OPCODE_SHR
:
2117 case TGSI_OPCODE_AND
:
2121 case TGSI_OPCODE_OR
:
2125 case TGSI_OPCODE_MOD
:
2129 case TGSI_OPCODE_XOR
:
2133 case TGSI_OPCODE_SAD
:
2137 case TGSI_OPCODE_TXF
:
2141 case TGSI_OPCODE_TXQ
:
2145 case TGSI_OPCODE_CONT
:
2149 case TGSI_OPCODE_EMIT
:
2153 case TGSI_OPCODE_ENDPRIM
:
2166 struct x86_function
*func
,
2167 struct tgsi_full_declaration
*decl
)
2169 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
2170 unsigned first
, last
, mask
;
2173 assert( decl
->Declaration
.Declare
== TGSI_DECLARE_RANGE
);
2175 first
= decl
->u
.DeclarationRange
.First
;
2176 last
= decl
->u
.DeclarationRange
.Last
;
2177 mask
= decl
->Declaration
.UsageMask
;
2179 for( i
= first
; i
<= last
; i
++ ) {
2180 for( j
= 0; j
< NUM_CHANNELS
; j
++ ) {
2181 if( mask
& (1 << j
) ) {
2182 switch( decl
->Interpolation
.Interpolate
) {
2183 case TGSI_INTERPOLATE_CONSTANT
:
2184 emit_coef_a0( func
, 0, i
, j
);
2185 emit_inputs( func
, 0, i
, j
);
2188 case TGSI_INTERPOLATE_LINEAR
:
2189 emit_inputf( func
, 0, 0, TGSI_SWIZZLE_X
);
2190 emit_coef_dadx( func
, 1, i
, j
);
2191 emit_inputf( func
, 2, 0, TGSI_SWIZZLE_Y
);
2192 emit_coef_dady( func
, 3, i
, j
);
2193 emit_mul( func
, 0, 1 ); /* x * dadx */
2194 emit_coef_a0( func
, 4, i
, j
);
2195 emit_mul( func
, 2, 3 ); /* y * dady */
2196 emit_add( func
, 0, 4 ); /* x * dadx + a0 */
2197 emit_add( func
, 0, 2 ); /* x * dadx + y * dady + a0 */
2198 emit_inputs( func
, 0, i
, j
);
2201 case TGSI_INTERPOLATE_PERSPECTIVE
:
2202 emit_inputf( func
, 0, 0, TGSI_SWIZZLE_X
);
2203 emit_coef_dadx( func
, 1, i
, j
);
2204 emit_inputf( func
, 2, 0, TGSI_SWIZZLE_Y
);
2205 emit_coef_dady( func
, 3, i
, j
);
2206 emit_mul( func
, 0, 1 ); /* x * dadx */
2207 emit_inputf( func
, 4, 0, TGSI_SWIZZLE_W
);
2208 emit_coef_a0( func
, 5, i
, j
);
2209 emit_rcp( func
, 4, 4 ); /* 1.0 / w */
2210 emit_mul( func
, 2, 3 ); /* y * dady */
2211 emit_add( func
, 0, 5 ); /* x * dadx + a0 */
2212 emit_add( func
, 0, 2 ); /* x * dadx + y * dady + a0 */
2213 emit_mul( func
, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
2214 emit_inputs( func
, 0, i
, j
);
2229 struct tgsi_token
*tokens
,
2230 struct x86_function
*func
)
2232 struct tgsi_parse_context parse
;
2237 func
->csr
= func
->store
;
2242 get_argument( 0 ) );
2246 get_argument( 1 ) );
2250 get_argument( 2 ) );
2254 get_argument( 3 ) );
2256 tgsi_parse_init( &parse
, tokens
);
2258 while( !tgsi_parse_end_of_tokens( &parse
) && ok
) {
2259 tgsi_parse_token( &parse
);
2261 switch( parse
.FullToken
.Token
.Type
) {
2262 case TGSI_TOKEN_TYPE_DECLARATION
:
2265 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2266 ok
= emit_instruction(
2268 &parse
.FullToken
.FullInstruction
);
2271 debug_printf("failed to translate tgsi opcode %d\n",
2272 parse
.FullToken
.FullInstruction
.Instruction
.Opcode
);
2276 case TGSI_TOKEN_TYPE_IMMEDIATE
:
2277 /* XXX implement this */
2279 debug_printf("failed to emit immediate value\n");
2289 tgsi_parse_free( &parse
);
2297 * Fragment shaders are responsible for interpolating shader inputs. Because on
2298 * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
2299 * output, const, temp and coef), the code is split into two phases --
2300 * DECLARATION and INSTRUCTION phase.
2301 * GP register holding the output argument is aliased with the coeff argument,
2302 * as outputs are not needed in the DECLARATION phase.
2306 struct tgsi_token
*tokens
,
2307 struct x86_function
*func
)
2309 struct tgsi_parse_context parse
;
2310 boolean instruction_phase
= FALSE
;
2314 func
->csr
= func
->store
;
2316 /* DECLARATION phase, do not load output argument. */
2320 get_argument( 0 ) );
2324 get_argument( 2 ) );
2328 get_argument( 3 ) );
2332 get_argument( 4 ) );
2334 tgsi_parse_init( &parse
, tokens
);
2336 while( !tgsi_parse_end_of_tokens( &parse
) ) {
2337 tgsi_parse_token( &parse
);
2339 switch( parse
.FullToken
.Token
.Type
) {
2340 case TGSI_TOKEN_TYPE_DECLARATION
:
2343 &parse
.FullToken
.FullDeclaration
);
2346 case TGSI_TOKEN_TYPE_INSTRUCTION
:
2347 if( !instruction_phase
) {
2348 /* INSTRUCTION phase, overwrite coeff with output. */
2349 instruction_phase
= TRUE
;
2353 get_argument( 1 ) );
2357 &parse
.FullToken
.FullInstruction
);
2360 case TGSI_TOKEN_TYPE_IMMEDIATE
:
2361 /* XXX implement this */
2370 tgsi_parse_free( &parse
);