X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Frtasm%2Frtasm_x86sse.c;h=1acf3c373ebee1700b74463c834d1363a2098e3d;hb=cd63e35603568c168e8aebbc47ca6ea308f2ccfa;hp=4e036d9032c8514f69620ab53f444bc63ab826fc;hpb=a3dbd412df99c7d19b1f81b3b9ec7d5c8a09d069;p=mesa.git diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4e036d9032c..1acf3c373eb 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -21,11 +21,13 @@ * **************************************************************************/ -#if defined(__i386__) || defined(__386__) || defined(i386) +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_pointer.h" +#include "util/u_debug.h" +#include "util/u_pointer.h" #include "rtasm_execmem.h" #include "rtasm_x86sse.h" @@ -36,11 +38,8 @@ #define DUMP_SSE 0 -#if DUMP_SSE -static void -_print_reg( - struct x86_reg reg ) +void x86_print_reg( struct x86_reg reg ) { if (reg.mod != mod_REG) debug_printf( "[" ); @@ -77,6 +76,7 @@ _print_reg( debug_printf( "]" ); } +#if DUMP_SSE #define DUMP_START() debug_printf( "\n" ) #define DUMP_END() debug_printf( "\n" ) @@ -87,7 +87,7 @@ _print_reg( foo++; \ if (*foo) \ foo++; \ - debug_printf( "\n% 15s ", foo ); \ + debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \ } while (0) #define DUMP_I( I ) do { \ @@ -97,27 +97,27 @@ _print_reg( #define DUMP_R( R0 ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ } while( 0 ) #define DUMP_RR( R0, R1 ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", " ); \ - _print_reg( R1 ); \ + x86_print_reg( R1 ); \ } while( 0 ) #define DUMP_RI( R0, I ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", %u", I ); \ } while( 0 ) #define DUMP_RRI( R0, R1, I ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", " ); \ - _print_reg( R1 ); \ + x86_print_reg( R1 ); \ debug_printf( ", %u", I ); \ } while( 0 ) @@ -220,6 +220,8 @@ static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1 /* Build a modRM byte + possible displacement. No treatment of SIB * indexing. BZZT - no way to encode an absolute address. + * + * This is the "/r" field in the x86 manuals... */ static void emit_modrm( struct x86_function *p, struct x86_reg reg, @@ -238,7 +240,8 @@ static void emit_modrm( struct x86_function *p, /* Oh-oh we've stumbled into the SIB thing. */ if (regmem.file == file_REG32 && - regmem.idx == reg_SP) { + regmem.idx == reg_SP && + regmem.mod != mod_REG) { emit_1ub(p, 0x24); /* simplistic! */ } @@ -258,7 +261,8 @@ static void emit_modrm( struct x86_function *p, } } - +/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. + */ static void emit_modrm_noreg( struct x86_function *p, unsigned op, struct x86_reg regmem ) @@ -327,7 +331,7 @@ struct x86_reg x86_make_disp( struct x86_reg reg, else reg.disp += disp; - if (reg.disp == 0) + if (reg.disp == 0 && reg.idx != reg_BP) reg.mod = mod_INDIRECT; else if (reg.disp <= 127 && reg.disp >= -128) reg.mod = mod_DISP8; @@ -367,8 +371,11 @@ void x86_jcc( struct x86_function *p, DUMP_I(cc); if (offset < 0) { - int amt = p->csr - p->store; - assert(amt > -offset); + /*assert(p->csr - p->store > -offset);*/ + if (p->csr - p->store <= -offset) { + /* probably out of memory (using the error_overflow buffer) */ + return; + } } if (offset <= 127 && offset >= -128) { @@ -433,18 +440,73 @@ void x86_call( struct x86_function *p, struct x86_reg reg) } -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); } +/** + * Immediate group 1 instructions. + */ +static INLINE void +x86_group1_imm( struct x86_function *p, + unsigned op, struct x86_reg dst, int imm ) +{ + assert(dst.file == file_REG32); + assert(dst.mod == mod_REG); + if(-0x80 <= imm && imm < 0x80) { + emit_1ub(p, 0x83); + emit_modrm_noreg(p, op, dst); + emit_1b(p, (char)imm); + } + else { + emit_1ub(p, 0x81); + emit_modrm_noreg(p, op, dst); + emit_1i(p, imm); + } +} + +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 0, dst, imm); +} + +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 1, dst, imm); +} + +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 4, dst, imm); +} + +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 5, dst, imm); +} + +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 6, dst, imm); +} + +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 7, dst, imm); +} + + void x86_push( struct x86_function *p, struct x86_reg reg ) { @@ -461,6 +523,17 @@ void x86_push( struct x86_function *p, p->stack_offset += 4; } +void x86_push_imm32( struct x86_function *p, + int imm32 ) +{ + DUMP_I( imm32 ); + emit_1ub(p, 0x68); + emit_1i(p, imm32); + + p->stack_offset += 4; +} + + void x86_pop( struct x86_function *p, struct x86_reg reg ) { @@ -606,6 +679,44 @@ void x86_and( struct x86_function *p, * SSE instructions */ +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 0, ptr); +} + +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 1, ptr); +} + +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 2, ptr); +} + +void sse_movntps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, src ); + + assert(dst.mod != mod_REG); + assert(src.mod == mod_REG); + emit_2ub(p, 0x0f, 0x2b); + emit_modrm(p, src, dst); +} + + + void sse_movss( struct x86_function *p, struct x86_reg dst, @@ -865,7 +976,7 @@ void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, - unsigned char cc) + enum sse_cc cc) { DUMP_RRI( dst, src, cc ); emit_2ub(p, X86_TWOB, 0xC2); @@ -882,6 +993,15 @@ void sse_pmovmskb( struct x86_function *p, emit_modrm(p, dst, src); } +void sse_movmskps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x50); + emit_modrm(p, dst, src); +} + /*********************************************************************** * SSE2 instructions */ @@ -988,6 +1108,24 @@ void sse2_movd( struct x86_function *p, /*********************************************************************** * x87 instructions */ +static void note_x87_pop( struct x86_function *p ) +{ + p->x87_stack--; + assert(p->x87_stack >= 0); +} + +static void note_x87_push( struct x86_function *p ) +{ + p->x87_stack++; + assert(p->x87_stack <= 7); +} + +void x87_assert_stack_empty( struct x86_function *p ) +{ + assert (p->x87_stack == 0); +} + + void x87_fist( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -1000,6 +1138,7 @@ void x87_fistp( struct x86_function *p, struct x86_reg dst ) DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 3, dst); + note_x87_pop(p); } void x87_fild( struct x86_function *p, struct x86_reg arg ) @@ -1007,12 +1146,14 @@ void x87_fild( struct x86_function *p, struct x86_reg arg ) DUMP_R( arg ); emit_1ub(p, 0xdf); emit_modrm_noreg(p, 0, arg); + note_x87_push(p); } void x87_fldz( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xee); + note_x87_push(p); } @@ -1029,18 +1170,21 @@ void x87_fld1( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xe8); + note_x87_push(p); } void x87_fldl2e( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xea); + note_x87_push(p); } void x87_fldln2( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xed); + note_x87_push(p); } void x87_fwait( struct x86_function *p ) @@ -1061,6 +1205,49 @@ void x87_fclex( struct x86_function *p ) x87_fnclex(p); } +void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc0+arg.idx); +} + +void x87_fcmove( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc8+arg.idx); +} + +void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xd0+arg.idx); +} + +void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc0+arg.idx); +} + +void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc8+arg.idx); +} + +void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xd0+arg.idx); +} + + static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, unsigned char dst0ub0, @@ -1148,6 +1335,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc8+dst.idx); + note_x87_pop(p); } void x87_fsubp( struct x86_function *p, struct x86_reg dst ) @@ -1156,6 +1344,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe8+dst.idx); + note_x87_pop(p); } void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) @@ -1164,6 +1353,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe0+dst.idx); + note_x87_pop(p); } void x87_faddp( struct x86_function *p, struct x86_reg dst ) @@ -1172,6 +1362,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc0+dst.idx); + note_x87_pop(p); } void x87_fdivp( struct x86_function *p, struct x86_reg dst ) @@ -1180,6 +1371,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf8+dst.idx); + note_x87_pop(p); } void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) @@ -1188,6 +1380,13 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf0+dst.idx); + note_x87_pop(p); +} + +void x87_ftst( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe4); } void x87_fucom( struct x86_function *p, struct x86_reg arg ) @@ -1202,12 +1401,15 @@ void x87_fucomp( struct x86_function *p, struct x86_reg arg ) DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe8+arg.idx); + note_x87_pop(p); } void x87_fucompp( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xda, 0xe9); + note_x87_pop(p); /* pop twice */ + note_x87_pop(p); /* pop twice */ } void x87_fxch( struct x86_function *p, struct x86_reg arg ) @@ -1289,6 +1491,7 @@ void x87_fyl2x( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xf1); + note_x87_pop(p); } /* st1 = st1 * log2(st0 + 1.0); @@ -1300,6 +1503,7 @@ void x87_fyl2xp1( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xf9); + note_x87_pop(p); } @@ -1312,6 +1516,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) emit_1ub(p, 0xd9); emit_modrm_noreg(p, 0, arg); } + note_x87_push(p); } void x87_fst( struct x86_function *p, struct x86_reg dst ) @@ -1334,8 +1539,15 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) emit_1ub(p, 0xd9); emit_modrm_noreg(p, 3, dst); } + note_x87_pop(p); +} + +void x87_fpop( struct x86_function *p ) +{ + x87_fstp( p, x86_make_reg( file_x87, 0 )); } + void x87_fcom( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -1347,6 +1559,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) } } + void x87_fcomp( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -1356,6 +1569,20 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) emit_1ub(p, 0xd8); emit_modrm_noreg(p, 3, dst); } + note_x87_pop(p); +} + +void x87_fcomi( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); +} + +void x87_fcomip( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); + note_x87_pop(p); } @@ -1374,6 +1601,17 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) } +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_REG32); + + emit_1ub(p, 0x9b); /* WAIT -- needed? */ + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 7, dst); +} + + /*********************************************************************** @@ -1442,6 +1680,21 @@ void mmx_movq( struct x86_function *p, */ +void x86_cdecl_caller_push_regs( struct x86_function *p ) +{ + x86_push(p, x86_make_reg(file_REG32, reg_AX)); + x86_push(p, x86_make_reg(file_REG32, reg_CX)); + x86_push(p, x86_make_reg(file_REG32, reg_DX)); +} + +void x86_cdecl_caller_pop_regs( struct x86_function *p ) +{ + x86_pop(p, x86_make_reg(file_REG32, reg_DX)); + x86_pop(p, x86_make_reg(file_REG32, reg_CX)); + x86_pop(p, x86_make_reg(file_REG32, reg_AX)); +} + + /* Retreive a reference to one of the function arguments, taking into * account any push/pop activity: */