1 /**************************************************************************
3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 **************************************************************************/
24 #ifndef _RTASM_X86SSE_H_
25 #define _RTASM_X86SSE_H_
27 #if defined(__i386__) || defined(__386__) || defined(i386)
29 /* It is up to the caller to ensure that instructions issued are
30 * suitable for the host cpu. There are no checks made in this module
31 * for mmx/sse/sse2 support on the cpu.
36 unsigned mod
:2; /* mod_REG if this is just a register */
37 int disp
:24; /* only +/- 23bits of offset - should be enough... */
44 unsigned stack_offset
;
46 unsigned char error_overflow
[4];
57 /* Values for mod field of modr/m byte
80 cc_NO
, /* not overflow */
81 cc_NAE
, /* not above or equal / carry */
82 cc_AE
, /* above or equal / not carry */
83 cc_E
, /* equal / zero */
84 cc_NE
/* not equal / not zero */
101 /* Begin/end/retreive function creation:
105 void x86_init_func( struct x86_function
*p
);
106 void x86_init_func_size( struct x86_function
*p
, unsigned code_size
);
107 void x86_release_func( struct x86_function
*p
);
108 void (*x86_get_func( struct x86_function
*p
))( void );
112 /* Create and manipulate registers and regmem values:
114 struct x86_reg
x86_make_reg( enum x86_reg_file file
,
115 enum x86_reg_name idx
);
117 struct x86_reg
x86_make_disp( struct x86_reg reg
,
120 struct x86_reg
x86_deref( struct x86_reg reg
);
122 struct x86_reg
x86_get_base_reg( struct x86_reg reg
);
125 /* Labels, jumps and fixup:
127 unsigned char *x86_get_label( struct x86_function
*p
);
129 void x86_jcc( struct x86_function
*p
,
131 unsigned char *label
);
133 unsigned char *x86_jcc_forward( struct x86_function
*p
,
136 unsigned char *x86_jmp_forward( struct x86_function
*p
);
138 unsigned char *x86_call_forward( struct x86_function
*p
);
140 void x86_fixup_fwd_jump( struct x86_function
*p
,
141 unsigned char *fixup
);
143 void x86_jmp( struct x86_function
*p
, unsigned char *label
);
145 /* void x86_call( struct x86_function *p, void (*label)() ); */
146 void x86_call( struct x86_function
*p
, struct x86_reg reg
);
149 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
150 * I load the immediate into general purpose register and use it.
152 void x86_mov_reg_imm( struct x86_function
*p
, struct x86_reg dst
, int imm
);
155 /* Macro for sse_shufps() and sse2_pshufd():
157 #define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
158 #define SHUF_NOOP RSW(0,1,2,3)
159 #define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
161 void mmx_emms( struct x86_function
*p
);
162 void mmx_movd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
163 void mmx_movq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
164 void mmx_packssdw( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
165 void mmx_packuswb( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
167 void sse2_cvtps2dq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
168 void sse2_cvttps2dq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
169 void sse2_cvtdq2ps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
170 void sse2_movd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
171 void sse2_packssdw( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
172 void sse2_packsswb( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
173 void sse2_packuswb( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
174 void sse2_pshufd( struct x86_function
*p
, struct x86_reg dest
, struct x86_reg arg0
,
175 unsigned char shuf
);
176 void sse2_rcpps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
177 void sse2_rcpss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
179 void sse_addps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
180 void sse_addss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
181 void sse_cvtps2pi( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
182 void sse_divss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
183 void sse_andnps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
184 void sse_andps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
185 void sse_cmpps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
,
187 void sse_maxps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
188 void sse_maxss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
189 void sse_minps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
190 void sse_movaps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
191 void sse_movhlps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
192 void sse_movhps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
193 void sse_movlhps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
194 void sse_movlps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
195 void sse_movss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
196 void sse_movups( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
197 void sse_mulps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
198 void sse_mulss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
199 void sse_orps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
200 void sse_xorps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
201 void sse_subps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
202 void sse_rsqrtps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
203 void sse_rsqrtss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
204 void sse_shufps( struct x86_function
*p
, struct x86_reg dest
, struct x86_reg arg0
,
205 unsigned char shuf
);
206 void sse_unpckhps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
207 void sse_unpcklps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
208 void sse_pmovmskb( struct x86_function
*p
, struct x86_reg dest
, struct x86_reg src
);
209 void sse2_punpcklbw( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
211 void x86_add( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
212 void x86_and( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
213 void x86_cmp( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
214 void x86_dec( struct x86_function
*p
, struct x86_reg reg
);
215 void x86_inc( struct x86_function
*p
, struct x86_reg reg
);
216 void x86_lea( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
217 void x86_mov( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
218 void x86_mul( struct x86_function
*p
, struct x86_reg src
);
219 void x86_imul( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
220 void x86_or( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
221 void x86_pop( struct x86_function
*p
, struct x86_reg reg
);
222 void x86_push( struct x86_function
*p
, struct x86_reg reg
);
223 void x86_ret( struct x86_function
*p
);
224 void x86_retw( struct x86_function
*p
, unsigned short imm
);
225 void x86_sub( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
226 void x86_test( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
227 void x86_xor( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
228 void x86_sahf( struct x86_function
*p
);
230 void x87_f2xm1( struct x86_function
*p
);
231 void x87_fabs( struct x86_function
*p
);
232 void x87_fadd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
233 void x87_faddp( struct x86_function
*p
, struct x86_reg dst
);
234 void x87_fchs( struct x86_function
*p
);
235 void x87_fclex( struct x86_function
*p
);
236 void x87_fcom( struct x86_function
*p
, struct x86_reg dst
);
237 void x87_fcomp( struct x86_function
*p
, struct x86_reg dst
);
238 void x87_fcos( struct x86_function
*p
);
239 void x87_fdiv( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
240 void x87_fdivp( struct x86_function
*p
, struct x86_reg dst
);
241 void x87_fdivr( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
242 void x87_fdivrp( struct x86_function
*p
, struct x86_reg dst
);
243 void x87_fild( struct x86_function
*p
, struct x86_reg arg
);
244 void x87_fist( struct x86_function
*p
, struct x86_reg dst
);
245 void x87_fistp( struct x86_function
*p
, struct x86_reg dst
);
246 void x87_fld( struct x86_function
*p
, struct x86_reg arg
);
247 void x87_fld1( struct x86_function
*p
);
248 void x87_fldcw( struct x86_function
*p
, struct x86_reg arg
);
249 void x87_fldl2e( struct x86_function
*p
);
250 void x87_fldln2( struct x86_function
*p
);
251 void x87_fldz( struct x86_function
*p
);
252 void x87_fmul( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
253 void x87_fmulp( struct x86_function
*p
, struct x86_reg dst
);
254 void x87_fnclex( struct x86_function
*p
);
255 void x87_fprndint( struct x86_function
*p
);
256 void x87_fscale( struct x86_function
*p
);
257 void x87_fsin( struct x86_function
*p
);
258 void x87_fsincos( struct x86_function
*p
);
259 void x87_fsqrt( struct x86_function
*p
);
260 void x87_fst( struct x86_function
*p
, struct x86_reg dst
);
261 void x87_fstp( struct x86_function
*p
, struct x86_reg dst
);
262 void x87_fsub( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
263 void x87_fsubp( struct x86_function
*p
, struct x86_reg dst
);
264 void x87_fsubr( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
265 void x87_fsubrp( struct x86_function
*p
, struct x86_reg dst
);
266 void x87_fxch( struct x86_function
*p
, struct x86_reg dst
);
267 void x87_fxtract( struct x86_function
*p
);
268 void x87_fyl2x( struct x86_function
*p
);
269 void x87_fyl2xp1( struct x86_function
*p
);
270 void x87_fwait( struct x86_function
*p
);
271 void x87_fnstsw( struct x86_function
*p
, struct x86_reg dst
);
272 void x87_fucompp( struct x86_function
*p
);
273 void x87_fucomp( struct x86_function
*p
, struct x86_reg arg
);
274 void x87_fucom( struct x86_function
*p
, struct x86_reg arg
);
278 /* Retreive a reference to one of the function arguments, taking into
279 * account any push/pop activity. Note - doesn't track explict
280 * manipulation of ESP by other instructions.
282 struct x86_reg
x86_fn_arg( struct x86_function
*p
, unsigned arg
);