Merge branch '7.8'
[mesa.git] / src / gallium / auxiliary / rtasm / rtasm_x86sse.h
1 /**************************************************************************
2 *
3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 **************************************************************************/
23
24 #ifndef _RTASM_X86SSE_H_
25 #define _RTASM_X86SSE_H_
26
27 #include "pipe/p_config.h"
28
29 #if defined(PIPE_ARCH_X86)
30
31 /* It is up to the caller to ensure that instructions issued are
32 * suitable for the host cpu. There are no checks made in this module
33 * for mmx/sse/sse2 support on the cpu.
34 */
35 struct x86_reg {
36 unsigned file:3;
37 unsigned idx:3;
38 unsigned mod:2; /* mod_REG if this is just a register */
39 int disp:24; /* only +/- 23bits of offset - should be enough... */
40 };
41
42 struct x86_function {
43 unsigned size;
44 unsigned char *store;
45 unsigned char *csr;
46
47 unsigned stack_offset:16;
48 unsigned need_emms:8;
49 int x87_stack:8;
50
51 unsigned char error_overflow[4];
52 };
53
54 enum x86_reg_file {
55 file_REG32,
56 file_MMX,
57 file_XMM,
58 file_x87
59 };
60
61 /* Values for mod field of modr/m byte
62 */
63 enum x86_reg_mod {
64 mod_INDIRECT,
65 mod_DISP8,
66 mod_DISP32,
67 mod_REG
68 };
69
70 enum x86_reg_name {
71 reg_AX,
72 reg_CX,
73 reg_DX,
74 reg_BX,
75 reg_SP,
76 reg_BP,
77 reg_SI,
78 reg_DI
79 };
80
81
82 enum x86_cc {
83 cc_O, /* overflow */
84 cc_NO, /* not overflow */
85 cc_NAE, /* not above or equal / carry */
86 cc_AE, /* above or equal / not carry */
87 cc_E, /* equal / zero */
88 cc_NE /* not equal / not zero */
89 };
90
91 enum sse_cc {
92 cc_Equal,
93 cc_LessThan,
94 cc_LessThanEqual,
95 cc_Unordered,
96 cc_NotEqual,
97 cc_NotLessThan,
98 cc_NotLessThanEqual,
99 cc_Ordered
100 };
101
102 #define cc_Z cc_E
103 #define cc_NZ cc_NE
104
105 /* Begin/end/retrieve function creation:
106 */
107
108
109 void x86_init_func( struct x86_function *p );
110 void x86_init_func_size( struct x86_function *p, unsigned code_size );
111 void x86_release_func( struct x86_function *p );
112 void (*x86_get_func( struct x86_function *p ))( void );
113
114 /* Debugging:
115 */
116 void x86_print_reg( struct x86_reg reg );
117
118
119 /* Create and manipulate registers and regmem values:
120 */
121 struct x86_reg x86_make_reg( enum x86_reg_file file,
122 enum x86_reg_name idx );
123
124 struct x86_reg x86_make_disp( struct x86_reg reg,
125 int disp );
126
127 struct x86_reg x86_deref( struct x86_reg reg );
128
129 struct x86_reg x86_get_base_reg( struct x86_reg reg );
130
131
132 /* Labels, jumps and fixup:
133 */
134 int x86_get_label( struct x86_function *p );
135
136 void x86_jcc( struct x86_function *p,
137 enum x86_cc cc,
138 int label );
139
140 int x86_jcc_forward( struct x86_function *p,
141 enum x86_cc cc );
142
143 int x86_jmp_forward( struct x86_function *p);
144
145 int x86_call_forward( struct x86_function *p);
146
147 void x86_fixup_fwd_jump( struct x86_function *p,
148 int fixup );
149
150 void x86_jmp( struct x86_function *p, int label );
151
152 /* void x86_call( struct x86_function *p, void (*label)() ); */
153 void x86_call( struct x86_function *p, struct x86_reg reg);
154
155 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
156 void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );
157 void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );
158 void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );
159 void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );
160 void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );
161 void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );
162
163
164 /* Macro for sse_shufps() and sse2_pshufd():
165 */
166 #define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
167 #define SHUF_NOOP RSW(0,1,2,3)
168 #define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
169
170 void mmx_emms( struct x86_function *p );
171 void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
172 void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
173 void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
174 void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
175
176 void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
177 void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
178 void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
179 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
180 void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
181 void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
182 void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
183 void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
184 unsigned char shuf );
185 void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
186 void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
187
188
189 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
190 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
191 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);
192
193 void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
194
195 void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
196 void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
197 void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
198 void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
199 void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
200 void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
201 void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
202 enum sse_cc cc );
203 void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
204 void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
205 void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
206 void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
207 void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
208 void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
209 void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
210 void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
211 void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
212 void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
213 void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
214 void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
215 void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
216 void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
217 void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
218 void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
219 void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
220 void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
221 unsigned char shuf );
222 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
223 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
224 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
225 void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
226 void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
227
228 void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
229 void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
230 void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
231 void x86_dec( struct x86_function *p, struct x86_reg reg );
232 void x86_inc( struct x86_function *p, struct x86_reg reg );
233 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
234 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
235 void x86_mul( struct x86_function *p, struct x86_reg src );
236 void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
237 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
238 void x86_pop( struct x86_function *p, struct x86_reg reg );
239 void x86_push( struct x86_function *p, struct x86_reg reg );
240 void x86_push_imm32( struct x86_function *p, int imm );
241 void x86_ret( struct x86_function *p );
242 void x86_retw( struct x86_function *p, unsigned short imm );
243 void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
244 void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
245 void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
246 void x86_sahf( struct x86_function *p );
247 void x86_div( struct x86_function *p, struct x86_reg src );
248
249
250 void x86_cdecl_caller_push_regs( struct x86_function *p );
251 void x86_cdecl_caller_pop_regs( struct x86_function *p );
252
253 void x87_assert_stack_empty( struct x86_function *p );
254
255 void x87_f2xm1( struct x86_function *p );
256 void x87_fabs( struct x86_function *p );
257 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
258 void x87_faddp( struct x86_function *p, struct x86_reg dst );
259 void x87_fchs( struct x86_function *p );
260 void x87_fclex( struct x86_function *p );
261 void x87_fcmovb( struct x86_function *p, struct x86_reg src );
262 void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
263 void x87_fcmove( struct x86_function *p, struct x86_reg src );
264 void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
265 void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
266 void x87_fcmovne( struct x86_function *p, struct x86_reg src );
267 void x87_fcom( struct x86_function *p, struct x86_reg dst );
268 void x87_fcomi( struct x86_function *p, struct x86_reg dst );
269 void x87_fcomip( struct x86_function *p, struct x86_reg dst );
270 void x87_fcomp( struct x86_function *p, struct x86_reg dst );
271 void x87_fcos( struct x86_function *p );
272 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
273 void x87_fdivp( struct x86_function *p, struct x86_reg dst );
274 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
275 void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
276 void x87_fild( struct x86_function *p, struct x86_reg arg );
277 void x87_fist( struct x86_function *p, struct x86_reg dst );
278 void x87_fistp( struct x86_function *p, struct x86_reg dst );
279 void x87_fld( struct x86_function *p, struct x86_reg arg );
280 void x87_fld1( struct x86_function *p );
281 void x87_fldcw( struct x86_function *p, struct x86_reg arg );
282 void x87_fldl2e( struct x86_function *p );
283 void x87_fldln2( struct x86_function *p );
284 void x87_fldz( struct x86_function *p );
285 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
286 void x87_fmulp( struct x86_function *p, struct x86_reg dst );
287 void x87_fnclex( struct x86_function *p );
288 void x87_fprndint( struct x86_function *p );
289 void x87_fpop( struct x86_function *p );
290 void x87_fscale( struct x86_function *p );
291 void x87_fsin( struct x86_function *p );
292 void x87_fsincos( struct x86_function *p );
293 void x87_fsqrt( struct x86_function *p );
294 void x87_fst( struct x86_function *p, struct x86_reg dst );
295 void x87_fstp( struct x86_function *p, struct x86_reg dst );
296 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
297 void x87_fsubp( struct x86_function *p, struct x86_reg dst );
298 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
299 void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
300 void x87_ftst( struct x86_function *p );
301 void x87_fxch( struct x86_function *p, struct x86_reg dst );
302 void x87_fxtract( struct x86_function *p );
303 void x87_fyl2x( struct x86_function *p );
304 void x87_fyl2xp1( struct x86_function *p );
305 void x87_fwait( struct x86_function *p );
306 void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
307 void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
308 void x87_fucompp( struct x86_function *p );
309 void x87_fucomp( struct x86_function *p, struct x86_reg arg );
310 void x87_fucom( struct x86_function *p, struct x86_reg arg );
311
312
313
314 /* Retrieve a reference to one of the function arguments, taking into
315 * account any push/pop activity. Note - doesn't track explicit
316 * manipulation of ESP by other instructions.
317 */
318 struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
319
320 #endif
321 #endif