gallium: Generate SSE code to swizzle and unswizzle vs inputs and outputs.
[mesa.git] / src / gallium / auxiliary / rtasm / rtasm_x86sse.h
1 /**************************************************************************
2 *
3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 **************************************************************************/
23
24 #ifndef _RTASM_X86SSE_H_
25 #define _RTASM_X86SSE_H_
26
27 #if defined(__i386__) || defined(__386__) || defined(i386)
28
29 /* It is up to the caller to ensure that instructions issued are
30 * suitable for the host cpu. There are no checks made in this module
31 * for mmx/sse/sse2 support on the cpu.
32 */
33 struct x86_reg {
34 unsigned file:3;
35 unsigned idx:3;
36 unsigned mod:2; /* mod_REG if this is just a register */
37 int disp:24; /* only +/- 23bits of offset - should be enough... */
38 };
39
40 struct x86_function {
41 unsigned size;
42 unsigned char *store;
43 unsigned char *csr;
44 unsigned stack_offset;
45 int need_emms;
46 unsigned char error_overflow[4];
47 const char *fn;
48 };
49
50 enum x86_reg_file {
51 file_REG32,
52 file_MMX,
53 file_XMM,
54 file_x87
55 };
56
57 /* Values for mod field of modr/m byte
58 */
59 enum x86_reg_mod {
60 mod_INDIRECT,
61 mod_DISP8,
62 mod_DISP32,
63 mod_REG
64 };
65
66 enum x86_reg_name {
67 reg_AX,
68 reg_CX,
69 reg_DX,
70 reg_BX,
71 reg_SP,
72 reg_BP,
73 reg_SI,
74 reg_DI
75 };
76
77
78 enum x86_cc {
79 cc_O, /* overflow */
80 cc_NO, /* not overflow */
81 cc_NAE, /* not above or equal / carry */
82 cc_AE, /* above or equal / not carry */
83 cc_E, /* equal / zero */
84 cc_NE /* not equal / not zero */
85 };
86
87 enum sse_cc {
88 cc_Equal,
89 cc_LessThan,
90 cc_LessThanEqual,
91 cc_Unordered,
92 cc_NotEqual,
93 cc_NotLessThan,
94 cc_NotLessThanEqual,
95 cc_Ordered
96 };
97
98 #define cc_Z cc_E
99 #define cc_NZ cc_NE
100
101 /* Begin/end/retreive function creation:
102 */
103
104
105 void x86_init_func( struct x86_function *p );
106 void x86_init_func_size( struct x86_function *p, unsigned code_size );
107 void x86_release_func( struct x86_function *p );
108 void (*x86_get_func( struct x86_function *p ))( void );
109
110
111
112 /* Create and manipulate registers and regmem values:
113 */
114 struct x86_reg x86_make_reg( enum x86_reg_file file,
115 enum x86_reg_name idx );
116
117 struct x86_reg x86_make_disp( struct x86_reg reg,
118 int disp );
119
120 struct x86_reg x86_deref( struct x86_reg reg );
121
122 struct x86_reg x86_get_base_reg( struct x86_reg reg );
123
124
125 /* Labels, jumps and fixup:
126 */
127 unsigned char *x86_get_label( struct x86_function *p );
128
129 void x86_jcc( struct x86_function *p,
130 enum x86_cc cc,
131 unsigned char *label );
132
133 unsigned char *x86_jcc_forward( struct x86_function *p,
134 enum x86_cc cc );
135
136 unsigned char *x86_jmp_forward( struct x86_function *p);
137
138 unsigned char *x86_call_forward( struct x86_function *p);
139
140 void x86_fixup_fwd_jump( struct x86_function *p,
141 unsigned char *fixup );
142
143 void x86_jmp( struct x86_function *p, unsigned char *label );
144
145 /* void x86_call( struct x86_function *p, void (*label)() ); */
146 void x86_call( struct x86_function *p, struct x86_reg reg);
147
148 /* michal:
149 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
150 * I load the immediate into general purpose register and use it.
151 */
152 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
153
154
155 /* Macro for sse_shufps() and sse2_pshufd():
156 */
157 #define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
158 #define SHUF_NOOP RSW(0,1,2,3)
159 #define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
160
161 void mmx_emms( struct x86_function *p );
162 void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
163 void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
164 void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
165 void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
166
167 void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
168 void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
169 void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
170 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
171 void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
172 void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
173 void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
174 void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
175 unsigned char shuf );
176 void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
177 void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
178
179 void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
180 void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
181 void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
182 void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
183 void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
184 void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
185 void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
186 unsigned char cc );
187 void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
188 void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
189 void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
190 void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
191 void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
192 void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
193 void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
194 void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
195 void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
196 void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
197 void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
198 void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
199 void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
200 void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
201 void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
202 void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
203 void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
204 void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
205 unsigned char shuf );
206 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
207 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
208 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
209 void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
210
211 void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
212 void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
213 void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
214 void x86_dec( struct x86_function *p, struct x86_reg reg );
215 void x86_inc( struct x86_function *p, struct x86_reg reg );
216 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
217 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
218 void x86_mul( struct x86_function *p, struct x86_reg src );
219 void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
220 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
221 void x86_pop( struct x86_function *p, struct x86_reg reg );
222 void x86_push( struct x86_function *p, struct x86_reg reg );
223 void x86_ret( struct x86_function *p );
224 void x86_retw( struct x86_function *p, unsigned short imm );
225 void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
226 void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
227 void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
228 void x86_sahf( struct x86_function *p );
229
230 void x87_f2xm1( struct x86_function *p );
231 void x87_fabs( struct x86_function *p );
232 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
233 void x87_faddp( struct x86_function *p, struct x86_reg dst );
234 void x87_fchs( struct x86_function *p );
235 void x87_fclex( struct x86_function *p );
236 void x87_fcom( struct x86_function *p, struct x86_reg dst );
237 void x87_fcomp( struct x86_function *p, struct x86_reg dst );
238 void x87_fcos( struct x86_function *p );
239 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
240 void x87_fdivp( struct x86_function *p, struct x86_reg dst );
241 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
242 void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
243 void x87_fild( struct x86_function *p, struct x86_reg arg );
244 void x87_fist( struct x86_function *p, struct x86_reg dst );
245 void x87_fistp( struct x86_function *p, struct x86_reg dst );
246 void x87_fld( struct x86_function *p, struct x86_reg arg );
247 void x87_fld1( struct x86_function *p );
248 void x87_fldcw( struct x86_function *p, struct x86_reg arg );
249 void x87_fldl2e( struct x86_function *p );
250 void x87_fldln2( struct x86_function *p );
251 void x87_fldz( struct x86_function *p );
252 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
253 void x87_fmulp( struct x86_function *p, struct x86_reg dst );
254 void x87_fnclex( struct x86_function *p );
255 void x87_fprndint( struct x86_function *p );
256 void x87_fscale( struct x86_function *p );
257 void x87_fsin( struct x86_function *p );
258 void x87_fsincos( struct x86_function *p );
259 void x87_fsqrt( struct x86_function *p );
260 void x87_fst( struct x86_function *p, struct x86_reg dst );
261 void x87_fstp( struct x86_function *p, struct x86_reg dst );
262 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
263 void x87_fsubp( struct x86_function *p, struct x86_reg dst );
264 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
265 void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
266 void x87_fxch( struct x86_function *p, struct x86_reg dst );
267 void x87_fxtract( struct x86_function *p );
268 void x87_fyl2x( struct x86_function *p );
269 void x87_fyl2xp1( struct x86_function *p );
270 void x87_fwait( struct x86_function *p );
271 void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
272 void x87_fucompp( struct x86_function *p );
273 void x87_fucomp( struct x86_function *p, struct x86_reg arg );
274 void x87_fucom( struct x86_function *p, struct x86_reg arg );
275
276
277
278 /* Retreive a reference to one of the function arguments, taking into
279 * account any push/pop activity. Note - doesn't track explict
280 * manipulation of ESP by other instructions.
281 */
282 struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
283
284 #endif
285 #endif