1 #if defined(USE_X86_ASM)
8 /* Emit bytes to the instruction stream:
10 static void emit_1b( struct x86_function
*p
, GLbyte b0
)
12 *(GLbyte
*)(p
->csr
++) = b0
;
15 static void emit_1i( struct x86_function
*p
, GLint i0
)
17 *(GLint
*)(p
->csr
) = i0
;
21 static void disassem( struct x86_function
*p
, const char *fn
)
24 if (fn
&& fn
!= p
->fn
) {
25 _mesa_printf("0x%x: %s\n", p
->csr
, fn
);
31 static void emit_1ub_fn( struct x86_function
*p
, GLubyte b0
, const char *fn
)
37 static void emit_2ub_fn( struct x86_function
*p
, GLubyte b0
, GLubyte b1
, const char *fn
)
44 static void emit_3ub_fn( struct x86_function
*p
, GLubyte b0
, GLubyte b1
, GLubyte b2
, const char *fn
)
52 #define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__)
53 #define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__)
54 #define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
58 /* Build a modRM byte + possible displacement. No treatment of SIB
59 * indexing. BZZT - no way to encode an absolute address.
61 static void emit_modrm( struct x86_function
*p
,
63 struct x86_reg regmem
)
67 assert(reg
.mod
== mod_REG
);
69 val
|= regmem
.mod
<< 6; /* mod field */
70 val
|= reg
.idx
<< 3; /* reg field */
71 val
|= regmem
.idx
; /* r/m field */
73 emit_1ub_fn(p
, val
, 0);
75 /* Oh-oh we've stumbled into the SIB thing.
77 if (regmem
.file
== file_REG32
&&
78 regmem
.idx
== reg_SP
) {
79 emit_1ub_fn(p
, 0x24, 0); /* simplistic! */
87 emit_1b(p
, regmem
.disp
);
90 emit_1i(p
, regmem
.disp
);
98 /* Many x86 instructions have two opcodes to cope with the situations
99 * where the destination is a register or memory reference
100 * respectively. This function selects the correct opcode based on
101 * the arguments presented.
103 static void emit_op_modrm( struct x86_function
*p
,
104 GLubyte op_dst_is_reg
,
105 GLubyte op_dst_is_mem
,
111 emit_1ub_fn(p
, op_dst_is_reg
, 0);
112 emit_modrm(p
, dst
, src
);
117 assert(src
.mod
== mod_REG
);
118 emit_1ub_fn(p
, op_dst_is_mem
, 0);
119 emit_modrm(p
, src
, dst
);
133 /* Create and manipulate registers and regmem values:
135 struct x86_reg
x86_make_reg( GLuint file
,
148 struct x86_reg
x86_make_disp( struct x86_reg reg
,
151 assert(reg
.file
== file_REG32
);
153 if (reg
.mod
== mod_REG
)
159 reg
.mod
= mod_INDIRECT
;
160 else if (reg
.disp
<= 127 && reg
.disp
>= -128)
163 reg
.mod
= mod_DISP32
;
168 struct x86_reg
x86_deref( struct x86_reg reg
)
170 return x86_make_disp(reg
, 0);
173 struct x86_reg
x86_get_base_reg( struct x86_reg reg
)
175 return x86_make_reg( reg
.file
, reg
.idx
);
180 /* Labels, jumps and fixup:
182 GLubyte
*x86_get_label( struct x86_function
*p
)
187 void x86_jcc( struct x86_function
*p
,
191 GLint offset
= label
- (x86_get_label(p
) + 2);
193 if (offset
<= 127 && offset
>= -128) {
194 emit_1ub(p
, 0x70 + cc
);
195 emit_1b(p
, (GLbyte
) offset
);
198 offset
= label
- (x86_get_label(p
) + 6);
199 emit_2ub(p
, 0x0f, 0x80 + cc
);
204 /* Always use a 32bit offset for forward jumps:
206 GLubyte
*x86_jcc_forward( struct x86_function
*p
,
209 emit_2ub(p
, 0x0f, 0x80 + cc
);
211 return x86_get_label(p
);
214 /* Fixup offset from forward jump:
216 void x86_fixup_fwd_jump( struct x86_function
*p
,
219 *(int *)(fixup
- 4) = x86_get_label(p
) - fixup
;
222 void x86_push( struct x86_function
*p
,
225 assert(reg
.mod
== mod_REG
);
226 emit_1ub(p
, 0x50 + reg
.idx
);
227 p
->stack_offset
+= 4;
230 void x86_pop( struct x86_function
*p
,
233 assert(reg
.mod
== mod_REG
);
234 emit_1ub(p
, 0x58 + reg
.idx
);
235 p
->stack_offset
-= 4;
238 void x86_inc( struct x86_function
*p
,
241 assert(reg
.mod
== mod_REG
);
242 emit_1ub(p
, 0x40 + reg
.idx
);
245 void x86_dec( struct x86_function
*p
,
248 assert(reg
.mod
== mod_REG
);
249 emit_1ub(p
, 0x48 + reg
.idx
);
252 void x86_ret( struct x86_function
*p
)
257 void mmx_emms( struct x86_function
*p
)
259 assert(p
->need_emms
);
260 emit_2ub(p
, 0x0f, 0x77);
267 void x86_mov( struct x86_function
*p
,
271 emit_op_modrm( p
, 0x8b, 0x89, dst
, src
);
274 void x86_xor( struct x86_function
*p
,
278 emit_op_modrm( p
, 0x33, 0x31, dst
, src
);
281 void x86_cmp( struct x86_function
*p
,
285 emit_op_modrm( p
, 0x3b, 0x39, dst
, src
);
288 void sse2_movd( struct x86_function
*p
,
292 emit_2ub(p
, 0x66, X86_TWOB
);
293 emit_op_modrm( p
, 0x6e, 0x7e, dst
, src
);
296 void mmx_movd( struct x86_function
*p
,
301 emit_1ub(p
, X86_TWOB
);
302 emit_op_modrm( p
, 0x6e, 0x7e, dst
, src
);
305 void mmx_movq( struct x86_function
*p
,
310 emit_1ub(p
, X86_TWOB
);
311 emit_op_modrm( p
, 0x6f, 0x7f, dst
, src
);
315 void sse_movss( struct x86_function
*p
,
319 emit_2ub(p
, 0xF3, X86_TWOB
);
320 emit_op_modrm( p
, 0x10, 0x11, dst
, src
);
323 void sse_movaps( struct x86_function
*p
,
327 emit_1ub(p
, X86_TWOB
);
328 emit_op_modrm( p
, 0x28, 0x29, dst
, src
);
331 void sse_movups( struct x86_function
*p
,
335 emit_1ub(p
, X86_TWOB
);
336 emit_op_modrm( p
, 0x10, 0x11, dst
, src
);
339 void sse_movhps( struct x86_function
*p
,
343 assert(dst
.mod
!= mod_REG
|| src
.mod
!= mod_REG
);
344 emit_1ub(p
, X86_TWOB
);
345 emit_op_modrm( p
, 0x16, 0x17, dst
, src
); /* cf movlhps */
348 void sse_movlps( struct x86_function
*p
,
352 assert(dst
.mod
!= mod_REG
|| src
.mod
!= mod_REG
);
353 emit_1ub(p
, X86_TWOB
);
354 emit_op_modrm( p
, 0x12, 0x13, dst
, src
); /* cf movhlps */
357 /* SSE operations often only have one format, with dest constrained to
360 void sse_maxps( struct x86_function
*p
,
364 emit_2ub(p
, X86_TWOB
, 0x5F);
365 emit_modrm( p
, dst
, src
);
368 void sse_divss( struct x86_function
*p
,
372 emit_3ub(p
, 0xF3, X86_TWOB
, 0x5E);
373 emit_modrm( p
, dst
, src
);
376 void sse_minps( struct x86_function
*p
,
380 emit_2ub(p
, X86_TWOB
, 0x5D);
381 emit_modrm( p
, dst
, src
);
384 void sse_subps( struct x86_function
*p
,
388 emit_2ub(p
, X86_TWOB
, 0x5C);
389 emit_modrm( p
, dst
, src
);
392 void sse_mulps( struct x86_function
*p
,
396 emit_2ub(p
, X86_TWOB
, 0x59);
397 emit_modrm( p
, dst
, src
);
400 void sse_addps( struct x86_function
*p
,
404 emit_2ub(p
, X86_TWOB
, 0x58);
405 emit_modrm( p
, dst
, src
);
408 void sse_addss( struct x86_function
*p
,
412 emit_3ub(p
, 0xF3, X86_TWOB
, 0x58);
413 emit_modrm( p
, dst
, src
);
416 void sse_andps( struct x86_function
*p
,
420 emit_2ub(p
, X86_TWOB
, 0x54);
421 emit_modrm( p
, dst
, src
);
424 void sse2_rcpss( struct x86_function
*p
,
428 emit_3ub(p
, 0xF3, X86_TWOB
, 0x53);
429 emit_modrm( p
, dst
, src
);
432 void sse_rsqrtss( struct x86_function
*p
,
436 emit_3ub(p
, 0xF3, X86_TWOB
, 0x52);
437 emit_modrm( p
, dst
, src
);
441 void sse_movhlps( struct x86_function
*p
,
445 assert(dst
.mod
== mod_REG
&& src
.mod
== mod_REG
);
446 emit_2ub(p
, X86_TWOB
, 0x12);
447 emit_modrm( p
, dst
, src
);
450 void sse_movlhps( struct x86_function
*p
,
454 assert(dst
.mod
== mod_REG
&& src
.mod
== mod_REG
);
455 emit_2ub(p
, X86_TWOB
, 0x16);
456 emit_modrm( p
, dst
, src
);
459 void sse2_cvtps2dq( struct x86_function
*p
,
463 emit_3ub(p
, 0x66, X86_TWOB
, 0x5B);
464 emit_modrm( p
, dst
, src
);
467 void sse2_packssdw( struct x86_function
*p
,
471 emit_3ub(p
, 0x66, X86_TWOB
, 0x6B);
472 emit_modrm( p
, dst
, src
);
475 void sse2_packsswb( struct x86_function
*p
,
479 emit_3ub(p
, 0x66, X86_TWOB
, 0x63);
480 emit_modrm( p
, dst
, src
);
483 void sse2_packuswb( struct x86_function
*p
,
487 emit_3ub(p
, 0x66, X86_TWOB
, 0x67);
488 emit_modrm( p
, dst
, src
);
491 void sse_cvtps2pi( struct x86_function
*p
,
495 assert(dst
.file
== file_MMX
&&
496 (src
.file
== file_XMM
|| src
.mod
!= mod_REG
));
500 emit_2ub(p
, X86_TWOB
, 0x2d);
501 emit_modrm( p
, dst
, src
);
504 void mmx_packssdw( struct x86_function
*p
,
508 assert(dst
.file
== file_MMX
&&
509 (src
.file
== file_MMX
|| src
.mod
!= mod_REG
));
513 emit_2ub(p
, X86_TWOB
, 0x6b);
514 emit_modrm( p
, dst
, src
);
517 void mmx_packuswb( struct x86_function
*p
,
521 assert(dst
.file
== file_MMX
&&
522 (src
.file
== file_MMX
|| src
.mod
!= mod_REG
));
526 emit_2ub(p
, X86_TWOB
, 0x67);
527 emit_modrm( p
, dst
, src
);
531 /* Load effective address:
533 void x86_lea( struct x86_function
*p
,
538 emit_modrm( p
, dst
, src
);
541 void x86_test( struct x86_function
*p
,
546 emit_modrm( p
, dst
, src
);
551 * Perform a reduced swizzle:
553 void sse2_pshufd( struct x86_function
*p
,
558 emit_3ub(p
, 0x66, X86_TWOB
, 0x70);
559 emit_modrm(p
, dest
, arg0
);
564 /* Shufps can also be used to implement a reduced swizzle when dest ==
567 void sse_shufps( struct x86_function
*p
,
572 emit_2ub(p
, X86_TWOB
, 0xC6);
573 emit_modrm(p
, dest
, arg0
);
577 void sse_cmpps( struct x86_function
*p
,
582 emit_2ub(p
, X86_TWOB
, 0xC2);
583 emit_modrm(p
, dest
, arg0
);
588 /* Retreive a reference to one of the function arguments, taking into
589 * account any push/pop activity:
591 struct x86_reg
x86_fn_arg( struct x86_function
*p
,
594 return x86_make_disp(x86_make_reg(file_REG32
, reg_SP
),
595 p
->stack_offset
+ arg
* 4); /* ??? */
599 void x86_init_func( struct x86_function
*p
)
601 p
->store
= malloc(1024);
605 void x86_release_func( struct x86_function
*p
)
611 void (*x86_get_func( struct x86_function
*p
))(void)
614 _mesa_printf("disassemble %p %p\n", p
->store
, p
->csr
);
615 return (void (*)())p
->store
;
620 void x86sse_dummy( void )