gallium: replace assertion with conditional/recovery code
[mesa.git] / src / gallium / auxiliary / rtasm / rtasm_x86sse.c
1 /**************************************************************************
2 *
3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 **************************************************************************/
23
24 #include "pipe/p_config.h"
25
26 #if defined(PIPE_ARCH_X86)
27
28 #include "pipe/p_compiler.h"
29 #include "pipe/p_debug.h"
30 #include "util/u_pointer.h"
31
32 #include "rtasm_execmem.h"
33 #include "rtasm_x86sse.h"
34
35 #define DISASSEM 0
36 #define X86_TWOB 0x0f
37
38
39 #define DUMP_SSE 0
40
41
42 void x86_print_reg( struct x86_reg reg )
43 {
44 if (reg.mod != mod_REG)
45 debug_printf( "[" );
46
47 switch( reg.file ) {
48 case file_REG32:
49 switch( reg.idx ) {
50 case reg_AX: debug_printf( "EAX" ); break;
51 case reg_CX: debug_printf( "ECX" ); break;
52 case reg_DX: debug_printf( "EDX" ); break;
53 case reg_BX: debug_printf( "EBX" ); break;
54 case reg_SP: debug_printf( "ESP" ); break;
55 case reg_BP: debug_printf( "EBP" ); break;
56 case reg_SI: debug_printf( "ESI" ); break;
57 case reg_DI: debug_printf( "EDI" ); break;
58 }
59 break;
60 case file_MMX:
61 debug_printf( "MMX%u", reg.idx );
62 break;
63 case file_XMM:
64 debug_printf( "XMM%u", reg.idx );
65 break;
66 case file_x87:
67 debug_printf( "fp%u", reg.idx );
68 break;
69 }
70
71 if (reg.mod == mod_DISP8 ||
72 reg.mod == mod_DISP32)
73 debug_printf("+%d", reg.disp);
74
75 if (reg.mod != mod_REG)
76 debug_printf( "]" );
77 }
78
79 #if DUMP_SSE
80
81 #define DUMP_START() debug_printf( "\n" )
82 #define DUMP_END() debug_printf( "\n" )
83
84 #define DUMP() do { \
85 const char *foo = __FUNCTION__; \
86 while (*foo && *foo != '_') \
87 foo++; \
88 if (*foo) \
89 foo++; \
90 debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \
91 } while (0)
92
93 #define DUMP_I( I ) do { \
94 DUMP(); \
95 debug_printf( "%u", I ); \
96 } while( 0 )
97
98 #define DUMP_R( R0 ) do { \
99 DUMP(); \
100 x86_print_reg( R0 ); \
101 } while( 0 )
102
103 #define DUMP_RR( R0, R1 ) do { \
104 DUMP(); \
105 x86_print_reg( R0 ); \
106 debug_printf( ", " ); \
107 x86_print_reg( R1 ); \
108 } while( 0 )
109
110 #define DUMP_RI( R0, I ) do { \
111 DUMP(); \
112 x86_print_reg( R0 ); \
113 debug_printf( ", %u", I ); \
114 } while( 0 )
115
116 #define DUMP_RRI( R0, R1, I ) do { \
117 DUMP(); \
118 x86_print_reg( R0 ); \
119 debug_printf( ", " ); \
120 x86_print_reg( R1 ); \
121 debug_printf( ", %u", I ); \
122 } while( 0 )
123
124 #else
125
126 #define DUMP_START()
127 #define DUMP_END()
128 #define DUMP( )
129 #define DUMP_I( I )
130 #define DUMP_R( R0 )
131 #define DUMP_RR( R0, R1 )
132 #define DUMP_RI( R0, I )
133 #define DUMP_RRI( R0, R1, I )
134
135 #endif
136
137
138 static void do_realloc( struct x86_function *p )
139 {
140 if (p->store == p->error_overflow) {
141 p->csr = p->store;
142 }
143 else if (p->size == 0) {
144 p->size = 1024;
145 p->store = rtasm_exec_malloc(p->size);
146 p->csr = p->store;
147 }
148 else {
149 uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store );
150 unsigned char *tmp = p->store;
151 p->size *= 2;
152 p->store = rtasm_exec_malloc(p->size);
153
154 if (p->store) {
155 memcpy(p->store, tmp, used);
156 p->csr = p->store + used;
157 }
158 else {
159 p->csr = p->store;
160 }
161
162 rtasm_exec_free(tmp);
163 }
164
165 if (p->store == NULL) {
166 p->store = p->csr = p->error_overflow;
167 p->size = sizeof(p->error_overflow);
168 }
169 }
170
171 /* Emit bytes to the instruction stream:
172 */
173 static unsigned char *reserve( struct x86_function *p, int bytes )
174 {
175 if (p->csr + bytes - p->store > (int) p->size)
176 do_realloc(p);
177
178 {
179 unsigned char *csr = p->csr;
180 p->csr += bytes;
181 return csr;
182 }
183 }
184
185
186
187 static void emit_1b( struct x86_function *p, char b0 )
188 {
189 char *csr = (char *)reserve(p, 1);
190 *csr = b0;
191 }
192
193 static void emit_1i( struct x86_function *p, int i0 )
194 {
195 int *icsr = (int *)reserve(p, sizeof(i0));
196 *icsr = i0;
197 }
198
199 static void emit_1ub( struct x86_function *p, unsigned char b0 )
200 {
201 unsigned char *csr = reserve(p, 1);
202 *csr++ = b0;
203 }
204
205 static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
206 {
207 unsigned char *csr = reserve(p, 2);
208 *csr++ = b0;
209 *csr++ = b1;
210 }
211
212 static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
213 {
214 unsigned char *csr = reserve(p, 3);
215 *csr++ = b0;
216 *csr++ = b1;
217 *csr++ = b2;
218 }
219
220
221 /* Build a modRM byte + possible displacement. No treatment of SIB
222 * indexing. BZZT - no way to encode an absolute address.
223 *
224 * This is the "/r" field in the x86 manuals...
225 */
226 static void emit_modrm( struct x86_function *p,
227 struct x86_reg reg,
228 struct x86_reg regmem )
229 {
230 unsigned char val = 0;
231
232 assert(reg.mod == mod_REG);
233
234 val |= regmem.mod << 6; /* mod field */
235 val |= reg.idx << 3; /* reg field */
236 val |= regmem.idx; /* r/m field */
237
238 emit_1ub(p, val);
239
240 /* Oh-oh we've stumbled into the SIB thing.
241 */
242 if (regmem.file == file_REG32 &&
243 regmem.idx == reg_SP) {
244 emit_1ub(p, 0x24); /* simplistic! */
245 }
246
247 switch (regmem.mod) {
248 case mod_REG:
249 case mod_INDIRECT:
250 break;
251 case mod_DISP8:
252 emit_1b(p, (char) regmem.disp);
253 break;
254 case mod_DISP32:
255 emit_1i(p, regmem.disp);
256 break;
257 default:
258 assert(0);
259 break;
260 }
261 }
262
263 /* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes.
264 */
265 static void emit_modrm_noreg( struct x86_function *p,
266 unsigned op,
267 struct x86_reg regmem )
268 {
269 struct x86_reg dummy = x86_make_reg(file_REG32, op);
270 emit_modrm(p, dummy, regmem);
271 }
272
273 /* Many x86 instructions have two opcodes to cope with the situations
274 * where the destination is a register or memory reference
275 * respectively. This function selects the correct opcode based on
276 * the arguments presented.
277 */
278 static void emit_op_modrm( struct x86_function *p,
279 unsigned char op_dst_is_reg,
280 unsigned char op_dst_is_mem,
281 struct x86_reg dst,
282 struct x86_reg src )
283 {
284 switch (dst.mod) {
285 case mod_REG:
286 emit_1ub(p, op_dst_is_reg);
287 emit_modrm(p, dst, src);
288 break;
289 case mod_INDIRECT:
290 case mod_DISP32:
291 case mod_DISP8:
292 assert(src.mod == mod_REG);
293 emit_1ub(p, op_dst_is_mem);
294 emit_modrm(p, src, dst);
295 break;
296 default:
297 assert(0);
298 break;
299 }
300 }
301
302
303
304
305
306
307
308 /* Create and manipulate registers and regmem values:
309 */
310 struct x86_reg x86_make_reg( enum x86_reg_file file,
311 enum x86_reg_name idx )
312 {
313 struct x86_reg reg;
314
315 reg.file = file;
316 reg.idx = idx;
317 reg.mod = mod_REG;
318 reg.disp = 0;
319
320 return reg;
321 }
322
323 struct x86_reg x86_make_disp( struct x86_reg reg,
324 int disp )
325 {
326 assert(reg.file == file_REG32);
327
328 if (reg.mod == mod_REG)
329 reg.disp = disp;
330 else
331 reg.disp += disp;
332
333 if (reg.disp == 0 && reg.idx != reg_BP)
334 reg.mod = mod_INDIRECT;
335 else if (reg.disp <= 127 && reg.disp >= -128)
336 reg.mod = mod_DISP8;
337 else
338 reg.mod = mod_DISP32;
339
340 return reg;
341 }
342
343 struct x86_reg x86_deref( struct x86_reg reg )
344 {
345 return x86_make_disp(reg, 0);
346 }
347
348 struct x86_reg x86_get_base_reg( struct x86_reg reg )
349 {
350 return x86_make_reg( reg.file, reg.idx );
351 }
352
353 int x86_get_label( struct x86_function *p )
354 {
355 return p->csr - p->store;
356 }
357
358
359
360 /***********************************************************************
361 * x86 instructions
362 */
363
364
365 void x86_jcc( struct x86_function *p,
366 enum x86_cc cc,
367 int label )
368 {
369 int offset = label - (x86_get_label(p) + 2);
370 DUMP_I(cc);
371
372 if (offset < 0) {
373 /*assert(p->csr - p->store > -offset);*/
374 if (p->csr - p->store <= -offset) {
375 /* probably out of memory (using the error_overflow buffer) */
376 return;
377 }
378 }
379
380 if (offset <= 127 && offset >= -128) {
381 emit_1ub(p, 0x70 + cc);
382 emit_1b(p, (char) offset);
383 }
384 else {
385 offset = label - (x86_get_label(p) + 6);
386 emit_2ub(p, 0x0f, 0x80 + cc);
387 emit_1i(p, offset);
388 }
389 }
390
391 /* Always use a 32bit offset for forward jumps:
392 */
393 int x86_jcc_forward( struct x86_function *p,
394 enum x86_cc cc )
395 {
396 DUMP_I(cc);
397 emit_2ub(p, 0x0f, 0x80 + cc);
398 emit_1i(p, 0);
399 return x86_get_label(p);
400 }
401
402 int x86_jmp_forward( struct x86_function *p)
403 {
404 DUMP();
405 emit_1ub(p, 0xe9);
406 emit_1i(p, 0);
407 return x86_get_label(p);
408 }
409
410 int x86_call_forward( struct x86_function *p)
411 {
412 DUMP();
413
414 emit_1ub(p, 0xe8);
415 emit_1i(p, 0);
416 return x86_get_label(p);
417 }
418
419 /* Fixup offset from forward jump:
420 */
421 void x86_fixup_fwd_jump( struct x86_function *p,
422 int fixup )
423 {
424 *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup;
425 }
426
427 void x86_jmp( struct x86_function *p, int label)
428 {
429 DUMP_I( label );
430 emit_1ub(p, 0xe9);
431 emit_1i(p, label - x86_get_label(p) - 4);
432 }
433
434 void x86_call( struct x86_function *p, struct x86_reg reg)
435 {
436 DUMP_R( reg );
437 emit_1ub(p, 0xff);
438 emit_modrm_noreg(p, 2, reg);
439 }
440
441
442 /* michal:
443 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
444 * I load the immediate into general purpose register and use it.
445 */
446 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
447 {
448 DUMP_RI( dst, imm );
449 assert(dst.mod == mod_REG);
450 emit_1ub(p, 0xb8 + dst.idx);
451 emit_1i(p, imm);
452 }
453
454 void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm )
455 {
456 DUMP_RI( dst, imm );
457 assert(dst.mod == mod_REG);
458 emit_1ub(p, 0x80);
459 emit_modrm_noreg(p, 0, dst);
460 emit_1ub(p, imm);
461 }
462
463
464 void x86_push( struct x86_function *p,
465 struct x86_reg reg )
466 {
467 DUMP_R( reg );
468 if (reg.mod == mod_REG)
469 emit_1ub(p, 0x50 + reg.idx);
470 else
471 {
472 emit_1ub(p, 0xff);
473 emit_modrm_noreg(p, 6, reg);
474 }
475
476
477 p->stack_offset += 4;
478 }
479
480 void x86_push_imm32( struct x86_function *p,
481 int imm32 )
482 {
483 DUMP_I( imm32 );
484 emit_1ub(p, 0x68);
485 emit_1i(p, imm32);
486
487 p->stack_offset += 4;
488 }
489
490
491 void x86_pop( struct x86_function *p,
492 struct x86_reg reg )
493 {
494 DUMP_R( reg );
495 assert(reg.mod == mod_REG);
496 emit_1ub(p, 0x58 + reg.idx);
497 p->stack_offset -= 4;
498 }
499
500 void x86_inc( struct x86_function *p,
501 struct x86_reg reg )
502 {
503 DUMP_R( reg );
504 assert(reg.mod == mod_REG);
505 emit_1ub(p, 0x40 + reg.idx);
506 }
507
508 void x86_dec( struct x86_function *p,
509 struct x86_reg reg )
510 {
511 DUMP_R( reg );
512 assert(reg.mod == mod_REG);
513 emit_1ub(p, 0x48 + reg.idx);
514 }
515
516 void x86_ret( struct x86_function *p )
517 {
518 DUMP();
519 assert(p->stack_offset == 0);
520 emit_1ub(p, 0xc3);
521 }
522
523 void x86_retw( struct x86_function *p, unsigned short imm )
524 {
525 DUMP();
526 emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff);
527 }
528
529 void x86_sahf( struct x86_function *p )
530 {
531 DUMP();
532 emit_1ub(p, 0x9e);
533 }
534
535 void x86_mov( struct x86_function *p,
536 struct x86_reg dst,
537 struct x86_reg src )
538 {
539 DUMP_RR( dst, src );
540 emit_op_modrm( p, 0x8b, 0x89, dst, src );
541 }
542
543 void x86_xor( struct x86_function *p,
544 struct x86_reg dst,
545 struct x86_reg src )
546 {
547 DUMP_RR( dst, src );
548 emit_op_modrm( p, 0x33, 0x31, dst, src );
549 }
550
551 void x86_cmp( struct x86_function *p,
552 struct x86_reg dst,
553 struct x86_reg src )
554 {
555 DUMP_RR( dst, src );
556 emit_op_modrm( p, 0x3b, 0x39, dst, src );
557 }
558
559 void x86_lea( struct x86_function *p,
560 struct x86_reg dst,
561 struct x86_reg src )
562 {
563 DUMP_RR( dst, src );
564 emit_1ub(p, 0x8d);
565 emit_modrm( p, dst, src );
566 }
567
568 void x86_test( struct x86_function *p,
569 struct x86_reg dst,
570 struct x86_reg src )
571 {
572 DUMP_RR( dst, src );
573 emit_1ub(p, 0x85);
574 emit_modrm( p, dst, src );
575 }
576
577 void x86_add( struct x86_function *p,
578 struct x86_reg dst,
579 struct x86_reg src )
580 {
581 DUMP_RR( dst, src );
582 emit_op_modrm(p, 0x03, 0x01, dst, src );
583 }
584
585 /* Calculate EAX * src, results in EDX:EAX.
586 */
587 void x86_mul( struct x86_function *p,
588 struct x86_reg src )
589 {
590 DUMP_R( src );
591 emit_1ub(p, 0xf7);
592 emit_modrm_noreg(p, 4, src );
593 }
594
595
596 void x86_imul( struct x86_function *p,
597 struct x86_reg dst,
598 struct x86_reg src )
599 {
600 DUMP_RR( dst, src );
601 emit_2ub(p, X86_TWOB, 0xAF);
602 emit_modrm(p, dst, src);
603 }
604
605
606 void x86_sub( struct x86_function *p,
607 struct x86_reg dst,
608 struct x86_reg src )
609 {
610 DUMP_RR( dst, src );
611 emit_op_modrm(p, 0x2b, 0x29, dst, src );
612 }
613
614 void x86_or( struct x86_function *p,
615 struct x86_reg dst,
616 struct x86_reg src )
617 {
618 DUMP_RR( dst, src );
619 emit_op_modrm( p, 0x0b, 0x09, dst, src );
620 }
621
622 void x86_and( struct x86_function *p,
623 struct x86_reg dst,
624 struct x86_reg src )
625 {
626 DUMP_RR( dst, src );
627 emit_op_modrm( p, 0x23, 0x21, dst, src );
628 }
629
630
631
632 /***********************************************************************
633 * SSE instructions
634 */
635
636 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr)
637 {
638 DUMP_R( ptr );
639 assert(ptr.mod != mod_REG);
640 emit_2ub(p, 0x0f, 0x18);
641 emit_modrm_noreg(p, 0, ptr);
642 }
643
644 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr)
645 {
646 DUMP_R( ptr );
647 assert(ptr.mod != mod_REG);
648 emit_2ub(p, 0x0f, 0x18);
649 emit_modrm_noreg(p, 1, ptr);
650 }
651
652 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr)
653 {
654 DUMP_R( ptr );
655 assert(ptr.mod != mod_REG);
656 emit_2ub(p, 0x0f, 0x18);
657 emit_modrm_noreg(p, 2, ptr);
658 }
659
660 void sse_movntps( struct x86_function *p,
661 struct x86_reg dst,
662 struct x86_reg src)
663 {
664 DUMP_RR( dst, src );
665
666 assert(dst.mod != mod_REG);
667 assert(src.mod == mod_REG);
668 emit_2ub(p, 0x0f, 0x2b);
669 emit_modrm(p, src, dst);
670 }
671
672
673
674
675 void sse_movss( struct x86_function *p,
676 struct x86_reg dst,
677 struct x86_reg src )
678 {
679 DUMP_RR( dst, src );
680 emit_2ub(p, 0xF3, X86_TWOB);
681 emit_op_modrm( p, 0x10, 0x11, dst, src );
682 }
683
684 void sse_movaps( struct x86_function *p,
685 struct x86_reg dst,
686 struct x86_reg src )
687 {
688 DUMP_RR( dst, src );
689 emit_1ub(p, X86_TWOB);
690 emit_op_modrm( p, 0x28, 0x29, dst, src );
691 }
692
693 void sse_movups( struct x86_function *p,
694 struct x86_reg dst,
695 struct x86_reg src )
696 {
697 DUMP_RR( dst, src );
698 emit_1ub(p, X86_TWOB);
699 emit_op_modrm( p, 0x10, 0x11, dst, src );
700 }
701
702 void sse_movhps( struct x86_function *p,
703 struct x86_reg dst,
704 struct x86_reg src )
705 {
706 DUMP_RR( dst, src );
707 assert(dst.mod != mod_REG || src.mod != mod_REG);
708 emit_1ub(p, X86_TWOB);
709 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
710 }
711
712 void sse_movlps( struct x86_function *p,
713 struct x86_reg dst,
714 struct x86_reg src )
715 {
716 DUMP_RR( dst, src );
717 assert(dst.mod != mod_REG || src.mod != mod_REG);
718 emit_1ub(p, X86_TWOB);
719 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
720 }
721
722 void sse_maxps( struct x86_function *p,
723 struct x86_reg dst,
724 struct x86_reg src )
725 {
726 DUMP_RR( dst, src );
727 emit_2ub(p, X86_TWOB, 0x5F);
728 emit_modrm( p, dst, src );
729 }
730
731 void sse_maxss( struct x86_function *p,
732 struct x86_reg dst,
733 struct x86_reg src )
734 {
735 DUMP_RR( dst, src );
736 emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
737 emit_modrm( p, dst, src );
738 }
739
740 void sse_divss( struct x86_function *p,
741 struct x86_reg dst,
742 struct x86_reg src )
743 {
744 DUMP_RR( dst, src );
745 emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
746 emit_modrm( p, dst, src );
747 }
748
749 void sse_minps( struct x86_function *p,
750 struct x86_reg dst,
751 struct x86_reg src )
752 {
753 DUMP_RR( dst, src );
754 emit_2ub(p, X86_TWOB, 0x5D);
755 emit_modrm( p, dst, src );
756 }
757
758 void sse_subps( struct x86_function *p,
759 struct x86_reg dst,
760 struct x86_reg src )
761 {
762 DUMP_RR( dst, src );
763 emit_2ub(p, X86_TWOB, 0x5C);
764 emit_modrm( p, dst, src );
765 }
766
767 void sse_mulps( struct x86_function *p,
768 struct x86_reg dst,
769 struct x86_reg src )
770 {
771 DUMP_RR( dst, src );
772 emit_2ub(p, X86_TWOB, 0x59);
773 emit_modrm( p, dst, src );
774 }
775
776 void sse_mulss( struct x86_function *p,
777 struct x86_reg dst,
778 struct x86_reg src )
779 {
780 DUMP_RR( dst, src );
781 emit_3ub(p, 0xF3, X86_TWOB, 0x59);
782 emit_modrm( p, dst, src );
783 }
784
785 void sse_addps( struct x86_function *p,
786 struct x86_reg dst,
787 struct x86_reg src )
788 {
789 DUMP_RR( dst, src );
790 emit_2ub(p, X86_TWOB, 0x58);
791 emit_modrm( p, dst, src );
792 }
793
794 void sse_addss( struct x86_function *p,
795 struct x86_reg dst,
796 struct x86_reg src )
797 {
798 DUMP_RR( dst, src );
799 emit_3ub(p, 0xF3, X86_TWOB, 0x58);
800 emit_modrm( p, dst, src );
801 }
802
803 void sse_andnps( struct x86_function *p,
804 struct x86_reg dst,
805 struct x86_reg src )
806 {
807 DUMP_RR( dst, src );
808 emit_2ub(p, X86_TWOB, 0x55);
809 emit_modrm( p, dst, src );
810 }
811
812 void sse_andps( struct x86_function *p,
813 struct x86_reg dst,
814 struct x86_reg src )
815 {
816 DUMP_RR( dst, src );
817 emit_2ub(p, X86_TWOB, 0x54);
818 emit_modrm( p, dst, src );
819 }
820
821 void sse_rsqrtps( struct x86_function *p,
822 struct x86_reg dst,
823 struct x86_reg src )
824 {
825 DUMP_RR( dst, src );
826 emit_2ub(p, X86_TWOB, 0x52);
827 emit_modrm( p, dst, src );
828 }
829
830 void sse_rsqrtss( struct x86_function *p,
831 struct x86_reg dst,
832 struct x86_reg src )
833 {
834 DUMP_RR( dst, src );
835 emit_3ub(p, 0xF3, X86_TWOB, 0x52);
836 emit_modrm( p, dst, src );
837
838 }
839
840 void sse_movhlps( struct x86_function *p,
841 struct x86_reg dst,
842 struct x86_reg src )
843 {
844 DUMP_RR( dst, src );
845 assert(dst.mod == mod_REG && src.mod == mod_REG);
846 emit_2ub(p, X86_TWOB, 0x12);
847 emit_modrm( p, dst, src );
848 }
849
850 void sse_movlhps( struct x86_function *p,
851 struct x86_reg dst,
852 struct x86_reg src )
853 {
854 DUMP_RR( dst, src );
855 assert(dst.mod == mod_REG && src.mod == mod_REG);
856 emit_2ub(p, X86_TWOB, 0x16);
857 emit_modrm( p, dst, src );
858 }
859
860 void sse_orps( struct x86_function *p,
861 struct x86_reg dst,
862 struct x86_reg src )
863 {
864 DUMP_RR( dst, src );
865 emit_2ub(p, X86_TWOB, 0x56);
866 emit_modrm( p, dst, src );
867 }
868
869 void sse_xorps( struct x86_function *p,
870 struct x86_reg dst,
871 struct x86_reg src )
872 {
873 DUMP_RR( dst, src );
874 emit_2ub(p, X86_TWOB, 0x57);
875 emit_modrm( p, dst, src );
876 }
877
878 void sse_cvtps2pi( struct x86_function *p,
879 struct x86_reg dst,
880 struct x86_reg src )
881 {
882 DUMP_RR( dst, src );
883 assert(dst.file == file_MMX &&
884 (src.file == file_XMM || src.mod != mod_REG));
885
886 p->need_emms = 1;
887
888 emit_2ub(p, X86_TWOB, 0x2d);
889 emit_modrm( p, dst, src );
890 }
891
892 void sse2_cvtdq2ps( struct x86_function *p,
893 struct x86_reg dst,
894 struct x86_reg src )
895 {
896 DUMP_RR( dst, src );
897 emit_2ub(p, X86_TWOB, 0x5b);
898 emit_modrm( p, dst, src );
899 }
900
901
902 /* Shufps can also be used to implement a reduced swizzle when dest ==
903 * arg0.
904 */
905 void sse_shufps( struct x86_function *p,
906 struct x86_reg dst,
907 struct x86_reg src,
908 unsigned char shuf)
909 {
910 DUMP_RRI( dst, src, shuf );
911 emit_2ub(p, X86_TWOB, 0xC6);
912 emit_modrm(p, dst, src);
913 emit_1ub(p, shuf);
914 }
915
916 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
917 {
918 DUMP_RR( dst, src );
919 emit_2ub( p, X86_TWOB, 0x15 );
920 emit_modrm( p, dst, src );
921 }
922
923 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
924 {
925 DUMP_RR( dst, src );
926 emit_2ub( p, X86_TWOB, 0x14 );
927 emit_modrm( p, dst, src );
928 }
929
930 void sse_cmpps( struct x86_function *p,
931 struct x86_reg dst,
932 struct x86_reg src,
933 enum sse_cc cc)
934 {
935 DUMP_RRI( dst, src, cc );
936 emit_2ub(p, X86_TWOB, 0xC2);
937 emit_modrm(p, dst, src);
938 emit_1ub(p, cc);
939 }
940
941 void sse_pmovmskb( struct x86_function *p,
942 struct x86_reg dst,
943 struct x86_reg src)
944 {
945 DUMP_RR( dst, src );
946 emit_3ub(p, 0x66, X86_TWOB, 0xD7);
947 emit_modrm(p, dst, src);
948 }
949
950 /***********************************************************************
951 * SSE2 instructions
952 */
953
954 /**
955 * Perform a reduced swizzle:
956 */
957 void sse2_pshufd( struct x86_function *p,
958 struct x86_reg dst,
959 struct x86_reg src,
960 unsigned char shuf)
961 {
962 DUMP_RRI( dst, src, shuf );
963 emit_3ub(p, 0x66, X86_TWOB, 0x70);
964 emit_modrm(p, dst, src);
965 emit_1ub(p, shuf);
966 }
967
968 void sse2_cvttps2dq( struct x86_function *p,
969 struct x86_reg dst,
970 struct x86_reg src )
971 {
972 DUMP_RR( dst, src );
973 emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
974 emit_modrm( p, dst, src );
975 }
976
977 void sse2_cvtps2dq( struct x86_function *p,
978 struct x86_reg dst,
979 struct x86_reg src )
980 {
981 DUMP_RR( dst, src );
982 emit_3ub(p, 0x66, X86_TWOB, 0x5B);
983 emit_modrm( p, dst, src );
984 }
985
986 void sse2_packssdw( struct x86_function *p,
987 struct x86_reg dst,
988 struct x86_reg src )
989 {
990 DUMP_RR( dst, src );
991 emit_3ub(p, 0x66, X86_TWOB, 0x6B);
992 emit_modrm( p, dst, src );
993 }
994
995 void sse2_packsswb( struct x86_function *p,
996 struct x86_reg dst,
997 struct x86_reg src )
998 {
999 DUMP_RR( dst, src );
1000 emit_3ub(p, 0x66, X86_TWOB, 0x63);
1001 emit_modrm( p, dst, src );
1002 }
1003
1004 void sse2_packuswb( struct x86_function *p,
1005 struct x86_reg dst,
1006 struct x86_reg src )
1007 {
1008 DUMP_RR( dst, src );
1009 emit_3ub(p, 0x66, X86_TWOB, 0x67);
1010 emit_modrm( p, dst, src );
1011 }
1012
1013 void sse2_punpcklbw( struct x86_function *p,
1014 struct x86_reg dst,
1015 struct x86_reg src )
1016 {
1017 DUMP_RR( dst, src );
1018 emit_3ub(p, 0x66, X86_TWOB, 0x60);
1019 emit_modrm( p, dst, src );
1020 }
1021
1022
1023 void sse2_rcpps( struct x86_function *p,
1024 struct x86_reg dst,
1025 struct x86_reg src )
1026 {
1027 DUMP_RR( dst, src );
1028 emit_2ub(p, X86_TWOB, 0x53);
1029 emit_modrm( p, dst, src );
1030 }
1031
1032 void sse2_rcpss( struct x86_function *p,
1033 struct x86_reg dst,
1034 struct x86_reg src )
1035 {
1036 DUMP_RR( dst, src );
1037 emit_3ub(p, 0xF3, X86_TWOB, 0x53);
1038 emit_modrm( p, dst, src );
1039 }
1040
1041 void sse2_movd( struct x86_function *p,
1042 struct x86_reg dst,
1043 struct x86_reg src )
1044 {
1045 DUMP_RR( dst, src );
1046 emit_2ub(p, 0x66, X86_TWOB);
1047 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1048 }
1049
1050
1051
1052
1053 /***********************************************************************
1054 * x87 instructions
1055 */
1056 static void note_x87_pop( struct x86_function *p )
1057 {
1058 p->x87_stack--;
1059 assert(p->x87_stack >= 0);
1060 }
1061
1062 static void note_x87_push( struct x86_function *p )
1063 {
1064 p->x87_stack++;
1065 assert(p->x87_stack <= 7);
1066 }
1067
1068 void x87_assert_stack_empty( struct x86_function *p )
1069 {
1070 assert (p->x87_stack == 0);
1071 }
1072
1073
1074 void x87_fist( struct x86_function *p, struct x86_reg dst )
1075 {
1076 DUMP_R( dst );
1077 emit_1ub(p, 0xdb);
1078 emit_modrm_noreg(p, 2, dst);
1079 }
1080
1081 void x87_fistp( struct x86_function *p, struct x86_reg dst )
1082 {
1083 DUMP_R( dst );
1084 emit_1ub(p, 0xdb);
1085 emit_modrm_noreg(p, 3, dst);
1086 note_x87_pop(p);
1087 }
1088
1089 void x87_fild( struct x86_function *p, struct x86_reg arg )
1090 {
1091 DUMP_R( arg );
1092 emit_1ub(p, 0xdf);
1093 emit_modrm_noreg(p, 0, arg);
1094 note_x87_push(p);
1095 }
1096
1097 void x87_fldz( struct x86_function *p )
1098 {
1099 DUMP();
1100 emit_2ub(p, 0xd9, 0xee);
1101 note_x87_push(p);
1102 }
1103
1104
1105 void x87_fldcw( struct x86_function *p, struct x86_reg arg )
1106 {
1107 DUMP_R( arg );
1108 assert(arg.file == file_REG32);
1109 assert(arg.mod != mod_REG);
1110 emit_1ub(p, 0xd9);
1111 emit_modrm_noreg(p, 5, arg);
1112 }
1113
1114 void x87_fld1( struct x86_function *p )
1115 {
1116 DUMP();
1117 emit_2ub(p, 0xd9, 0xe8);
1118 note_x87_push(p);
1119 }
1120
1121 void x87_fldl2e( struct x86_function *p )
1122 {
1123 DUMP();
1124 emit_2ub(p, 0xd9, 0xea);
1125 note_x87_push(p);
1126 }
1127
1128 void x87_fldln2( struct x86_function *p )
1129 {
1130 DUMP();
1131 emit_2ub(p, 0xd9, 0xed);
1132 note_x87_push(p);
1133 }
1134
1135 void x87_fwait( struct x86_function *p )
1136 {
1137 DUMP();
1138 emit_1ub(p, 0x9b);
1139 }
1140
1141 void x87_fnclex( struct x86_function *p )
1142 {
1143 DUMP();
1144 emit_2ub(p, 0xdb, 0xe2);
1145 }
1146
1147 void x87_fclex( struct x86_function *p )
1148 {
1149 x87_fwait(p);
1150 x87_fnclex(p);
1151 }
1152
1153 void x87_fcmovb( struct x86_function *p, struct x86_reg arg )
1154 {
1155 DUMP_R( arg );
1156 assert(arg.file == file_x87);
1157 emit_2ub(p, 0xda, 0xc0+arg.idx);
1158 }
1159
1160 void x87_fcmove( struct x86_function *p, struct x86_reg arg )
1161 {
1162 DUMP_R( arg );
1163 assert(arg.file == file_x87);
1164 emit_2ub(p, 0xda, 0xc8+arg.idx);
1165 }
1166
1167 void x87_fcmovbe( struct x86_function *p, struct x86_reg arg )
1168 {
1169 DUMP_R( arg );
1170 assert(arg.file == file_x87);
1171 emit_2ub(p, 0xda, 0xd0+arg.idx);
1172 }
1173
1174 void x87_fcmovnb( struct x86_function *p, struct x86_reg arg )
1175 {
1176 DUMP_R( arg );
1177 assert(arg.file == file_x87);
1178 emit_2ub(p, 0xdb, 0xc0+arg.idx);
1179 }
1180
1181 void x87_fcmovne( struct x86_function *p, struct x86_reg arg )
1182 {
1183 DUMP_R( arg );
1184 assert(arg.file == file_x87);
1185 emit_2ub(p, 0xdb, 0xc8+arg.idx);
1186 }
1187
1188 void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg )
1189 {
1190 DUMP_R( arg );
1191 assert(arg.file == file_x87);
1192 emit_2ub(p, 0xdb, 0xd0+arg.idx);
1193 }
1194
1195
1196
1197 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
1198 unsigned char dst0ub0,
1199 unsigned char dst0ub1,
1200 unsigned char arg0ub0,
1201 unsigned char arg0ub1,
1202 unsigned char argmem_noreg)
1203 {
1204 assert(dst.file == file_x87);
1205
1206 if (arg.file == file_x87) {
1207 if (dst.idx == 0)
1208 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
1209 else if (arg.idx == 0)
1210 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
1211 else
1212 assert(0);
1213 }
1214 else if (dst.idx == 0) {
1215 assert(arg.file == file_REG32);
1216 emit_1ub(p, 0xd8);
1217 emit_modrm_noreg(p, argmem_noreg, arg);
1218 }
1219 else
1220 assert(0);
1221 }
1222
1223 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1224 {
1225 DUMP_RR( dst, src );
1226 x87_arith_op(p, dst, src,
1227 0xd8, 0xc8,
1228 0xdc, 0xc8,
1229 4);
1230 }
1231
1232 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1233 {
1234 DUMP_RR( dst, src );
1235 x87_arith_op(p, dst, src,
1236 0xd8, 0xe0,
1237 0xdc, 0xe8,
1238 4);
1239 }
1240
1241 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1242 {
1243 DUMP_RR( dst, src );
1244 x87_arith_op(p, dst, src,
1245 0xd8, 0xe8,
1246 0xdc, 0xe0,
1247 5);
1248 }
1249
1250 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1251 {
1252 DUMP_RR( dst, src );
1253 x87_arith_op(p, dst, src,
1254 0xd8, 0xc0,
1255 0xdc, 0xc0,
1256 0);
1257 }
1258
1259 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1260 {
1261 DUMP_RR( dst, src );
1262 x87_arith_op(p, dst, src,
1263 0xd8, 0xf0,
1264 0xdc, 0xf8,
1265 6);
1266 }
1267
1268 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
1269 {
1270 DUMP_RR( dst, src );
1271 x87_arith_op(p, dst, src,
1272 0xd8, 0xf8,
1273 0xdc, 0xf0,
1274 7);
1275 }
1276
1277 void x87_fmulp( struct x86_function *p, struct x86_reg dst )
1278 {
1279 DUMP_R( dst );
1280 assert(dst.file == file_x87);
1281 assert(dst.idx >= 1);
1282 emit_2ub(p, 0xde, 0xc8+dst.idx);
1283 note_x87_pop(p);
1284 }
1285
1286 void x87_fsubp( struct x86_function *p, struct x86_reg dst )
1287 {
1288 DUMP_R( dst );
1289 assert(dst.file == file_x87);
1290 assert(dst.idx >= 1);
1291 emit_2ub(p, 0xde, 0xe8+dst.idx);
1292 note_x87_pop(p);
1293 }
1294
1295 void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
1296 {
1297 DUMP_R( dst );
1298 assert(dst.file == file_x87);
1299 assert(dst.idx >= 1);
1300 emit_2ub(p, 0xde, 0xe0+dst.idx);
1301 note_x87_pop(p);
1302 }
1303
1304 void x87_faddp( struct x86_function *p, struct x86_reg dst )
1305 {
1306 DUMP_R( dst );
1307 assert(dst.file == file_x87);
1308 assert(dst.idx >= 1);
1309 emit_2ub(p, 0xde, 0xc0+dst.idx);
1310 note_x87_pop(p);
1311 }
1312
1313 void x87_fdivp( struct x86_function *p, struct x86_reg dst )
1314 {
1315 DUMP_R( dst );
1316 assert(dst.file == file_x87);
1317 assert(dst.idx >= 1);
1318 emit_2ub(p, 0xde, 0xf8+dst.idx);
1319 note_x87_pop(p);
1320 }
1321
1322 void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
1323 {
1324 DUMP_R( dst );
1325 assert(dst.file == file_x87);
1326 assert(dst.idx >= 1);
1327 emit_2ub(p, 0xde, 0xf0+dst.idx);
1328 note_x87_pop(p);
1329 }
1330
1331 void x87_ftst( struct x86_function *p )
1332 {
1333 DUMP();
1334 emit_2ub(p, 0xd9, 0xe4);
1335 }
1336
1337 void x87_fucom( struct x86_function *p, struct x86_reg arg )
1338 {
1339 DUMP_R( arg );
1340 assert(arg.file == file_x87);
1341 emit_2ub(p, 0xdd, 0xe0+arg.idx);
1342 }
1343
1344 void x87_fucomp( struct x86_function *p, struct x86_reg arg )
1345 {
1346 DUMP_R( arg );
1347 assert(arg.file == file_x87);
1348 emit_2ub(p, 0xdd, 0xe8+arg.idx);
1349 note_x87_pop(p);
1350 }
1351
1352 void x87_fucompp( struct x86_function *p )
1353 {
1354 DUMP();
1355 emit_2ub(p, 0xda, 0xe9);
1356 note_x87_pop(p); /* pop twice */
1357 note_x87_pop(p); /* pop twice */
1358 }
1359
1360 void x87_fxch( struct x86_function *p, struct x86_reg arg )
1361 {
1362 DUMP_R( arg );
1363 assert(arg.file == file_x87);
1364 emit_2ub(p, 0xd9, 0xc8+arg.idx);
1365 }
1366
1367 void x87_fabs( struct x86_function *p )
1368 {
1369 DUMP();
1370 emit_2ub(p, 0xd9, 0xe1);
1371 }
1372
1373 void x87_fchs( struct x86_function *p )
1374 {
1375 DUMP();
1376 emit_2ub(p, 0xd9, 0xe0);
1377 }
1378
1379 void x87_fcos( struct x86_function *p )
1380 {
1381 DUMP();
1382 emit_2ub(p, 0xd9, 0xff);
1383 }
1384
1385
1386 void x87_fprndint( struct x86_function *p )
1387 {
1388 DUMP();
1389 emit_2ub(p, 0xd9, 0xfc);
1390 }
1391
1392 void x87_fscale( struct x86_function *p )
1393 {
1394 DUMP();
1395 emit_2ub(p, 0xd9, 0xfd);
1396 }
1397
1398 void x87_fsin( struct x86_function *p )
1399 {
1400 DUMP();
1401 emit_2ub(p, 0xd9, 0xfe);
1402 }
1403
1404 void x87_fsincos( struct x86_function *p )
1405 {
1406 DUMP();
1407 emit_2ub(p, 0xd9, 0xfb);
1408 }
1409
1410 void x87_fsqrt( struct x86_function *p )
1411 {
1412 DUMP();
1413 emit_2ub(p, 0xd9, 0xfa);
1414 }
1415
1416 void x87_fxtract( struct x86_function *p )
1417 {
1418 DUMP();
1419 emit_2ub(p, 0xd9, 0xf4);
1420 }
1421
1422 /* st0 = (2^st0)-1
1423 *
1424 * Restrictions: -1.0 <= st0 <= 1.0
1425 */
1426 void x87_f2xm1( struct x86_function *p )
1427 {
1428 DUMP();
1429 emit_2ub(p, 0xd9, 0xf0);
1430 }
1431
1432 /* st1 = st1 * log2(st0);
1433 * pop_stack;
1434 */
1435 void x87_fyl2x( struct x86_function *p )
1436 {
1437 DUMP();
1438 emit_2ub(p, 0xd9, 0xf1);
1439 note_x87_pop(p);
1440 }
1441
1442 /* st1 = st1 * log2(st0 + 1.0);
1443 * pop_stack;
1444 *
1445 * A fast operation, with restrictions: -.29 < st0 < .29
1446 */
1447 void x87_fyl2xp1( struct x86_function *p )
1448 {
1449 DUMP();
1450 emit_2ub(p, 0xd9, 0xf9);
1451 note_x87_pop(p);
1452 }
1453
1454
1455 void x87_fld( struct x86_function *p, struct x86_reg arg )
1456 {
1457 DUMP_R( arg );
1458 if (arg.file == file_x87)
1459 emit_2ub(p, 0xd9, 0xc0 + arg.idx);
1460 else {
1461 emit_1ub(p, 0xd9);
1462 emit_modrm_noreg(p, 0, arg);
1463 }
1464 note_x87_push(p);
1465 }
1466
1467 void x87_fst( struct x86_function *p, struct x86_reg dst )
1468 {
1469 DUMP_R( dst );
1470 if (dst.file == file_x87)
1471 emit_2ub(p, 0xdd, 0xd0 + dst.idx);
1472 else {
1473 emit_1ub(p, 0xd9);
1474 emit_modrm_noreg(p, 2, dst);
1475 }
1476 }
1477
1478 void x87_fstp( struct x86_function *p, struct x86_reg dst )
1479 {
1480 DUMP_R( dst );
1481 if (dst.file == file_x87)
1482 emit_2ub(p, 0xdd, 0xd8 + dst.idx);
1483 else {
1484 emit_1ub(p, 0xd9);
1485 emit_modrm_noreg(p, 3, dst);
1486 }
1487 note_x87_pop(p);
1488 }
1489
1490 void x87_fpop( struct x86_function *p )
1491 {
1492 x87_fstp( p, x86_make_reg( file_x87, 0 ));
1493 }
1494
1495
1496 void x87_fcom( struct x86_function *p, struct x86_reg dst )
1497 {
1498 DUMP_R( dst );
1499 if (dst.file == file_x87)
1500 emit_2ub(p, 0xd8, 0xd0 + dst.idx);
1501 else {
1502 emit_1ub(p, 0xd8);
1503 emit_modrm_noreg(p, 2, dst);
1504 }
1505 }
1506
1507
1508 void x87_fcomp( struct x86_function *p, struct x86_reg dst )
1509 {
1510 DUMP_R( dst );
1511 if (dst.file == file_x87)
1512 emit_2ub(p, 0xd8, 0xd8 + dst.idx);
1513 else {
1514 emit_1ub(p, 0xd8);
1515 emit_modrm_noreg(p, 3, dst);
1516 }
1517 note_x87_pop(p);
1518 }
1519
1520 void x87_fcomi( struct x86_function *p, struct x86_reg arg )
1521 {
1522 DUMP_R( arg );
1523 emit_2ub(p, 0xdb, 0xf0+arg.idx);
1524 }
1525
1526 void x87_fcomip( struct x86_function *p, struct x86_reg arg )
1527 {
1528 DUMP_R( arg );
1529 emit_2ub(p, 0xdb, 0xf0+arg.idx);
1530 note_x87_pop(p);
1531 }
1532
1533
1534 void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
1535 {
1536 DUMP_R( dst );
1537 assert(dst.file == file_REG32);
1538
1539 if (dst.idx == reg_AX &&
1540 dst.mod == mod_REG)
1541 emit_2ub(p, 0xdf, 0xe0);
1542 else {
1543 emit_1ub(p, 0xdd);
1544 emit_modrm_noreg(p, 7, dst);
1545 }
1546 }
1547
1548
1549 void x87_fnstcw( struct x86_function *p, struct x86_reg dst )
1550 {
1551 DUMP_R( dst );
1552 assert(dst.file == file_REG32);
1553
1554 emit_1ub(p, 0x9b); /* WAIT -- needed? */
1555 emit_1ub(p, 0xd9);
1556 emit_modrm_noreg(p, 7, dst);
1557 }
1558
1559
1560
1561
1562 /***********************************************************************
1563 * MMX instructions
1564 */
1565
1566 void mmx_emms( struct x86_function *p )
1567 {
1568 DUMP();
1569 assert(p->need_emms);
1570 emit_2ub(p, 0x0f, 0x77);
1571 p->need_emms = 0;
1572 }
1573
1574 void mmx_packssdw( struct x86_function *p,
1575 struct x86_reg dst,
1576 struct x86_reg src )
1577 {
1578 DUMP_RR( dst, src );
1579 assert(dst.file == file_MMX &&
1580 (src.file == file_MMX || src.mod != mod_REG));
1581
1582 p->need_emms = 1;
1583
1584 emit_2ub(p, X86_TWOB, 0x6b);
1585 emit_modrm( p, dst, src );
1586 }
1587
1588 void mmx_packuswb( struct x86_function *p,
1589 struct x86_reg dst,
1590 struct x86_reg src )
1591 {
1592 DUMP_RR( dst, src );
1593 assert(dst.file == file_MMX &&
1594 (src.file == file_MMX || src.mod != mod_REG));
1595
1596 p->need_emms = 1;
1597
1598 emit_2ub(p, X86_TWOB, 0x67);
1599 emit_modrm( p, dst, src );
1600 }
1601
1602 void mmx_movd( struct x86_function *p,
1603 struct x86_reg dst,
1604 struct x86_reg src )
1605 {
1606 DUMP_RR( dst, src );
1607 p->need_emms = 1;
1608 emit_1ub(p, X86_TWOB);
1609 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1610 }
1611
1612 void mmx_movq( struct x86_function *p,
1613 struct x86_reg dst,
1614 struct x86_reg src )
1615 {
1616 DUMP_RR( dst, src );
1617 p->need_emms = 1;
1618 emit_1ub(p, X86_TWOB);
1619 emit_op_modrm( p, 0x6f, 0x7f, dst, src );
1620 }
1621
1622
1623 /***********************************************************************
1624 * Helper functions
1625 */
1626
1627
1628 void x86_cdecl_caller_push_regs( struct x86_function *p )
1629 {
1630 x86_push(p, x86_make_reg(file_REG32, reg_AX));
1631 x86_push(p, x86_make_reg(file_REG32, reg_CX));
1632 x86_push(p, x86_make_reg(file_REG32, reg_DX));
1633 }
1634
1635 void x86_cdecl_caller_pop_regs( struct x86_function *p )
1636 {
1637 x86_pop(p, x86_make_reg(file_REG32, reg_DX));
1638 x86_pop(p, x86_make_reg(file_REG32, reg_CX));
1639 x86_pop(p, x86_make_reg(file_REG32, reg_AX));
1640 }
1641
1642
1643 /* Retreive a reference to one of the function arguments, taking into
1644 * account any push/pop activity:
1645 */
1646 struct x86_reg x86_fn_arg( struct x86_function *p,
1647 unsigned arg )
1648 {
1649 return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
1650 p->stack_offset + arg * 4); /* ??? */
1651 }
1652
1653
1654 void x86_init_func( struct x86_function *p )
1655 {
1656 p->size = 0;
1657 p->store = NULL;
1658 p->csr = p->store;
1659 DUMP_START();
1660 }
1661
1662 void x86_init_func_size( struct x86_function *p, unsigned code_size )
1663 {
1664 p->size = code_size;
1665 p->store = rtasm_exec_malloc(code_size);
1666 if (p->store == NULL) {
1667 p->store = p->error_overflow;
1668 }
1669 p->csr = p->store;
1670 DUMP_START();
1671 }
1672
1673 void x86_release_func( struct x86_function *p )
1674 {
1675 if (p->store && p->store != p->error_overflow)
1676 rtasm_exec_free(p->store);
1677
1678 p->store = NULL;
1679 p->csr = NULL;
1680 p->size = 0;
1681 }
1682
1683
1684 void (*x86_get_func( struct x86_function *p ))(void)
1685 {
1686 DUMP_END();
1687 if (DISASSEM && p->store)
1688 debug_printf("disassemble %p %p\n", p->store, p->csr);
1689
1690 if (p->store == p->error_overflow)
1691 return (void (*)(void)) NULL;
1692 else
1693 return (void (*)(void)) p->store;
1694 }
1695
1696 #else
1697
1698 void x86sse_dummy( void )
1699 {
1700 }
1701
1702 #endif