Merge branch 'r300-vertprog-branch' of ssh://people.freedesktop.org/~z3ro/mesa
[mesa.git] / src / mesa / x86 / rtasm / x86sse.c
1 #if defined(__i386__) || defined(__386__)
2
3 #include "imports.h"
4 #include "x86sse.h"
5
6 #define DISASSEM 0
7 #define X86_TWOB 0x0f
8
9 static unsigned char *cptr( void (*label)() )
10 {
11 return (unsigned char *)(unsigned long)label;
12 }
13
14
15 static void do_realloc( struct x86_function *p )
16 {
17 if (p->size == 0) {
18 p->size = 1024;
19 p->store = _mesa_exec_malloc(p->size);
20 p->csr = p->store;
21 }
22 else {
23 unsigned used = p->csr - p->store;
24 unsigned char *tmp = p->store;
25 p->size *= 2;
26 p->store = _mesa_exec_malloc(p->size);
27 memcpy(p->store, tmp, used);
28 p->csr = p->store + used;
29 _mesa_exec_free(tmp);
30 }
31 }
32
33 /* Emit bytes to the instruction stream:
34 */
35 static unsigned char *reserve( struct x86_function *p, int bytes )
36 {
37 if (p->csr + bytes - p->store > p->size)
38 do_realloc(p);
39
40 {
41 unsigned char *csr = p->csr;
42 p->csr += bytes;
43 return csr;
44 }
45 }
46
47
48
49 static void emit_1b( struct x86_function *p, char b0 )
50 {
51 char *csr = (char *)reserve(p, 1);
52 *csr = b0;
53 }
54
55 static void emit_1i( struct x86_function *p, int i0 )
56 {
57 int *icsr = (int *)reserve(p, sizeof(i0));
58 *icsr = i0;
59 }
60
61 static void emit_1ub( struct x86_function *p, unsigned char b0 )
62 {
63 unsigned char *csr = reserve(p, 1);
64 *csr++ = b0;
65 }
66
67 static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
68 {
69 unsigned char *csr = reserve(p, 2);
70 *csr++ = b0;
71 *csr++ = b1;
72 }
73
74 static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
75 {
76 unsigned char *csr = reserve(p, 3);
77 *csr++ = b0;
78 *csr++ = b1;
79 *csr++ = b2;
80 }
81
82
83 /* Build a modRM byte + possible displacement. No treatment of SIB
84 * indexing. BZZT - no way to encode an absolute address.
85 */
86 static void emit_modrm( struct x86_function *p,
87 struct x86_reg reg,
88 struct x86_reg regmem )
89 {
90 unsigned char val = 0;
91
92 assert(reg.mod == mod_REG);
93
94 val |= regmem.mod << 6; /* mod field */
95 val |= reg.idx << 3; /* reg field */
96 val |= regmem.idx; /* r/m field */
97
98 emit_1ub(p, val);
99
100 /* Oh-oh we've stumbled into the SIB thing.
101 */
102 if (regmem.file == file_REG32 &&
103 regmem.idx == reg_SP) {
104 emit_1ub(p, 0x24); /* simplistic! */
105 }
106
107 switch (regmem.mod) {
108 case mod_REG:
109 case mod_INDIRECT:
110 break;
111 case mod_DISP8:
112 emit_1b(p, regmem.disp);
113 break;
114 case mod_DISP32:
115 emit_1i(p, regmem.disp);
116 break;
117 default:
118 assert(0);
119 break;
120 }
121 }
122
123
124 static void emit_modrm_noreg( struct x86_function *p,
125 unsigned op,
126 struct x86_reg regmem )
127 {
128 struct x86_reg dummy = x86_make_reg(file_REG32, op);
129 emit_modrm(p, dummy, regmem);
130 }
131
132 /* Many x86 instructions have two opcodes to cope with the situations
133 * where the destination is a register or memory reference
134 * respectively. This function selects the correct opcode based on
135 * the arguments presented.
136 */
137 static void emit_op_modrm( struct x86_function *p,
138 unsigned char op_dst_is_reg,
139 unsigned char op_dst_is_mem,
140 struct x86_reg dst,
141 struct x86_reg src )
142 {
143 switch (dst.mod) {
144 case mod_REG:
145 emit_1ub(p, op_dst_is_reg);
146 emit_modrm(p, dst, src);
147 break;
148 case mod_INDIRECT:
149 case mod_DISP32:
150 case mod_DISP8:
151 assert(src.mod == mod_REG);
152 emit_1ub(p, op_dst_is_mem);
153 emit_modrm(p, src, dst);
154 break;
155 default:
156 assert(0);
157 break;
158 }
159 }
160
161
162
163
164
165
166
167 /* Create and manipulate registers and regmem values:
168 */
169 struct x86_reg x86_make_reg( enum x86_reg_file file,
170 enum x86_reg_name idx )
171 {
172 struct x86_reg reg;
173
174 reg.file = file;
175 reg.idx = idx;
176 reg.mod = mod_REG;
177 reg.disp = 0;
178
179 return reg;
180 }
181
182 struct x86_reg x86_make_disp( struct x86_reg reg,
183 int disp )
184 {
185 assert(reg.file == file_REG32);
186
187 if (reg.mod == mod_REG)
188 reg.disp = disp;
189 else
190 reg.disp += disp;
191
192 if (reg.disp == 0)
193 reg.mod = mod_INDIRECT;
194 else if (reg.disp <= 127 && reg.disp >= -128)
195 reg.mod = mod_DISP8;
196 else
197 reg.mod = mod_DISP32;
198
199 return reg;
200 }
201
202 struct x86_reg x86_deref( struct x86_reg reg )
203 {
204 return x86_make_disp(reg, 0);
205 }
206
207 struct x86_reg x86_get_base_reg( struct x86_reg reg )
208 {
209 return x86_make_reg( reg.file, reg.idx );
210 }
211
212 unsigned char *x86_get_label( struct x86_function *p )
213 {
214 return p->csr;
215 }
216
217
218
219 /***********************************************************************
220 * x86 instructions
221 */
222
223
224 void x86_jcc( struct x86_function *p,
225 enum x86_cc cc,
226 unsigned char *label )
227 {
228 int offset = label - (x86_get_label(p) + 2);
229
230 if (offset <= 127 && offset >= -128) {
231 emit_1ub(p, 0x70 + cc);
232 emit_1b(p, (char) offset);
233 }
234 else {
235 offset = label - (x86_get_label(p) + 6);
236 emit_2ub(p, 0x0f, 0x80 + cc);
237 emit_1i(p, offset);
238 }
239 }
240
241 /* Always use a 32bit offset for forward jumps:
242 */
243 unsigned char *x86_jcc_forward( struct x86_function *p,
244 enum x86_cc cc )
245 {
246 emit_2ub(p, 0x0f, 0x80 + cc);
247 emit_1i(p, 0);
248 return x86_get_label(p);
249 }
250
251 unsigned char *x86_jmp_forward( struct x86_function *p)
252 {
253 emit_1ub(p, 0xe9);
254 emit_1i(p, 0);
255 return x86_get_label(p);
256 }
257
258 unsigned char *x86_call_forward( struct x86_function *p)
259 {
260 emit_1ub(p, 0xe8);
261 emit_1i(p, 0);
262 return x86_get_label(p);
263 }
264
265 /* Fixup offset from forward jump:
266 */
267 void x86_fixup_fwd_jump( struct x86_function *p,
268 unsigned char *fixup )
269 {
270 *(int *)(fixup - 4) = x86_get_label(p) - fixup;
271 }
272
273 void x86_jmp( struct x86_function *p, unsigned char *label)
274 {
275 emit_1ub(p, 0xe9);
276 emit_1i(p, label - x86_get_label(p) - 4);
277 }
278
279 #if 0
280 /* This doesn't work once we start reallocating & copying the
281 * generated code on buffer fills, because the call is relative to the
282 * current pc.
283 */
284 void x86_call( struct x86_function *p, void (*label)())
285 {
286 emit_1ub(p, 0xe8);
287 emit_1i(p, cptr(label) - x86_get_label(p) - 4);
288 }
289 #else
290 void x86_call( struct x86_function *p, struct x86_reg reg)
291 {
292 emit_1ub(p, 0xff);
293 emit_modrm(p, reg, reg);
294 }
295 #endif
296
297
298 /* michal:
299 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
300 * I load the immediate into general purpose register and use it.
301 */
302 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
303 {
304 assert(dst.mod == mod_REG);
305 emit_1ub(p, 0xb8 + dst.idx);
306 emit_1i(p, imm);
307 }
308
309 void x86_push( struct x86_function *p,
310 struct x86_reg reg )
311 {
312 assert(reg.mod == mod_REG);
313 emit_1ub(p, 0x50 + reg.idx);
314 p->stack_offset += 4;
315 }
316
317 void x86_pop( struct x86_function *p,
318 struct x86_reg reg )
319 {
320 assert(reg.mod == mod_REG);
321 emit_1ub(p, 0x58 + reg.idx);
322 p->stack_offset -= 4;
323 }
324
325 void x86_inc( struct x86_function *p,
326 struct x86_reg reg )
327 {
328 assert(reg.mod == mod_REG);
329 emit_1ub(p, 0x40 + reg.idx);
330 }
331
332 void x86_dec( struct x86_function *p,
333 struct x86_reg reg )
334 {
335 assert(reg.mod == mod_REG);
336 emit_1ub(p, 0x48 + reg.idx);
337 }
338
339 void x86_ret( struct x86_function *p )
340 {
341 emit_1ub(p, 0xc3);
342 }
343
344 void x86_sahf( struct x86_function *p )
345 {
346 emit_1ub(p, 0x9e);
347 }
348
349 void x86_mov( struct x86_function *p,
350 struct x86_reg dst,
351 struct x86_reg src )
352 {
353 emit_op_modrm( p, 0x8b, 0x89, dst, src );
354 }
355
356 void x86_xor( struct x86_function *p,
357 struct x86_reg dst,
358 struct x86_reg src )
359 {
360 emit_op_modrm( p, 0x33, 0x31, dst, src );
361 }
362
363 void x86_cmp( struct x86_function *p,
364 struct x86_reg dst,
365 struct x86_reg src )
366 {
367 emit_op_modrm( p, 0x3b, 0x39, dst, src );
368 }
369
370 void x86_lea( struct x86_function *p,
371 struct x86_reg dst,
372 struct x86_reg src )
373 {
374 emit_1ub(p, 0x8d);
375 emit_modrm( p, dst, src );
376 }
377
378 void x86_test( struct x86_function *p,
379 struct x86_reg dst,
380 struct x86_reg src )
381 {
382 emit_1ub(p, 0x85);
383 emit_modrm( p, dst, src );
384 }
385
386 void x86_add( struct x86_function *p,
387 struct x86_reg dst,
388 struct x86_reg src )
389 {
390 emit_op_modrm(p, 0x03, 0x01, dst, src );
391 }
392
393 void x86_mul( struct x86_function *p,
394 struct x86_reg src )
395 {
396 assert (src.file == file_REG32 && src.mod == mod_REG);
397 emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
398 }
399
400 void x86_sub( struct x86_function *p,
401 struct x86_reg dst,
402 struct x86_reg src )
403 {
404 emit_op_modrm(p, 0x2b, 0x29, dst, src );
405 }
406
407 void x86_or( struct x86_function *p,
408 struct x86_reg dst,
409 struct x86_reg src )
410 {
411 emit_op_modrm( p, 0x0b, 0x09, dst, src );
412 }
413
414 void x86_and( struct x86_function *p,
415 struct x86_reg dst,
416 struct x86_reg src )
417 {
418 emit_op_modrm( p, 0x23, 0x21, dst, src );
419 }
420
421
422
423 /***********************************************************************
424 * SSE instructions
425 */
426
427
428 void sse_movss( struct x86_function *p,
429 struct x86_reg dst,
430 struct x86_reg src )
431 {
432 emit_2ub(p, 0xF3, X86_TWOB);
433 emit_op_modrm( p, 0x10, 0x11, dst, src );
434 }
435
436 void sse_movaps( struct x86_function *p,
437 struct x86_reg dst,
438 struct x86_reg src )
439 {
440 emit_1ub(p, X86_TWOB);
441 emit_op_modrm( p, 0x28, 0x29, dst, src );
442 }
443
444 void sse_movups( struct x86_function *p,
445 struct x86_reg dst,
446 struct x86_reg src )
447 {
448 emit_1ub(p, X86_TWOB);
449 emit_op_modrm( p, 0x10, 0x11, dst, src );
450 }
451
452 void sse_movhps( struct x86_function *p,
453 struct x86_reg dst,
454 struct x86_reg src )
455 {
456 assert(dst.mod != mod_REG || src.mod != mod_REG);
457 emit_1ub(p, X86_TWOB);
458 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
459 }
460
461 void sse_movlps( struct x86_function *p,
462 struct x86_reg dst,
463 struct x86_reg src )
464 {
465 assert(dst.mod != mod_REG || src.mod != mod_REG);
466 emit_1ub(p, X86_TWOB);
467 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
468 }
469
470 void sse_maxps( struct x86_function *p,
471 struct x86_reg dst,
472 struct x86_reg src )
473 {
474 emit_2ub(p, X86_TWOB, 0x5F);
475 emit_modrm( p, dst, src );
476 }
477
478 void sse_maxss( struct x86_function *p,
479 struct x86_reg dst,
480 struct x86_reg src )
481 {
482 emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
483 emit_modrm( p, dst, src );
484 }
485
486 void sse_divss( struct x86_function *p,
487 struct x86_reg dst,
488 struct x86_reg src )
489 {
490 emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
491 emit_modrm( p, dst, src );
492 }
493
494 void sse_minps( struct x86_function *p,
495 struct x86_reg dst,
496 struct x86_reg src )
497 {
498 emit_2ub(p, X86_TWOB, 0x5D);
499 emit_modrm( p, dst, src );
500 }
501
502 void sse_subps( struct x86_function *p,
503 struct x86_reg dst,
504 struct x86_reg src )
505 {
506 emit_2ub(p, X86_TWOB, 0x5C);
507 emit_modrm( p, dst, src );
508 }
509
510 void sse_mulps( struct x86_function *p,
511 struct x86_reg dst,
512 struct x86_reg src )
513 {
514 emit_2ub(p, X86_TWOB, 0x59);
515 emit_modrm( p, dst, src );
516 }
517
518 void sse_mulss( struct x86_function *p,
519 struct x86_reg dst,
520 struct x86_reg src )
521 {
522 emit_3ub(p, 0xF3, X86_TWOB, 0x59);
523 emit_modrm( p, dst, src );
524 }
525
526 void sse_addps( struct x86_function *p,
527 struct x86_reg dst,
528 struct x86_reg src )
529 {
530 emit_2ub(p, X86_TWOB, 0x58);
531 emit_modrm( p, dst, src );
532 }
533
534 void sse_addss( struct x86_function *p,
535 struct x86_reg dst,
536 struct x86_reg src )
537 {
538 emit_3ub(p, 0xF3, X86_TWOB, 0x58);
539 emit_modrm( p, dst, src );
540 }
541
542 void sse_andnps( struct x86_function *p,
543 struct x86_reg dst,
544 struct x86_reg src )
545 {
546 emit_2ub(p, X86_TWOB, 0x55);
547 emit_modrm( p, dst, src );
548 }
549
550 void sse_andps( struct x86_function *p,
551 struct x86_reg dst,
552 struct x86_reg src )
553 {
554 emit_2ub(p, X86_TWOB, 0x54);
555 emit_modrm( p, dst, src );
556 }
557
558 void sse_rsqrtps( struct x86_function *p,
559 struct x86_reg dst,
560 struct x86_reg src )
561 {
562 emit_2ub(p, X86_TWOB, 0x52);
563 emit_modrm( p, dst, src );
564 }
565
566 void sse_rsqrtss( struct x86_function *p,
567 struct x86_reg dst,
568 struct x86_reg src )
569 {
570 emit_3ub(p, 0xF3, X86_TWOB, 0x52);
571 emit_modrm( p, dst, src );
572
573 }
574
575 void sse_movhlps( struct x86_function *p,
576 struct x86_reg dst,
577 struct x86_reg src )
578 {
579 assert(dst.mod == mod_REG && src.mod == mod_REG);
580 emit_2ub(p, X86_TWOB, 0x12);
581 emit_modrm( p, dst, src );
582 }
583
584 void sse_movlhps( struct x86_function *p,
585 struct x86_reg dst,
586 struct x86_reg src )
587 {
588 assert(dst.mod == mod_REG && src.mod == mod_REG);
589 emit_2ub(p, X86_TWOB, 0x16);
590 emit_modrm( p, dst, src );
591 }
592
593 void sse_orps( struct x86_function *p,
594 struct x86_reg dst,
595 struct x86_reg src )
596 {
597 emit_2ub(p, X86_TWOB, 0x56);
598 emit_modrm( p, dst, src );
599 }
600
601 void sse_xorps( struct x86_function *p,
602 struct x86_reg dst,
603 struct x86_reg src )
604 {
605 emit_2ub(p, X86_TWOB, 0x57);
606 emit_modrm( p, dst, src );
607 }
608
609 void sse_cvtps2pi( struct x86_function *p,
610 struct x86_reg dst,
611 struct x86_reg src )
612 {
613 assert(dst.file == file_MMX &&
614 (src.file == file_XMM || src.mod != mod_REG));
615
616 p->need_emms = 1;
617
618 emit_2ub(p, X86_TWOB, 0x2d);
619 emit_modrm( p, dst, src );
620 }
621
622
623 /* Shufps can also be used to implement a reduced swizzle when dest ==
624 * arg0.
625 */
626 void sse_shufps( struct x86_function *p,
627 struct x86_reg dest,
628 struct x86_reg arg0,
629 unsigned char shuf)
630 {
631 emit_2ub(p, X86_TWOB, 0xC6);
632 emit_modrm(p, dest, arg0);
633 emit_1ub(p, shuf);
634 }
635
636 void sse_cmpps( struct x86_function *p,
637 struct x86_reg dest,
638 struct x86_reg arg0,
639 unsigned char cc)
640 {
641 emit_2ub(p, X86_TWOB, 0xC2);
642 emit_modrm(p, dest, arg0);
643 emit_1ub(p, cc);
644 }
645
646 void sse_pmovmskb( struct x86_function *p,
647 struct x86_reg dest,
648 struct x86_reg src)
649 {
650 emit_3ub(p, 0x66, X86_TWOB, 0xD7);
651 emit_modrm(p, dest, src);
652 }
653
654 /***********************************************************************
655 * SSE2 instructions
656 */
657
658 /**
659 * Perform a reduced swizzle:
660 */
661 void sse2_pshufd( struct x86_function *p,
662 struct x86_reg dest,
663 struct x86_reg arg0,
664 unsigned char shuf)
665 {
666 emit_3ub(p, 0x66, X86_TWOB, 0x70);
667 emit_modrm(p, dest, arg0);
668 emit_1ub(p, shuf);
669 }
670
671 void sse2_cvttps2dq( struct x86_function *p,
672 struct x86_reg dst,
673 struct x86_reg src )
674 {
675 emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
676 emit_modrm( p, dst, src );
677 }
678
679 void sse2_cvtps2dq( struct x86_function *p,
680 struct x86_reg dst,
681 struct x86_reg src )
682 {
683 emit_3ub(p, 0x66, X86_TWOB, 0x5B);
684 emit_modrm( p, dst, src );
685 }
686
687 void sse2_packssdw( struct x86_function *p,
688 struct x86_reg dst,
689 struct x86_reg src )
690 {
691 emit_3ub(p, 0x66, X86_TWOB, 0x6B);
692 emit_modrm( p, dst, src );
693 }
694
695 void sse2_packsswb( struct x86_function *p,
696 struct x86_reg dst,
697 struct x86_reg src )
698 {
699 emit_3ub(p, 0x66, X86_TWOB, 0x63);
700 emit_modrm( p, dst, src );
701 }
702
703 void sse2_packuswb( struct x86_function *p,
704 struct x86_reg dst,
705 struct x86_reg src )
706 {
707 emit_3ub(p, 0x66, X86_TWOB, 0x67);
708 emit_modrm( p, dst, src );
709 }
710
711 void sse2_rcpps( struct x86_function *p,
712 struct x86_reg dst,
713 struct x86_reg src )
714 {
715 emit_2ub(p, X86_TWOB, 0x53);
716 emit_modrm( p, dst, src );
717 }
718
719 void sse2_rcpss( struct x86_function *p,
720 struct x86_reg dst,
721 struct x86_reg src )
722 {
723 emit_3ub(p, 0xF3, X86_TWOB, 0x53);
724 emit_modrm( p, dst, src );
725 }
726
727 void sse2_movd( struct x86_function *p,
728 struct x86_reg dst,
729 struct x86_reg src )
730 {
731 emit_2ub(p, 0x66, X86_TWOB);
732 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
733 }
734
735
736
737
738 /***********************************************************************
739 * x87 instructions
740 */
741 void x87_fist( struct x86_function *p, struct x86_reg dst )
742 {
743 emit_1ub(p, 0xdb);
744 emit_modrm_noreg(p, 2, dst);
745 }
746
747 void x87_fistp( struct x86_function *p, struct x86_reg dst )
748 {
749 emit_1ub(p, 0xdb);
750 emit_modrm_noreg(p, 3, dst);
751 }
752
753 void x87_fild( struct x86_function *p, struct x86_reg arg )
754 {
755 emit_1ub(p, 0xdf);
756 emit_modrm_noreg(p, 0, arg);
757 }
758
759 void x87_fldz( struct x86_function *p )
760 {
761 emit_2ub(p, 0xd9, 0xee);
762 }
763
764
765 void x87_fldcw( struct x86_function *p, struct x86_reg arg )
766 {
767 assert(arg.file == file_REG32);
768 assert(arg.mod != mod_REG);
769 emit_1ub(p, 0xd9);
770 emit_modrm_noreg(p, 5, arg);
771 }
772
773 void x87_fld1( struct x86_function *p )
774 {
775 emit_2ub(p, 0xd9, 0xe8);
776 }
777
778 void x87_fldl2e( struct x86_function *p )
779 {
780 emit_2ub(p, 0xd9, 0xea);
781 }
782
783 void x87_fldln2( struct x86_function *p )
784 {
785 emit_2ub(p, 0xd9, 0xed);
786 }
787
788 void x87_fwait( struct x86_function *p )
789 {
790 emit_1ub(p, 0x9b);
791 }
792
793 void x87_fnclex( struct x86_function *p )
794 {
795 emit_2ub(p, 0xdb, 0xe2);
796 }
797
798 void x87_fclex( struct x86_function *p )
799 {
800 x87_fwait(p);
801 x87_fnclex(p);
802 }
803
804
805 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
806 unsigned char dst0ub0,
807 unsigned char dst0ub1,
808 unsigned char arg0ub0,
809 unsigned char arg0ub1,
810 unsigned char argmem_noreg)
811 {
812 assert(dst.file == file_x87);
813
814 if (arg.file == file_x87) {
815 if (dst.idx == 0)
816 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
817 else if (arg.idx == 0)
818 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
819 else
820 assert(0);
821 }
822 else if (dst.idx == 0) {
823 assert(arg.file == file_REG32);
824 emit_1ub(p, 0xd8);
825 emit_modrm_noreg(p, argmem_noreg, arg);
826 }
827 else
828 assert(0);
829 }
830
831 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
832 {
833 x87_arith_op(p, dst, arg,
834 0xd8, 0xc8,
835 0xdc, 0xc8,
836 4);
837 }
838
839 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
840 {
841 x87_arith_op(p, dst, arg,
842 0xd8, 0xe0,
843 0xdc, 0xe8,
844 4);
845 }
846
847 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
848 {
849 x87_arith_op(p, dst, arg,
850 0xd8, 0xe8,
851 0xdc, 0xe0,
852 5);
853 }
854
855 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
856 {
857 x87_arith_op(p, dst, arg,
858 0xd8, 0xc0,
859 0xdc, 0xc0,
860 0);
861 }
862
863 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
864 {
865 x87_arith_op(p, dst, arg,
866 0xd8, 0xf0,
867 0xdc, 0xf8,
868 6);
869 }
870
871 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
872 {
873 x87_arith_op(p, dst, arg,
874 0xd8, 0xf8,
875 0xdc, 0xf0,
876 7);
877 }
878
879 void x87_fmulp( struct x86_function *p, struct x86_reg dst )
880 {
881 assert(dst.file == file_x87);
882 assert(dst.idx >= 1);
883 emit_2ub(p, 0xde, 0xc8+dst.idx);
884 }
885
886 void x87_fsubp( struct x86_function *p, struct x86_reg dst )
887 {
888 assert(dst.file == file_x87);
889 assert(dst.idx >= 1);
890 emit_2ub(p, 0xde, 0xe8+dst.idx);
891 }
892
893 void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
894 {
895 assert(dst.file == file_x87);
896 assert(dst.idx >= 1);
897 emit_2ub(p, 0xde, 0xe0+dst.idx);
898 }
899
900 void x87_faddp( struct x86_function *p, struct x86_reg dst )
901 {
902 assert(dst.file == file_x87);
903 assert(dst.idx >= 1);
904 emit_2ub(p, 0xde, 0xc0+dst.idx);
905 }
906
907 void x87_fdivp( struct x86_function *p, struct x86_reg dst )
908 {
909 assert(dst.file == file_x87);
910 assert(dst.idx >= 1);
911 emit_2ub(p, 0xde, 0xf8+dst.idx);
912 }
913
914 void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
915 {
916 assert(dst.file == file_x87);
917 assert(dst.idx >= 1);
918 emit_2ub(p, 0xde, 0xf0+dst.idx);
919 }
920
921 void x87_fucom( struct x86_function *p, struct x86_reg arg )
922 {
923 assert(arg.file == file_x87);
924 emit_2ub(p, 0xdd, 0xe0+arg.idx);
925 }
926
927 void x87_fucomp( struct x86_function *p, struct x86_reg arg )
928 {
929 assert(arg.file == file_x87);
930 emit_2ub(p, 0xdd, 0xe8+arg.idx);
931 }
932
933 void x87_fucompp( struct x86_function *p )
934 {
935 emit_2ub(p, 0xda, 0xe9);
936 }
937
938 void x87_fxch( struct x86_function *p, struct x86_reg arg )
939 {
940 assert(arg.file == file_x87);
941 emit_2ub(p, 0xd9, 0xc8+arg.idx);
942 }
943
944 void x87_fabs( struct x86_function *p )
945 {
946 emit_2ub(p, 0xd9, 0xe1);
947 }
948
949 void x87_fchs( struct x86_function *p )
950 {
951 emit_2ub(p, 0xd9, 0xe0);
952 }
953
954 void x87_fcos( struct x86_function *p )
955 {
956 emit_2ub(p, 0xd9, 0xff);
957 }
958
959
960 void x87_fprndint( struct x86_function *p )
961 {
962 emit_2ub(p, 0xd9, 0xfc);
963 }
964
965 void x87_fscale( struct x86_function *p )
966 {
967 emit_2ub(p, 0xd9, 0xfd);
968 }
969
970 void x87_fsin( struct x86_function *p )
971 {
972 emit_2ub(p, 0xd9, 0xfe);
973 }
974
975 void x87_fsincos( struct x86_function *p )
976 {
977 emit_2ub(p, 0xd9, 0xfb);
978 }
979
980 void x87_fsqrt( struct x86_function *p )
981 {
982 emit_2ub(p, 0xd9, 0xfa);
983 }
984
985 void x87_fxtract( struct x86_function *p )
986 {
987 emit_2ub(p, 0xd9, 0xf4);
988 }
989
990 /* st0 = (2^st0)-1
991 *
992 * Restrictions: -1.0 <= st0 <= 1.0
993 */
994 void x87_f2xm1( struct x86_function *p )
995 {
996 emit_2ub(p, 0xd9, 0xf0);
997 }
998
999 /* st1 = st1 * log2(st0);
1000 * pop_stack;
1001 */
1002 void x87_fyl2x( struct x86_function *p )
1003 {
1004 emit_2ub(p, 0xd9, 0xf1);
1005 }
1006
1007 /* st1 = st1 * log2(st0 + 1.0);
1008 * pop_stack;
1009 *
1010 * A fast operation, with restrictions: -.29 < st0 < .29
1011 */
1012 void x87_fyl2xp1( struct x86_function *p )
1013 {
1014 emit_2ub(p, 0xd9, 0xf9);
1015 }
1016
1017
1018 void x87_fld( struct x86_function *p, struct x86_reg arg )
1019 {
1020 if (arg.file == file_x87)
1021 emit_2ub(p, 0xd9, 0xc0 + arg.idx);
1022 else {
1023 emit_1ub(p, 0xd9);
1024 emit_modrm_noreg(p, 0, arg);
1025 }
1026 }
1027
1028 void x87_fst( struct x86_function *p, struct x86_reg dst )
1029 {
1030 if (dst.file == file_x87)
1031 emit_2ub(p, 0xdd, 0xd0 + dst.idx);
1032 else {
1033 emit_1ub(p, 0xd9);
1034 emit_modrm_noreg(p, 2, dst);
1035 }
1036 }
1037
1038 void x87_fstp( struct x86_function *p, struct x86_reg dst )
1039 {
1040 if (dst.file == file_x87)
1041 emit_2ub(p, 0xdd, 0xd8 + dst.idx);
1042 else {
1043 emit_1ub(p, 0xd9);
1044 emit_modrm_noreg(p, 3, dst);
1045 }
1046 }
1047
1048 void x87_fcom( struct x86_function *p, struct x86_reg dst )
1049 {
1050 if (dst.file == file_x87)
1051 emit_2ub(p, 0xd8, 0xd0 + dst.idx);
1052 else {
1053 emit_1ub(p, 0xd8);
1054 emit_modrm_noreg(p, 2, dst);
1055 }
1056 }
1057
1058 void x87_fcomp( struct x86_function *p, struct x86_reg dst )
1059 {
1060 if (dst.file == file_x87)
1061 emit_2ub(p, 0xd8, 0xd8 + dst.idx);
1062 else {
1063 emit_1ub(p, 0xd8);
1064 emit_modrm_noreg(p, 3, dst);
1065 }
1066 }
1067
1068
1069 void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
1070 {
1071 assert(dst.file == file_REG32);
1072
1073 if (dst.idx == reg_AX &&
1074 dst.mod == mod_REG)
1075 emit_2ub(p, 0xdf, 0xe0);
1076 else {
1077 emit_1ub(p, 0xdd);
1078 emit_modrm_noreg(p, 7, dst);
1079 }
1080 }
1081
1082
1083
1084
1085 /***********************************************************************
1086 * MMX instructions
1087 */
1088
1089 void mmx_emms( struct x86_function *p )
1090 {
1091 assert(p->need_emms);
1092 emit_2ub(p, 0x0f, 0x77);
1093 p->need_emms = 0;
1094 }
1095
1096 void mmx_packssdw( struct x86_function *p,
1097 struct x86_reg dst,
1098 struct x86_reg src )
1099 {
1100 assert(dst.file == file_MMX &&
1101 (src.file == file_MMX || src.mod != mod_REG));
1102
1103 p->need_emms = 1;
1104
1105 emit_2ub(p, X86_TWOB, 0x6b);
1106 emit_modrm( p, dst, src );
1107 }
1108
1109 void mmx_packuswb( struct x86_function *p,
1110 struct x86_reg dst,
1111 struct x86_reg src )
1112 {
1113 assert(dst.file == file_MMX &&
1114 (src.file == file_MMX || src.mod != mod_REG));
1115
1116 p->need_emms = 1;
1117
1118 emit_2ub(p, X86_TWOB, 0x67);
1119 emit_modrm( p, dst, src );
1120 }
1121
1122 void mmx_movd( struct x86_function *p,
1123 struct x86_reg dst,
1124 struct x86_reg src )
1125 {
1126 p->need_emms = 1;
1127 emit_1ub(p, X86_TWOB);
1128 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1129 }
1130
1131 void mmx_movq( struct x86_function *p,
1132 struct x86_reg dst,
1133 struct x86_reg src )
1134 {
1135 p->need_emms = 1;
1136 emit_1ub(p, X86_TWOB);
1137 emit_op_modrm( p, 0x6f, 0x7f, dst, src );
1138 }
1139
1140
1141 /***********************************************************************
1142 * Helper functions
1143 */
1144
1145
1146 /* Retreive a reference to one of the function arguments, taking into
1147 * account any push/pop activity:
1148 */
1149 struct x86_reg x86_fn_arg( struct x86_function *p,
1150 unsigned arg )
1151 {
1152 return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
1153 p->stack_offset + arg * 4); /* ??? */
1154 }
1155
1156
1157 void x86_init_func( struct x86_function *p )
1158 {
1159 p->size = 0;
1160 p->store = NULL;
1161 p->csr = p->store;
1162 }
1163
1164 int x86_init_func_size( struct x86_function *p, unsigned code_size )
1165 {
1166 p->size = code_size;
1167 p->store = _mesa_exec_malloc(code_size);
1168 p->csr = p->store;
1169 return p->store != NULL;
1170 }
1171
1172 void x86_release_func( struct x86_function *p )
1173 {
1174 _mesa_exec_free(p->store);
1175 p->store = NULL;
1176 p->csr = NULL;
1177 p->size = 0;
1178 }
1179
1180
1181 void (*x86_get_func( struct x86_function *p ))(void)
1182 {
1183 if (DISASSEM && p->store)
1184 _mesa_printf("disassemble %p %p\n", p->store, p->csr);
1185 return (void (*)(void)) (unsigned long) p->store;
1186 }
1187
1188 #else
1189
1190 void x86sse_dummy( void )
1191 {
1192 }
1193
1194 #endif