Fix some compiler warnings with -pedantic
[mesa.git] / src / mesa / x86 / rtasm / x86sse.c
1 #if defined(__i386__) || defined(__386__)
2
3 #include "imports.h"
4 #include "x86sse.h"
5
6 #define DISASSEM 0
7 #define X86_TWOB 0x0f
8
9 static GLubyte *cptr( void (*label)() )
10 {
11 return (char *)(unsigned long)label;
12 }
13
14
15 /* Emit bytes to the instruction stream:
16 */
17 static void emit_1b( struct x86_function *p, GLbyte b0 )
18 {
19 *(GLbyte *)(p->csr++) = b0;
20 }
21
22 static void emit_1i( struct x86_function *p, GLint i0 )
23 {
24 *(GLint *)(p->csr) = i0;
25 p->csr += 4;
26 }
27
28 static void disassem( struct x86_function *p, const char *fn )
29 {
30 #if DISASSEM && 0
31 if (fn && fn != p->fn) {
32 _mesa_printf("0x%x: %s\n", p->csr, fn);
33 p->fn = fn;
34 }
35 #endif
36 }
37
38 static void emit_1ub_fn( struct x86_function *p, GLubyte b0, const char *fn )
39 {
40 disassem(p, fn);
41 *(p->csr++) = b0;
42 }
43
44 static void emit_2ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, const char *fn )
45 {
46 disassem(p, fn);
47 *(p->csr++) = b0;
48 *(p->csr++) = b1;
49 }
50
51 static void emit_3ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
52 {
53 disassem(p, fn);
54 *(p->csr++) = b0;
55 *(p->csr++) = b1;
56 *(p->csr++) = b2;
57 }
58
59 #define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__)
60 #define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__)
61 #define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
62
63
64
65 /* Build a modRM byte + possible displacement. No treatment of SIB
66 * indexing. BZZT - no way to encode an absolute address.
67 */
68 static void emit_modrm( struct x86_function *p,
69 struct x86_reg reg,
70 struct x86_reg regmem )
71 {
72 GLubyte val = 0;
73
74 assert(reg.mod == mod_REG);
75
76 val |= regmem.mod << 6; /* mod field */
77 val |= reg.idx << 3; /* reg field */
78 val |= regmem.idx; /* r/m field */
79
80 emit_1ub_fn(p, val, 0);
81
82 /* Oh-oh we've stumbled into the SIB thing.
83 */
84 if (regmem.file == file_REG32 &&
85 regmem.idx == reg_SP) {
86 emit_1ub_fn(p, 0x24, 0); /* simplistic! */
87 }
88
89 switch (regmem.mod) {
90 case mod_REG:
91 case mod_INDIRECT:
92 break;
93 case mod_DISP8:
94 emit_1b(p, regmem.disp);
95 break;
96 case mod_DISP32:
97 emit_1i(p, regmem.disp);
98 break;
99 default:
100 assert(0);
101 break;
102 }
103 }
104
105
106 static void emit_modrm_noreg( struct x86_function *p,
107 GLuint op,
108 struct x86_reg regmem )
109 {
110 struct x86_reg dummy = x86_make_reg(file_REG32, op);
111 emit_modrm(p, dummy, regmem);
112 }
113
114 /* Many x86 instructions have two opcodes to cope with the situations
115 * where the destination is a register or memory reference
116 * respectively. This function selects the correct opcode based on
117 * the arguments presented.
118 */
119 static void emit_op_modrm( struct x86_function *p,
120 GLubyte op_dst_is_reg,
121 GLubyte op_dst_is_mem,
122 struct x86_reg dst,
123 struct x86_reg src )
124 {
125 switch (dst.mod) {
126 case mod_REG:
127 emit_1ub_fn(p, op_dst_is_reg, 0);
128 emit_modrm(p, dst, src);
129 break;
130 case mod_INDIRECT:
131 case mod_DISP32:
132 case mod_DISP8:
133 assert(src.mod == mod_REG);
134 emit_1ub_fn(p, op_dst_is_mem, 0);
135 emit_modrm(p, src, dst);
136 break;
137 default:
138 assert(0);
139 break;
140 }
141 }
142
143
144
145
146
147
148
149 /* Create and manipulate registers and regmem values:
150 */
151 struct x86_reg x86_make_reg( enum x86_reg_file file,
152 enum x86_reg_name idx )
153 {
154 struct x86_reg reg;
155
156 reg.file = file;
157 reg.idx = idx;
158 reg.mod = mod_REG;
159 reg.disp = 0;
160
161 return reg;
162 }
163
164 struct x86_reg x86_make_disp( struct x86_reg reg,
165 GLint disp )
166 {
167 assert(reg.file == file_REG32);
168
169 if (reg.mod == mod_REG)
170 reg.disp = disp;
171 else
172 reg.disp += disp;
173
174 if (reg.disp == 0)
175 reg.mod = mod_INDIRECT;
176 else if (reg.disp <= 127 && reg.disp >= -128)
177 reg.mod = mod_DISP8;
178 else
179 reg.mod = mod_DISP32;
180
181 return reg;
182 }
183
184 struct x86_reg x86_deref( struct x86_reg reg )
185 {
186 return x86_make_disp(reg, 0);
187 }
188
189 struct x86_reg x86_get_base_reg( struct x86_reg reg )
190 {
191 return x86_make_reg( reg.file, reg.idx );
192 }
193
194 GLubyte *x86_get_label( struct x86_function *p )
195 {
196 return p->csr;
197 }
198
199
200
201 /***********************************************************************
202 * x86 instructions
203 */
204
205
206 void x86_jcc( struct x86_function *p,
207 enum x86_cc cc,
208 GLubyte *label )
209 {
210 GLint offset = label - (x86_get_label(p) + 2);
211
212 if (offset <= 127 && offset >= -128) {
213 emit_1ub(p, 0x70 + cc);
214 emit_1b(p, (GLbyte) offset);
215 }
216 else {
217 offset = label - (x86_get_label(p) + 6);
218 emit_2ub(p, 0x0f, 0x80 + cc);
219 emit_1i(p, offset);
220 }
221 }
222
223 /* Always use a 32bit offset for forward jumps:
224 */
225 GLubyte *x86_jcc_forward( struct x86_function *p,
226 enum x86_cc cc )
227 {
228 emit_2ub(p, 0x0f, 0x80 + cc);
229 emit_1i(p, 0);
230 return x86_get_label(p);
231 }
232
233 GLubyte *x86_jmp_forward( struct x86_function *p)
234 {
235 emit_1ub(p, 0xe9);
236 emit_1i(p, 0);
237 return x86_get_label(p);
238 }
239
240 GLubyte *x86_call_forward( struct x86_function *p)
241 {
242 emit_1ub(p, 0xe8);
243 emit_1i(p, 0);
244 return x86_get_label(p);
245 }
246
247 /* Fixup offset from forward jump:
248 */
249 void x86_fixup_fwd_jump( struct x86_function *p,
250 GLubyte *fixup )
251 {
252 *(int *)(fixup - 4) = x86_get_label(p) - fixup;
253 }
254
255 void x86_jmp( struct x86_function *p, GLubyte *label)
256 {
257 emit_1ub(p, 0xe9);
258 emit_1i(p, label - x86_get_label(p) - 4);
259 }
260
261 void x86_call( struct x86_function *p, void (*label)())
262 {
263 emit_1ub(p, 0xe8);
264 emit_1i(p, cptr(label) - x86_get_label(p) - 4);
265 }
266
267 /* michal:
268 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
269 * I load the immediate into general purpose register and use it.
270 */
271 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm )
272 {
273 assert(dst.mod == mod_REG);
274 emit_1ub(p, 0xb8 + dst.idx);
275 emit_1i(p, imm);
276 }
277
278 void x86_push( struct x86_function *p,
279 struct x86_reg reg )
280 {
281 assert(reg.mod == mod_REG);
282 emit_1ub(p, 0x50 + reg.idx);
283 p->stack_offset += 4;
284 }
285
286 void x86_pop( struct x86_function *p,
287 struct x86_reg reg )
288 {
289 assert(reg.mod == mod_REG);
290 emit_1ub(p, 0x58 + reg.idx);
291 p->stack_offset -= 4;
292 }
293
294 void x86_inc( struct x86_function *p,
295 struct x86_reg reg )
296 {
297 assert(reg.mod == mod_REG);
298 emit_1ub(p, 0x40 + reg.idx);
299 }
300
301 void x86_dec( struct x86_function *p,
302 struct x86_reg reg )
303 {
304 assert(reg.mod == mod_REG);
305 emit_1ub(p, 0x48 + reg.idx);
306 }
307
308 void x86_ret( struct x86_function *p )
309 {
310 emit_1ub(p, 0xc3);
311 }
312
313 void x86_sahf( struct x86_function *p )
314 {
315 emit_1ub(p, 0x9e);
316 }
317
318 void x86_mov( struct x86_function *p,
319 struct x86_reg dst,
320 struct x86_reg src )
321 {
322 emit_op_modrm( p, 0x8b, 0x89, dst, src );
323 }
324
325 void x86_xor( struct x86_function *p,
326 struct x86_reg dst,
327 struct x86_reg src )
328 {
329 emit_op_modrm( p, 0x33, 0x31, dst, src );
330 }
331
332 void x86_cmp( struct x86_function *p,
333 struct x86_reg dst,
334 struct x86_reg src )
335 {
336 emit_op_modrm( p, 0x3b, 0x39, dst, src );
337 }
338
339 void x86_lea( struct x86_function *p,
340 struct x86_reg dst,
341 struct x86_reg src )
342 {
343 emit_1ub(p, 0x8d);
344 emit_modrm( p, dst, src );
345 }
346
347 void x86_test( struct x86_function *p,
348 struct x86_reg dst,
349 struct x86_reg src )
350 {
351 emit_1ub(p, 0x85);
352 emit_modrm( p, dst, src );
353 }
354
355 void x86_add( struct x86_function *p,
356 struct x86_reg dst,
357 struct x86_reg src )
358 {
359 emit_op_modrm(p, 0x03, 0x01, dst, src );
360 }
361
362 void x86_mul( struct x86_function *p,
363 struct x86_reg src )
364 {
365 assert (src.file == file_REG32 && src.mod == mod_REG);
366 emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
367 }
368
369 void x86_sub( struct x86_function *p,
370 struct x86_reg dst,
371 struct x86_reg src )
372 {
373 emit_op_modrm(p, 0x2b, 0x29, dst, src );
374 }
375
376 void x86_or( struct x86_function *p,
377 struct x86_reg dst,
378 struct x86_reg src )
379 {
380 emit_op_modrm( p, 0x0b, 0x09, dst, src );
381 }
382
383 void x86_and( struct x86_function *p,
384 struct x86_reg dst,
385 struct x86_reg src )
386 {
387 emit_op_modrm( p, 0x23, 0x21, dst, src );
388 }
389
390
391
392 /***********************************************************************
393 * SSE instructions
394 */
395
396
397 void sse_movss( struct x86_function *p,
398 struct x86_reg dst,
399 struct x86_reg src )
400 {
401 emit_2ub(p, 0xF3, X86_TWOB);
402 emit_op_modrm( p, 0x10, 0x11, dst, src );
403 }
404
405 void sse_movaps( struct x86_function *p,
406 struct x86_reg dst,
407 struct x86_reg src )
408 {
409 emit_1ub(p, X86_TWOB);
410 emit_op_modrm( p, 0x28, 0x29, dst, src );
411 }
412
413 void sse_movups( struct x86_function *p,
414 struct x86_reg dst,
415 struct x86_reg src )
416 {
417 emit_1ub(p, X86_TWOB);
418 emit_op_modrm( p, 0x10, 0x11, dst, src );
419 }
420
421 void sse_movhps( struct x86_function *p,
422 struct x86_reg dst,
423 struct x86_reg src )
424 {
425 assert(dst.mod != mod_REG || src.mod != mod_REG);
426 emit_1ub(p, X86_TWOB);
427 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
428 }
429
430 void sse_movlps( struct x86_function *p,
431 struct x86_reg dst,
432 struct x86_reg src )
433 {
434 assert(dst.mod != mod_REG || src.mod != mod_REG);
435 emit_1ub(p, X86_TWOB);
436 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
437 }
438
439 void sse_maxps( struct x86_function *p,
440 struct x86_reg dst,
441 struct x86_reg src )
442 {
443 emit_2ub(p, X86_TWOB, 0x5F);
444 emit_modrm( p, dst, src );
445 }
446
447 void sse_maxss( struct x86_function *p,
448 struct x86_reg dst,
449 struct x86_reg src )
450 {
451 emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
452 emit_modrm( p, dst, src );
453 }
454
455 void sse_divss( struct x86_function *p,
456 struct x86_reg dst,
457 struct x86_reg src )
458 {
459 emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
460 emit_modrm( p, dst, src );
461 }
462
463 void sse_minps( struct x86_function *p,
464 struct x86_reg dst,
465 struct x86_reg src )
466 {
467 emit_2ub(p, X86_TWOB, 0x5D);
468 emit_modrm( p, dst, src );
469 }
470
471 void sse_subps( struct x86_function *p,
472 struct x86_reg dst,
473 struct x86_reg src )
474 {
475 emit_2ub(p, X86_TWOB, 0x5C);
476 emit_modrm( p, dst, src );
477 }
478
479 void sse_mulps( struct x86_function *p,
480 struct x86_reg dst,
481 struct x86_reg src )
482 {
483 emit_2ub(p, X86_TWOB, 0x59);
484 emit_modrm( p, dst, src );
485 }
486
487 void sse_mulss( struct x86_function *p,
488 struct x86_reg dst,
489 struct x86_reg src )
490 {
491 emit_3ub(p, 0xF3, X86_TWOB, 0x59);
492 emit_modrm( p, dst, src );
493 }
494
495 void sse_addps( struct x86_function *p,
496 struct x86_reg dst,
497 struct x86_reg src )
498 {
499 emit_2ub(p, X86_TWOB, 0x58);
500 emit_modrm( p, dst, src );
501 }
502
503 void sse_addss( struct x86_function *p,
504 struct x86_reg dst,
505 struct x86_reg src )
506 {
507 emit_3ub(p, 0xF3, X86_TWOB, 0x58);
508 emit_modrm( p, dst, src );
509 }
510
511 void sse_andnps( struct x86_function *p,
512 struct x86_reg dst,
513 struct x86_reg src )
514 {
515 emit_2ub(p, X86_TWOB, 0x55);
516 emit_modrm( p, dst, src );
517 }
518
519 void sse_andps( struct x86_function *p,
520 struct x86_reg dst,
521 struct x86_reg src )
522 {
523 emit_2ub(p, X86_TWOB, 0x54);
524 emit_modrm( p, dst, src );
525 }
526
527 void sse_rsqrtps( struct x86_function *p,
528 struct x86_reg dst,
529 struct x86_reg src )
530 {
531 emit_2ub(p, X86_TWOB, 0x52);
532 emit_modrm( p, dst, src );
533 }
534
535 void sse_rsqrtss( struct x86_function *p,
536 struct x86_reg dst,
537 struct x86_reg src )
538 {
539 emit_3ub(p, 0xF3, X86_TWOB, 0x52);
540 emit_modrm( p, dst, src );
541
542 }
543
544 void sse_movhlps( struct x86_function *p,
545 struct x86_reg dst,
546 struct x86_reg src )
547 {
548 assert(dst.mod == mod_REG && src.mod == mod_REG);
549 emit_2ub(p, X86_TWOB, 0x12);
550 emit_modrm( p, dst, src );
551 }
552
553 void sse_movlhps( struct x86_function *p,
554 struct x86_reg dst,
555 struct x86_reg src )
556 {
557 assert(dst.mod == mod_REG && src.mod == mod_REG);
558 emit_2ub(p, X86_TWOB, 0x16);
559 emit_modrm( p, dst, src );
560 }
561
562 void sse_orps( struct x86_function *p,
563 struct x86_reg dst,
564 struct x86_reg src )
565 {
566 emit_2ub(p, X86_TWOB, 0x56);
567 emit_modrm( p, dst, src );
568 }
569
570 void sse_xorps( struct x86_function *p,
571 struct x86_reg dst,
572 struct x86_reg src )
573 {
574 emit_2ub(p, X86_TWOB, 0x57);
575 emit_modrm( p, dst, src );
576 }
577
578 void sse_cvtps2pi( struct x86_function *p,
579 struct x86_reg dst,
580 struct x86_reg src )
581 {
582 assert(dst.file == file_MMX &&
583 (src.file == file_XMM || src.mod != mod_REG));
584
585 p->need_emms = 1;
586
587 emit_2ub(p, X86_TWOB, 0x2d);
588 emit_modrm( p, dst, src );
589 }
590
591
592 /* Shufps can also be used to implement a reduced swizzle when dest ==
593 * arg0.
594 */
595 void sse_shufps( struct x86_function *p,
596 struct x86_reg dest,
597 struct x86_reg arg0,
598 GLubyte shuf)
599 {
600 emit_2ub(p, X86_TWOB, 0xC6);
601 emit_modrm(p, dest, arg0);
602 emit_1ub(p, shuf);
603 }
604
605 void sse_cmpps( struct x86_function *p,
606 struct x86_reg dest,
607 struct x86_reg arg0,
608 GLubyte cc)
609 {
610 emit_2ub(p, X86_TWOB, 0xC2);
611 emit_modrm(p, dest, arg0);
612 emit_1ub(p, cc);
613 }
614
615 void sse_pmovmskb( struct x86_function *p,
616 struct x86_reg dest,
617 struct x86_reg src)
618 {
619 emit_3ub(p, 0x66, X86_TWOB, 0xD7);
620 emit_modrm(p, dest, src);
621 }
622
623 /***********************************************************************
624 * SSE2 instructions
625 */
626
627 /**
628 * Perform a reduced swizzle:
629 */
630 void sse2_pshufd( struct x86_function *p,
631 struct x86_reg dest,
632 struct x86_reg arg0,
633 GLubyte shuf)
634 {
635 emit_3ub(p, 0x66, X86_TWOB, 0x70);
636 emit_modrm(p, dest, arg0);
637 emit_1ub(p, shuf);
638 }
639
640 void sse2_cvttps2dq( struct x86_function *p,
641 struct x86_reg dst,
642 struct x86_reg src )
643 {
644 emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
645 emit_modrm( p, dst, src );
646 }
647
648 void sse2_cvtps2dq( struct x86_function *p,
649 struct x86_reg dst,
650 struct x86_reg src )
651 {
652 emit_3ub(p, 0x66, X86_TWOB, 0x5B);
653 emit_modrm( p, dst, src );
654 }
655
656 void sse2_packssdw( struct x86_function *p,
657 struct x86_reg dst,
658 struct x86_reg src )
659 {
660 emit_3ub(p, 0x66, X86_TWOB, 0x6B);
661 emit_modrm( p, dst, src );
662 }
663
664 void sse2_packsswb( struct x86_function *p,
665 struct x86_reg dst,
666 struct x86_reg src )
667 {
668 emit_3ub(p, 0x66, X86_TWOB, 0x63);
669 emit_modrm( p, dst, src );
670 }
671
672 void sse2_packuswb( struct x86_function *p,
673 struct x86_reg dst,
674 struct x86_reg src )
675 {
676 emit_3ub(p, 0x66, X86_TWOB, 0x67);
677 emit_modrm( p, dst, src );
678 }
679
680 void sse2_rcpps( struct x86_function *p,
681 struct x86_reg dst,
682 struct x86_reg src )
683 {
684 emit_2ub(p, X86_TWOB, 0x53);
685 emit_modrm( p, dst, src );
686 }
687
688 void sse2_rcpss( struct x86_function *p,
689 struct x86_reg dst,
690 struct x86_reg src )
691 {
692 emit_3ub(p, 0xF3, X86_TWOB, 0x53);
693 emit_modrm( p, dst, src );
694 }
695
696 void sse2_movd( struct x86_function *p,
697 struct x86_reg dst,
698 struct x86_reg src )
699 {
700 emit_2ub(p, 0x66, X86_TWOB);
701 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
702 }
703
704
705
706
707 /***********************************************************************
708 * x87 instructions
709 */
710 void x87_fist( struct x86_function *p, struct x86_reg dst )
711 {
712 emit_1ub(p, 0xdb);
713 emit_modrm_noreg(p, 2, dst);
714 }
715
716 void x87_fistp( struct x86_function *p, struct x86_reg dst )
717 {
718 emit_1ub(p, 0xdb);
719 emit_modrm_noreg(p, 3, dst);
720 }
721
722 void x87_fild( struct x86_function *p, struct x86_reg arg )
723 {
724 emit_1ub(p, 0xdf);
725 emit_modrm_noreg(p, 0, arg);
726 }
727
728 void x87_fldz( struct x86_function *p )
729 {
730 emit_2ub(p, 0xd9, 0xee);
731 }
732
733
734 void x87_fldcw( struct x86_function *p, struct x86_reg arg )
735 {
736 assert(arg.file == file_REG32);
737 assert(arg.mod != mod_REG);
738 emit_1ub(p, 0xd9);
739 emit_modrm_noreg(p, 5, arg);
740 }
741
742 void x87_fld1( struct x86_function *p )
743 {
744 emit_2ub(p, 0xd9, 0xe8);
745 }
746
747 void x87_fldl2e( struct x86_function *p )
748 {
749 emit_2ub(p, 0xd9, 0xea);
750 }
751
752 void x87_fldln2( struct x86_function *p )
753 {
754 emit_2ub(p, 0xd9, 0xed);
755 }
756
757 void x87_fwait( struct x86_function *p )
758 {
759 emit_1ub(p, 0x9b);
760 }
761
762 void x87_fnclex( struct x86_function *p )
763 {
764 emit_2ub(p, 0xdb, 0xe2);
765 }
766
767 void x87_fclex( struct x86_function *p )
768 {
769 x87_fwait(p);
770 x87_fnclex(p);
771 }
772
773
774 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
775 GLubyte dst0ub0,
776 GLubyte dst0ub1,
777 GLubyte arg0ub0,
778 GLubyte arg0ub1,
779 GLubyte argmem_noreg)
780 {
781 assert(dst.file == file_x87);
782
783 if (arg.file == file_x87) {
784 if (dst.idx == 0)
785 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
786 else if (arg.idx == 0)
787 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
788 else
789 assert(0);
790 }
791 else if (dst.idx == 0) {
792 assert(arg.file = file_REG32);
793 emit_1ub(p, 0xd8);
794 emit_modrm_noreg(p, argmem_noreg, arg);
795 }
796 else
797 assert(0);
798 }
799
800 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
801 {
802 x87_arith_op(p, dst, arg,
803 0xd8, 0xc8,
804 0xdc, 0xc8,
805 4);
806 }
807
808 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
809 {
810 x87_arith_op(p, dst, arg,
811 0xd8, 0xe0,
812 0xdc, 0xe8,
813 4);
814 }
815
816 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
817 {
818 x87_arith_op(p, dst, arg,
819 0xd8, 0xe8,
820 0xdc, 0xe0,
821 5);
822 }
823
824 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
825 {
826 x87_arith_op(p, dst, arg,
827 0xd8, 0xc0,
828 0xdc, 0xc0,
829 0);
830 }
831
832 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
833 {
834 x87_arith_op(p, dst, arg,
835 0xd8, 0xf0,
836 0xdc, 0xf8,
837 6);
838 }
839
840 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
841 {
842 x87_arith_op(p, dst, arg,
843 0xd8, 0xf8,
844 0xdc, 0xf0,
845 7);
846 }
847
848 void x87_fmulp( struct x86_function *p, struct x86_reg dst )
849 {
850 assert(dst.file == file_x87);
851 assert(dst.idx >= 1);
852 emit_2ub(p, 0xde, 0xc8+dst.idx);
853 }
854
855 void x87_fsubp( struct x86_function *p, struct x86_reg dst )
856 {
857 assert(dst.file == file_x87);
858 assert(dst.idx >= 1);
859 emit_2ub(p, 0xde, 0xe8+dst.idx);
860 }
861
862 void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
863 {
864 assert(dst.file == file_x87);
865 assert(dst.idx >= 1);
866 emit_2ub(p, 0xde, 0xe0+dst.idx);
867 }
868
869 void x87_faddp( struct x86_function *p, struct x86_reg dst )
870 {
871 assert(dst.file == file_x87);
872 assert(dst.idx >= 1);
873 emit_2ub(p, 0xde, 0xc0+dst.idx);
874 }
875
876 void x87_fdivp( struct x86_function *p, struct x86_reg dst )
877 {
878 assert(dst.file == file_x87);
879 assert(dst.idx >= 1);
880 emit_2ub(p, 0xde, 0xf8+dst.idx);
881 }
882
883 void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
884 {
885 assert(dst.file == file_x87);
886 assert(dst.idx >= 1);
887 emit_2ub(p, 0xde, 0xf0+dst.idx);
888 }
889
890 void x87_fucom( struct x86_function *p, struct x86_reg arg )
891 {
892 assert(arg.file == file_x87);
893 emit_2ub(p, 0xdd, 0xe0+arg.idx);
894 }
895
896 void x87_fucomp( struct x86_function *p, struct x86_reg arg )
897 {
898 assert(arg.file == file_x87);
899 emit_2ub(p, 0xdd, 0xe8+arg.idx);
900 }
901
902 void x87_fucompp( struct x86_function *p )
903 {
904 emit_2ub(p, 0xda, 0xe9);
905 }
906
907 void x87_fxch( struct x86_function *p, struct x86_reg arg )
908 {
909 assert(arg.file == file_x87);
910 emit_2ub(p, 0xd9, 0xc8+arg.idx);
911 }
912
913 void x87_fabs( struct x86_function *p )
914 {
915 emit_2ub(p, 0xd9, 0xe1);
916 }
917
918 void x87_fchs( struct x86_function *p )
919 {
920 emit_2ub(p, 0xd9, 0xe0);
921 }
922
923 void x87_fcos( struct x86_function *p )
924 {
925 emit_2ub(p, 0xd9, 0xff);
926 }
927
928
929 void x87_fprndint( struct x86_function *p )
930 {
931 emit_2ub(p, 0xd9, 0xfc);
932 }
933
934 void x87_fscale( struct x86_function *p )
935 {
936 emit_2ub(p, 0xd9, 0xfd);
937 }
938
939 void x87_fsin( struct x86_function *p )
940 {
941 emit_2ub(p, 0xd9, 0xfe);
942 }
943
944 void x87_fsincos( struct x86_function *p )
945 {
946 emit_2ub(p, 0xd9, 0xfb);
947 }
948
949 void x87_fsqrt( struct x86_function *p )
950 {
951 emit_2ub(p, 0xd9, 0xfa);
952 }
953
954 void x87_fxtract( struct x86_function *p )
955 {
956 emit_2ub(p, 0xd9, 0xf4);
957 }
958
959 /* st0 = (2^st0)-1
960 *
961 * Restrictions: -1.0 <= st0 <= 1.0
962 */
963 void x87_f2xm1( struct x86_function *p )
964 {
965 emit_2ub(p, 0xd9, 0xf0);
966 }
967
968 /* st1 = st1 * log2(st0);
969 * pop_stack;
970 */
971 void x87_fyl2x( struct x86_function *p )
972 {
973 emit_2ub(p, 0xd9, 0xf1);
974 }
975
976 /* st1 = st1 * log2(st0 + 1.0);
977 * pop_stack;
978 *
979 * A fast operation, with restrictions: -.29 < st0 < .29
980 */
981 void x87_fyl2xp1( struct x86_function *p )
982 {
983 emit_2ub(p, 0xd9, 0xf9);
984 }
985
986
987 void x87_fld( struct x86_function *p, struct x86_reg arg )
988 {
989 if (arg.file == file_x87)
990 emit_2ub(p, 0xd9, 0xc0 + arg.idx);
991 else {
992 emit_1ub(p, 0xd9);
993 emit_modrm_noreg(p, 0, arg);
994 }
995 }
996
997 void x87_fst( struct x86_function *p, struct x86_reg dst )
998 {
999 if (dst.file == file_x87)
1000 emit_2ub(p, 0xdd, 0xd0 + dst.idx);
1001 else {
1002 emit_1ub(p, 0xd9);
1003 emit_modrm_noreg(p, 2, dst);
1004 }
1005 }
1006
1007 void x87_fstp( struct x86_function *p, struct x86_reg dst )
1008 {
1009 if (dst.file == file_x87)
1010 emit_2ub(p, 0xdd, 0xd8 + dst.idx);
1011 else {
1012 emit_1ub(p, 0xd9);
1013 emit_modrm_noreg(p, 3, dst);
1014 }
1015 }
1016
1017 void x87_fcom( struct x86_function *p, struct x86_reg dst )
1018 {
1019 if (dst.file == file_x87)
1020 emit_2ub(p, 0xd8, 0xd0 + dst.idx);
1021 else {
1022 emit_1ub(p, 0xd8);
1023 emit_modrm_noreg(p, 2, dst);
1024 }
1025 }
1026
1027 void x87_fcomp( struct x86_function *p, struct x86_reg dst )
1028 {
1029 if (dst.file == file_x87)
1030 emit_2ub(p, 0xd8, 0xd8 + dst.idx);
1031 else {
1032 emit_1ub(p, 0xd8);
1033 emit_modrm_noreg(p, 3, dst);
1034 }
1035 }
1036
1037
1038 void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
1039 {
1040 assert(dst.file == file_REG32);
1041
1042 if (dst.idx == reg_AX &&
1043 dst.mod == mod_REG)
1044 emit_2ub(p, 0xdf, 0xe0);
1045 else {
1046 emit_1ub(p, 0xdd);
1047 emit_modrm_noreg(p, 7, dst);
1048 }
1049 }
1050
1051
1052
1053
1054 /***********************************************************************
1055 * MMX instructions
1056 */
1057
1058 void mmx_emms( struct x86_function *p )
1059 {
1060 assert(p->need_emms);
1061 emit_2ub(p, 0x0f, 0x77);
1062 p->need_emms = 0;
1063 }
1064
1065 void mmx_packssdw( struct x86_function *p,
1066 struct x86_reg dst,
1067 struct x86_reg src )
1068 {
1069 assert(dst.file == file_MMX &&
1070 (src.file == file_MMX || src.mod != mod_REG));
1071
1072 p->need_emms = 1;
1073
1074 emit_2ub(p, X86_TWOB, 0x6b);
1075 emit_modrm( p, dst, src );
1076 }
1077
1078 void mmx_packuswb( struct x86_function *p,
1079 struct x86_reg dst,
1080 struct x86_reg src )
1081 {
1082 assert(dst.file == file_MMX &&
1083 (src.file == file_MMX || src.mod != mod_REG));
1084
1085 p->need_emms = 1;
1086
1087 emit_2ub(p, X86_TWOB, 0x67);
1088 emit_modrm( p, dst, src );
1089 }
1090
1091 void mmx_movd( struct x86_function *p,
1092 struct x86_reg dst,
1093 struct x86_reg src )
1094 {
1095 p->need_emms = 1;
1096 emit_1ub(p, X86_TWOB);
1097 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1098 }
1099
1100 void mmx_movq( struct x86_function *p,
1101 struct x86_reg dst,
1102 struct x86_reg src )
1103 {
1104 p->need_emms = 1;
1105 emit_1ub(p, X86_TWOB);
1106 emit_op_modrm( p, 0x6f, 0x7f, dst, src );
1107 }
1108
1109
1110 /***********************************************************************
1111 * Helper functions
1112 */
1113
1114
1115 /* Retreive a reference to one of the function arguments, taking into
1116 * account any push/pop activity:
1117 */
1118 struct x86_reg x86_fn_arg( struct x86_function *p,
1119 GLuint arg )
1120 {
1121 return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
1122 p->stack_offset + arg * 4); /* ??? */
1123 }
1124
1125
1126 void x86_init_func( struct x86_function *p )
1127 {
1128 x86_init_func_size(p, 1024);
1129 }
1130
1131 void x86_init_func_size( struct x86_function *p, GLuint code_size )
1132 {
1133 p->store = _mesa_exec_malloc(code_size);
1134 p->csr = p->store;
1135 }
1136
1137 void x86_release_func( struct x86_function *p )
1138 {
1139 _mesa_exec_free(p->store);
1140 }
1141
1142
1143 void (*x86_get_func( struct x86_function *p ))(void)
1144 {
1145 if (DISASSEM)
1146 _mesa_printf("disassemble %p %p\n", p->store, p->csr);
1147 return (void (*)(void)) (unsigned long) p->store;
1148 }
1149
1150 #else
1151
1152 void x86sse_dummy( void )
1153 {
1154 }
1155
1156 #endif