More GLSL code:
[mesa.git] / src / mesa / x86 / rtasm / x86sse.c
1 #if defined(USE_X86_ASM) || defined(SLANG_X86)
2
3 #include "imports.h"
4 #include "x86sse.h"
5
6 #define DISASSEM 0
7 #define X86_TWOB 0x0f
8
9 /* Emit bytes to the instruction stream:
10 */
11 static void emit_1b( struct x86_function *p, GLbyte b0 )
12 {
13 *(GLbyte *)(p->csr++) = b0;
14 }
15
16 static void emit_1i( struct x86_function *p, GLint i0 )
17 {
18 *(GLint *)(p->csr) = i0;
19 p->csr += 4;
20 }
21
22 static void disassem( struct x86_function *p, const char *fn )
23 {
24 #if DISASSEM && 0
25 if (fn && fn != p->fn) {
26 _mesa_printf("0x%x: %s\n", p->csr, fn);
27 p->fn = fn;
28 }
29 #endif
30 }
31
32 static void emit_1ub_fn( struct x86_function *p, GLubyte b0, const char *fn )
33 {
34 disassem(p, fn);
35 *(p->csr++) = b0;
36 }
37
38 static void emit_2ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, const char *fn )
39 {
40 disassem(p, fn);
41 *(p->csr++) = b0;
42 *(p->csr++) = b1;
43 }
44
45 static void emit_3ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
46 {
47 disassem(p, fn);
48 *(p->csr++) = b0;
49 *(p->csr++) = b1;
50 *(p->csr++) = b2;
51 }
52
53 #define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__)
54 #define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__)
55 #define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
56
57
58
59 /* Build a modRM byte + possible displacement. No treatment of SIB
60 * indexing. BZZT - no way to encode an absolute address.
61 */
62 static void emit_modrm( struct x86_function *p,
63 struct x86_reg reg,
64 struct x86_reg regmem )
65 {
66 GLubyte val = 0;
67
68 assert(reg.mod == mod_REG);
69
70 val |= regmem.mod << 6; /* mod field */
71 val |= reg.idx << 3; /* reg field */
72 val |= regmem.idx; /* r/m field */
73
74 emit_1ub_fn(p, val, 0);
75
76 /* Oh-oh we've stumbled into the SIB thing.
77 */
78 if (regmem.file == file_REG32 &&
79 regmem.idx == reg_SP) {
80 emit_1ub_fn(p, 0x24, 0); /* simplistic! */
81 }
82
83 switch (regmem.mod) {
84 case mod_REG:
85 case mod_INDIRECT:
86 break;
87 case mod_DISP8:
88 emit_1b(p, regmem.disp);
89 break;
90 case mod_DISP32:
91 emit_1i(p, regmem.disp);
92 break;
93 default:
94 assert(0);
95 break;
96 }
97 }
98
99
100 static void emit_modrm_noreg( struct x86_function *p,
101 GLuint op,
102 struct x86_reg regmem )
103 {
104 struct x86_reg dummy = x86_make_reg(file_REG32, op);
105 emit_modrm(p, dummy, regmem);
106 }
107
108 /* Many x86 instructions have two opcodes to cope with the situations
109 * where the destination is a register or memory reference
110 * respectively. This function selects the correct opcode based on
111 * the arguments presented.
112 */
113 static void emit_op_modrm( struct x86_function *p,
114 GLubyte op_dst_is_reg,
115 GLubyte op_dst_is_mem,
116 struct x86_reg dst,
117 struct x86_reg src )
118 {
119 switch (dst.mod) {
120 case mod_REG:
121 emit_1ub_fn(p, op_dst_is_reg, 0);
122 emit_modrm(p, dst, src);
123 break;
124 case mod_INDIRECT:
125 case mod_DISP32:
126 case mod_DISP8:
127 assert(src.mod == mod_REG);
128 emit_1ub_fn(p, op_dst_is_mem, 0);
129 emit_modrm(p, src, dst);
130 break;
131 default:
132 assert(0);
133 break;
134 }
135 }
136
137
138
139
140
141
142
143 /* Create and manipulate registers and regmem values:
144 */
145 struct x86_reg x86_make_reg( enum x86_reg_file file,
146 enum x86_reg_name idx )
147 {
148 struct x86_reg reg;
149
150 reg.file = file;
151 reg.idx = idx;
152 reg.mod = mod_REG;
153 reg.disp = 0;
154
155 return reg;
156 }
157
158 struct x86_reg x86_make_disp( struct x86_reg reg,
159 GLint disp )
160 {
161 assert(reg.file == file_REG32);
162
163 if (reg.mod == mod_REG)
164 reg.disp = disp;
165 else
166 reg.disp += disp;
167
168 if (reg.disp == 0)
169 reg.mod = mod_INDIRECT;
170 else if (reg.disp <= 127 && reg.disp >= -128)
171 reg.mod = mod_DISP8;
172 else
173 reg.mod = mod_DISP32;
174
175 return reg;
176 }
177
178 struct x86_reg x86_deref( struct x86_reg reg )
179 {
180 return x86_make_disp(reg, 0);
181 }
182
183 struct x86_reg x86_get_base_reg( struct x86_reg reg )
184 {
185 return x86_make_reg( reg.file, reg.idx );
186 }
187
188 GLubyte *x86_get_label( struct x86_function *p )
189 {
190 return p->csr;
191 }
192
193
194
195 /***********************************************************************
196 * x86 instructions
197 */
198
199
200 void x86_jcc( struct x86_function *p,
201 enum x86_cc cc,
202 GLubyte *label )
203 {
204 GLint offset = label - (x86_get_label(p) + 2);
205
206 if (offset <= 127 && offset >= -128) {
207 emit_1ub(p, 0x70 + cc);
208 emit_1b(p, (GLbyte) offset);
209 }
210 else {
211 offset = label - (x86_get_label(p) + 6);
212 emit_2ub(p, 0x0f, 0x80 + cc);
213 emit_1i(p, offset);
214 }
215 }
216
217 /* Always use a 32bit offset for forward jumps:
218 */
219 GLubyte *x86_jcc_forward( struct x86_function *p,
220 enum x86_cc cc )
221 {
222 emit_2ub(p, 0x0f, 0x80 + cc);
223 emit_1i(p, 0);
224 return x86_get_label(p);
225 }
226
227 GLubyte *x86_jmp_forward( struct x86_function *p)
228 {
229 emit_1ub(p, 0xe9);
230 emit_1i(p, 0);
231 return x86_get_label(p);
232 }
233
234 GLubyte *x86_call_forward( struct x86_function *p)
235 {
236 emit_1ub(p, 0xe8);
237 emit_1i(p, 0);
238 return x86_get_label(p);
239 }
240
241 /* Fixup offset from forward jump:
242 */
243 void x86_fixup_fwd_jump( struct x86_function *p,
244 GLubyte *fixup )
245 {
246 *(int *)(fixup - 4) = x86_get_label(p) - fixup;
247 }
248
249 void x86_jmp( struct x86_function *p, GLubyte *label)
250 {
251 emit_1ub(p, 0xe9);
252 emit_1i(p, label - x86_get_label(p) - 4);
253 }
254
255 void x86_call( struct x86_function *p, GLubyte *label)
256 {
257 emit_1ub(p, 0xe8);
258 emit_1i(p, label - x86_get_label(p) - 4);
259 }
260
261 /* michal:
262 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
263 * I load the immediate into general purpose register and use it.
264 */
265 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm )
266 {
267 assert(dst.mod == mod_REG);
268 emit_1ub(p, 0xb8 + dst.idx);
269 emit_1i(p, imm);
270 }
271
272 void x86_push( struct x86_function *p,
273 struct x86_reg reg )
274 {
275 assert(reg.mod == mod_REG);
276 emit_1ub(p, 0x50 + reg.idx);
277 p->stack_offset += 4;
278 }
279
280 void x86_pop( struct x86_function *p,
281 struct x86_reg reg )
282 {
283 assert(reg.mod == mod_REG);
284 emit_1ub(p, 0x58 + reg.idx);
285 p->stack_offset -= 4;
286 }
287
288 void x86_inc( struct x86_function *p,
289 struct x86_reg reg )
290 {
291 assert(reg.mod == mod_REG);
292 emit_1ub(p, 0x40 + reg.idx);
293 }
294
295 void x86_dec( struct x86_function *p,
296 struct x86_reg reg )
297 {
298 assert(reg.mod == mod_REG);
299 emit_1ub(p, 0x48 + reg.idx);
300 }
301
302 void x86_ret( struct x86_function *p )
303 {
304 emit_1ub(p, 0xc3);
305 }
306
307 void x86_sahf( struct x86_function *p )
308 {
309 emit_1ub(p, 0x9e);
310 }
311
312 void x86_mov( struct x86_function *p,
313 struct x86_reg dst,
314 struct x86_reg src )
315 {
316 emit_op_modrm( p, 0x8b, 0x89, dst, src );
317 }
318
319 void x86_xor( struct x86_function *p,
320 struct x86_reg dst,
321 struct x86_reg src )
322 {
323 emit_op_modrm( p, 0x33, 0x31, dst, src );
324 }
325
326 void x86_cmp( struct x86_function *p,
327 struct x86_reg dst,
328 struct x86_reg src )
329 {
330 emit_op_modrm( p, 0x3b, 0x39, dst, src );
331 }
332
333 void x86_lea( struct x86_function *p,
334 struct x86_reg dst,
335 struct x86_reg src )
336 {
337 emit_1ub(p, 0x8d);
338 emit_modrm( p, dst, src );
339 }
340
341 void x86_test( struct x86_function *p,
342 struct x86_reg dst,
343 struct x86_reg src )
344 {
345 emit_1ub(p, 0x85);
346 emit_modrm( p, dst, src );
347 }
348
349 void x86_add( struct x86_function *p,
350 struct x86_reg dst,
351 struct x86_reg src )
352 {
353 emit_op_modrm(p, 0x03, 0x01, dst, src );
354 }
355
356 void x86_mul( struct x86_function *p,
357 struct x86_reg src )
358 {
359 assert (src.file == file_REG32 && src.mod == mod_REG);
360 emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
361 }
362
363 void x86_sub( struct x86_function *p,
364 struct x86_reg dst,
365 struct x86_reg src )
366 {
367 emit_op_modrm(p, 0x2b, 0x29, dst, src );
368 }
369
370
371
372 /***********************************************************************
373 * SSE instructions
374 */
375
376
377 void sse_movss( struct x86_function *p,
378 struct x86_reg dst,
379 struct x86_reg src )
380 {
381 emit_2ub(p, 0xF3, X86_TWOB);
382 emit_op_modrm( p, 0x10, 0x11, dst, src );
383 }
384
385 void sse_movaps( struct x86_function *p,
386 struct x86_reg dst,
387 struct x86_reg src )
388 {
389 emit_1ub(p, X86_TWOB);
390 emit_op_modrm( p, 0x28, 0x29, dst, src );
391 }
392
393 void sse_movups( struct x86_function *p,
394 struct x86_reg dst,
395 struct x86_reg src )
396 {
397 emit_1ub(p, X86_TWOB);
398 emit_op_modrm( p, 0x10, 0x11, dst, src );
399 }
400
401 void sse_movhps( struct x86_function *p,
402 struct x86_reg dst,
403 struct x86_reg src )
404 {
405 assert(dst.mod != mod_REG || src.mod != mod_REG);
406 emit_1ub(p, X86_TWOB);
407 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
408 }
409
410 void sse_movlps( struct x86_function *p,
411 struct x86_reg dst,
412 struct x86_reg src )
413 {
414 assert(dst.mod != mod_REG || src.mod != mod_REG);
415 emit_1ub(p, X86_TWOB);
416 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
417 }
418
419 void sse_maxps( struct x86_function *p,
420 struct x86_reg dst,
421 struct x86_reg src )
422 {
423 emit_2ub(p, X86_TWOB, 0x5F);
424 emit_modrm( p, dst, src );
425 }
426
427 void sse_divss( struct x86_function *p,
428 struct x86_reg dst,
429 struct x86_reg src )
430 {
431 emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
432 emit_modrm( p, dst, src );
433 }
434
435 void sse_minps( struct x86_function *p,
436 struct x86_reg dst,
437 struct x86_reg src )
438 {
439 emit_2ub(p, X86_TWOB, 0x5D);
440 emit_modrm( p, dst, src );
441 }
442
443 void sse_subps( struct x86_function *p,
444 struct x86_reg dst,
445 struct x86_reg src )
446 {
447 emit_2ub(p, X86_TWOB, 0x5C);
448 emit_modrm( p, dst, src );
449 }
450
451 void sse_mulps( struct x86_function *p,
452 struct x86_reg dst,
453 struct x86_reg src )
454 {
455 emit_2ub(p, X86_TWOB, 0x59);
456 emit_modrm( p, dst, src );
457 }
458
459 void sse_addps( struct x86_function *p,
460 struct x86_reg dst,
461 struct x86_reg src )
462 {
463 emit_2ub(p, X86_TWOB, 0x58);
464 emit_modrm( p, dst, src );
465 }
466
467 void sse_addss( struct x86_function *p,
468 struct x86_reg dst,
469 struct x86_reg src )
470 {
471 emit_3ub(p, 0xF3, X86_TWOB, 0x58);
472 emit_modrm( p, dst, src );
473 }
474
475 void sse_andps( struct x86_function *p,
476 struct x86_reg dst,
477 struct x86_reg src )
478 {
479 emit_2ub(p, X86_TWOB, 0x54);
480 emit_modrm( p, dst, src );
481 }
482
483
484 void sse_rsqrtss( struct x86_function *p,
485 struct x86_reg dst,
486 struct x86_reg src )
487 {
488 emit_3ub(p, 0xF3, X86_TWOB, 0x52);
489 emit_modrm( p, dst, src );
490
491 }
492
493 void sse_movhlps( struct x86_function *p,
494 struct x86_reg dst,
495 struct x86_reg src )
496 {
497 assert(dst.mod == mod_REG && src.mod == mod_REG);
498 emit_2ub(p, X86_TWOB, 0x12);
499 emit_modrm( p, dst, src );
500 }
501
502 void sse_movlhps( struct x86_function *p,
503 struct x86_reg dst,
504 struct x86_reg src )
505 {
506 assert(dst.mod == mod_REG && src.mod == mod_REG);
507 emit_2ub(p, X86_TWOB, 0x16);
508 emit_modrm( p, dst, src );
509 }
510
511
512 void sse_cvtps2pi( struct x86_function *p,
513 struct x86_reg dst,
514 struct x86_reg src )
515 {
516 assert(dst.file == file_MMX &&
517 (src.file == file_XMM || src.mod != mod_REG));
518
519 p->need_emms = 1;
520
521 emit_2ub(p, X86_TWOB, 0x2d);
522 emit_modrm( p, dst, src );
523 }
524
525
526 /* Shufps can also be used to implement a reduced swizzle when dest ==
527 * arg0.
528 */
529 void sse_shufps( struct x86_function *p,
530 struct x86_reg dest,
531 struct x86_reg arg0,
532 GLubyte shuf)
533 {
534 emit_2ub(p, X86_TWOB, 0xC6);
535 emit_modrm(p, dest, arg0);
536 emit_1ub(p, shuf);
537 }
538
539 void sse_cmpps( struct x86_function *p,
540 struct x86_reg dest,
541 struct x86_reg arg0,
542 GLubyte cc)
543 {
544 emit_2ub(p, X86_TWOB, 0xC2);
545 emit_modrm(p, dest, arg0);
546 emit_1ub(p, cc);
547 }
548
549 /***********************************************************************
550 * SSE2 instructions
551 */
552
553 /**
554 * Perform a reduced swizzle:
555 */
556 void sse2_pshufd( struct x86_function *p,
557 struct x86_reg dest,
558 struct x86_reg arg0,
559 GLubyte shuf)
560 {
561 emit_3ub(p, 0x66, X86_TWOB, 0x70);
562 emit_modrm(p, dest, arg0);
563 emit_1ub(p, shuf);
564 }
565
566 void sse2_cvtps2dq( struct x86_function *p,
567 struct x86_reg dst,
568 struct x86_reg src )
569 {
570 emit_3ub(p, 0x66, X86_TWOB, 0x5B);
571 emit_modrm( p, dst, src );
572 }
573
574 void sse2_packssdw( struct x86_function *p,
575 struct x86_reg dst,
576 struct x86_reg src )
577 {
578 emit_3ub(p, 0x66, X86_TWOB, 0x6B);
579 emit_modrm( p, dst, src );
580 }
581
582 void sse2_packsswb( struct x86_function *p,
583 struct x86_reg dst,
584 struct x86_reg src )
585 {
586 emit_3ub(p, 0x66, X86_TWOB, 0x63);
587 emit_modrm( p, dst, src );
588 }
589
590 void sse2_packuswb( struct x86_function *p,
591 struct x86_reg dst,
592 struct x86_reg src )
593 {
594 emit_3ub(p, 0x66, X86_TWOB, 0x67);
595 emit_modrm( p, dst, src );
596 }
597
598 void sse2_rcpss( struct x86_function *p,
599 struct x86_reg dst,
600 struct x86_reg src )
601 {
602 emit_3ub(p, 0xF3, X86_TWOB, 0x53);
603 emit_modrm( p, dst, src );
604 }
605
606 void sse2_movd( struct x86_function *p,
607 struct x86_reg dst,
608 struct x86_reg src )
609 {
610 emit_2ub(p, 0x66, X86_TWOB);
611 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
612 }
613
614
615
616
617 /***********************************************************************
618 * x87 instructions
619 */
620 void x87_fist( struct x86_function *p, struct x86_reg dst )
621 {
622 emit_1ub(p, 0xdb);
623 emit_modrm_noreg(p, 2, dst);
624 }
625
626 void x87_fistp( struct x86_function *p, struct x86_reg dst )
627 {
628 emit_1ub(p, 0xdb);
629 emit_modrm_noreg(p, 3, dst);
630 }
631
632 void x87_fldz( struct x86_function *p )
633 {
634 emit_2ub(p, 0xd9, 0xee);
635 }
636
637
638 void x87_fldcw( struct x86_function *p, struct x86_reg arg )
639 {
640 assert(arg.file == file_REG32);
641 assert(arg.mod != mod_REG);
642 emit_1ub(p, 0xd9);
643 emit_modrm_noreg(p, 5, arg);
644 }
645
646 void x87_fld1( struct x86_function *p )
647 {
648 emit_2ub(p, 0xd9, 0xe8);
649 }
650
651 void x87_fldl2e( struct x86_function *p )
652 {
653 emit_2ub(p, 0xd9, 0xea);
654 }
655
656 void x87_fldln2( struct x86_function *p )
657 {
658 emit_2ub(p, 0xd9, 0xed);
659 }
660
661 void x87_fwait( struct x86_function *p )
662 {
663 emit_1ub(p, 0x9b);
664 }
665
666 void x87_fnclex( struct x86_function *p )
667 {
668 emit_2ub(p, 0xdb, 0xe2);
669 }
670
671 void x87_fclex( struct x86_function *p )
672 {
673 x87_fwait(p);
674 x87_fnclex(p);
675 }
676
677
678 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
679 GLubyte dst0ub0,
680 GLubyte dst0ub1,
681 GLubyte arg0ub0,
682 GLubyte arg0ub1,
683 GLubyte argmem_noreg)
684 {
685 assert(dst.file == file_x87);
686
687 if (arg.file == file_x87) {
688 if (dst.idx == 0)
689 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
690 else if (arg.idx == 0)
691 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
692 else
693 assert(0);
694 }
695 else if (dst.idx == 0) {
696 assert(arg.file = file_REG32);
697 emit_1ub(p, 0xd8);
698 emit_modrm_noreg(p, argmem_noreg, arg);
699 }
700 else
701 assert(0);
702 }
703
704 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
705 {
706 x87_arith_op(p, dst, arg,
707 0xd8, 0xc8,
708 0xdc, 0xc8,
709 4);
710 }
711
712 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
713 {
714 x87_arith_op(p, dst, arg,
715 0xd8, 0xe0,
716 0xdc, 0xe8,
717 4);
718 }
719
720 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
721 {
722 x87_arith_op(p, dst, arg,
723 0xd8, 0xe8,
724 0xdc, 0xe0,
725 5);
726 }
727
728 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
729 {
730 x87_arith_op(p, dst, arg,
731 0xd8, 0xc0,
732 0xdc, 0xc0,
733 0);
734 }
735
736 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
737 {
738 x87_arith_op(p, dst, arg,
739 0xd8, 0xf0,
740 0xdc, 0xf8,
741 6);
742 }
743
744 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
745 {
746 x87_arith_op(p, dst, arg,
747 0xd8, 0xf8,
748 0xdc, 0xf0,
749 7);
750 }
751
752 void x87_fmulp( struct x86_function *p, struct x86_reg dst )
753 {
754 assert(dst.file == file_x87);
755 assert(dst.idx >= 1);
756 emit_2ub(p, 0xde, 0xc8+dst.idx);
757 }
758
759 void x87_fsubp( struct x86_function *p, struct x86_reg dst )
760 {
761 assert(dst.file == file_x87);
762 assert(dst.idx >= 1);
763 emit_2ub(p, 0xde, 0xe8+dst.idx);
764 }
765
766 void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
767 {
768 assert(dst.file == file_x87);
769 assert(dst.idx >= 1);
770 emit_2ub(p, 0xde, 0xe0+dst.idx);
771 }
772
773 void x87_faddp( struct x86_function *p, struct x86_reg dst )
774 {
775 assert(dst.file == file_x87);
776 assert(dst.idx >= 1);
777 emit_2ub(p, 0xde, 0xc0+dst.idx);
778 }
779
780 void x87_fdivp( struct x86_function *p, struct x86_reg dst )
781 {
782 assert(dst.file == file_x87);
783 assert(dst.idx >= 1);
784 emit_2ub(p, 0xde, 0xf8+dst.idx);
785 }
786
787 void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
788 {
789 assert(dst.file == file_x87);
790 assert(dst.idx >= 1);
791 emit_2ub(p, 0xde, 0xf0+dst.idx);
792 }
793
794 void x87_fucom( struct x86_function *p, struct x86_reg arg )
795 {
796 assert(arg.file == file_x87);
797 emit_2ub(p, 0xdd, 0xe0+arg.idx);
798 }
799
800 void x87_fucomp( struct x86_function *p, struct x86_reg arg )
801 {
802 assert(arg.file == file_x87);
803 emit_2ub(p, 0xdd, 0xe8+arg.idx);
804 }
805
806 void x87_fucompp( struct x86_function *p )
807 {
808 emit_2ub(p, 0xda, 0xe9);
809 }
810
811 void x87_fxch( struct x86_function *p, struct x86_reg arg )
812 {
813 assert(arg.file == file_x87);
814 emit_2ub(p, 0xd9, 0xc8+arg.idx);
815 }
816
817 void x87_fabs( struct x86_function *p )
818 {
819 emit_2ub(p, 0xd9, 0xe1);
820 }
821
822 void x87_fchs( struct x86_function *p )
823 {
824 emit_2ub(p, 0xd9, 0xe0);
825 }
826
827 void x87_fcos( struct x86_function *p )
828 {
829 emit_2ub(p, 0xd9, 0xff);
830 }
831
832
833 void x87_fprndint( struct x86_function *p )
834 {
835 emit_2ub(p, 0xd9, 0xfc);
836 }
837
838 void x87_fscale( struct x86_function *p )
839 {
840 emit_2ub(p, 0xd9, 0xfd);
841 }
842
843 void x87_fsin( struct x86_function *p )
844 {
845 emit_2ub(p, 0xd9, 0xfe);
846 }
847
848 void x87_fsincos( struct x86_function *p )
849 {
850 emit_2ub(p, 0xd9, 0xfb);
851 }
852
853 void x87_fsqrt( struct x86_function *p )
854 {
855 emit_2ub(p, 0xd9, 0xfa);
856 }
857
858 void x87_fxtract( struct x86_function *p )
859 {
860 emit_2ub(p, 0xd9, 0xf4);
861 }
862
863 /* st0 = (2^st0)-1
864 *
865 * Restrictions: -1.0 <= st0 <= 1.0
866 */
867 void x87_f2xm1( struct x86_function *p )
868 {
869 emit_2ub(p, 0xd9, 0xf0);
870 }
871
872 /* st1 = st1 * log2(st0);
873 * pop_stack;
874 */
875 void x87_fyl2x( struct x86_function *p )
876 {
877 emit_2ub(p, 0xd9, 0xf1);
878 }
879
880 /* st1 = st1 * log2(st0 + 1.0);
881 * pop_stack;
882 *
883 * A fast operation, with restrictions: -.29 < st0 < .29
884 */
885 void x87_fyl2xp1( struct x86_function *p )
886 {
887 emit_2ub(p, 0xd9, 0xf9);
888 }
889
890
891 void x87_fld( struct x86_function *p, struct x86_reg arg )
892 {
893 if (arg.file == file_x87)
894 emit_2ub(p, 0xd9, 0xc0 + arg.idx);
895 else {
896 emit_1ub(p, 0xd9);
897 emit_modrm_noreg(p, 0, arg);
898 }
899 }
900
901 void x87_fst( struct x86_function *p, struct x86_reg dst )
902 {
903 if (dst.file == file_x87)
904 emit_2ub(p, 0xdd, 0xd0 + dst.idx);
905 else {
906 emit_1ub(p, 0xd9);
907 emit_modrm_noreg(p, 2, dst);
908 }
909 }
910
911 void x87_fstp( struct x86_function *p, struct x86_reg dst )
912 {
913 if (dst.file == file_x87)
914 emit_2ub(p, 0xdd, 0xd8 + dst.idx);
915 else {
916 emit_1ub(p, 0xd9);
917 emit_modrm_noreg(p, 3, dst);
918 }
919 }
920
921 void x87_fcom( struct x86_function *p, struct x86_reg dst )
922 {
923 if (dst.file == file_x87)
924 emit_2ub(p, 0xd8, 0xd0 + dst.idx);
925 else {
926 emit_1ub(p, 0xd8);
927 emit_modrm_noreg(p, 2, dst);
928 }
929 }
930
931 void x87_fcomp( struct x86_function *p, struct x86_reg dst )
932 {
933 if (dst.file == file_x87)
934 emit_2ub(p, 0xd8, 0xd8 + dst.idx);
935 else {
936 emit_1ub(p, 0xd8);
937 emit_modrm_noreg(p, 3, dst);
938 }
939 }
940
941
942 void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
943 {
944 assert(dst.file == file_REG32);
945
946 if (dst.idx == reg_AX &&
947 dst.mod == mod_REG)
948 emit_2ub(p, 0xdf, 0xe0);
949 else {
950 emit_1ub(p, 0xdd);
951 emit_modrm_noreg(p, 7, dst);
952 }
953 }
954
955
956
957
958 /***********************************************************************
959 * MMX instructions
960 */
961
962 void mmx_emms( struct x86_function *p )
963 {
964 assert(p->need_emms);
965 emit_2ub(p, 0x0f, 0x77);
966 p->need_emms = 0;
967 }
968
969 void mmx_packssdw( struct x86_function *p,
970 struct x86_reg dst,
971 struct x86_reg src )
972 {
973 assert(dst.file == file_MMX &&
974 (src.file == file_MMX || src.mod != mod_REG));
975
976 p->need_emms = 1;
977
978 emit_2ub(p, X86_TWOB, 0x6b);
979 emit_modrm( p, dst, src );
980 }
981
982 void mmx_packuswb( struct x86_function *p,
983 struct x86_reg dst,
984 struct x86_reg src )
985 {
986 assert(dst.file == file_MMX &&
987 (src.file == file_MMX || src.mod != mod_REG));
988
989 p->need_emms = 1;
990
991 emit_2ub(p, X86_TWOB, 0x67);
992 emit_modrm( p, dst, src );
993 }
994
995 void mmx_movd( struct x86_function *p,
996 struct x86_reg dst,
997 struct x86_reg src )
998 {
999 p->need_emms = 1;
1000 emit_1ub(p, X86_TWOB);
1001 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1002 }
1003
1004 void mmx_movq( struct x86_function *p,
1005 struct x86_reg dst,
1006 struct x86_reg src )
1007 {
1008 p->need_emms = 1;
1009 emit_1ub(p, X86_TWOB);
1010 emit_op_modrm( p, 0x6f, 0x7f, dst, src );
1011 }
1012
1013
1014 /***********************************************************************
1015 * Helper functions
1016 */
1017
1018
1019 /* Retreive a reference to one of the function arguments, taking into
1020 * account any push/pop activity:
1021 */
1022 struct x86_reg x86_fn_arg( struct x86_function *p,
1023 GLuint arg )
1024 {
1025 return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
1026 p->stack_offset + arg * 4); /* ??? */
1027 }
1028
1029
1030 void x86_init_func( struct x86_function *p )
1031 {
1032 x86_init_func_size(p, 1024);
1033 }
1034
1035 void x86_init_func_size( struct x86_function *p, GLuint code_size )
1036 {
1037 p->store = _mesa_exec_malloc(code_size);
1038 p->csr = p->store;
1039 }
1040
1041 void x86_release_func( struct x86_function *p )
1042 {
1043 _mesa_exec_free(p->store);
1044 }
1045
1046
1047 void (*x86_get_func( struct x86_function *p ))(void)
1048 {
1049 if (DISASSEM)
1050 _mesa_printf("disassemble %p %p\n", p->store, p->csr);
1051 return (void (*)(void))p->store;
1052 }
1053
1054 #else
1055
1056 void x86sse_dummy( void )
1057 {
1058 }
1059
1060 #endif