More GLSL code - more support for vec4 operations on x86 back-end.
[mesa.git] / src / mesa / x86 / rtasm / x86sse.c
1 #if defined(USE_X86_ASM) || defined(SLANG_X86)
2
3 #include "imports.h"
4 #include "x86sse.h"
5
6 #define DISASSEM 0
7 #define X86_TWOB 0x0f
8
9 /* Emit bytes to the instruction stream:
10 */
11 static void emit_1b( struct x86_function *p, GLbyte b0 )
12 {
13 *(GLbyte *)(p->csr++) = b0;
14 }
15
16 static void emit_1i( struct x86_function *p, GLint i0 )
17 {
18 *(GLint *)(p->csr) = i0;
19 p->csr += 4;
20 }
21
22 static void disassem( struct x86_function *p, const char *fn )
23 {
24 #if DISASSEM && 0
25 if (fn && fn != p->fn) {
26 _mesa_printf("0x%x: %s\n", p->csr, fn);
27 p->fn = fn;
28 }
29 #endif
30 }
31
32 static void emit_1ub_fn( struct x86_function *p, GLubyte b0, const char *fn )
33 {
34 disassem(p, fn);
35 *(p->csr++) = b0;
36 }
37
38 static void emit_2ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, const char *fn )
39 {
40 disassem(p, fn);
41 *(p->csr++) = b0;
42 *(p->csr++) = b1;
43 }
44
45 static void emit_3ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
46 {
47 disassem(p, fn);
48 *(p->csr++) = b0;
49 *(p->csr++) = b1;
50 *(p->csr++) = b2;
51 }
52
53 #define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__)
54 #define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__)
55 #define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
56
57
58
59 /* Build a modRM byte + possible displacement. No treatment of SIB
60 * indexing. BZZT - no way to encode an absolute address.
61 */
62 static void emit_modrm( struct x86_function *p,
63 struct x86_reg reg,
64 struct x86_reg regmem )
65 {
66 GLubyte val = 0;
67
68 assert(reg.mod == mod_REG);
69
70 val |= regmem.mod << 6; /* mod field */
71 val |= reg.idx << 3; /* reg field */
72 val |= regmem.idx; /* r/m field */
73
74 emit_1ub_fn(p, val, 0);
75
76 /* Oh-oh we've stumbled into the SIB thing.
77 */
78 if (regmem.file == file_REG32 &&
79 regmem.idx == reg_SP) {
80 emit_1ub_fn(p, 0x24, 0); /* simplistic! */
81 }
82
83 switch (regmem.mod) {
84 case mod_REG:
85 case mod_INDIRECT:
86 break;
87 case mod_DISP8:
88 emit_1b(p, regmem.disp);
89 break;
90 case mod_DISP32:
91 emit_1i(p, regmem.disp);
92 break;
93 default:
94 assert(0);
95 break;
96 }
97 }
98
99
100 static void emit_modrm_noreg( struct x86_function *p,
101 GLuint op,
102 struct x86_reg regmem )
103 {
104 struct x86_reg dummy = x86_make_reg(file_REG32, op);
105 emit_modrm(p, dummy, regmem);
106 }
107
108 /* Many x86 instructions have two opcodes to cope with the situations
109 * where the destination is a register or memory reference
110 * respectively. This function selects the correct opcode based on
111 * the arguments presented.
112 */
113 static void emit_op_modrm( struct x86_function *p,
114 GLubyte op_dst_is_reg,
115 GLubyte op_dst_is_mem,
116 struct x86_reg dst,
117 struct x86_reg src )
118 {
119 switch (dst.mod) {
120 case mod_REG:
121 emit_1ub_fn(p, op_dst_is_reg, 0);
122 emit_modrm(p, dst, src);
123 break;
124 case mod_INDIRECT:
125 case mod_DISP32:
126 case mod_DISP8:
127 assert(src.mod == mod_REG);
128 emit_1ub_fn(p, op_dst_is_mem, 0);
129 emit_modrm(p, src, dst);
130 break;
131 default:
132 assert(0);
133 break;
134 }
135 }
136
137
138
139
140
141
142
143 /* Create and manipulate registers and regmem values:
144 */
145 struct x86_reg x86_make_reg( enum x86_reg_file file,
146 enum x86_reg_name idx )
147 {
148 struct x86_reg reg;
149
150 reg.file = file;
151 reg.idx = idx;
152 reg.mod = mod_REG;
153 reg.disp = 0;
154
155 return reg;
156 }
157
158 struct x86_reg x86_make_disp( struct x86_reg reg,
159 GLint disp )
160 {
161 assert(reg.file == file_REG32);
162
163 if (reg.mod == mod_REG)
164 reg.disp = disp;
165 else
166 reg.disp += disp;
167
168 if (reg.disp == 0)
169 reg.mod = mod_INDIRECT;
170 else if (reg.disp <= 127 && reg.disp >= -128)
171 reg.mod = mod_DISP8;
172 else
173 reg.mod = mod_DISP32;
174
175 return reg;
176 }
177
178 struct x86_reg x86_deref( struct x86_reg reg )
179 {
180 return x86_make_disp(reg, 0);
181 }
182
183 struct x86_reg x86_get_base_reg( struct x86_reg reg )
184 {
185 return x86_make_reg( reg.file, reg.idx );
186 }
187
188 GLubyte *x86_get_label( struct x86_function *p )
189 {
190 return p->csr;
191 }
192
193
194
195 /***********************************************************************
196 * x86 instructions
197 */
198
199
200 void x86_jcc( struct x86_function *p,
201 enum x86_cc cc,
202 GLubyte *label )
203 {
204 GLint offset = label - (x86_get_label(p) + 2);
205
206 if (offset <= 127 && offset >= -128) {
207 emit_1ub(p, 0x70 + cc);
208 emit_1b(p, (GLbyte) offset);
209 }
210 else {
211 offset = label - (x86_get_label(p) + 6);
212 emit_2ub(p, 0x0f, 0x80 + cc);
213 emit_1i(p, offset);
214 }
215 }
216
217 /* Always use a 32bit offset for forward jumps:
218 */
219 GLubyte *x86_jcc_forward( struct x86_function *p,
220 enum x86_cc cc )
221 {
222 emit_2ub(p, 0x0f, 0x80 + cc);
223 emit_1i(p, 0);
224 return x86_get_label(p);
225 }
226
227 GLubyte *x86_jmp_forward( struct x86_function *p)
228 {
229 emit_1ub(p, 0xe9);
230 emit_1i(p, 0);
231 return x86_get_label(p);
232 }
233
234 GLubyte *x86_call_forward( struct x86_function *p)
235 {
236 emit_1ub(p, 0xe8);
237 emit_1i(p, 0);
238 return x86_get_label(p);
239 }
240
241 /* Fixup offset from forward jump:
242 */
243 void x86_fixup_fwd_jump( struct x86_function *p,
244 GLubyte *fixup )
245 {
246 *(int *)(fixup - 4) = x86_get_label(p) - fixup;
247 }
248
249 void x86_jmp( struct x86_function *p, GLubyte *label)
250 {
251 emit_1ub(p, 0xe9);
252 emit_1i(p, label - x86_get_label(p) - 4);
253 }
254
255 void x86_call( struct x86_function *p, GLubyte *label)
256 {
257 emit_1ub(p, 0xe8);
258 emit_1i(p, label - x86_get_label(p) - 4);
259 }
260
261 /* michal:
262 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
263 * I load the immediate into general purpose register and use it.
264 */
265 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm )
266 {
267 assert(dst.mod == mod_REG);
268 emit_1ub(p, 0xb8 + dst.idx);
269 emit_1i(p, imm);
270 }
271
272 void x86_push( struct x86_function *p,
273 struct x86_reg reg )
274 {
275 assert(reg.mod == mod_REG);
276 emit_1ub(p, 0x50 + reg.idx);
277 p->stack_offset += 4;
278 }
279
280 void x86_pop( struct x86_function *p,
281 struct x86_reg reg )
282 {
283 assert(reg.mod == mod_REG);
284 emit_1ub(p, 0x58 + reg.idx);
285 p->stack_offset -= 4;
286 }
287
288 void x86_inc( struct x86_function *p,
289 struct x86_reg reg )
290 {
291 assert(reg.mod == mod_REG);
292 emit_1ub(p, 0x40 + reg.idx);
293 }
294
295 void x86_dec( struct x86_function *p,
296 struct x86_reg reg )
297 {
298 assert(reg.mod == mod_REG);
299 emit_1ub(p, 0x48 + reg.idx);
300 }
301
302 void x86_ret( struct x86_function *p )
303 {
304 emit_1ub(p, 0xc3);
305 }
306
307 void x86_sahf( struct x86_function *p )
308 {
309 emit_1ub(p, 0x9e);
310 }
311
312 void x86_mov( struct x86_function *p,
313 struct x86_reg dst,
314 struct x86_reg src )
315 {
316 emit_op_modrm( p, 0x8b, 0x89, dst, src );
317 }
318
319 void x86_xor( struct x86_function *p,
320 struct x86_reg dst,
321 struct x86_reg src )
322 {
323 emit_op_modrm( p, 0x33, 0x31, dst, src );
324 }
325
326 void x86_cmp( struct x86_function *p,
327 struct x86_reg dst,
328 struct x86_reg src )
329 {
330 emit_op_modrm( p, 0x3b, 0x39, dst, src );
331 }
332
333 void x86_lea( struct x86_function *p,
334 struct x86_reg dst,
335 struct x86_reg src )
336 {
337 emit_1ub(p, 0x8d);
338 emit_modrm( p, dst, src );
339 }
340
341 void x86_test( struct x86_function *p,
342 struct x86_reg dst,
343 struct x86_reg src )
344 {
345 emit_1ub(p, 0x85);
346 emit_modrm( p, dst, src );
347 }
348
349 void x86_add( struct x86_function *p,
350 struct x86_reg dst,
351 struct x86_reg src )
352 {
353 emit_op_modrm(p, 0x03, 0x01, dst, src );
354 }
355
356 void x86_mul( struct x86_function *p,
357 struct x86_reg src )
358 {
359 assert (src.file == file_REG32 && src.mod == mod_REG);
360 emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
361 }
362
363 void x86_sub( struct x86_function *p,
364 struct x86_reg dst,
365 struct x86_reg src )
366 {
367 emit_op_modrm(p, 0x2b, 0x29, dst, src );
368 }
369
370 void x86_or( struct x86_function *p,
371 struct x86_reg dst,
372 struct x86_reg src )
373 {
374 emit_op_modrm( p, 0x0b, 0x09, dst, src );
375 }
376
377 void x86_and( struct x86_function *p,
378 struct x86_reg dst,
379 struct x86_reg src )
380 {
381 emit_op_modrm( p, 0x23, 0x21, dst, src );
382 }
383
384
385
386 /***********************************************************************
387 * SSE instructions
388 */
389
390
391 void sse_movss( struct x86_function *p,
392 struct x86_reg dst,
393 struct x86_reg src )
394 {
395 emit_2ub(p, 0xF3, X86_TWOB);
396 emit_op_modrm( p, 0x10, 0x11, dst, src );
397 }
398
399 void sse_movaps( struct x86_function *p,
400 struct x86_reg dst,
401 struct x86_reg src )
402 {
403 emit_1ub(p, X86_TWOB);
404 emit_op_modrm( p, 0x28, 0x29, dst, src );
405 }
406
407 void sse_movups( struct x86_function *p,
408 struct x86_reg dst,
409 struct x86_reg src )
410 {
411 emit_1ub(p, X86_TWOB);
412 emit_op_modrm( p, 0x10, 0x11, dst, src );
413 }
414
415 void sse_movhps( struct x86_function *p,
416 struct x86_reg dst,
417 struct x86_reg src )
418 {
419 assert(dst.mod != mod_REG || src.mod != mod_REG);
420 emit_1ub(p, X86_TWOB);
421 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
422 }
423
424 void sse_movlps( struct x86_function *p,
425 struct x86_reg dst,
426 struct x86_reg src )
427 {
428 assert(dst.mod != mod_REG || src.mod != mod_REG);
429 emit_1ub(p, X86_TWOB);
430 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
431 }
432
433 void sse_maxps( struct x86_function *p,
434 struct x86_reg dst,
435 struct x86_reg src )
436 {
437 emit_2ub(p, X86_TWOB, 0x5F);
438 emit_modrm( p, dst, src );
439 }
440
441 void sse_maxss( struct x86_function *p,
442 struct x86_reg dst,
443 struct x86_reg src )
444 {
445 emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
446 emit_modrm( p, dst, src );
447 }
448
449 void sse_divss( struct x86_function *p,
450 struct x86_reg dst,
451 struct x86_reg src )
452 {
453 emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
454 emit_modrm( p, dst, src );
455 }
456
457 void sse_minps( struct x86_function *p,
458 struct x86_reg dst,
459 struct x86_reg src )
460 {
461 emit_2ub(p, X86_TWOB, 0x5D);
462 emit_modrm( p, dst, src );
463 }
464
465 void sse_subps( struct x86_function *p,
466 struct x86_reg dst,
467 struct x86_reg src )
468 {
469 emit_2ub(p, X86_TWOB, 0x5C);
470 emit_modrm( p, dst, src );
471 }
472
473 void sse_mulps( struct x86_function *p,
474 struct x86_reg dst,
475 struct x86_reg src )
476 {
477 emit_2ub(p, X86_TWOB, 0x59);
478 emit_modrm( p, dst, src );
479 }
480
481 void sse_mulss( struct x86_function *p,
482 struct x86_reg dst,
483 struct x86_reg src )
484 {
485 emit_3ub(p, 0xF3, X86_TWOB, 0x59);
486 emit_modrm( p, dst, src );
487 }
488
489 void sse_addps( struct x86_function *p,
490 struct x86_reg dst,
491 struct x86_reg src )
492 {
493 emit_2ub(p, X86_TWOB, 0x58);
494 emit_modrm( p, dst, src );
495 }
496
497 void sse_addss( struct x86_function *p,
498 struct x86_reg dst,
499 struct x86_reg src )
500 {
501 emit_3ub(p, 0xF3, X86_TWOB, 0x58);
502 emit_modrm( p, dst, src );
503 }
504
505 void sse_andps( struct x86_function *p,
506 struct x86_reg dst,
507 struct x86_reg src )
508 {
509 emit_2ub(p, X86_TWOB, 0x54);
510 emit_modrm( p, dst, src );
511 }
512
513
514 void sse_rsqrtss( struct x86_function *p,
515 struct x86_reg dst,
516 struct x86_reg src )
517 {
518 emit_3ub(p, 0xF3, X86_TWOB, 0x52);
519 emit_modrm( p, dst, src );
520
521 }
522
523 void sse_movhlps( struct x86_function *p,
524 struct x86_reg dst,
525 struct x86_reg src )
526 {
527 assert(dst.mod == mod_REG && src.mod == mod_REG);
528 emit_2ub(p, X86_TWOB, 0x12);
529 emit_modrm( p, dst, src );
530 }
531
532 void sse_movlhps( struct x86_function *p,
533 struct x86_reg dst,
534 struct x86_reg src )
535 {
536 assert(dst.mod == mod_REG && src.mod == mod_REG);
537 emit_2ub(p, X86_TWOB, 0x16);
538 emit_modrm( p, dst, src );
539 }
540
541
542 void sse_cvtps2pi( struct x86_function *p,
543 struct x86_reg dst,
544 struct x86_reg src )
545 {
546 assert(dst.file == file_MMX &&
547 (src.file == file_XMM || src.mod != mod_REG));
548
549 p->need_emms = 1;
550
551 emit_2ub(p, X86_TWOB, 0x2d);
552 emit_modrm( p, dst, src );
553 }
554
555
556 /* Shufps can also be used to implement a reduced swizzle when dest ==
557 * arg0.
558 */
559 void sse_shufps( struct x86_function *p,
560 struct x86_reg dest,
561 struct x86_reg arg0,
562 GLubyte shuf)
563 {
564 emit_2ub(p, X86_TWOB, 0xC6);
565 emit_modrm(p, dest, arg0);
566 emit_1ub(p, shuf);
567 }
568
569 void sse_cmpps( struct x86_function *p,
570 struct x86_reg dest,
571 struct x86_reg arg0,
572 GLubyte cc)
573 {
574 emit_2ub(p, X86_TWOB, 0xC2);
575 emit_modrm(p, dest, arg0);
576 emit_1ub(p, cc);
577 }
578
579 /***********************************************************************
580 * SSE2 instructions
581 */
582
583 /**
584 * Perform a reduced swizzle:
585 */
586 void sse2_pshufd( struct x86_function *p,
587 struct x86_reg dest,
588 struct x86_reg arg0,
589 GLubyte shuf)
590 {
591 emit_3ub(p, 0x66, X86_TWOB, 0x70);
592 emit_modrm(p, dest, arg0);
593 emit_1ub(p, shuf);
594 }
595
596 void sse2_cvtps2dq( struct x86_function *p,
597 struct x86_reg dst,
598 struct x86_reg src )
599 {
600 emit_3ub(p, 0x66, X86_TWOB, 0x5B);
601 emit_modrm( p, dst, src );
602 }
603
604 void sse2_packssdw( struct x86_function *p,
605 struct x86_reg dst,
606 struct x86_reg src )
607 {
608 emit_3ub(p, 0x66, X86_TWOB, 0x6B);
609 emit_modrm( p, dst, src );
610 }
611
612 void sse2_packsswb( struct x86_function *p,
613 struct x86_reg dst,
614 struct x86_reg src )
615 {
616 emit_3ub(p, 0x66, X86_TWOB, 0x63);
617 emit_modrm( p, dst, src );
618 }
619
620 void sse2_packuswb( struct x86_function *p,
621 struct x86_reg dst,
622 struct x86_reg src )
623 {
624 emit_3ub(p, 0x66, X86_TWOB, 0x67);
625 emit_modrm( p, dst, src );
626 }
627
628 void sse2_rcpss( struct x86_function *p,
629 struct x86_reg dst,
630 struct x86_reg src )
631 {
632 emit_3ub(p, 0xF3, X86_TWOB, 0x53);
633 emit_modrm( p, dst, src );
634 }
635
636 void sse2_movd( struct x86_function *p,
637 struct x86_reg dst,
638 struct x86_reg src )
639 {
640 emit_2ub(p, 0x66, X86_TWOB);
641 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
642 }
643
644
645
646
647 /***********************************************************************
648 * x87 instructions
649 */
650 void x87_fist( struct x86_function *p, struct x86_reg dst )
651 {
652 emit_1ub(p, 0xdb);
653 emit_modrm_noreg(p, 2, dst);
654 }
655
656 void x87_fistp( struct x86_function *p, struct x86_reg dst )
657 {
658 emit_1ub(p, 0xdb);
659 emit_modrm_noreg(p, 3, dst);
660 }
661
662 void x87_fild( struct x86_function *p, struct x86_reg arg )
663 {
664 emit_1ub(p, 0xdf);
665 emit_modrm_noreg(p, 0, arg);
666 }
667
668 void x87_fldz( struct x86_function *p )
669 {
670 emit_2ub(p, 0xd9, 0xee);
671 }
672
673
674 void x87_fldcw( struct x86_function *p, struct x86_reg arg )
675 {
676 assert(arg.file == file_REG32);
677 assert(arg.mod != mod_REG);
678 emit_1ub(p, 0xd9);
679 emit_modrm_noreg(p, 5, arg);
680 }
681
682 void x87_fld1( struct x86_function *p )
683 {
684 emit_2ub(p, 0xd9, 0xe8);
685 }
686
687 void x87_fldl2e( struct x86_function *p )
688 {
689 emit_2ub(p, 0xd9, 0xea);
690 }
691
692 void x87_fldln2( struct x86_function *p )
693 {
694 emit_2ub(p, 0xd9, 0xed);
695 }
696
697 void x87_fwait( struct x86_function *p )
698 {
699 emit_1ub(p, 0x9b);
700 }
701
702 void x87_fnclex( struct x86_function *p )
703 {
704 emit_2ub(p, 0xdb, 0xe2);
705 }
706
707 void x87_fclex( struct x86_function *p )
708 {
709 x87_fwait(p);
710 x87_fnclex(p);
711 }
712
713
714 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
715 GLubyte dst0ub0,
716 GLubyte dst0ub1,
717 GLubyte arg0ub0,
718 GLubyte arg0ub1,
719 GLubyte argmem_noreg)
720 {
721 assert(dst.file == file_x87);
722
723 if (arg.file == file_x87) {
724 if (dst.idx == 0)
725 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
726 else if (arg.idx == 0)
727 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
728 else
729 assert(0);
730 }
731 else if (dst.idx == 0) {
732 assert(arg.file = file_REG32);
733 emit_1ub(p, 0xd8);
734 emit_modrm_noreg(p, argmem_noreg, arg);
735 }
736 else
737 assert(0);
738 }
739
740 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
741 {
742 x87_arith_op(p, dst, arg,
743 0xd8, 0xc8,
744 0xdc, 0xc8,
745 4);
746 }
747
748 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
749 {
750 x87_arith_op(p, dst, arg,
751 0xd8, 0xe0,
752 0xdc, 0xe8,
753 4);
754 }
755
756 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
757 {
758 x87_arith_op(p, dst, arg,
759 0xd8, 0xe8,
760 0xdc, 0xe0,
761 5);
762 }
763
764 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
765 {
766 x87_arith_op(p, dst, arg,
767 0xd8, 0xc0,
768 0xdc, 0xc0,
769 0);
770 }
771
772 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
773 {
774 x87_arith_op(p, dst, arg,
775 0xd8, 0xf0,
776 0xdc, 0xf8,
777 6);
778 }
779
780 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
781 {
782 x87_arith_op(p, dst, arg,
783 0xd8, 0xf8,
784 0xdc, 0xf0,
785 7);
786 }
787
788 void x87_fmulp( struct x86_function *p, struct x86_reg dst )
789 {
790 assert(dst.file == file_x87);
791 assert(dst.idx >= 1);
792 emit_2ub(p, 0xde, 0xc8+dst.idx);
793 }
794
795 void x87_fsubp( struct x86_function *p, struct x86_reg dst )
796 {
797 assert(dst.file == file_x87);
798 assert(dst.idx >= 1);
799 emit_2ub(p, 0xde, 0xe8+dst.idx);
800 }
801
802 void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
803 {
804 assert(dst.file == file_x87);
805 assert(dst.idx >= 1);
806 emit_2ub(p, 0xde, 0xe0+dst.idx);
807 }
808
809 void x87_faddp( struct x86_function *p, struct x86_reg dst )
810 {
811 assert(dst.file == file_x87);
812 assert(dst.idx >= 1);
813 emit_2ub(p, 0xde, 0xc0+dst.idx);
814 }
815
816 void x87_fdivp( struct x86_function *p, struct x86_reg dst )
817 {
818 assert(dst.file == file_x87);
819 assert(dst.idx >= 1);
820 emit_2ub(p, 0xde, 0xf8+dst.idx);
821 }
822
823 void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
824 {
825 assert(dst.file == file_x87);
826 assert(dst.idx >= 1);
827 emit_2ub(p, 0xde, 0xf0+dst.idx);
828 }
829
830 void x87_fucom( struct x86_function *p, struct x86_reg arg )
831 {
832 assert(arg.file == file_x87);
833 emit_2ub(p, 0xdd, 0xe0+arg.idx);
834 }
835
836 void x87_fucomp( struct x86_function *p, struct x86_reg arg )
837 {
838 assert(arg.file == file_x87);
839 emit_2ub(p, 0xdd, 0xe8+arg.idx);
840 }
841
842 void x87_fucompp( struct x86_function *p )
843 {
844 emit_2ub(p, 0xda, 0xe9);
845 }
846
847 void x87_fxch( struct x86_function *p, struct x86_reg arg )
848 {
849 assert(arg.file == file_x87);
850 emit_2ub(p, 0xd9, 0xc8+arg.idx);
851 }
852
853 void x87_fabs( struct x86_function *p )
854 {
855 emit_2ub(p, 0xd9, 0xe1);
856 }
857
858 void x87_fchs( struct x86_function *p )
859 {
860 emit_2ub(p, 0xd9, 0xe0);
861 }
862
863 void x87_fcos( struct x86_function *p )
864 {
865 emit_2ub(p, 0xd9, 0xff);
866 }
867
868
869 void x87_fprndint( struct x86_function *p )
870 {
871 emit_2ub(p, 0xd9, 0xfc);
872 }
873
874 void x87_fscale( struct x86_function *p )
875 {
876 emit_2ub(p, 0xd9, 0xfd);
877 }
878
879 void x87_fsin( struct x86_function *p )
880 {
881 emit_2ub(p, 0xd9, 0xfe);
882 }
883
884 void x87_fsincos( struct x86_function *p )
885 {
886 emit_2ub(p, 0xd9, 0xfb);
887 }
888
889 void x87_fsqrt( struct x86_function *p )
890 {
891 emit_2ub(p, 0xd9, 0xfa);
892 }
893
894 void x87_fxtract( struct x86_function *p )
895 {
896 emit_2ub(p, 0xd9, 0xf4);
897 }
898
899 /* st0 = (2^st0)-1
900 *
901 * Restrictions: -1.0 <= st0 <= 1.0
902 */
903 void x87_f2xm1( struct x86_function *p )
904 {
905 emit_2ub(p, 0xd9, 0xf0);
906 }
907
908 /* st1 = st1 * log2(st0);
909 * pop_stack;
910 */
911 void x87_fyl2x( struct x86_function *p )
912 {
913 emit_2ub(p, 0xd9, 0xf1);
914 }
915
916 /* st1 = st1 * log2(st0 + 1.0);
917 * pop_stack;
918 *
919 * A fast operation, with restrictions: -.29 < st0 < .29
920 */
921 void x87_fyl2xp1( struct x86_function *p )
922 {
923 emit_2ub(p, 0xd9, 0xf9);
924 }
925
926
927 void x87_fld( struct x86_function *p, struct x86_reg arg )
928 {
929 if (arg.file == file_x87)
930 emit_2ub(p, 0xd9, 0xc0 + arg.idx);
931 else {
932 emit_1ub(p, 0xd9);
933 emit_modrm_noreg(p, 0, arg);
934 }
935 }
936
937 void x87_fst( struct x86_function *p, struct x86_reg dst )
938 {
939 if (dst.file == file_x87)
940 emit_2ub(p, 0xdd, 0xd0 + dst.idx);
941 else {
942 emit_1ub(p, 0xd9);
943 emit_modrm_noreg(p, 2, dst);
944 }
945 }
946
947 void x87_fstp( struct x86_function *p, struct x86_reg dst )
948 {
949 if (dst.file == file_x87)
950 emit_2ub(p, 0xdd, 0xd8 + dst.idx);
951 else {
952 emit_1ub(p, 0xd9);
953 emit_modrm_noreg(p, 3, dst);
954 }
955 }
956
957 void x87_fcom( struct x86_function *p, struct x86_reg dst )
958 {
959 if (dst.file == file_x87)
960 emit_2ub(p, 0xd8, 0xd0 + dst.idx);
961 else {
962 emit_1ub(p, 0xd8);
963 emit_modrm_noreg(p, 2, dst);
964 }
965 }
966
967 void x87_fcomp( struct x86_function *p, struct x86_reg dst )
968 {
969 if (dst.file == file_x87)
970 emit_2ub(p, 0xd8, 0xd8 + dst.idx);
971 else {
972 emit_1ub(p, 0xd8);
973 emit_modrm_noreg(p, 3, dst);
974 }
975 }
976
977
978 void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
979 {
980 assert(dst.file == file_REG32);
981
982 if (dst.idx == reg_AX &&
983 dst.mod == mod_REG)
984 emit_2ub(p, 0xdf, 0xe0);
985 else {
986 emit_1ub(p, 0xdd);
987 emit_modrm_noreg(p, 7, dst);
988 }
989 }
990
991
992
993
994 /***********************************************************************
995 * MMX instructions
996 */
997
998 void mmx_emms( struct x86_function *p )
999 {
1000 assert(p->need_emms);
1001 emit_2ub(p, 0x0f, 0x77);
1002 p->need_emms = 0;
1003 }
1004
1005 void mmx_packssdw( struct x86_function *p,
1006 struct x86_reg dst,
1007 struct x86_reg src )
1008 {
1009 assert(dst.file == file_MMX &&
1010 (src.file == file_MMX || src.mod != mod_REG));
1011
1012 p->need_emms = 1;
1013
1014 emit_2ub(p, X86_TWOB, 0x6b);
1015 emit_modrm( p, dst, src );
1016 }
1017
1018 void mmx_packuswb( struct x86_function *p,
1019 struct x86_reg dst,
1020 struct x86_reg src )
1021 {
1022 assert(dst.file == file_MMX &&
1023 (src.file == file_MMX || src.mod != mod_REG));
1024
1025 p->need_emms = 1;
1026
1027 emit_2ub(p, X86_TWOB, 0x67);
1028 emit_modrm( p, dst, src );
1029 }
1030
1031 void mmx_movd( struct x86_function *p,
1032 struct x86_reg dst,
1033 struct x86_reg src )
1034 {
1035 p->need_emms = 1;
1036 emit_1ub(p, X86_TWOB);
1037 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1038 }
1039
1040 void mmx_movq( struct x86_function *p,
1041 struct x86_reg dst,
1042 struct x86_reg src )
1043 {
1044 p->need_emms = 1;
1045 emit_1ub(p, X86_TWOB);
1046 emit_op_modrm( p, 0x6f, 0x7f, dst, src );
1047 }
1048
1049
1050 /***********************************************************************
1051 * Helper functions
1052 */
1053
1054
1055 /* Retreive a reference to one of the function arguments, taking into
1056 * account any push/pop activity:
1057 */
1058 struct x86_reg x86_fn_arg( struct x86_function *p,
1059 GLuint arg )
1060 {
1061 return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
1062 p->stack_offset + arg * 4); /* ??? */
1063 }
1064
1065
1066 void x86_init_func( struct x86_function *p )
1067 {
1068 x86_init_func_size(p, 1024);
1069 }
1070
1071 void x86_init_func_size( struct x86_function *p, GLuint code_size )
1072 {
1073 p->store = _mesa_exec_malloc(code_size);
1074 p->csr = p->store;
1075 }
1076
1077 void x86_release_func( struct x86_function *p )
1078 {
1079 _mesa_exec_free(p->store);
1080 }
1081
1082
1083 void (*x86_get_func( struct x86_function *p ))(void)
1084 {
1085 if (DISASSEM)
1086 _mesa_printf("disassemble %p %p\n", p->store, p->csr);
1087 return (void (*)(void))p->store;
1088 }
1089
1090 #else
1091
1092 void x86sse_dummy( void )
1093 {
1094 }
1095
1096 #endif