Turn off DISASSEM
[mesa.git] / src / mesa / x86 / rtasm / x86sse.c
1 #if defined(USE_X86_ASM)
2
3 #include "x86sse.h"
4
5 #define DISASSEM 0
6 #define X86_TWOB 0x0f
7
8 /* Emit bytes to the instruction stream:
9 */
10 static void emit_1b( struct x86_function *p, GLbyte b0 )
11 {
12 *(GLbyte *)(p->csr++) = b0;
13 }
14
15 static void emit_1i( struct x86_function *p, GLint i0 )
16 {
17 *(GLint *)(p->csr) = i0;
18 p->csr += 4;
19 }
20
21 static void disassem( struct x86_function *p, const char *fn )
22 {
23 #if DISASSEM
24 if (fn && fn != p->fn) {
25 _mesa_printf("0x%x: %s\n", p->csr, fn);
26 p->fn = fn;
27 }
28 #endif
29 }
30
31 static void emit_1ub_fn( struct x86_function *p, GLubyte b0, const char *fn )
32 {
33 disassem(p, fn);
34 *(p->csr++) = b0;
35 }
36
37 static void emit_2ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, const char *fn )
38 {
39 disassem(p, fn);
40 *(p->csr++) = b0;
41 *(p->csr++) = b1;
42 }
43
44 static void emit_3ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
45 {
46 disassem(p, fn);
47 *(p->csr++) = b0;
48 *(p->csr++) = b1;
49 *(p->csr++) = b2;
50 }
51
52 #define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__)
53 #define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__)
54 #define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
55
56
57
58 /* Build a modRM byte + possible displacement. No treatment of SIB
59 * indexing. BZZT - no way to encode an absolute address.
60 */
61 static void emit_modrm( struct x86_function *p,
62 struct x86_reg reg,
63 struct x86_reg regmem )
64 {
65 GLubyte val = 0;
66
67 assert(reg.mod == mod_REG);
68
69 val |= regmem.mod << 6; /* mod field */
70 val |= reg.idx << 3; /* reg field */
71 val |= regmem.idx; /* r/m field */
72
73 emit_1ub_fn(p, val, 0);
74
75 /* Oh-oh we've stumbled into the SIB thing.
76 */
77 if (regmem.file == file_REG32 &&
78 regmem.idx == reg_SP) {
79 emit_1ub_fn(p, 0x24, 0); /* simplistic! */
80 }
81
82 switch (regmem.mod) {
83 case mod_REG:
84 case mod_INDIRECT:
85 break;
86 case mod_DISP8:
87 emit_1b(p, regmem.disp);
88 break;
89 case mod_DISP32:
90 emit_1i(p, regmem.disp);
91 break;
92 default:
93 assert(0);
94 break;
95 }
96 }
97
98 /* Many x86 instructions have two opcodes to cope with the situations
99 * where the destination is a register or memory reference
100 * respectively. This function selects the correct opcode based on
101 * the arguments presented.
102 */
103 static void emit_op_modrm( struct x86_function *p,
104 GLubyte op_dst_is_reg,
105 GLubyte op_dst_is_mem,
106 struct x86_reg dst,
107 struct x86_reg src )
108 {
109 switch (dst.mod) {
110 case mod_REG:
111 emit_1ub_fn(p, op_dst_is_reg, 0);
112 emit_modrm(p, dst, src);
113 break;
114 case mod_INDIRECT:
115 case mod_DISP32:
116 case mod_DISP8:
117 assert(src.mod == mod_REG);
118 emit_1ub_fn(p, op_dst_is_mem, 0);
119 emit_modrm(p, src, dst);
120 break;
121 default:
122 assert(0);
123 break;
124 }
125 }
126
127
128
129
130
131
132
133 /* Create and manipulate registers and regmem values:
134 */
135 struct x86_reg x86_make_reg( GLuint file,
136 GLuint idx )
137 {
138 struct x86_reg reg;
139
140 reg.file = file;
141 reg.idx = idx;
142 reg.mod = mod_REG;
143 reg.disp = 0;
144
145 return reg;
146 }
147
148 struct x86_reg x86_make_disp( struct x86_reg reg,
149 GLint disp )
150 {
151 assert(reg.file == file_REG32);
152
153 if (reg.mod == mod_REG)
154 reg.disp = disp;
155 else
156 reg.disp += disp;
157
158 if (reg.disp == 0)
159 reg.mod = mod_INDIRECT;
160 else if (reg.disp <= 127 && reg.disp >= -128)
161 reg.mod = mod_DISP8;
162 else
163 reg.mod = mod_DISP32;
164
165 return reg;
166 }
167
168 struct x86_reg x86_deref( struct x86_reg reg )
169 {
170 return x86_make_disp(reg, 0);
171 }
172
173 struct x86_reg x86_get_base_reg( struct x86_reg reg )
174 {
175 return x86_make_reg( reg.file, reg.idx );
176 }
177
178
179
180 /* Labels, jumps and fixup:
181 */
182 GLubyte *x86_get_label( struct x86_function *p )
183 {
184 return p->csr;
185 }
186
187 void x86_jcc( struct x86_function *p,
188 GLuint cc,
189 GLubyte *label )
190 {
191 GLint offset = label - (x86_get_label(p) + 2);
192
193 if (offset <= 127 && offset >= -128) {
194 emit_1ub(p, 0x70 + cc);
195 emit_1b(p, (GLbyte) offset);
196 }
197 else {
198 offset = label - (x86_get_label(p) + 6);
199 emit_2ub(p, 0x0f, 0x80 + cc);
200 emit_1i(p, offset);
201 }
202 }
203
204 /* Always use a 32bit offset for forward jumps:
205 */
206 GLubyte *x86_jcc_forward( struct x86_function *p,
207 GLuint cc )
208 {
209 emit_2ub(p, 0x0f, 0x80 + cc);
210 emit_1i(p, 0);
211 return x86_get_label(p);
212 }
213
214 /* Fixup offset from forward jump:
215 */
216 void x86_fixup_fwd_jump( struct x86_function *p,
217 GLubyte *fixup )
218 {
219 *(int *)(fixup - 4) = x86_get_label(p) - fixup;
220 }
221
222 void x86_push( struct x86_function *p,
223 struct x86_reg reg )
224 {
225 assert(reg.mod == mod_REG);
226 emit_1ub(p, 0x50 + reg.idx);
227 p->stack_offset += 4;
228 }
229
230 void x86_pop( struct x86_function *p,
231 struct x86_reg reg )
232 {
233 assert(reg.mod == mod_REG);
234 emit_1ub(p, 0x58 + reg.idx);
235 p->stack_offset -= 4;
236 }
237
238 void x86_inc( struct x86_function *p,
239 struct x86_reg reg )
240 {
241 assert(reg.mod == mod_REG);
242 emit_1ub(p, 0x40 + reg.idx);
243 }
244
245 void x86_dec( struct x86_function *p,
246 struct x86_reg reg )
247 {
248 assert(reg.mod == mod_REG);
249 emit_1ub(p, 0x48 + reg.idx);
250 }
251
252 void x86_ret( struct x86_function *p )
253 {
254 emit_1ub(p, 0xc3);
255 }
256
257 void mmx_emms( struct x86_function *p )
258 {
259 assert(p->need_emms);
260 emit_2ub(p, 0x0f, 0x77);
261 p->need_emms = 0;
262 }
263
264
265
266
267 void x86_mov( struct x86_function *p,
268 struct x86_reg dst,
269 struct x86_reg src )
270 {
271 emit_op_modrm( p, 0x8b, 0x89, dst, src );
272 }
273
274 void x86_xor( struct x86_function *p,
275 struct x86_reg dst,
276 struct x86_reg src )
277 {
278 emit_op_modrm( p, 0x33, 0x31, dst, src );
279 }
280
281 void x86_cmp( struct x86_function *p,
282 struct x86_reg dst,
283 struct x86_reg src )
284 {
285 emit_op_modrm( p, 0x3b, 0x39, dst, src );
286 }
287
288 void sse2_movd( struct x86_function *p,
289 struct x86_reg dst,
290 struct x86_reg src )
291 {
292 emit_2ub(p, 0x66, X86_TWOB);
293 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
294 }
295
296 void mmx_movd( struct x86_function *p,
297 struct x86_reg dst,
298 struct x86_reg src )
299 {
300 p->need_emms = 1;
301 emit_1ub(p, X86_TWOB);
302 emit_op_modrm( p, 0x6e, 0x7e, dst, src );
303 }
304
305 void mmx_movq( struct x86_function *p,
306 struct x86_reg dst,
307 struct x86_reg src )
308 {
309 p->need_emms = 1;
310 emit_1ub(p, X86_TWOB);
311 emit_op_modrm( p, 0x6f, 0x7f, dst, src );
312 }
313
314
315 void sse_movss( struct x86_function *p,
316 struct x86_reg dst,
317 struct x86_reg src )
318 {
319 emit_2ub(p, 0xF3, X86_TWOB);
320 emit_op_modrm( p, 0x10, 0x11, dst, src );
321 }
322
323 void sse_movaps( struct x86_function *p,
324 struct x86_reg dst,
325 struct x86_reg src )
326 {
327 emit_1ub(p, X86_TWOB);
328 emit_op_modrm( p, 0x28, 0x29, dst, src );
329 }
330
331 void sse_movups( struct x86_function *p,
332 struct x86_reg dst,
333 struct x86_reg src )
334 {
335 emit_1ub(p, X86_TWOB);
336 emit_op_modrm( p, 0x10, 0x11, dst, src );
337 }
338
339 void sse_movhps( struct x86_function *p,
340 struct x86_reg dst,
341 struct x86_reg src )
342 {
343 assert(dst.mod != mod_REG || src.mod != mod_REG);
344 emit_1ub(p, X86_TWOB);
345 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
346 }
347
348 void sse_movlps( struct x86_function *p,
349 struct x86_reg dst,
350 struct x86_reg src )
351 {
352 assert(dst.mod != mod_REG || src.mod != mod_REG);
353 emit_1ub(p, X86_TWOB);
354 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
355 }
356
357 /* SSE operations often only have one format, with dest constrained to
358 * be a register:
359 */
360 void sse_maxps( struct x86_function *p,
361 struct x86_reg dst,
362 struct x86_reg src )
363 {
364 emit_2ub(p, X86_TWOB, 0x5F);
365 emit_modrm( p, dst, src );
366 }
367
368 void sse_divss( struct x86_function *p,
369 struct x86_reg dst,
370 struct x86_reg src )
371 {
372 emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
373 emit_modrm( p, dst, src );
374 }
375
376 void sse_minps( struct x86_function *p,
377 struct x86_reg dst,
378 struct x86_reg src )
379 {
380 emit_2ub(p, X86_TWOB, 0x5D);
381 emit_modrm( p, dst, src );
382 }
383
384 void sse_subps( struct x86_function *p,
385 struct x86_reg dst,
386 struct x86_reg src )
387 {
388 emit_2ub(p, X86_TWOB, 0x5C);
389 emit_modrm( p, dst, src );
390 }
391
392 void sse_mulps( struct x86_function *p,
393 struct x86_reg dst,
394 struct x86_reg src )
395 {
396 emit_2ub(p, X86_TWOB, 0x59);
397 emit_modrm( p, dst, src );
398 }
399
400 void sse_addps( struct x86_function *p,
401 struct x86_reg dst,
402 struct x86_reg src )
403 {
404 emit_2ub(p, X86_TWOB, 0x58);
405 emit_modrm( p, dst, src );
406 }
407
408 void sse_addss( struct x86_function *p,
409 struct x86_reg dst,
410 struct x86_reg src )
411 {
412 emit_3ub(p, 0xF3, X86_TWOB, 0x58);
413 emit_modrm( p, dst, src );
414 }
415
416 void sse_andps( struct x86_function *p,
417 struct x86_reg dst,
418 struct x86_reg src )
419 {
420 emit_2ub(p, X86_TWOB, 0x54);
421 emit_modrm( p, dst, src );
422 }
423
424 void sse2_rcpss( struct x86_function *p,
425 struct x86_reg dst,
426 struct x86_reg src )
427 {
428 emit_3ub(p, 0xF3, X86_TWOB, 0x53);
429 emit_modrm( p, dst, src );
430 }
431
432 void sse_rsqrtss( struct x86_function *p,
433 struct x86_reg dst,
434 struct x86_reg src )
435 {
436 emit_3ub(p, 0xF3, X86_TWOB, 0x52);
437 emit_modrm( p, dst, src );
438
439 }
440
441 void sse_movhlps( struct x86_function *p,
442 struct x86_reg dst,
443 struct x86_reg src )
444 {
445 assert(dst.mod == mod_REG && src.mod == mod_REG);
446 emit_2ub(p, X86_TWOB, 0x12);
447 emit_modrm( p, dst, src );
448 }
449
450 void sse_movlhps( struct x86_function *p,
451 struct x86_reg dst,
452 struct x86_reg src )
453 {
454 assert(dst.mod == mod_REG && src.mod == mod_REG);
455 emit_2ub(p, X86_TWOB, 0x16);
456 emit_modrm( p, dst, src );
457 }
458
459 void sse2_cvtps2dq( struct x86_function *p,
460 struct x86_reg dst,
461 struct x86_reg src )
462 {
463 emit_3ub(p, 0x66, X86_TWOB, 0x5B);
464 emit_modrm( p, dst, src );
465 }
466
467 void sse2_packssdw( struct x86_function *p,
468 struct x86_reg dst,
469 struct x86_reg src )
470 {
471 emit_3ub(p, 0x66, X86_TWOB, 0x6B);
472 emit_modrm( p, dst, src );
473 }
474
475 void sse2_packsswb( struct x86_function *p,
476 struct x86_reg dst,
477 struct x86_reg src )
478 {
479 emit_3ub(p, 0x66, X86_TWOB, 0x63);
480 emit_modrm( p, dst, src );
481 }
482
483 void sse2_packuswb( struct x86_function *p,
484 struct x86_reg dst,
485 struct x86_reg src )
486 {
487 emit_3ub(p, 0x66, X86_TWOB, 0x67);
488 emit_modrm( p, dst, src );
489 }
490
491 void sse_cvtps2pi( struct x86_function *p,
492 struct x86_reg dst,
493 struct x86_reg src )
494 {
495 assert(dst.file == file_MMX &&
496 (src.file == file_XMM || src.mod != mod_REG));
497
498 p->need_emms = 1;
499
500 emit_2ub(p, X86_TWOB, 0x2d);
501 emit_modrm( p, dst, src );
502 }
503
504 void mmx_packssdw( struct x86_function *p,
505 struct x86_reg dst,
506 struct x86_reg src )
507 {
508 assert(dst.file == file_MMX &&
509 (src.file == file_MMX || src.mod != mod_REG));
510
511 p->need_emms = 1;
512
513 emit_2ub(p, X86_TWOB, 0x6b);
514 emit_modrm( p, dst, src );
515 }
516
517 void mmx_packuswb( struct x86_function *p,
518 struct x86_reg dst,
519 struct x86_reg src )
520 {
521 assert(dst.file == file_MMX &&
522 (src.file == file_MMX || src.mod != mod_REG));
523
524 p->need_emms = 1;
525
526 emit_2ub(p, X86_TWOB, 0x67);
527 emit_modrm( p, dst, src );
528 }
529
530
531 /* Load effective address:
532 */
533 void x86_lea( struct x86_function *p,
534 struct x86_reg dst,
535 struct x86_reg src )
536 {
537 emit_1ub(p, 0x8d);
538 emit_modrm( p, dst, src );
539 }
540
541 void x86_test( struct x86_function *p,
542 struct x86_reg dst,
543 struct x86_reg src )
544 {
545 emit_1ub(p, 0x85);
546 emit_modrm( p, dst, src );
547 }
548
549
550 /**
551 * Perform a reduced swizzle:
552 */
553 void sse2_pshufd( struct x86_function *p,
554 struct x86_reg dest,
555 struct x86_reg arg0,
556 GLubyte shuf)
557 {
558 emit_3ub(p, 0x66, X86_TWOB, 0x70);
559 emit_modrm(p, dest, arg0);
560 emit_1ub(p, shuf);
561 }
562
563
564 /* Shufps can also be used to implement a reduced swizzle when dest ==
565 * arg0.
566 */
567 void sse_shufps( struct x86_function *p,
568 struct x86_reg dest,
569 struct x86_reg arg0,
570 GLubyte shuf)
571 {
572 emit_2ub(p, X86_TWOB, 0xC6);
573 emit_modrm(p, dest, arg0);
574 emit_1ub(p, shuf);
575 }
576
577 void sse_cmpps( struct x86_function *p,
578 struct x86_reg dest,
579 struct x86_reg arg0,
580 GLubyte cc)
581 {
582 emit_2ub(p, X86_TWOB, 0xC2);
583 emit_modrm(p, dest, arg0);
584 emit_1ub(p, cc);
585 }
586
587
588 /* Retreive a reference to one of the function arguments, taking into
589 * account any push/pop activity:
590 */
591 struct x86_reg x86_fn_arg( struct x86_function *p,
592 GLuint arg )
593 {
594 return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
595 p->stack_offset + arg * 4); /* ??? */
596 }
597
598
599 void x86_init_func( struct x86_function *p )
600 {
601 p->store = malloc(1024);
602 p->csr = p->store;
603 }
604
605 void x86_release_func( struct x86_function *p )
606 {
607 free(p->store);
608 }
609
610
611 void (*x86_get_func( struct x86_function *p ))(void)
612 {
613 if (DISASSEM)
614 _mesa_printf("disassemble %p %p\n", p->store, p->csr);
615 return (void (*)())p->store;
616 }
617
618 #else
619
620 void x86sse_dummy( void )
621 {
622 }
623
624 #endif