-
-/* Emit bytes to the instruction stream:
- */
-static void emit_1b( struct x86_program *p, GLbyte b0 )
-{
- *(GLbyte *)(p->csr++) = b0;
-}
-
-static void emit_1i( struct x86_program *p, GLint i0 )
-{
- *(GLint *)(p->csr) = i0;
- p->csr += 4;
-}
-
-static void disassem( struct x86_program *p, const char *fn )
-{
-#if DISASSEM
- static const char *last_fn;
- if (fn && fn != last_fn) {
- _mesa_printf("0x%x: %s\n", p->csr, fn);
- last_fn = fn;
- }
-#endif
-}
-
-static void emit_1ub_fn( struct x86_program *p, GLubyte b0, const char *fn )
-{
- disassem(p, fn);
- *(p->csr++) = b0;
-}
-
-static void emit_2ub_fn( struct x86_program *p, GLubyte b0, GLubyte b1, const char *fn )
-{
- disassem(p, fn);
- *(p->csr++) = b0;
- *(p->csr++) = b1;
-}
-
-static void emit_3ub_fn( struct x86_program *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
-{
- disassem(p, fn);
- *(p->csr++) = b0;
- *(p->csr++) = b1;
- *(p->csr++) = b2;
-}
-
-#define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__)
-#define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__)
-#define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
-
-
-/* Labels, jumps and fixup:
- */
-static GLubyte *get_label( struct x86_program *p )
-{
- return p->csr;
-}
-
-static void x86_jcc( struct x86_program *p,
- GLuint cc,
- GLubyte *label )
-{
- GLint offset = label - (get_label(p) + 2);
-
- if (offset <= 127 && offset >= -128) {
- emit_1ub(p, 0x70 + cc);
- emit_1b(p, (GLbyte) offset);
- }
- else {
- offset = label - (get_label(p) + 6);
- emit_2ub(p, 0x0f, 0x80 + cc);
- emit_1i(p, offset);
- }
-}
-
-/* Always use a 32bit offset for forward jumps:
- */
-static GLubyte *x86_jcc_forward( struct x86_program *p,
- GLuint cc )
-{
- emit_2ub(p, 0x0f, 0x80 + cc);
- emit_1i(p, 0);
- return get_label(p);
-}
-
-/* Fixup offset from forward jump:
- */
-static void do_fixup( struct x86_program *p,
- GLubyte *fixup )
-{
- *(int *)(fixup - 4) = get_label(p) - fixup;
-}
-
-static void x86_push( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x50 + reg.idx);
- p->stack_offset += 4;
-}
-
-static void x86_pop( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x58 + reg.idx);
- p->stack_offset -= 4;
-}
-
-static void x86_inc( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x40 + reg.idx);
-}
-
-static void x86_dec( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x48 + reg.idx);
-}
-
-static void x86_ret( struct x86_program *p )
-{
- emit_1ub(p, 0xc3);
-}
-
-static void mmx_emms( struct x86_program *p )
-{
- assert(p->need_emms);
- emit_2ub(p, 0x0f, 0x77);
- p->need_emms = 0;
-}
-
-
-
-
-/* Build a modRM byte + possible displacement. No treatment of SIB
- * indexing. BZZT - no way to encode an absolute address.
- */
-static void emit_modrm( struct x86_program *p,
- struct x86_reg reg,
- struct x86_reg regmem )
-{
- GLubyte val = 0;
-
- assert(reg.mod == mod_REG);
-
- val |= regmem.mod << 6; /* mod field */
- val |= reg.idx << 3; /* reg field */
- val |= regmem.idx; /* r/m field */
-
- emit_1ub_fn(p, val, 0);
-
- /* Oh-oh we've stumbled into the SIB thing.
- */
- if (regmem.idx == reg_SP) {
- emit_1ub_fn(p, 0x24, 0); /* simplistic! */
- }
-
- switch (regmem.mod) {
- case mod_REG:
- case mod_INDIRECT:
- break;
- case mod_DISP8:
- emit_1b(p, regmem.disp);
- break;
- case mod_DISP32:
- emit_1i(p, regmem.disp);
- break;
- default:
- _mesa_printf("unknown regmem.mod %d\n", regmem.mod);
- abort();
- break;
- }
-}
-
-/* Many x86 instructions have two opcodes to cope with the situations
- * where the destination is a register or memory reference
- * respectively. This function selects the correct opcode based on
- * the arguments presented.
- */
-static void emit_op_modrm( struct x86_program *p,
- GLubyte op_dst_is_reg,
- GLubyte op_dst_is_mem,
- struct x86_reg dst,
- struct x86_reg src )
-{
- switch (dst.mod) {
- case mod_REG:
- emit_1ub_fn(p, op_dst_is_reg, 0);
- emit_modrm(p, dst, src);
- break;
- case mod_INDIRECT:
- case mod_DISP32:
- case mod_DISP8:
- assert(src.mod == mod_REG);
- emit_1ub_fn(p, op_dst_is_mem, 0);
- emit_modrm(p, src, dst);
- break;
- default:
- _mesa_printf("unknown dst.mod %d\n", dst.mod);
- abort();
- break;
- }
-}
-
-static void x86_mov( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_op_modrm( p, 0x8b, 0x89, dst, src );
-}
-
-static void x86_xor( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_op_modrm( p, 0x33, 0x31, dst, src );
-}
-
-static void x86_cmp( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_op_modrm( p, 0x3b, 0x39, dst, src );
-}
-
-static void sse2_movd( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(p->have_sse2);
- emit_2ub(p, 0x66, X86_TWOB);
- emit_op_modrm( p, 0x6e, 0x7e, dst, src );
-}
-
-static void mmx_movd( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- p->need_emms = 1;
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x6e, 0x7e, dst, src );
-}
-
-static void mmx_movq( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- p->need_emms = 1;
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x6f, 0x7f, dst, src );
-}
-
-
-static void sse_movss( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_2ub(p, 0xF3, X86_TWOB);
- emit_op_modrm( p, 0x10, 0x11, dst, src );
-}
-
-static void sse_movaps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x28, 0x29, dst, src );
-}
-
-static void sse_movups( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x10, 0x11, dst, src );
-}
-
-static void sse_movhps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(dst.mod != mod_REG || src.mod != mod_REG);
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
-}
-
-static void sse_movlps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(dst.mod != mod_REG || src.mod != mod_REG);
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
-}
-
-/* SSE operations often only have one format, with dest constrained to
- * be a register:
- */
-static void sse_mulps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_2ub(p, X86_TWOB, 0x59);
- emit_modrm( p, dst, src );
-}
-
-static void sse_addps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_2ub(p, X86_TWOB, 0x58);
- emit_modrm( p, dst, src );
-}
-
-static void sse_movhlps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(dst.mod == mod_REG && src.mod == mod_REG);
- emit_2ub(p, X86_TWOB, 0x12);
- emit_modrm( p, dst, src );
-}
-
-static void sse_movlhps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(dst.mod == mod_REG && src.mod == mod_REG);
- emit_2ub(p, X86_TWOB, 0x16);
- emit_modrm( p, dst, src );
-}
-
-static void sse2_cvtps2dq( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(p->have_sse2);
- emit_3ub(p, 0x66, X86_TWOB, 0x5B);
- emit_modrm( p, dst, src );
-}
-
-static void sse2_packssdw( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(p->have_sse2);
- emit_3ub(p, 0x66, X86_TWOB, 0x6B);
- emit_modrm( p, dst, src );
-}
-
-static void sse2_packsswb( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(p->have_sse2);
- emit_3ub(p, 0x66, X86_TWOB, 0x63);
- emit_modrm( p, dst, src );
-}
-
-static void sse2_packuswb( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(p->have_sse2);
- emit_3ub(p, 0x66, X86_TWOB, 0x67);
- emit_modrm( p, dst, src );
-}
-
-static void sse_cvtps2pi( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(dst.file == file_MMX &&
- (src.file == file_XMM || src.mod != mod_REG));
-
- p->need_emms = 1;
-
- emit_2ub(p, X86_TWOB, 0x2d);
- emit_modrm( p, dst, src );
-}
-
-static void mmx_packssdw( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(dst.file == file_MMX &&
- (src.file == file_MMX || src.mod != mod_REG));
-
- p->need_emms = 1;
-
- emit_2ub(p, X86_TWOB, 0x6b);
- emit_modrm( p, dst, src );
-}
-
-static void mmx_packuswb( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- assert(dst.file == file_MMX &&
- (src.file == file_MMX || src.mod != mod_REG));
-
- p->need_emms = 1;
-
- emit_2ub(p, X86_TWOB, 0x67);
- emit_modrm( p, dst, src );
-}
-
-
-/* Load effective address:
- */
-static void x86_lea( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, 0x8d);
- emit_modrm( p, dst, src );
-}
-
-static void x86_test( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, 0x85);
- emit_modrm( p, dst, src );
-}
-
-
-
-
-/**
- * Perform a reduced swizzle:
- */
-static void sse2_pshufd( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0,
- GLubyte x,
- GLubyte y,
- GLubyte z,
- GLubyte w)
-{
- assert(p->have_sse2);
- emit_3ub(p, 0x66, X86_TWOB, 0x70);
- emit_modrm(p, dest, arg0);
- emit_1ub(p, (x|(y<<2)|(z<<4)|w<<6));
-}
-
-
-/* Shufps can also be used to implement a reduced swizzle when dest ==
- * arg0.
- */
-static void sse_shufps( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0,
- GLubyte x,
- GLubyte y,
- GLubyte z,
- GLubyte w)
-{
- emit_2ub(p, X86_TWOB, 0xC6);
- emit_modrm(p, dest, arg0);
- emit_1ub(p, (x|(y<<2)|(z<<4)|w<<6));
-}
-
-