-static struct x86_reg get_sse_temp( struct x86_program *p )
-{
- return make_reg(file_XMM, 7); /* hardwired */
-}
-
-static void release_temp( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.file == file_XMM &&
- reg.idx == 7);
-}
-
-/* Emit bytes to the instruction stream:
- */
-static void emit_1b( struct x86_program *p, GLbyte b0 )
-{
- *(GLbyte *)(p->csr++) = b0;
-}
-
-static void emit_1i( struct x86_program *p, GLint i0 )
-{
- *(GLint *)(p->csr) = i0;
- p->csr += 4;
-}
-
-static void disassem( struct x86_program *p, const char *fn )
-{
-#if DISASSEM
- static const char *last_fn;
- if (fn && fn != last_fn) {
- _mesa_printf("0x%x: %s\n", p->csr, fn);
- last_fn = fn;
- }
-#endif
-}
-
-static void emit_1ub_fn( struct x86_program *p, GLubyte b0, const char *fn )
-{
- disassem(p, fn);
- *(p->csr++) = b0;
-}
-
-static void emit_2ub_fn( struct x86_program *p, GLubyte b0, GLubyte b1, const char *fn )
-{
- disassem(p, fn);
- *(p->csr++) = b0;
- *(p->csr++) = b1;
-}
-
-static void emit_3ub_fn( struct x86_program *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
-{
- disassem(p, fn);
- *(p->csr++) = b0;
- *(p->csr++) = b1;
- *(p->csr++) = b2;
-}
-
-#define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__)
-#define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__)
-#define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
-
-
-/* Labels, jumps and fixup:
- */
-static GLubyte *get_label( struct x86_program *p )
-{
- return p->csr;
-}
-
-static void emit_jcc( struct x86_program *p,
- GLuint cc,
- GLubyte *label )
-{
- GLint offset = label - (get_label(p) + 2);
-
- if (offset <= 127 && offset >= -128) {
- emit_1ub(p, 0x70 + cc);
- emit_1b(p, (GLbyte) offset);
- }
- else {
- offset = label - (get_label(p) + 6);
- emit_2ub(p, 0x0f, 0x80 + cc);
- emit_1i(p, offset);
- }
-}
-
-/* Always use a 32bit offset for forward jumps:
- */
-static GLubyte *emit_jcc_forward( struct x86_program *p,
- GLuint cc )
-{
- emit_2ub(p, 0x0f, 0x80 + cc);
- emit_1i(p, 0);
- return get_label(p);
-}
-
-/* Fixup offset from forward jump:
- */
-static void do_fixup( struct x86_program *p,
- GLubyte *fixup )
-{
- *(int *)(fixup - 4) = get_label(p) - fixup;
-}
-
-static void emit_push( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x50 + reg.idx);
- p->stack_offset += 4;
-}
-
-static void emit_pop( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x58 + reg.idx);
- p->stack_offset -= 4;
-}
-
-static void emit_inc( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x40 + reg.idx);
-}
-
-static void emit_dec( struct x86_program *p,
- struct x86_reg reg )
-{
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x48 + reg.idx);
-}
-
-static void emit_ret( struct x86_program *p )
-{
- emit_1ub(p, 0xc3);
-}
-
-
-
-
-/* Build a modRM byte + possible displacement. No treatment of SIB
- * indexing. BZZT - no way to encode an absolute address.
- */
-static void emit_modrm( struct x86_program *p,
- struct x86_reg reg,
- struct x86_reg regmem )
-{
- GLubyte val = 0;
-
- assert(reg.mod == mod_REG);
-
- val |= regmem.mod << 6; /* mod field */
- val |= reg.idx << 3; /* reg field */
- val |= regmem.idx; /* r/m field */
-
- emit_1ub_fn(p, val, 0);
-
- /* Oh-oh we've stumbled into the SIB thing.
- */
- if (regmem.idx == reg_SP) {
- emit_1ub_fn(p, 0x24, 0); /* simplistic! */
- }
-
- switch (regmem.mod) {
- case mod_REG:
- case mod_INDIRECT:
- break;
- case mod_DISP8:
- emit_1b(p, regmem.disp);
- break;
- case mod_DISP32:
- emit_1i(p, regmem.disp);
- break;
- }
-}
-
-/* Many x86 instructions have two opcodes to cope with the situations
- * where the destination is a register or memory reference
- * respectively. This function selects the correct opcode based on
- * the arguments presented.
- */
-static void emit_op_modrm( struct x86_program *p,
- GLubyte op_dst_is_reg,
- GLubyte op_dst_is_mem,
- struct x86_reg dst,
- struct x86_reg src )
-{
- switch (dst.mod) {
- case mod_REG:
- emit_1ub_fn(p, op_dst_is_reg, 0);
- emit_modrm(p, dst, src);
- break;
- case mod_INDIRECT:
- case mod_DISP32:
- case mod_DISP8:
- assert(src.mod == mod_REG);
- emit_1ub_fn(p, op_dst_is_mem, 0);
- emit_modrm(p, src, dst);
- break;
- }
-}
-
-static void emit_mov( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_op_modrm( p, 0x8b, 0x89, dst, src );
-}
-
-static void emit_xor( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_op_modrm( p, 0x33, 0x31, dst, src );
-}
-
-static void emit_cmp( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_op_modrm( p, 0x3b, 0x39, dst, src );
-}
-
-static void emit_movlps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x12, 0x13, dst, src );
-}
-
-static void emit_movhps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x16, 0x17, dst, src );
-}
-
-static void emit_movd( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_2ub(p, 0x66, X86_TWOB);
- emit_op_modrm( p, 0x6e, 0x7e, dst, src );
-}
-
-static void emit_movss( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_2ub(p, 0xF3, X86_TWOB);
- emit_op_modrm( p, 0x10, 0x11, dst, src );
-}
-
-static void emit_movaps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x28, 0x29, dst, src );
-}
-
-static void emit_movups( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, X86_TWOB);
- emit_op_modrm( p, 0x10, 0x11, dst, src );
-}
-
-/* SSE operations often only have one format, with dest constrained to
- * be a register:
- */
-static void emit_mulps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_2ub(p, X86_TWOB, 0x59);
- emit_modrm( p, dst, src );
-}
-
-static void emit_addps( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_2ub(p, X86_TWOB, 0x58);
- emit_modrm( p, dst, src );
-}
-
-static void emit_cvtps2dq( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_3ub(p, 0x66, X86_TWOB, 0x5B);
- emit_modrm( p, dst, src );
-}
-
-static void emit_packssdw( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_3ub(p, 0x66, X86_TWOB, 0x6B);
- emit_modrm( p, dst, src );
-}
-
-static void emit_packsswb( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_3ub(p, 0x66, X86_TWOB, 0x63);
- emit_modrm( p, dst, src );
-}
-
-static void emit_packuswb( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_3ub(p, 0x66, X86_TWOB, 0x67);
- emit_modrm( p, dst, src );
-}
-
-/* Load effective address:
- */
-static void emit_lea( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, 0x8d);
- emit_modrm( p, dst, src );
-}
-
-static void emit_add_imm( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src,
- GLint value )
-{
- emit_lea(p, dst, make_disp(src, value));
-}
-
-static void emit_test( struct x86_program *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- emit_1ub(p, 0x85);
- emit_modrm( p, dst, src );
-}
-
-
-
-
-/**
- * Perform a reduced swizzle:
- */
-static void emit_pshufd( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0,
- GLubyte x,
- GLubyte y,
- GLubyte z,
- GLubyte w)
-{
- emit_3ub(p, 0x66, X86_TWOB, 0x70);
- emit_modrm(p, dest, arg0);
- emit_1ub(p, (x|(y<<2)|(z<<4)|w<<6));
-}
-
-
-static void emit_pk4ub( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_cvtps2dq(p, dest, arg0);
- emit_packssdw(p, dest, dest);
- emit_packuswb(p, dest, dest);
-}
-