if (swz != SSE_SWIZZLE_NOOP || negs != 0 || abs != 0) {
struct x86_reg dst = aos_get_xmm_reg(cp);
- if (swz != SSE_SWIZZLE_NOOP) {
+ if (swz != SSE_SWIZZLE_NOOP)
emit_pshufd(cp, dst, arg0, swz);
- arg0 = dst;
- }
+ else
+ sse_movaps(cp->func, dst, arg0);
+ arg0 = dst;
if (negs && negs != 0xf) {
struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
(negs & 2) ? 1 : 0,
(negs & 4) ? 1 : 0,
(negs & 8) ? 1 : 0));
- sse_mulps(cp->func, dst, arg0);
+ sse_mulps(cp->func, dst, tmp);
aos_release_xmm_reg(cp, tmp.idx);
- arg0 = dst;
}
else if (negs) {
struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS);
sse_mulps(cp->func, dst, imm_negs);
- arg0 = dst;
}
sse_movaps(cp->func, tmp, arg0);
sse_mulps(cp->func, tmp, neg);
- sse_maxps(cp->func, dst, arg0);
+ sse_maxps(cp->func, dst, tmp);
aos_release_xmm_reg(cp, tmp.idx);
- arg0 = dst;
}
}
return TRUE;
}
-#if 1
-
-/* The x87 version.
- */
-static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
-{
- struct x86_reg st1 = x86_make_reg( file_x87, 1 );
-
- x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
- x87_fld_src( cp, &op->FullSrcRegisters[1], 0 );
- x87_fmulp( cp->func, st1 );
- x87_fld_src( cp, &op->FullSrcRegisters[0], 1 );
- x87_fld_src( cp, &op->FullSrcRegisters[1], 1 );
- x87_fmulp( cp->func, st1 );
- x87_faddp( cp->func, st1 );
- x87_fld_src( cp, &op->FullSrcRegisters[0], 2 );
- x87_fld_src( cp, &op->FullSrcRegisters[1], 2 );
- x87_fmulp( cp->func, st1 );
- x87_faddp( cp->func, st1 );
-
- x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
-
- return TRUE;
-}
-
-#else
-
/* The dotproduct instructions don't really do that well in sse:
* XXX: produces wrong results -- disabled.
*/
return TRUE;
}
-#endif
-
static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);