sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */
emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
sse_addss(cp->func, dst, tmp);
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
aos_release_xmm_reg(cp, tmp.idx);
store_dest(cp, &op->FullDstRegisters[0], dst);
sse_addps(cp->func, dst, tmp); /* a*x+c*z, b*y+d*w, a*x+c*z, b*y+d*w */
emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
sse_addss(cp->func, dst, tmp);
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
aos_release_xmm_reg(cp, tmp.idx);
store_dest(cp, &op->FullDstRegisters[0], dst);
sse_addss(cp->func, dst, tmp);
emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W));
sse_addss(cp->func, dst, tmp);
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
aos_release_xmm_reg(cp, tmp.idx);
store_dest(cp, &op->FullDstRegisters[0], dst);
sse_divss(cp->func, dst, arg0);
}
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
store_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
/* Extend precision here...
*/
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
store_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;