emmintrin.h (_mm_cvtsi128_si32): Move earlier.
authorRichard Henderson <rth@redhat.com>
Sun, 9 Jan 2005 00:51:31 +0000 (16:51 -0800)
committerRichard Henderson <rth@gcc.gnu.org>
Sun, 9 Jan 2005 00:51:31 +0000 (16:51 -0800)
* config/i386/emmintrin.h (_mm_cvtsi128_si32): Move earlier.
(_mm_cvtsi128_si64x): Likewise.
(_mm_srl_epi64, _mm_srl_epi32, _mm_srl_epi16, _mm_sra_epi32,
_mm_sra_epi16, _mm_sll_epi64, _mm_sll_epi32, _mm_sll_epi16): Use
the _mm_{srl,sll}i_foo counterpart, and _mm_cvtsi128_si32.
* config/i386/i386-modes.def: Add V16HI, V32QI, V4DF, V8SF.
* config/i386/i386-protos.h: Update.
* config/i386/i386.c (print_operand): Add 'H'.
(ix86_fixup_binary_operands): Split out from ...
(ix86_expand_binary_operator): ... here.
(ix86_fixup_binary_operands_no_copy): New.
(ix86_expand_fp_absneg_operator): Handle vector mode results.
(bdesc_2arg): Update names for sse{,2,3}_ prefixes.
(ix86_init_mmx_sse_builtins): Remove *maskncmp* special cases.
(safe_vector_operand): Use CONST0_RTX.
(ix86_expand_binop_builtin): Use ix86_fixup_binary_operands.
(ix86_expand_builtin): Merge CODE_FOR_sse2_maskmovdqu_rex64 and
CODE_FOR_sse2_maskmovdqu.  Special case SSE version of MASKMOVDQU
expansion.  Update names for sse{,2,3}_ prefixes.  Remove *maskncmp*
special cases.
* config/i386/i386.h (IX86_BUILTIN_CMPNGTSS): New.
(IX86_BUILTIN_CMPNGESS): New.
* config/i386/i386.md (UNSPEC_FIX_NOTRUNC): New.
(attr type): Add sselog1.
(attr unit, attr memory): Handle it.
(movti, movti_internal, movti_rex64): Move near other integer moves.
(movtf, movtf_internal): Move near other fp moves.
(SSEMODE, SSEMODEI, vec_setv2df, vec_extractv2df, vec_initv2df,
vec_setv4sf, vec_extractv4sf, vec_initv4sf, movv4sf, movv4sf_internal,
movv2df, movv2df_internal, mov<SSEMODEI>, mov<SSEMODEI>_internal,
movmisalign<SSEMODE>, sse_movups_1, sse_movmskps, sse_movntv4sf,
sse_movhlps, sse_movlhps, sse_storehps, sse_loadhps, sse_storelps,
sse_loadlps, sse_loadss, sse_loadss_1, sse_movss, sse_storess,
sse_shufps, addv4sf3, vmaddv4sf3, subv4sf3, vmsubv4sf3, negv4sf2,
mulv4sf3, vmmulv4sf3, divv4sf3, vmdivv4sf3, rcpv4sf2, vmrcpv4sf2,
rsqrtv4sf2, vmrsqrtv4sf2, sqrtv4sf2, vmsqrtv4sf2, sse_andv4sf3,
sse_nandv4sf3, sse_iorv4sf3, sse_xorv4sf3, sse2_andv2df3,
sse2_nandv2df3, sse2_iorv2df3, sse2_xorv2df3, sse2_andv2di3,
sse2_nandv2di3, sse2_iorv2di3, sse2_xorv2di3, maskcmpv4sf3,
vmmaskcmpv4sf3, sse_comi, sse_ucomi, sse_unpckhps, sse_unpcklps,
smaxv4sf3, vmsmaxv4sf3, sminv4sf3, vmsminv4sf3, cvtpi2ps, cvtps2pi,
cvttps2pi, cvtsi2ss, cvtsi2ssq, cvtss2si, cvtss2siq, cvttss2si,
cvttss2siq, addv2df3, vmaddv2df3, subv2df3, vmsubv2df3, mulv2df3,
vmmulv2df3, divv2df3, vmdivv2df3, smaxv2df3, vmsmaxv2df3, sminv2df3,
vmsminv2df3, sqrtv2df2, vmsqrtv2df2, maskcmpv2df3, vmmaskcmpv2df3,
sse2_comi, sse2_ucomi, sse2_movmskpd, sse2_pmovmskb, sse2_maskmovdqu,
sse2_maskmovdqu_rex64, sse2_movntv2df, sse2_movntv2di, sse2_movntsi,
cvtdq2ps, cvtps2dq, cvttps2dq, cvtdq2pd, cvtpd2dq, cvttpd2dq,
cvtpd2pi, cvttpd2pi, cvtpi2pd, cvtsd2si, cvtsd2siq, cvttsd2si,
cvttsd2siq, cvtsi2sd, cvtsi2sdq, cvtsd2ss, cvtss2sd, cvtpd2ps,
cvtps2pd, addv16qi3, addv8hi3, addv4si3, addv2di3, ssaddv16qi3,
ssaddv8hi3, usaddv16qi3, usaddv8hi3, subv16qi3, subv8hi3, subv4si3,
subv2di3, sssubv16qi3, sssubv8hi3, ussubv16qi3, ussubv8hi3, mulv8hi3,
smulv8hi3_highpart, umulv8hi3_highpart, sse2_umulsidi3,
sse2_umulv2siv2di3, sse2_pmaddwd, sse2_uavgv16qi3, sse2_uavgv8hi3,
sse2_psadbw, sse2_pinsrw, sse2_pextrw, sse2_pshufd, sse2_pshuflw,
sse2_pshufhw, eqv16qi3, eqv8hi3, eqv4si3, gtv16qi3, gtv8hi3,
gtv4si3, umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3, ashrv8hi3,
ashrv4si3, lshrv8hi3, lshrv4si3, lshrv2di3, ashlv8hi3, ashlv4si3,
ashlv2di3, sse2_ashlti3, sse2_lshrti3, sse2_unpckhpd, sse2_unpcklpd,
sse2_packsswb, sse2_packssdw, sse2_packuswb, sse2_punpckhbw,
sse2_punpckhwd, sse2_punpckhdq, sse2_punpcklbw, sse2_punpcklwd,
sse2_punpckldq, sse2_punpcklqdq, sse2_punpckhqdq, sse2_movupd,
sse2_movdqu, sse2_movdq2q, sse2_movdq2q_rex64, sse2_movq2dq,
sse2_movq2dq_rex64, sse2_loadd, sse2_stored, sse2_storehpd,
sse2_loadhpd, sse2_storelpd, sse2_loadlpd, sse2_movsd, sse2_loadsd,
sse2_loadsd_1, sse2_storesd, sse2_shufpd, sse2_clflush, sse2_mfence,
mfence_insn, sse2_lfence, lfence_insn, mwait, monitor, addsubv4sf3,
addsubv2df3, haddv4sf3, haddv2df3, hsubv4sf3, hsubv2df3, movshdup,
movsldup, lddqu, loadddup, movddup): Move to sse.md.  Any with
non-optabs meanings renamed with an "sse{,2,3}_" prefix at the
same time.
(SSEPUSH, push<SSEPUSH>): Remove.
(MMXPUSH, push<MMXPUSH>): Remove.
(sse_movaps, sse_movaps_1, sse_movups): Remove.
(sse2_movapd, sse2_movdqa, sse2_movq): Remove.
(sse2_andti3, sse2_nandti3, sse2_iorti3, sse2_xorti3): Remove.
(sse_clrv4sf, sse_clrv2df, sse2_clrti): Remove.
(maskncmpv4sf3, vmmaskncmpv4sf3): Remove.
(maskncmpv2df3, vmmaskncmpv2df3): Remove.
(ashrv8hi3_ti, ashrv4si3_ti, lshrv8hi3_ti, lshrv4si3_ti): Remove.
(lshrv2di3_ti, ashlv8hi3_ti, ashlv4si3_ti, ashlv2di3_ti): Remove.
* config/i386/athlon.md (athlon_sselog_load): Handle sselog1.
(athlon_sselog_load_k8, athlon_sselog, athlon_sselog_k8): Likewise.
* config/i386/ppro.md (ppro_sse_div_V4SF_load): Fix memory attr.
(ppro_sse_log_V4SF_load): Similarly.  Handle sselog1.
(ppro_sse_log_V4SF): Handle sselog1.
* config/i386/predicates.md (const_0_to_1_operand): New.
(const_0_to_255_mul_8_operand): New.
(const_1_to_31_operand): Rename from const_int_1_31_operand.
(const_2_to_3_operand, const_4_to_7_operand): New.
* config/i386/sse.md: New file.
(SSEMODE12, SSEMODE24, SSEMODE124, SSEMODE248, ssevecsize): New.
(sse_movups): Rename from sse_movups_1.
(sse_loadlss): Rename from sse_loadss_1.
(andv4sf3, iorv4sf3, xorv4sf3, andv2df3): Remove the sse prefix
from the name.
(negv4sf2): Use ix86_expand_fp_absneg_operator.
(absv4sf2, negv2df, absv2df): New.
(addv4sf3): Add expander to call ix86_fixup_binary_operands_no_copy.
(subv4sf3, mulv4sf3, divv4sf3, smaxv4sf3, sminv4sf3, andv4sf3,
iorv4sf3, xorv4sf3, addv2df3, subv2df3, mulv2df3, divv2df3,
smaxv2df3, sminv2df3, andv2df3, iorv2df3, xorv2df3, mulv8hi3,
umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3): Likewise.
(sse3_addsubv4sf3): Model correctly.
sse3_haddv4sf3, sse3_hsubv4sf3, sse3_addsubv2df3, sse3_haddv2df3,
sse3_hsubv2df3, sse2_ashlti3, sse2_lshrti3): Likewise.
(sse_movhlps): Model with vec_select+vec_concat.
(sse_movlhps, sse_unpckhps, sse_unpcklps, sse3_movshdup,
sse3_movsldup, sse_shufps, sse_shufps_1, sse2_unpckhpd, sse3_movddup,
sse2_unpcklpd, sse2_shufpd, sse2_shufpd_1, sse2_punpckhbw,
sse2_punpcklbw, sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq,
sse2_punpckldq, sse2_punpckhqdq, sse2_punpcklqdq, sse2_pshufd,
sse2_pshufd_1, sse2_pshuflw, sse2_pshuflw_1, sse2_pshufhw,
sse2_pshufhw_1): Likewise.
(neg<SSEMODEI>2, one_cmpl<SSEMODEI>2): New.
(add<SSEMODEI>3, sse2_ssadd<SSEMODE12>3, sse2_usadd<SSEMODE12>3,
sub<SSEMODEI>3, sse2_sssub<SSEMODE12>3, sse2_ussub<SSEMODE12>3,
ashr<SSEMODE24>3, lshr<SSEMODE248>3, sse2_eq<SSEMODE124>3,
sse2_gt<SSEMODDE124>3, and<SSEMODEI>3, sse_nand<SSEMODEI>3,
ior<SSEMODEI>3, xor<SSEMODEI>3): Macroize from existing patterns.
(addv4sf3, sse_vmaddv4sf3, mulv4sf3, sse_vmmulv4sf3, smaxv4sf3,
sse_vmsmaxv4sf3, sminv4sf3, sse_vmsminv4sf3, addv2df3, sse2_vmaddv2df3,
mulv2df3, sse2_vmmulv2df3, smaxv2df3, sse2_vmsmaxv2df3, sminv2df3,
sse2_vmsminv2df3, umaxv16qi3, smaxv8hi3, uminv16qi3
sminv8hi3): Mark commutative
operands.  Use ix86_binary_operator_ok.
(sse_unpckhps, sse_unpcklps, sse2_packsswb, sse2_packssdw,
sse2_packuswb, sse2_punpckhbw, sse2_punpcklbw, sse2_punpckhwd,
sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq, sse2_punpckhqdq,
sse2_punpcklqdq): Allow operand2 in memory.
(sse_movhlps, sse_movlhps, sse2_unpckhpd, sse2_unpcklpd
sse2_movsd): Add memory alternatives.
(sse_storelps): Turn expander into an insn; split after reload.
(sse_storess, sse2_loadhpd, sse2_loadlpd): Add non-xmm inputs.
(sse2_storehpd, sse2_storelpd): Add non-xmm outputs.

From-SVN: r93101

13 files changed:
gcc/ChangeLog
gcc/config/i386/athlon.md
gcc/config/i386/emmintrin.h
gcc/config/i386/i386-modes.def
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/config/i386/ppro.md
gcc/config/i386/predicates.md
gcc/config/i386/sse.md [new file with mode: 0644]
gcc/testsuite/ChangeLog
gcc/testsuite/lib/target-supports.exp

index 243d0eb2bb2c2176c41b42bc2c662ba28e9f3ed5..5238e1e32764c96703732c686cfd486a2362067d 100644 (file)
@@ -1,3 +1,142 @@
+2005-01-08  Richard Henderson  <rth@redhat.com>
+
+       * config/i386/emmintrin.h (_mm_cvtsi128_si32): Move earlier.
+       (_mm_cvtsi128_si64x): Likewise.
+       (_mm_srl_epi64, _mm_srl_epi32, _mm_srl_epi16, _mm_sra_epi32,
+       _mm_sra_epi16, _mm_sll_epi64, _mm_sll_epi32, _mm_sll_epi16): Use
+       the _mm_{srl,sll}i_foo counterpart, and _mm_cvtsi128_si32.
+       * config/i386/i386-modes.def: Add V16HI, V32QI, V4DF, V8SF.
+       * config/i386/i386-protos.h: Update.
+       * config/i386/i386.c (print_operand): Add 'H'.
+       (ix86_fixup_binary_operands): Split out from ...
+       (ix86_expand_binary_operator): ... here.
+       (ix86_fixup_binary_operands_no_copy): New.
+       (ix86_expand_fp_absneg_operator): Handle vector mode results.
+       (bdesc_2arg): Update names for sse{,2,3}_ prefixes.
+       (ix86_init_mmx_sse_builtins): Remove *maskncmp* special cases.
+       (safe_vector_operand): Use CONST0_RTX.
+       (ix86_expand_binop_builtin): Use ix86_fixup_binary_operands.
+       (ix86_expand_builtin): Merge CODE_FOR_sse2_maskmovdqu_rex64 and
+       CODE_FOR_sse2_maskmovdqu.  Special case SSE version of MASKMOVDQU
+       expansion.  Update names for sse{,2,3}_ prefixes.  Remove *maskncmp*
+       special cases.
+       * config/i386/i386.h (IX86_BUILTIN_CMPNGTSS): New.
+       (IX86_BUILTIN_CMPNGESS): New.
+       * config/i386/i386.md (UNSPEC_FIX_NOTRUNC): New.
+       (attr type): Add sselog1.
+       (attr unit, attr memory): Handle it.
+       (movti, movti_internal, movti_rex64): Move near other integer moves.
+       (movtf, movtf_internal): Move near other fp moves.
+       (SSEMODE, SSEMODEI, vec_setv2df, vec_extractv2df, vec_initv2df,
+       vec_setv4sf, vec_extractv4sf, vec_initv4sf, movv4sf, movv4sf_internal,
+       movv2df, movv2df_internal, mov<SSEMODEI>, mov<SSEMODEI>_internal, 
+       movmisalign<SSEMODE>, sse_movups_1, sse_movmskps, sse_movntv4sf,
+       sse_movhlps, sse_movlhps, sse_storehps, sse_loadhps, sse_storelps,
+       sse_loadlps, sse_loadss, sse_loadss_1, sse_movss, sse_storess,
+       sse_shufps, addv4sf3, vmaddv4sf3, subv4sf3, vmsubv4sf3, negv4sf2,
+       mulv4sf3, vmmulv4sf3, divv4sf3, vmdivv4sf3, rcpv4sf2, vmrcpv4sf2,
+       rsqrtv4sf2, vmrsqrtv4sf2, sqrtv4sf2, vmsqrtv4sf2, sse_andv4sf3,
+       sse_nandv4sf3, sse_iorv4sf3, sse_xorv4sf3, sse2_andv2df3, 
+       sse2_nandv2df3, sse2_iorv2df3, sse2_xorv2df3, sse2_andv2di3, 
+       sse2_nandv2di3, sse2_iorv2di3, sse2_xorv2di3, maskcmpv4sf3, 
+       vmmaskcmpv4sf3, sse_comi, sse_ucomi, sse_unpckhps, sse_unpcklps,
+       smaxv4sf3, vmsmaxv4sf3, sminv4sf3, vmsminv4sf3, cvtpi2ps, cvtps2pi,
+       cvttps2pi, cvtsi2ss, cvtsi2ssq, cvtss2si, cvtss2siq, cvttss2si,
+       cvttss2siq, addv2df3, vmaddv2df3, subv2df3, vmsubv2df3, mulv2df3,
+       vmmulv2df3, divv2df3, vmdivv2df3, smaxv2df3, vmsmaxv2df3, sminv2df3,
+       vmsminv2df3, sqrtv2df2, vmsqrtv2df2, maskcmpv2df3, vmmaskcmpv2df3,
+       sse2_comi, sse2_ucomi, sse2_movmskpd, sse2_pmovmskb, sse2_maskmovdqu,
+       sse2_maskmovdqu_rex64, sse2_movntv2df, sse2_movntv2di, sse2_movntsi,
+       cvtdq2ps, cvtps2dq, cvttps2dq, cvtdq2pd, cvtpd2dq, cvttpd2dq,
+       cvtpd2pi, cvttpd2pi, cvtpi2pd, cvtsd2si, cvtsd2siq, cvttsd2si,
+       cvttsd2siq, cvtsi2sd, cvtsi2sdq, cvtsd2ss, cvtss2sd, cvtpd2ps,
+       cvtps2pd, addv16qi3, addv8hi3, addv4si3, addv2di3, ssaddv16qi3,
+       ssaddv8hi3, usaddv16qi3, usaddv8hi3, subv16qi3, subv8hi3, subv4si3,
+       subv2di3, sssubv16qi3, sssubv8hi3, ussubv16qi3, ussubv8hi3, mulv8hi3,
+       smulv8hi3_highpart, umulv8hi3_highpart, sse2_umulsidi3,
+       sse2_umulv2siv2di3, sse2_pmaddwd, sse2_uavgv16qi3, sse2_uavgv8hi3,
+       sse2_psadbw, sse2_pinsrw, sse2_pextrw, sse2_pshufd, sse2_pshuflw,
+       sse2_pshufhw, eqv16qi3, eqv8hi3, eqv4si3, gtv16qi3, gtv8hi3, 
+       gtv4si3, umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3, ashrv8hi3,
+       ashrv4si3, lshrv8hi3, lshrv4si3, lshrv2di3, ashlv8hi3, ashlv4si3,
+       ashlv2di3, sse2_ashlti3, sse2_lshrti3, sse2_unpckhpd, sse2_unpcklpd,
+       sse2_packsswb, sse2_packssdw, sse2_packuswb, sse2_punpckhbw, 
+       sse2_punpckhwd, sse2_punpckhdq, sse2_punpcklbw, sse2_punpcklwd,
+       sse2_punpckldq, sse2_punpcklqdq, sse2_punpckhqdq, sse2_movupd,
+       sse2_movdqu, sse2_movdq2q, sse2_movdq2q_rex64, sse2_movq2dq, 
+       sse2_movq2dq_rex64, sse2_loadd, sse2_stored, sse2_storehpd,
+       sse2_loadhpd, sse2_storelpd, sse2_loadlpd, sse2_movsd, sse2_loadsd,
+       sse2_loadsd_1, sse2_storesd, sse2_shufpd, sse2_clflush, sse2_mfence,
+       mfence_insn, sse2_lfence, lfence_insn, mwait, monitor, addsubv4sf3,
+       addsubv2df3, haddv4sf3, haddv2df3, hsubv4sf3, hsubv2df3, movshdup,
+       movsldup, lddqu, loadddup, movddup): Move to sse.md.  Any with
+       non-optabs meanings renamed with an "sse{,2,3}_" prefix at the
+       same time.
+       (SSEPUSH, push<SSEPUSH>): Remove.
+       (MMXPUSH, push<MMXPUSH>): Remove.
+       (sse_movaps, sse_movaps_1, sse_movups): Remove.
+       (sse2_movapd, sse2_movdqa, sse2_movq): Remove.
+       (sse2_andti3, sse2_nandti3, sse2_iorti3, sse2_xorti3): Remove.
+       (sse_clrv4sf, sse_clrv2df, sse2_clrti): Remove.
+       (maskncmpv4sf3, vmmaskncmpv4sf3): Remove.
+       (maskncmpv2df3, vmmaskncmpv2df3): Remove.
+       (ashrv8hi3_ti, ashrv4si3_ti, lshrv8hi3_ti, lshrv4si3_ti): Remove.
+       (lshrv2di3_ti, ashlv8hi3_ti, ashlv4si3_ti, ashlv2di3_ti): Remove.
+       * config/i386/athlon.md (athlon_sselog_load): Handle sselog1.
+       (athlon_sselog_load_k8, athlon_sselog, athlon_sselog_k8): Likewise.
+       * config/i386/ppro.md (ppro_sse_div_V4SF_load): Fix memory attr.
+       (ppro_sse_log_V4SF_load): Similarly.  Handle sselog1.
+       (ppro_sse_log_V4SF): Handle sselog1.
+       * config/i386/predicates.md (const_0_to_1_operand): New.
+       (const_0_to_255_mul_8_operand): New.
+       (const_1_to_31_operand): Rename from const_int_1_31_operand.
+       (const_2_to_3_operand, const_4_to_7_operand): New.
+       * config/i386/sse.md: New file.
+       (SSEMODE12, SSEMODE24, SSEMODE124, SSEMODE248, ssevecsize): New.
+       (sse_movups): Rename from sse_movups_1.
+       (sse_loadlss): Rename from sse_loadss_1.
+       (andv4sf3, iorv4sf3, xorv4sf3, andv2df3): Remove the sse prefix
+       from the name.
+       (negv4sf2): Use ix86_expand_fp_absneg_operator.
+       (absv4sf2, negv2df, absv2df): New.
+       (addv4sf3): Add expander to call ix86_fixup_binary_operands_no_copy.
+       (subv4sf3, mulv4sf3, divv4sf3, smaxv4sf3, sminv4sf3, andv4sf3,
+       iorv4sf3, xorv4sf3, addv2df3, subv2df3, mulv2df3, divv2df3,
+       smaxv2df3, sminv2df3, andv2df3, iorv2df3, xorv2df3, mulv8hi3,
+       umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3): Likewise.
+       (sse3_addsubv4sf3): Model correctly.
+       sse3_haddv4sf3, sse3_hsubv4sf3, sse3_addsubv2df3, sse3_haddv2df3,
+       sse3_hsubv2df3, sse2_ashlti3, sse2_lshrti3): Likewise.
+       (sse_movhlps): Model with vec_select+vec_concat.
+       (sse_movlhps, sse_unpckhps, sse_unpcklps, sse3_movshdup, 
+       sse3_movsldup, sse_shufps, sse_shufps_1, sse2_unpckhpd, sse3_movddup,
+       sse2_unpcklpd, sse2_shufpd, sse2_shufpd_1, sse2_punpckhbw,
+       sse2_punpcklbw, sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq,
+       sse2_punpckldq, sse2_punpckhqdq, sse2_punpcklqdq, sse2_pshufd,
+       sse2_pshufd_1, sse2_pshuflw, sse2_pshuflw_1, sse2_pshufhw, 
+       sse2_pshufhw_1): Likewise.
+       (neg<SSEMODEI>2, one_cmpl<SSEMODEI>2): New.
+       (add<SSEMODEI>3, sse2_ssadd<SSEMODE12>3, sse2_usadd<SSEMODE12>3,
+       sub<SSEMODEI>3, sse2_sssub<SSEMODE12>3, sse2_ussub<SSEMODE12>3,
+       ashr<SSEMODE24>3, lshr<SSEMODE248>3, sse2_eq<SSEMODE124>3,
+       sse2_gt<SSEMODDE124>3, and<SSEMODEI>3, sse_nand<SSEMODEI>3,
+       ior<SSEMODEI>3, xor<SSEMODEI>3): Macroize from existing patterns.       
+       (addv4sf3, sse_vmaddv4sf3, mulv4sf3, sse_vmmulv4sf3, smaxv4sf3,
+       sse_vmsmaxv4sf3, sminv4sf3, sse_vmsminv4sf3, addv2df3, sse2_vmaddv2df3,
+       mulv2df3, sse2_vmmulv2df3, smaxv2df3, sse2_vmsmaxv2df3, sminv2df3,
+       sse2_vmsminv2df3, umaxv16qi3, smaxv8hi3, uminv16qi3
+       sminv8hi3): Mark commutative
+       operands.  Use ix86_binary_operator_ok.
+       (sse_unpckhps, sse_unpcklps, sse2_packsswb, sse2_packssdw,
+       sse2_packuswb, sse2_punpckhbw, sse2_punpcklbw, sse2_punpckhwd,
+       sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq, sse2_punpckhqdq,
+       sse2_punpcklqdq): Allow operand2 in memory.
+       (sse_movhlps, sse_movlhps, sse2_unpckhpd, sse2_unpcklpd
+       sse2_movsd): Add memory alternatives.
+       (sse_storelps): Turn expander into an insn; split after reload.
+       (sse_storess, sse2_loadhpd, sse2_loadlpd): Add non-xmm inputs.
+       (sse2_storehpd, sse2_storelpd): Add non-xmm outputs.
+
 2005-01-08  Eric Botcazou  <ebotcazou@libertysurf.fr>
 
        * configure.ac (DWARF-2 debug_line): Use objdump.
index 79c9e023fa6bd8e0a4a613f85884ddc5e2d29d7d..1029a818196fc9d1fdd10a284d1e2397fcd566b8 100644 (file)
 
 (define_insn_reservation "athlon_sselog_load" 3
                         (and (eq_attr "cpu" "athlon")
-                             (and (eq_attr "type" "sselog")
+                             (and (eq_attr "type" "sselog,sselog1")
                                   (eq_attr "memory" "load")))
                         "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
 (define_insn_reservation "athlon_sselog_load_k8" 5
                         (and (eq_attr "cpu" "k8")
-                             (and (eq_attr "type" "sselog")
+                             (and (eq_attr "type" "sselog,sselog1")
                                   (eq_attr "memory" "load")))
                         "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
 (define_insn_reservation "athlon_sselog" 3
                         (and (eq_attr "cpu" "athlon")
-                             (eq_attr "type" "sselog"))
+                             (eq_attr "type" "sselog,sselog1"))
                         "athlon-vector,athlon-fpsched,athlon-fmul*2")
 (define_insn_reservation "athlon_sselog_k8" 3
                         (and (eq_attr "cpu" "k8")
-                             (eq_attr "type" "sselog"))
+                             (eq_attr "type" "sselog,sselog1"))
                         "athlon-double,athlon-fpsched,athlon-fmul")
 ;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
 (define_insn_reservation "athlon_ssecmp_load" 2
index 49c6a7f8147672ce21120a12c73ece94e71049f5..2d2b710d734e654830c7b086fe0494345a995a02 100644 (file)
@@ -177,6 +177,22 @@ _mm_storer_pd (double *__P, __m128d __A)
   __builtin_ia32_storeapd (__P, __tmp);
 }
 
+static __inline int
+_mm_cvtsi128_si32 (__m128i __A)
+{
+  int __tmp;
+  __builtin_ia32_stored (&__tmp, (__v4si)__A);
+  return __tmp;
+}
+
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsi128_si64x (__m128i __A)
+{
+  return __builtin_ia32_movdq2q ((__v2di)__A);
+}
+#endif
+
 /* Sets the low DPFP value of A from the low value of B.  */
 static __inline __m128d
 _mm_move_sd (__m128d __A, __m128d __B)
@@ -1157,115 +1173,118 @@ _mm_mul_epu32 (__m128i __A, __m128i __B)
 }
 
 static __inline __m128i
-_mm_sll_epi16 (__m128i __A, __m128i __B)
+_mm_slli_epi16 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B);
+  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
 }
 
 static __inline __m128i
-_mm_sll_epi32 (__m128i __A, __m128i __B)
+_mm_slli_epi32 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B);
+  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
 }
 
 static __inline __m128i
-_mm_sll_epi64 (__m128i __A, __m128i __B)
+_mm_slli_epi64 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
 }
 
 static __inline __m128i
-_mm_sra_epi16 (__m128i __A, __m128i __B)
+_mm_srai_epi16 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B);
+  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
 }
 
 static __inline __m128i
-_mm_sra_epi32 (__m128i __A, __m128i __B)
+_mm_srai_epi32 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B);
+  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
 }
 
-static __inline __m128i
-_mm_srl_epi16 (__m128i __A, __m128i __B)
+#if 0
+static __m128i __attribute__((__always_inline__))
+_mm_srli_si128 (__m128i __A, const int __B)
 {
-  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B);
+  return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
 }
 
-static __inline __m128i
-_mm_srl_epi32 (__m128i __A, __m128i __B)
+static __m128i __attribute__((__always_inline__))
+_mm_srli_si128 (__m128i __A, const int __B)
 {
-  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B);
+  return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
 }
+#else
+#define _mm_srli_si128(__A, __B) \
+  ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8))
+#define _mm_slli_si128(__A, __B) \
+  ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8))
+#endif
 
 static __inline __m128i
-_mm_srl_epi64 (__m128i __A, __m128i __B)
+_mm_srli_epi16 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
 }
 
 static __inline __m128i
-_mm_slli_epi16 (__m128i __A, int __B)
+_mm_srli_epi32 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
 }
 
 static __inline __m128i
-_mm_slli_epi32 (__m128i __A, int __B)
+_mm_srli_epi64 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
 }
 
 static __inline __m128i
-_mm_slli_epi64 (__m128i __A, int __B)
+_mm_sll_epi16 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+  return _mm_slli_epi16 (__A, _mm_cvtsi128_si32 (__B));
 }
 
 static __inline __m128i
-_mm_srai_epi16 (__m128i __A, int __B)
+_mm_sll_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
+  return _mm_slli_epi32 (__A, _mm_cvtsi128_si32 (__B));
 }
 
 static __inline __m128i
-_mm_srai_epi32 (__m128i __A, int __B)
+_mm_sll_epi64 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
+  return _mm_slli_epi64 (__A, _mm_cvtsi128_si32 (__B));
 }
 
-#if 0
-static __m128i __attribute__((__always_inline__))
-_mm_srli_si128 (__m128i __A, const int __B)
+static __inline __m128i
+_mm_sra_epi16 (__m128i __A, __m128i __B)
 {
-  return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
+  return _mm_srai_epi16 (__A, _mm_cvtsi128_si32 (__B));
 }
 
-static __m128i __attribute__((__always_inline__))
-_mm_srli_si128 (__m128i __A, const int __B)
+static __inline __m128i
+_mm_sra_epi32 (__m128i __A, __m128i __B)
 {
-  return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
+  return _mm_srai_epi32 (__A, _mm_cvtsi128_si32 (__B));
 }
-#endif
-#define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
-#define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
 
 static __inline __m128i
-_mm_srli_epi16 (__m128i __A, int __B)
+_mm_srl_epi16 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+  return _mm_srli_epi16 (__A, _mm_cvtsi128_si32 (__B));
 }
 
 static __inline __m128i
-_mm_srli_epi32 (__m128i __A, int __B)
+_mm_srl_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+  return _mm_srli_epi32 (__A, _mm_cvtsi128_si32 (__B));
 }
 
 static __inline __m128i
-_mm_srli_epi64 (__m128i __A, int __B)
+_mm_srl_epi64 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+  return _mm_srli_epi64 (__A, _mm_cvtsi128_si32 (__B));
 }
 
 static __inline __m128i
@@ -1470,22 +1489,6 @@ _mm_cvtsi64x_si128 (long long __A)
 }
 #endif
 
-static __inline int
-_mm_cvtsi128_si32 (__m128i __A)
-{
-  int __tmp;
-  __builtin_ia32_stored (&__tmp, (__v4si)__A);
-  return __tmp;
-}
-
-#ifdef __x86_64__
-static __inline long long
-_mm_cvtsi128_si64x (__m128i __A)
-{
-  return __builtin_ia32_movdq2q ((__v2di)__A);
-}
-#endif
-
 #endif /* __SSE2__  */
 
 #endif /* _EMMINTRIN_H_INCLUDED */
index ea35a2a1768ee12c9effa3dbd31587266f7c5b8a..6a6e68d8b1fb7baff4cef479cf8a00f286a69fe3 100644 (file)
@@ -70,6 +70,10 @@ VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
 VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
 VECTOR_MODE (INT, DI, 4);     /*                 V4DI */
 VECTOR_MODE (INT, SI, 8);     /*                 V8SI */
+VECTOR_MODE (INT, HI, 16);    /*                V16HI */
+VECTOR_MODE (INT, QI, 32);    /*                V32QI */
+VECTOR_MODE (FLOAT, DF, 4);   /*                 V4DF */
+VECTOR_MODE (FLOAT, SF, 8);   /*                 V8SF */
 
 /* The symbol Pmode stands for one of the above machine modes (usually SImode).
    The tm.h file specifies which one.  It is not a distinct mode.  */
index 58e4e23471b95fc32c58bfe15eefd14547a44f07..5920c9f1fdd2380c2271b4cc686cb313ed994f4f 100644 (file)
@@ -126,6 +126,10 @@ extern void ix86_expand_clear (rtx);
 extern void ix86_expand_move (enum machine_mode, rtx[]);
 extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
 extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
+extern rtx ix86_fixup_binary_operands (enum rtx_code,
+                                      enum machine_mode, rtx[]);
+extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
+                                               enum machine_mode, rtx[]);
 extern void ix86_expand_binary_operator (enum rtx_code,
                                         enum machine_mode, rtx[]);
 extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
index 0031959278597403bb99f1fb86c027a0d134ee36..7edd97c3c991624c31ed186060cdbec4f2fd9c0a 100644 (file)
@@ -6312,6 +6312,7 @@ get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
    P -- if PIC, print an @PLT suffix.
    X -- don't print any sort of PIC '@' suffix for a symbol.
    & -- print some in-use local-dynamic symbol name.
+   H -- print a memory address offset by 8; used for sse high-parts
  */
 
 void
@@ -6539,6 +6540,13 @@ print_operand (FILE *file, rtx x, int code)
 #endif
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
          return;
+
+       case 'H':
+         /* It doesn't actually matter what mode we use here, as we're
+            only going to use this for printing.  */
+         x = adjust_address_nv (x, DImode, 8);
+         break;
+
        case '+':
          {
            rtx x;
@@ -7714,16 +7722,16 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
 }
 
 
-/* Attempt to expand a binary operator.  Make the expansion closer to the
-   actual machine, then just general_operand, which will allow 3 separate
-   memory references (one output, two input) in a single insn.  */
+/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
+   destination to use for the operation.  If different from the true
+   destination in operands[0], a copy operation will be required.  */
 
-void
-ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
-                            rtx operands[])
+rtx
+ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
+                           rtx operands[])
 {
   int matching_memory;
-  rtx src1, src2, dst, op, clob;
+  rtx src1, src2, dst;
 
   dst = operands[0];
   src1 = operands[1];
@@ -7780,7 +7788,37 @@ ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
        src2 = force_reg (mode, src2);
     }
 
-  /* Emit the instruction.  */
+  src1 = operands[1] = src1;
+  src2 = operands[2] = src2;
+  return dst;
+}
+
+/* Similarly, but assume that the destination has already been
+   set up properly.  */
+
+void
+ix86_fixup_binary_operands_no_copy (enum rtx_code code,
+                                   enum machine_mode mode, rtx operands[])
+{
+  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+  gcc_assert (dst == operands[0]);
+}
+
+/* Attempt to expand a binary operator.  Make the expansion closer to the
+   actual machine, then just general_operand, which will allow 3 separate
+   memory references (one output, two input) in a single insn.  */
+
+void
+ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
+                            rtx operands[])
+{
+  rtx src1, src2, dst, op, clob;
+
+  dst = ix86_fixup_binary_operands (code, mode, operands);
+  src1 = operands[1];
+  src2 = operands[2];
+
+ /* Emit the instruction.  */
 
   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
   if (reload_in_progress)
@@ -7916,13 +7954,28 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
   rtx mask, set, use, clob, dst, src;
   bool matching_memory;
   bool use_sse = false;
+  bool vector_mode = VECTOR_MODE_P (mode);
+  enum machine_mode elt_mode = mode;
+  enum machine_mode vec_mode = VOIDmode;
 
+  if (vector_mode)
+    {
+      elt_mode = GET_MODE_INNER (mode);
+      vec_mode = mode;
+      use_sse = true;
+    }
   if (TARGET_SSE_MATH)
     {
       if (mode == SFmode)
-       use_sse = true;
+       {
+         use_sse = true;
+         vec_mode = V4SFmode;
+       }
       else if (mode == DFmode && TARGET_SSE2)
-       use_sse = true;
+       {
+         use_sse = true;
+         vec_mode = V2DFmode;
+       }
     }
 
   /* NEG and ABS performed with SSE use bitwise mask operations.
@@ -7931,9 +7984,10 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
     {
       HOST_WIDE_INT hi, lo;
       int shift = 63;
+      rtvec v;
 
       /* Find the sign bit, sign extended to 2*HWI.  */
-      if (mode == SFmode)
+      if (elt_mode == SFmode)
         lo = 0x80000000, hi = lo < 0;
       else if (HOST_BITS_PER_WIDE_INT >= 64)
         lo = (HOST_WIDE_INT)1 << shift, hi = -1;
@@ -7948,15 +8002,32 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
       /* Force this value into the low part of a fp vector constant.  */
       mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
       mask = gen_lowpart (mode, mask);
-      if (mode == SFmode)
-        mask = gen_rtx_CONST_VECTOR (V4SFmode,
-                                    gen_rtvec (4, mask, CONST0_RTX (SFmode),
-                                               CONST0_RTX (SFmode),
-                                               CONST0_RTX (SFmode)));
-      else
-        mask = gen_rtx_CONST_VECTOR (V2DFmode,
-                                    gen_rtvec (2, mask, CONST0_RTX (DFmode)));
-      mask = force_reg (GET_MODE (mask), mask);
+
+      switch (mode)
+       {
+       case SFmode:
+         v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
+                        CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+         break;
+
+       case DFmode:
+         v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
+         break;
+
+       case V4SFmode:
+         v = gen_rtvec (4, mask, mask, mask, mask);
+         break;
+
+       case V4DFmode:
+         v = gen_rtvec (2, mask, mask);
+         break;
+
+       default:
+         gcc_unreachable ();
+       }
+
+      mask = gen_rtx_CONST_VECTOR (vec_mode, v);
+      mask = force_reg (vec_mode, mask);
     }
   else
     {
@@ -7982,11 +8053,20 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
   if (MEM_P (src) && !matching_memory)
     src = force_reg (mode, src);
 
-  set = gen_rtx_fmt_e (code, mode, src);
-  set = gen_rtx_SET (VOIDmode, dst, set);
-  use = gen_rtx_USE (VOIDmode, mask);
-  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
+  if (vector_mode)
+    {
+      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
+      set = gen_rtx_SET (VOIDmode, dst, set);
+      emit_insn (set);
+    }
+  else
+    {
+      set = gen_rtx_fmt_e (code, mode, src);
+      set = gen_rtx_SET (VOIDmode, dst, set);
+      use = gen_rtx_USE (VOIDmode, mask);
+      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
+    }
 
   if (dst != operands[0])
     emit_move_insn (operands[0], dst);
@@ -12128,45 +12208,49 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
   { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
   { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
-  { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
-  { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
-  { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
-  { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
-
-  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
-  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
-  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
-  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
+  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
+
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
     BUILTIN_DESC_SWAP_OPERANDS },
-  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
     BUILTIN_DESC_SWAP_OPERANDS },
-  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
-  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
-  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
-  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
-  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT,
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
     BUILTIN_DESC_SWAP_OPERANDS },
-  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE,
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
     BUILTIN_DESC_SWAP_OPERANDS },
-  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
-  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
-  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
-  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
-  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
-  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
-  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
-  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
-  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
+  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
+    BUILTIN_DESC_SWAP_OPERANDS },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
+    BUILTIN_DESC_SWAP_OPERANDS },
+  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
 
   { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
   { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
-  { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
-  { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
 
-  { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
   { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
-  { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
-  { MASK_SSE, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
 
   { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
   { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
@@ -12229,9 +12313,9 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
 
-  { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
-  { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
-  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
 
   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
@@ -12260,45 +12344,45 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
   { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
   { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
-
-  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
-  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
-  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
-  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
+  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
+
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
     BUILTIN_DESC_SWAP_OPERANDS },
-  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
     BUILTIN_DESC_SWAP_OPERANDS },
-  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
-  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
-  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
-  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
-  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT,
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
     BUILTIN_DESC_SWAP_OPERANDS },
-  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE,
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
     BUILTIN_DESC_SWAP_OPERANDS },
-  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
-  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
-  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
-  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
-  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
-  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
-  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
-  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
-  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
 
   { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
   { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_sse2_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
@@ -12314,32 +12398,32 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
   { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
 
-  { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
-  { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
-  { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
-  { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
-  { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
-  { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
-  { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
-  { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
+  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
+  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
+  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
+  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
+  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
+  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
+  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
+  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
   { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
@@ -12359,45 +12443,37 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
   { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
   { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
   { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
   { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
   { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
   { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
   { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
   { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
-  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
 
   /* SSE3 MMX */
-  { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
-  { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
-  { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
-  { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
-  { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
-  { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
+  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
+  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
+  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
+  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
+  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
+  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
 };
 
 static const struct builtin_description bdesc_1arg[] =
@@ -12406,49 +12482,45 @@ static const struct builtin_description bdesc_1arg[] =
   { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
 
   { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
-  { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
-  { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
 
-  { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
-  { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
-  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
-  { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
-  { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
-  { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
+  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
-  { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
-
-  { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
-  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
-  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
-  { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
 
-  { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
+  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
 
   /* SSE3 */
-  { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
-  { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
-  { MASK_SSE3, CODE_FOR_movddup,  0, IX86_BUILTIN_MOVDDUP, 0, 0 }
+  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
+  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
+  { MASK_SSE3, CODE_FOR_sse3_movddup,  0, IX86_BUILTIN_MOVDDUP, 0, 0 }
 };
 
 void
@@ -12857,16 +12929,12 @@ ix86_init_mmx_sse_builtins (void)
        }
 
       /* Override for comparisons.  */
-      if (d->icode == CODE_FOR_maskcmpv4sf3
-         || d->icode == CODE_FOR_maskncmpv4sf3
-         || d->icode == CODE_FOR_vmmaskcmpv4sf3
-         || d->icode == CODE_FOR_vmmaskncmpv4sf3)
+      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
+         || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
        type = v4si_ftype_v4sf_v4sf;
 
-      if (d->icode == CODE_FOR_maskcmpv2df3
-         || d->icode == CODE_FOR_maskncmpv2df3
-         || d->icode == CODE_FOR_vmmaskcmpv2df3
-         || d->icode == CODE_FOR_vmmaskncmpv2df3)
+      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
+         || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
        type = v2di_ftype_v2df_v2df;
 
       def_builtin (d->mask, d->name, type, d->code);
@@ -13118,17 +13186,8 @@ ix86_init_mmx_sse_builtins (void)
 static rtx
 safe_vector_operand (rtx x, enum machine_mode mode)
 {
-  if (x != const0_rtx)
-    return x;
-  x = gen_reg_rtx (mode);
-
-  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
-    emit_insn (gen_mmx_clrdi (mode == DImode ? x
-                             : gen_rtx_SUBREG (DImode, x, 0)));
-  else
-    emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
-                               : gen_rtx_SUBREG (V4SFmode, x, 0),
-                               CONST0_RTX (V4SFmode)));
+  if (x == const0_rtx)
+    x = CONST0_RTX (mode);
   return x;
 }
 
@@ -13137,7 +13196,7 @@ safe_vector_operand (rtx x, enum machine_mode mode)
 static rtx
 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
 {
-  rtx pat;
+  rtx pat, xops[3];
   tree arg0 = TREE_VALUE (arglist);
   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
@@ -13169,20 +13228,17 @@ ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
       || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
     abort ();
 
-  if ((optimize && !register_operand (op0, mode0))
-      || !(*insn_data[icode].operand[1].predicate) (op0, mode0))
+  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
     op0 = copy_to_mode_reg (mode0, op0);
-  if ((optimize && !register_operand (op1, mode1))
-      || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
+  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
     op1 = copy_to_mode_reg (mode1, op1);
 
-  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
-     yet one of the two must not be a memory.  This is normally enforced
-     by expanders, but we didn't bother to create one here.  */
-  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
-    op0 = copy_to_mode_reg (mode0, op0);
+  xops[0] = target;
+  xops[1] = op0;
+  xops[2] = op1;
+  target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
 
-  pat = GEN_FCN (icode) (target, op0, op1);
+  pat = GEN_FCN (icode) (target, xops[1], xops[2]);
   if (! pat)
     return 0;
   emit_insn (pat);
@@ -13495,8 +13551,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       icode = (fcode == IX86_BUILTIN_MASKMOVQ
               ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex
                  : CODE_FOR_mmx_maskmovq)
-              : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
-                 : CODE_FOR_sse2_maskmovdqu));
+              : CODE_FOR_sse2_maskmovdqu);
       /* Note the arg order is different from the operand order.  */
       arg1 = TREE_VALUE (arglist);
       arg2 = TREE_VALUE (TREE_CHAIN (arglist));
@@ -13508,6 +13563,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       mode1 = insn_data[icode].operand[1].mode;
       mode2 = insn_data[icode].operand[2].mode;
 
+      if (fcode == IX86_BUILTIN_MASKMOVDQU)
+       {
+         op0 = force_reg (Pmode, op0);
+         op0 = gen_rtx_MEM (V16QImode, op0);
+       }
+
       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
        op0 = copy_to_mode_reg (mode0, op0);
       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
@@ -13521,20 +13582,20 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       return 0;
 
     case IX86_BUILTIN_SQRTSS:
-      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
+      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
     case IX86_BUILTIN_RSQRTSS:
-      return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
+      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
     case IX86_BUILTIN_RCPSS:
-      return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
+      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
 
     case IX86_BUILTIN_LOADAPS:
-      return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
+      return ix86_expand_unop_builtin (CODE_FOR_movv4sf, arglist, target, 1);
 
     case IX86_BUILTIN_LOADUPS:
       return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
 
     case IX86_BUILTIN_STOREAPS:
-      return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
+      return ix86_expand_store_builtin (CODE_FOR_movv4sf, arglist);
 
     case IX86_BUILTIN_STOREUPS:
       return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
@@ -13794,9 +13855,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
 
     case IX86_BUILTIN_SSE_ZERO:
-      target = gen_reg_rtx (V4SFmode);
-      emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
-      return target;
+      return CONST0_RTX (V4SFmode);
 
     case IX86_BUILTIN_MMX_ZERO:
       target = gen_reg_rtx (DImode);
@@ -13804,20 +13863,17 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       return target;
 
     case IX86_BUILTIN_CLRTI:
-      target = gen_reg_rtx (V2DImode);
-      emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
-      return target;
-
+      return const0_rtx;
 
     case IX86_BUILTIN_SQRTSD:
-      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
+      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
     case IX86_BUILTIN_LOADAPD:
-      return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
+      return ix86_expand_unop_builtin (CODE_FOR_movv2df, arglist, target, 1);
     case IX86_BUILTIN_LOADUPD:
       return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
 
     case IX86_BUILTIN_STOREAPD:
-      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
+      return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist);
     case IX86_BUILTIN_STOREUPD:
       return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
 
@@ -13825,7 +13881,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
 
     case IX86_BUILTIN_STORESD:
-      return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
+      return ix86_expand_store_builtin (CODE_FOR_sse2_storelpd, arglist);
 
     case IX86_BUILTIN_SETPD1:
       target = assign_386_stack_local (DFmode, 0);
@@ -13846,11 +13902,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       emit_move_insn (adjust_address (target, DFmode, 8),
                      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
       op0 = gen_reg_rtx (V2DFmode);
-      emit_insn (gen_sse2_movapd (op0, target));
+      emit_move_insn (op0, target);
       return op0;
 
     case IX86_BUILTIN_LOADRPD:
-      target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
+      target = ix86_expand_unop_builtin (CODE_FOR_movv2df, arglist,
                                         gen_reg_rtx (V2DFmode), 1);
       emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
       return target;
@@ -13862,14 +13918,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       return target;
 
     case IX86_BUILTIN_STOREPD1:
-      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
+      return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist);
     case IX86_BUILTIN_STORERPD:
-      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
+      return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist);
 
     case IX86_BUILTIN_CLRPD:
-      target = gen_reg_rtx (V2DFmode);
-      emit_insn (gen_sse_clrv2df (target));
-      return target;
+      return CONST0_RTX (V2DFmode);
 
     case IX86_BUILTIN_MFENCE:
        emit_insn (gen_sse2_mfence ());
@@ -13896,14 +13950,14 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
 
     case IX86_BUILTIN_LOADDQA:
-      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
+      return ix86_expand_unop_builtin (CODE_FOR_movv2di, arglist, target, 1);
     case IX86_BUILTIN_LOADDQU:
       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
     case IX86_BUILTIN_LOADD:
       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
 
     case IX86_BUILTIN_STOREDQA:
-      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
+      return ix86_expand_store_builtin (CODE_FOR_movv2di, arglist);
     case IX86_BUILTIN_STOREDQU:
       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
     case IX86_BUILTIN_STORED:
@@ -13922,7 +13976,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
        op1 = copy_to_mode_reg (SImode, op1);
       if (!REG_P (op2))
        op2 = copy_to_mode_reg (SImode, op2);
-      emit_insn (gen_monitor (op0, op1, op2));
+      emit_insn (gen_sse3_monitor (op0, op1, op2));
       return 0;
 
     case IX86_BUILTIN_MWAIT:
@@ -13934,14 +13988,14 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
        op0 = copy_to_mode_reg (SImode, op0);
       if (!REG_P (op1))
        op1 = copy_to_mode_reg (SImode, op1);
-      emit_insn (gen_mwait (op0, op1));
+      emit_insn (gen_sse3_mwait (op0, op1));
       return 0;
 
     case IX86_BUILTIN_LOADDDUP:
-      return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
+      return ix86_expand_unop_builtin (CODE_FOR_sse3_loadddup, arglist, target, 1);
 
     case IX86_BUILTIN_LDDQU:
-      return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
+      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, target,
                                       1);
 
     default:
@@ -13952,14 +14006,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     if (d->code == fcode)
       {
        /* Compares are treated specially.  */
-       if (d->icode == CODE_FOR_maskcmpv4sf3
-           || d->icode == CODE_FOR_vmmaskcmpv4sf3
-           || d->icode == CODE_FOR_maskncmpv4sf3
-           || d->icode == CODE_FOR_vmmaskncmpv4sf3
-           || d->icode == CODE_FOR_maskcmpv2df3
-           || d->icode == CODE_FOR_vmmaskcmpv2df3
-           || d->icode == CODE_FOR_maskncmpv2df3
-           || d->icode == CODE_FOR_vmmaskncmpv2df3)
+       if (d->icode == CODE_FOR_sse_maskcmpv4sf3
+           || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
+           || d->icode == CODE_FOR_sse2_maskcmpv2df3
+           || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
          return ix86_expand_sse_compare (d, arglist, target);
 
        return ix86_expand_binop_builtin (d->icode, arglist, target);
index 47d7035d46674a34eb785e08f2e86270567d61eb..0a0db2e102f907b8e141462bf2e2dfc9e5ac0af6 100644 (file)
@@ -2062,6 +2062,8 @@ enum ix86_builtins
   IX86_BUILTIN_CMPNEQSS,
   IX86_BUILTIN_CMPNLTSS,
   IX86_BUILTIN_CMPNLESS,
+  IX86_BUILTIN_CMPNGTSS,
+  IX86_BUILTIN_CMPNGESS,
   IX86_BUILTIN_CMPORDSS,
   IX86_BUILTIN_CMPUNORDSS,
   IX86_BUILTIN_CMPNESS,
index 0851dde2f8053a9677d8fd2793f4e0fe6444d11d..08aa382fb9d4f93f0de0e89e100ff44eb53440d7 100644 (file)
@@ -84,6 +84,7 @@
 
    ; For SSE/MMX support:
    (UNSPEC_FIX                 30)
+   (UNSPEC_FIX_NOTRUNC         31)
    (UNSPEC_MASKMOV             32)
    (UNSPEC_MOVMSK              33)
    (UNSPEC_MOVNT               34)
    push,pop,call,callv,leave,
    str,cld,
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint,
-   sselog,sseiadd,sseishft,sseimul,
+   sselog,sselog1,sseiadd,sseishft,sseimul,
    sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 (define_attr "unit" "integer,i387,sse,mmx,unknown"
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint")
           (const_string "i387")
-        (eq_attr "type" "sselog,sseiadd,sseishft,sseimul,
+        (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
                          sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
           (const_string "sse")
         (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
           (if_then_else (match_operand 1 "constant_call_address_operand" "")
             (const_string "none")
             (const_string "load"))
-        (and (eq_attr "type" "alu1,negnot,ishift1")
+        (and (eq_attr "type" "alu1,negnot,ishift1,sselog1")
              (match_operand 1 "memory_operand" ""))
           (const_string "both")
         (and (match_operand 0 "memory_operand" "")
                 "!alu1,negnot,ishift1,
                   imov,imovx,icmp,test,
                   fmov,fcmp,fsgn,
-                  sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,
+                  sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
                   mmx,mmxmov,mmxcmp,mmxcvt")
              (match_operand 2 "memory_operand" ""))
           (const_string "load")
    (set_attr "pent_pair" "np")
    (set_attr "athlon_decode" "vector")])
 
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+       (match_operand:TI 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE || TARGET_64BIT"
+{
+  if (TARGET_64BIT)
+    ix86_expand_move (TImode, operands);
+  else
+    ix86_expand_vector_move (TImode, operands);
+  DONE;
+})
+
+(define_insn "*movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+       (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE && !TARGET_64BIT
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "pxor\t%0, %0";
+    case 1:
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movdqa\t{%1, %0|%0, %1}";
+    default:
+      abort ();
+    }
+}
+  [(set_attr "type" "ssemov,ssemov,ssemov")
+   (set (attr "mode")
+        (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
+                (const_string "V4SF")
+
+              (eq_attr "alternative" "0,1")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "TI"))
+              (eq_attr "alternative" "2")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "TI"))]
+              (const_string "TI")))])
+
+(define_insn "*movti_rex64"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
+       (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
+  "TARGET_64BIT
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "#";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "pxor\t%0, %0";
+    case 3:
+    case 4:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movdqa\t{%1, %0|%0, %1}";
+    default:
+      abort ();
+    }
+}
+  [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "2,3")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "TI"))
+              (eq_attr "alternative" "4")
+                (if_then_else
+                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+                           (const_int 0))
+                       (ne (symbol_ref "optimize_size")
+                           (const_int 0)))
+                  (const_string "V4SF")
+                  (const_string "TI"))]
+              (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "reload_completed && !SSE_REG_P (operands[0])
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
 (define_expand "movsf"
   [(set (match_operand:SF 0 "nonimmediate_operand" "")
        (match_operand:SF 1 "general_operand" ""))]
 }
   [(set_attr "type" "fxch")
    (set_attr "mode" "XF")])
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+       (match_operand:TF 1 "nonimmediate_operand" ""))]
+  "TARGET_64BIT"
+{
+  ix86_expand_move (TFmode, operands);
+  DONE;
+})
+
+(define_insn "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm")
+       (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))]
+  "TARGET_64BIT
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "#";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "pxor\t%0, %0";
+    case 3:
+    case 4:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movdqa\t{%1, %0|%0, %1}";
+    default:
+      abort ();
+    }
+}
+  [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "2,3")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "TI"))
+              (eq_attr "alternative" "4")
+                (if_then_else
+                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+                           (const_int 0))
+                       (ne (symbol_ref "optimize_size")
+                           (const_int 0)))
+                  (const_string "V4SF")
+                  (const_string "TI"))]
+              (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+        (match_operand:TF 1 "general_operand" ""))]
+  "reload_completed && !SSE_REG_P (operands[0])
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
 \f
 ;; Zero extension instructions
 
 \f
 ;; SSE extract/set expanders
 
-(define_expand "vec_setv2df"
-  [(match_operand:V2DF 0 "register_operand" "")
-   (match_operand:DF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE2"
-{
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1]));
-      break;
-    case 1:
-      emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1]));
-      break;
-    default:
-      abort ();
-    }
-  DONE;
-})
-
-(define_expand "vec_extractv2df"
-  [(match_operand:DF 0 "register_operand" "")
-   (match_operand:V2DF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE2"
-{
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_insn (gen_sse2_storelpd (operands[0], operands[1]));
-      break;
-    case 1:
-      emit_insn (gen_sse2_storehpd (operands[0], operands[1]));
-      break;
-    default:
-      abort ();
-    }
-  DONE;
-})
-
-(define_expand "vec_initv2df"
-  [(match_operand:V2DF 0 "register_operand" "")
-   (match_operand 1 "" "")]
-  "TARGET_SSE2"
-{
-  ix86_expand_vector_init (operands[0], operands[1]);
-  DONE;
-})
-
-(define_expand "vec_setv4sf"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand:SF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE"
-{
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_insn (gen_sse_movss (operands[0], operands[0],
-                               simplify_gen_subreg (V4SFmode, operands[1],
-                                                    SFmode, 0)));
-      break;
-    case 1:
-      {
-       rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-       rtx tmp = gen_reg_rtx (V4SFmode);
-        emit_move_insn (tmp, operands[0]);
-       emit_insn (gen_sse_unpcklps (operands[0], operands[0], operands[0]));
-       emit_insn (gen_sse_movss (operands[0], operands[0], op1));
-        emit_insn (gen_sse_shufps (operands[0], operands[0], tmp,
-                                   GEN_INT (1 + (0<<2) + (2<<4) + (3<<6))));
-      }
-      break;
-    case 2:
-      {
-        rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-        rtx tmp = gen_reg_rtx (V4SFmode);
-
-        emit_move_insn (tmp, operands[0]);
-        emit_insn (gen_sse_movss (tmp, tmp, op1));
-        emit_insn (gen_sse_shufps (operands[0], operands[0], tmp,
-                                   GEN_INT (0 + (1<<2) + (0<<4) + (3<<6))));
-      }
-      break;
-    case 3:
-      {
-        rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-        rtx tmp = gen_reg_rtx (V4SFmode);
-
-        emit_move_insn (tmp, operands[0]);
-        emit_insn (gen_sse_movss (tmp, tmp, op1));
-        emit_insn (gen_sse_shufps (operands[0], operands[0], tmp,
-                                   GEN_INT (0 + (1<<2) + (2<<4) + (0<<6))));
-      }
-      break;
-    default:
-      abort ();
-    }
-  DONE;
-})
-
-(define_expand "vec_extractv4sf"
-  [(match_operand:SF 0 "register_operand" "")
-   (match_operand:V4SF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE"
-{
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_move_insn (operands[0], gen_lowpart (SFmode, operands[1]));
-      break;
-    case 1:
-      {
-       rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-       rtx tmp = gen_reg_rtx (V4SFmode);
-        emit_move_insn (tmp, operands[1]);
-        emit_insn (gen_sse_shufps (op0, tmp, tmp,
-                                   const1_rtx));
-      }
-      break;
-    case 2:
-      {
-       rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-       rtx tmp = gen_reg_rtx (V4SFmode);
-        emit_move_insn (tmp, operands[1]);
-        emit_insn (gen_sse_unpckhps (op0, tmp, tmp));
-      }
-      break;
-    case 3:
-      {
-       rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-       rtx tmp = gen_reg_rtx (V4SFmode);
-        emit_move_insn (tmp, operands[1]);
-        emit_insn (gen_sse_shufps (op0, tmp, tmp,
-                                   GEN_INT (3)));
-      }
-      break;
-    default:
-      abort ();
-    }
-  DONE;
-})
-
-(define_expand "vec_initv4sf"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand 1 "" "")]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_init (operands[0], operands[1]);
-  DONE;
-})
 \f
 ;; Add instructions
 
   [(set (reg FLAGS_REG)
        (compare
          (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashift:SI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (ashift:SI (match_operand:SI 1 "register_operand" "0")
-                    (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   [(set (reg FLAGS_REG)
        (compare
          (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashift:HI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashift:QI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:SI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:HI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashiftrt:QI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:SI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:HI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (lshiftrt:QI (match_dup 1) (match_dup 2)))]
   RET;
 })
 
-       ;; Pentium III SIMD instructions.
+;; Pentium III SIMD instructions.
 
 ;; Moves for SSE/MMX regs.
 
-(define_expand "movv4sf"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
-       (match_operand:V4SF 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
+;; 8 byte integral modes handled by MMX (and by extension, SSE)
+(define_mode_macro MMXMODEI [V8QI V4HI V2SI])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
+       (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
+  "TARGET_MMX"
 {
-  ix86_expand_vector_move (V4SFmode, operands);
-  DONE;
-})
-
-(define_insn "*movv4sf_internal"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
-       (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
-  "TARGET_SSE"
-  "@
-    xorps\t%0, %0
-    movaps\t{%1, %0|%0, %1}
-    movaps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V4SF")])
-
-(define_split
-  [(set (match_operand:V4SF 0 "register_operand" "")
-       (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
-  "TARGET_SSE && reload_completed"
-  [(set (match_dup 0)
-       (vec_merge:V4SF
-        (vec_duplicate:V4SF (match_dup 1))
-        (match_dup 2)
-        (const_int 1)))]
-{
-  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
-  operands[2] = CONST0_RTX (V4SFmode);
-})
-
-(define_expand "movv2df"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
-       (match_operand:V2DF 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_move (V2DFmode, operands);
-  DONE;
-})
-
-(define_insn "*movv2df_internal"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
-       (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
-  "TARGET_SSE
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "xorpd\t%0, %0";
-    case 1:
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movapd\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "ssemov")
-   (set (attr "mode")
-        (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
-                (const_string "V4SF")
-              (eq_attr "alternative" "0,1")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "V2DF"))
-              (eq_attr "alternative" "2")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "V2DF"))]
-              (const_string "V2DF")))])
-
-(define_split
-  [(set (match_operand:V2DF 0 "register_operand" "")
-       (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
-  "TARGET_SSE2 && reload_completed"
-  [(set (match_dup 0)
-       (vec_merge:V2DF
-        (vec_duplicate:V2DF (match_dup 1))
-        (match_dup 2)
-        (const_int 1)))]
-{
-  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
-  operands[2] = CONST0_RTX (V2DFmode);
-})
-
-;; 16 byte integral modes handled by SSE, minus TImode, which gets
-;; special-cased for TARGET_64BIT.
-(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
-
-(define_expand "mov<mode>"
-  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
-       (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_move (<MODE>mode, operands);
-  DONE;
-})
-
-(define_insn "*mov<mode>_internal"
-  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
-       (match_operand:SSEMODEI 1 "vector_move_operand"  "C ,xm,x"))]
-  "TARGET_SSE
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 1:
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "ssemov")
-   (set (attr "mode")
-        (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
-                (const_string "V4SF")
-
-              (eq_attr "alternative" "0,1")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "2")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "TI")))])
-
-;; 8 byte integral modes handled by MMX (and by extension, SSE)
-(define_mode_macro MMXMODEI [V8QI V4HI V2SI])
-
-(define_expand "mov<mode>"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
-       (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
-  "TARGET_MMX"
-{
-  ix86_expand_vector_move (<MODE>mode, operands);
+  ix86_expand_vector_move (<MODE>mode, operands);
   DONE;
 })
 
   [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov")
    (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V2SF,V2SF")])
 
-(define_expand "movti"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-       (match_operand:TI 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE || TARGET_64BIT"
+;; All 8-byte vector modes handled by MMX
+(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+       (match_operand:MMXMODE 1 "nonimmediate_operand" ""))]
+  "TARGET_MMX"
 {
-  if (TARGET_64BIT)
-    ix86_expand_move (TImode, operands);
-  else
-    ix86_expand_vector_move (TImode, operands);
+  ix86_expand_vector_move (<MODE>mode, operands);
   DONE;
 })
 
-(define_insn "*movti_internal"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
-       (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
-  "TARGET_SSE && !TARGET_64BIT
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 1:
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "ssemov,ssemov,ssemov")
-   (set (attr "mode")
-        (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
-                (const_string "V4SF")
+;; SSE Strange Moves.
 
-              (eq_attr "alternative" "0,1")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "2")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "TI")))])
+(define_insn "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+                  UNSPEC_MOVMSK))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pmovmskb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
 
-(define_insn "*movti_rex64"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
-       (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
-  "TARGET_64BIT
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return "#";
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 3:
-    case 4:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "2,3")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "4")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "DI")))])
 
-(define_expand "movtf"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-       (match_operand:TF 1 "nonimmediate_operand" ""))]
-  "TARGET_64BIT"
-{
-  ix86_expand_move (TFmode, operands);
-  DONE;
-})
-
-(define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm")
-       (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))]
-  "TARGET_64BIT
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return "#";
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 3:
-    case 4:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "2,3")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "4")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "DI")))])
-
-(define_mode_macro SSEPUSH [V16QI V8HI V4SI V2DI TI V4SF V2DF])
-
-(define_insn "*push<mode>"
-  [(set (match_operand:SSEPUSH 0 "push_operand" "=<")
-       (match_operand:SSEPUSH 1 "register_operand" "x"))]
-  "TARGET_SSE"
-  "#")
-
-(define_mode_macro MMXPUSH [V8QI V4HI V2SI V2SF])
-
-(define_insn "*push<mode>"
-  [(set (match_operand:MMXPUSH 0 "push_operand" "=<")
-       (match_operand:MMXPUSH 1 "register_operand" "xy"))]
-  "TARGET_MMX"
-  "#")
-
-(define_split
-  [(set (match_operand 0 "push_operand" "")
-       (match_operand 1 "register_operand" ""))]
-  "!TARGET_64BIT && reload_completed
-   && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
-  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 3)))
-   (set (match_dup 2) (match_dup 1))]
-  "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
-                                stack_pointer_rtx);
-   operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
-
-(define_split
-  [(set (match_operand 0 "push_operand" "")
-       (match_operand 1 "register_operand" ""))]
-  "TARGET_64BIT && reload_completed
-   && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
-  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 3)))
-   (set (match_dup 2) (match_dup 1))]
-  "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
-                                stack_pointer_rtx);
-   operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
-
-
-(define_split
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-        (match_operand:TI 1 "general_operand" ""))]
-  "reload_completed && !SSE_REG_P (operands[0])
-   && !SSE_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
-(define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-        (match_operand:TF 1 "general_operand" ""))]
-  "reload_completed && !SSE_REG_P (operands[0])
-   && !SSE_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
-;; All 16-byte vector modes handled by SSE
-(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
-
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
-       (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_move_misalign (<MODE>mode, operands);
-  DONE;
-})
-
-;; All 8-byte vector modes handled by MMX
-(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF])
-
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
-       (match_operand:MMXMODE 1 "nonimmediate_operand" ""))]
-  "TARGET_MMX"
-{
-  ix86_expand_vector_move (<MODE>mode, operands);
-  DONE;
-})
-
-;; These two patterns are useful for specifying exactly whether to use
-;; movaps or movups
-(define_expand "sse_movaps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
-       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
-                    UNSPEC_MOVA))]
-  "TARGET_SSE"
-{
-  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
-    {
-      rtx tmp = gen_reg_rtx (V4SFmode);
-      emit_insn (gen_sse_movaps (tmp, operands[1]));
-      emit_move_insn (operands[0], tmp);
-      DONE;
-    }
-})
-
-(define_insn "*sse_movaps_1"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
-                    UNSPEC_MOVA))]
-  "TARGET_SSE
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movaps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov,ssemov")
-   (set_attr "mode" "V4SF")])
-
-(define_expand "sse_movups"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
-       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
-                    UNSPEC_MOVU))]
-  "TARGET_SSE"
-{
-  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
-    {
-      rtx tmp = gen_reg_rtx (V4SFmode);
-      emit_insn (gen_sse_movups (tmp, operands[1]));
-      emit_move_insn (operands[0], tmp);
-      DONE;
-    }
-})
-
-(define_insn "*sse_movups_1"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
-                    UNSPEC_MOVU))]
-  "TARGET_SSE
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movups\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt,ssecvt")
-   (set_attr "mode" "V4SF")])
-
-;; SSE Strange Moves.
-
-(define_insn "sse_movmskps"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
-                  UNSPEC_MOVMSK))]
-  "TARGET_SSE"
-  "movmskps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "mmx_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
-                  UNSPEC_MOVMSK))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-
-(define_insn "mmx_maskmovq"
-  [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
-       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
-                     (match_operand:V8QI 2 "register_operand" "y")]
-                    UNSPEC_MASKMOV))]
-  "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "maskmovq\t{%2, %1|%1, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+(define_insn "mmx_maskmovq"
+  [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
+       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+                     (match_operand:V8QI 2 "register_operand" "y")]
+                    UNSPEC_MASKMOV))]
+  "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "maskmovq\t{%2, %1|%1, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
 
 (define_insn "mmx_maskmovq_rex"
   [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D"))
   [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
-(define_insn "sse_movntv4sf"
-  [(set (match_operand:V4SF 0 "memory_operand" "=m")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
-                    UNSPEC_MOVNT))]
-  "TARGET_SSE"
-  "movntps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V4SF")])
-
 (define_insn "sse_movntdi"
   [(set (match_operand:DI 0 "memory_operand" "=m")
        (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
   [(set_attr "type" "mmxmov")
    (set_attr "mode" "DI")])
 
-(define_insn "sse_movhlps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0")
-        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
-                         (parallel [(const_int 2)
-                                    (const_int 3)
-                                    (const_int 0)
-                                    (const_int 1)]))
-        (const_int 3)))]
-  "TARGET_SSE"
-  "movhlps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
+;; MMX insns
 
-(define_insn "sse_movlhps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0")
-        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
-                         (parallel [(const_int 2)
-                                    (const_int 3)
-                                    (const_int 0)
-                                    (const_int 1)]))
-        (const_int 12)))]
-  "TARGET_SSE"
-  "movlhps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
+;; MMX arithmetic
 
-;; Store the high V2SF of the source vector to the destination.
-(define_insn "sse_storehps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
-       (vec_select:V2SF
-         (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
-         (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_SSE"
-  "@
-   movhps\t{%1, %0|%0, %1}
-   movhlps\t{%1, %0|%0, %1}
-   #"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2SF")])
+(define_insn "addv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
+                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_split
-  [(set (match_operand:V2SF 0 "register_operand" "")
-       (vec_select:V2SF
-         (match_operand:V4SF 1 "memory_operand" "")
-         (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_SSE && reload_completed"
-  [(const_int 0)]
-{
-  emit_move_insn (operands[0], adjust_address (operands[1], V2SFmode, 8));
-  DONE;
-})
+(define_insn "addv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-;; Load the high V2SF of the target vector from the source vector.
-(define_insn "sse_loadhps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
-       (vec_concat:V4SF
-         (vec_select:V2SF
-           (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
-           (parallel [(const_int 0) (const_int 1)]))
-         (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
-  "TARGET_SSE"
-  "@
-   movhps\t{%2, %0|%0, %2}
-   movlhps\t{%2, %0|%0, %2}
-   #"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2SF")])
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0")
+                  (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_split
-  [(set (match_operand:V4SF 0 "memory_operand" "")
-       (vec_concat:V4SF
-         (vec_select:V2SF
-           (match_dup 0)
-           (parallel [(const_int 0) (const_int 1)]))
-         (match_operand:V2SF 1 "register_operand" "")))]
-  "TARGET_SSE && reload_completed"
-  [(const_int 0)]
-{
-  emit_move_insn (adjust_address (operands[0], V2SFmode, 8), operands[1]);
-  DONE;
-})
+(define_insn "mmx_adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(plus:DI (match_operand:DI 1 "register_operand" "%0")
+                  (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+        UNSPEC_NOP))]
+  "TARGET_MMX"
+  "paddq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-;; Store the low V2SF of the source vector to the destination.
-(define_expand "sse_storelps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
-       (vec_select:V2SF
-         (match_operand:V4SF 1 "nonimmediate_operand" "")
-         (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-{
-  operands[1] = gen_lowpart (V2SFmode, operands[1]);
-  emit_move_insn (operands[0], operands[1]);
-  DONE;
-})
+(define_insn "ssaddv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
+                     (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddsb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-;; Load the low V2SF of the target vector from the source vector.
-(define_insn "sse_loadlps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
-       (vec_concat:V4SF
-         (match_operand:V2SF 2 "nonimmediate_operand" "m,0,x")
-         (vec_select:V2SF
-           (match_operand:V4SF 1 "nonimmediate_operand" "0,x,0")
-           (parallel [(const_int 2) (const_int 3)]))))]
-  "TARGET_SSE"
-{
-  static const char * const alt[] = {
-    "movlps\t{%2, %0|%0, %2}",
-    "shufps\t{%2, %1, %0|%0, %1, %2}",
-    "movlps\t{%2, %0|%0, %2}"
-  };
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
+                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-  if (which_alternative == 1)
-    operands[2] = GEN_INT (0xe4);
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
+                     (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddusb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-  return alt[which_alternative];
-}
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2SF")])
+(define_insn "usaddv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
+                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddusw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_expand "sse_loadss"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand:SF 1 "memory_operand" "")]
-  "TARGET_SSE"
-{
-  emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
-                              CONST0_RTX (V4SFmode)));
-  DONE;
-})
+(define_insn "subv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse_loadss_1"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
-        (match_operand:V4SF 2 "const0_operand" "X")
-        (const_int 1)))]
-  "TARGET_SSE"
-  "movss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
+(define_insn "subv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse_movss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0")
-        (match_operand:V4SF 2 "register_operand" "x")
-        (const_int 14)))]
-  "TARGET_SSE"
-  "movss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (minus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                   (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse_storess"
-  [(set (match_operand:SF 0 "memory_operand" "=m")
-       (vec_select:SF
-        (match_operand:V4SF 1 "register_operand" "x")
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE"
-  "movss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
+(define_insn "mmx_subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(minus:DI (match_operand:DI 1 "register_operand" "0")
+                   (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+        UNSPEC_NOP))]
+  "TARGET_MMX"
+  "psubq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse_shufps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-                     (match_operand:SI 3 "const_int_operand" "n")]
-                    UNSPEC_SHUFFLE))]
-  "TARGET_SSE"
-  "shufps\t{%3, %2, %0|%0, %2, %3}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
+(define_insn "sssubv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubsb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-;; SSE arithmetic
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubusb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "addv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "addps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
+(define_insn "ussubv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubusw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "vmaddv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "addss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (mult:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pmullw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
 
-(define_insn "subv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "subps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
+(define_insn "smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (truncate:V4HI
+        (lshiftrt:V4SI
+         (mult:V4SI (sign_extend:V4SI
+                     (match_operand:V4HI 1 "register_operand" "0"))
+                    (sign_extend:V4SI
+                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+         (const_int 16))))]
+  "TARGET_MMX"
+  "pmulhw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
 
-(define_insn "vmsubv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "subss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
+(define_insn "umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (truncate:V4HI
+        (lshiftrt:V4SI
+         (mult:V4SI (zero_extend:V4SI
+                     (match_operand:V4HI 1 "register_operand" "0"))
+                    (zero_extend:V4SI
+                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+         (const_int 16))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pmulhuw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
 
-;; ??? Should probably be done by generic code instead.
-(define_expand "negv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-       (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-                 (match_dup 2)))]
-  "TARGET_SSE"
-{
-  rtx m0 = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode));
-  rtx vm0 = gen_rtx_CONST_VECTOR (V4SFmode, gen_rtvec (4, m0, m0, m0, m0));
-  operands[2] = force_reg (V4SFmode, vm0);
-})
+(define_insn "mmx_pmaddwd"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (plus:V2SI
+        (mult:V2SI
+         (sign_extend:V2SI
+          (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
+                           (parallel [(const_int 0) (const_int 2)])))
+         (sign_extend:V2SI
+          (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+                           (parallel [(const_int 0) (const_int 2)]))))
+        (mult:V2SI
+         (sign_extend:V2SI (vec_select:V2HI (match_dup 1)
+                                            (parallel [(const_int 1)
+                                                       (const_int 3)])))
+         (sign_extend:V2SI (vec_select:V2HI (match_dup 2)
+                                            (parallel [(const_int 1)
+                                                       (const_int 3)]))))))]
+  "TARGET_MMX"
+  "pmaddwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
 
-(define_insn "mulv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "mulps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "V4SF")])
+(define_insn "sse2_umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (mult:DI
+         (zero_extend:DI
+           (vec_select:SI
+             (match_operand:V2SI 1 "register_operand" "0")
+             (parallel [(const_int 0)])))
+         (zero_extend:DI
+           (vec_select:SI
+             (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+             (parallel [(const_int 0)])))))]
+  "TARGET_SSE2"
+  "pmuludq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
 
-(define_insn "vmmulv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "mulss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "SF")])
 
-(define_insn "divv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "divps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "V4SF")])
+;; MMX logical operations
+;; Note we don't want to declare these as regular iordi3 insns to prevent
+;; normal code that also wants to use the FPU from getting broken.
+;; The UNSPECs are there to prevent the combiner from getting overly clever.
+(define_insn "mmx_iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(ior:DI (match_operand:DI 1 "register_operand" "%0")
+                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+        UNSPEC_NOP))]
+  "TARGET_MMX"
+  "por\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "vmdivv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "divss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "SF")])
+(define_insn "mmx_xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(xor:DI (match_operand:DI 1 "register_operand" "%0")
+                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+        UNSPEC_NOP))]
+  "TARGET_MMX"
+  "pxor\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")
+   (set_attr "memory" "none")])
 
+;; Same as pxor, but don't show input operands so that we don't think
+;; they are live.
+(define_insn "mmx_clrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI [(const_int 0)] UNSPEC_NOP))]
+  "TARGET_MMX"
+  "pxor\t{%0, %0|%0, %0}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")
+   (set_attr "memory" "none")])
 
-;; SSE square root/reciprocal
+(define_insn "mmx_anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(and:DI (match_operand:DI 1 "register_operand" "%0")
+                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+        UNSPEC_NOP))]
+  "TARGET_MMX"
+  "pand\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "rcpv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF
-        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
-  "TARGET_SSE"
-  "rcpps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+(define_insn "mmx_nanddi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0"))
+                         (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+        UNSPEC_NOP))]
+  "TARGET_MMX"
+  "pandn\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "vmrcpv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-                     UNSPEC_RCP)
-        (match_operand:V4SF 2 "register_operand" "0")
-        (const_int 1)))]
-  "TARGET_SSE"
-  "rcpss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
 
-(define_insn "rsqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF
-        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
-  "TARGET_SSE"
-  "rsqrtps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+;; MMX unsigned averages/sum of absolute differences
 
-(define_insn "vmrsqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-                     UNSPEC_RSQRT)
-        (match_operand:V4SF 2 "register_operand" "0")
+(define_insn "mmx_uavgv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ashiftrt:V8QI
+        (plus:V8QI (plus:V8QI
+                    (match_operand:V8QI 1 "register_operand" "0")
+                    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+                   (const_vector:V8QI [(const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)]))
         (const_int 1)))]
-  "TARGET_SSE"
-  "rsqrtss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-(define_insn "sqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "sqrtps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pavgb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
 
-(define_insn "vmsqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
-        (match_operand:V4SF 2 "register_operand" "0")
+(define_insn "mmx_uavgv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ashiftrt:V4HI
+        (plus:V4HI (plus:V4HI
+                    (match_operand:V4HI 1 "register_operand" "0")
+                    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+                   (const_vector:V4HI [(const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)]))
         (const_int 1)))]
-  "TARGET_SSE"
-  "sqrtss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pavgw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
 
-;; SSE logical operations.
+(define_insn "mmx_psadbw"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI [(match_operand:V8QI 1 "register_operand" "0")
+                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+                  UNSPEC_PSADBW))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "psadbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
 
-;; SSE defines logical operations on floating point values.  This brings
-;; interesting challenge to RTL representation where logicals are only valid
-;; on integral types.  We deal with this by representing the floating point
-;; logical as logical on arguments casted to TImode as this is what hardware
-;; really does.  Unfortunately hardware requires the type information to be
-;; present and thus we must avoid subregs from being simplified and eliminated
-;; in later compilation phases.
-;;
-;; We have following variants from each instruction:
-;; sse_andsf3 - the operation taking V4SF vector operands
-;;              and doing TImode cast on them
-;; *sse_andsf3_memory - the operation taking one memory operand casted to
-;;                      TImode, since backend insist on eliminating casts
-;;                      on memory operands
-;; sse_andti3_sf_1 - the operation taking SF scalar operands.
-;;                   We cannot accept memory operand here as instruction reads
-;;                  whole scalar.  This is generated only post reload by GCC
-;;                  scalar float operations that expands to logicals (fabs)
-;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode
-;;                  memory operand.  Eventually combine can be able
-;;                  to synthesize these using splitter.
-;; sse2_anddf3, *sse2_anddf3_memory
-;;              
-;; 
-;; These are not called andti3 etc. because we really really don't want
-;; the compiler to widen DImode ands to TImode ands and then try to move
-;; into DImode subregs of SSE registers, and them together, and move out
-;; of DImode subregs again!
-;; SSE1 single precision floating point logical operation
-(define_expand "sse_andv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (and:V4SF (match_operand:V4SF 1 "register_operand" "")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "")
 
-(define_insn "*sse_andv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "andps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
+;; MMX insert/extract/shuffle
 
-(define_expand "sse_nandv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" ""))
-                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "")
+(define_expand "mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+        (vec_merge:V4HI
+         (match_operand:V4HI 1 "register_operand" "")
+          (vec_duplicate:V4HI
+            (match_operand:SI 2 "nonimmediate_operand" ""))
+          (match_operand:SI 3 "const_0_to_3_operand" "")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[2] = gen_lowpart (HImode, operands[2]);
+  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
+})
 
-(define_insn "*sse_nandv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "andnps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
+(define_insn "*mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (vec_merge:V4HI
+         (match_operand:V4HI 1 "register_operand" "0")
+          (vec_duplicate:V4HI
+            (match_operand:HI 2 "nonimmediate_operand" "rm"))
+          (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
 
-(define_expand "sse_iorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (ior:V4SF (match_operand:V4SF 1 "register_operand" "")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "")
+(define_insn "mmx_pextrw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
+                                      (parallel
+                                       [(match_operand:SI 2 "const_0_to_3_operand" "N")]))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
 
-(define_insn "*sse_iorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "orps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
+(define_insn "mmx_pshufw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "nonimmediate_operand" "ym")
+                     (match_operand:SI 2 "immediate_operand" "i")]
+                    UNSPEC_SHUFFLE))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pshufw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
 
-(define_expand "sse_xorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (xor:V4SF (match_operand:V4SF 1 "register_operand" "")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "")
 
-(define_insn "*sse_xorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "xorps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
+;; MMX mask-generating comparisons
 
-;; SSE2 double precision floating point logical operation
+(define_insn "eqv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (eq:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
 
-(define_expand "sse2_andv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (and:V2DF (match_operand:V2DF 1 "register_operand" "")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "")
+(define_insn "eqv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (eq:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
 
-(define_insn "*sse2_andv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "andpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "sse2_nandv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" ""))
-                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "")
+(define_insn "eqv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (eq:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
 
-(define_insn "*sse2_nandv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "andnpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "sse2_iorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (ior:V2DF (match_operand:V2DF 1 "register_operand" "")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "")
+(define_insn "gtv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (gt:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
 
-(define_insn "*sse2_iorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "orpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "sse2_xorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "")
+(define_insn "gtv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (gt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
 
-(define_insn "*sse2_xorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "xorpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-;; SSE2 integral logicals.  These patterns must always come after floating
-;; point ones since we don't want compiler to use integer opcodes on floating
-;; point SSE values to avoid matching of subregs in the match_operand.
-(define_insn "*sse2_andti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
-               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pand\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
+(define_insn "gtv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (gt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse2_andv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pand\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
 
-(define_insn "*sse2_nandti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
-               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
+;; MMX max/min insns
 
-(define_insn "sse2_nandv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (and:V2DI (not:V2DI (match_operand:V2DI 1 "register_operand" "0"))
-                 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
+(define_insn "umaxv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pmaxub\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "*sse2_iorti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
-               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "por\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
+(define_insn "smaxv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pmaxsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse2_iorv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "por\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
+(define_insn "uminv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pminub\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "*sse2_xorti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
-               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pxor\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
+(define_insn "sminv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pminsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
 
-(define_insn "sse2_xorv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pxor\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
 
-;; Use xor, but don't show input operands so they aren't live before
-;; this insn.
-(define_insn "sse_clrv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (match_operand:V4SF 1 "const0_operand" "X"))]
-  "TARGET_SSE"
-{
-  if (get_attr_mode (insn) == MODE_TI)
-    return "pxor\t{%0, %0|%0, %0}";
-  else
-    return "xorps\t{%0, %0|%0, %0}";
-}
-  [(set_attr "type" "sselog")
-   (set_attr "memory" "none")
-   (set (attr "mode")
-       (if_then_else
-          (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
-                        (const_int 0))
-                    (ne (symbol_ref "TARGET_SSE2")
-                        (const_int 0)))
-               (eq (symbol_ref "optimize_size")
-                   (const_int 0)))
-        (const_string "TI")
-        (const_string "V4SF")))])
-
-;; Use xor, but don't show input operands so they aren't live before
-;; this insn.
-(define_insn "sse_clrv2df"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(const_int 0)] UNSPEC_NOP))]
-  "TARGET_SSE2"
-  "xorpd\t{%0, %0|%0, %0}"
-  [(set_attr "type" "sselog")
-   (set_attr "memory" "none")
-   (set_attr "mode" "V4SF")])
+;; MMX shifts
 
-;; SSE mask-generating compares
+(define_insn "ashrv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psraw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
 
-(define_insn "maskcmpv4sf3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (match_operator:V4SI 3 "sse_comparison_operator"
-               [(match_operand:V4SF 1 "register_operand" "0")
-                (match_operand:V4SF 2 "register_operand" "x")]))]
-  "TARGET_SSE"
-  "cmp%D3ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V4SF")])
+(define_insn "ashrv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrad\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
 
-(define_insn "maskncmpv4sf3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (not:V4SI
-        (match_operator:V4SI 3 "sse_comparison_operator"
-               [(match_operand:V4SF 1 "register_operand" "0")
-                (match_operand:V4SF 2 "register_operand" "x")])))]
-  "TARGET_SSE"
-{
-  if (GET_CODE (operands[3]) == UNORDERED)
-    return "cmpordps\t{%2, %0|%0, %2}";
-  else
-    return "cmpn%D3ps\t{%2, %0|%0, %2}";
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmmaskcmpv4sf3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (match_operator:V4SI 3 "sse_comparison_operator"
-               [(match_operand:V4SF 1 "register_operand" "0")
-                (match_operand:V4SF 2 "register_operand" "x")])
-        (subreg:V4SI (match_dup 1) 0)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "cmp%D3ss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "SF")])
-
-(define_insn "vmmaskncmpv4sf3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (not:V4SI
-         (match_operator:V4SI 3 "sse_comparison_operator"
-               [(match_operand:V4SF 1 "register_operand" "0")
-                (match_operand:V4SF 2 "register_operand" "x")]))
-        (subreg:V4SI (match_dup 1) 0)
-        (const_int 1)))]
-  "TARGET_SSE"
-{
-  if (GET_CODE (operands[3]) == UNORDERED)
-    return "cmpordss\t{%2, %0|%0, %2}";
-  else
-    return "cmpn%D3ss\t{%2, %0|%0, %2}";
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_comi"
-  [(set (reg:CCFP FLAGS_REG)
-        (compare:CCFP (vec_select:SF
-                      (match_operand:V4SF 0 "register_operand" "x")
-                      (parallel [(const_int 0)]))
-                     (vec_select:SF
-                      (match_operand:V4SF 1 "register_operand" "x")
-                      (parallel [(const_int 0)]))))]
-  "TARGET_SSE"
-  "comiss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_ucomi"
-  [(set (reg:CCFPU FLAGS_REG)
-       (compare:CCFPU (vec_select:SF
-                       (match_operand:V4SF 0 "register_operand" "x")
-                       (parallel [(const_int 0)]))
-                      (vec_select:SF
-                       (match_operand:V4SF 1 "register_operand" "x")
-                       (parallel [(const_int 0)]))))]
-  "TARGET_SSE"
-  "ucomiss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "SF")])
-
-
-;; SSE unpack
-
-(define_insn "sse_unpckhps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                         (parallel [(const_int 2)
-                                    (const_int 0)
-                                    (const_int 3)
-                                    (const_int 1)]))
-        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
-                         (parallel [(const_int 0)
-                                    (const_int 2)
-                                    (const_int 1)
-                                    (const_int 3)]))
-        (const_int 5)))]
-  "TARGET_SSE"
-  "unpckhps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_unpcklps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                         (parallel [(const_int 0)
-                                    (const_int 2)
-                                    (const_int 1)
-                                    (const_int 3)]))
-        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
-                         (parallel [(const_int 2)
-                                    (const_int 0)
-                                    (const_int 3)
-                                    (const_int 1)]))
-        (const_int 5)))]
-  "TARGET_SSE"
-  "unpcklps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-
-;; SSE min/max
-
-(define_insn "smaxv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "maxps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmsmaxv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "maxss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-(define_insn "sminv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "minps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmsminv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-;; SSE <-> integer/MMX conversions
-
-(define_insn "cvtpi2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0")
-        (vec_duplicate:V4SF
-         (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
-        (const_int 12)))]
-  "TARGET_SSE"
-  "cvtpi2ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "cvtps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_select:V2SI
-        (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
-        (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvtps2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
+(define_insn "lshrv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrlw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
 
-(define_insn "cvttps2pi"
+(define_insn "lshrv2si3"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_select:V2SI
-        (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-                     UNSPEC_FIX)
-        (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvttps2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "SF")])
-
-(define_insn "cvtsi2ss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0,0")
-        (vec_duplicate:V4SF
-         (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,rm")))
-        (const_int 14)))]
-  "TARGET_SSE"
-  "cvtsi2ss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "mode" "SF")])
-
-(define_insn "cvtsi2ssq"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0,0")
-        (vec_duplicate:V4SF
-         (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
-        (const_int 14)))]
-  "TARGET_SSE && TARGET_64BIT"
-  "cvtsi2ssq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "mode" "SF")])
-
-(define_insn "cvtss2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (vec_select:SI
-        (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE"
-  "cvtss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "mode" "SI")])
-
-(define_insn "cvtss2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (vec_select:DI
-        (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE"
-  "cvtss2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
+        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrld\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "cvttss2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (vec_select:SI
-        (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
-                     UNSPEC_FIX)
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE"
-  "cvttss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double,vector")])
-
-(define_insn "cvttss2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (vec_select:DI
-        (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
-                     UNSPEC_FIX)
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE && TARGET_64BIT"
-  "cvttss2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double,vector")])
-
-
-;; MMX insns
-
-;; MMX arithmetic
-
-(define_insn "addv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
-                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+;; See logical MMX insns.
+(define_insn "mmx_lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+         [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi"))]
+         UNSPEC_NOP))]
   "TARGET_MMX"
-  "paddb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "psrlq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "addv4hi3"
+(define_insn "ashlv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
-                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
   "TARGET_MMX"
-  "paddw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "psllw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "addv2si3"
+(define_insn "ashlv2si3"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0")
-                  (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
   "TARGET_MMX"
-  "paddd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "pslld\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_adddi3"
+;; See logical MMX insns.
+(define_insn "mmx_ashldi3"
   [(set (match_operand:DI 0 "register_operand" "=y")
         (unspec:DI
-        [(plus:DI (match_operand:DI 1 "register_operand" "%0")
-                  (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+        [(ashift:DI (match_operand:DI 1 "register_operand" "0")
+                    (match_operand:DI 2 "nonmemory_operand" "yi"))]
         UNSPEC_NOP))]
   "TARGET_MMX"
-  "paddq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "psllq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "ssaddv8qi3"
+
+;; MMX pack/unpack insns.
+
+(define_insn "mmx_packsswb"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
-                     (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+       (vec_concat:V8QI
+        (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
+        (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
   "TARGET_MMX"
-  "paddsb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "packsswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "ssaddv4hi3"
+(define_insn "mmx_packssdw"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
-                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+       (vec_concat:V4HI
+        (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0"))
+        (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
   "TARGET_MMX"
-  "paddsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "packssdw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "usaddv8qi3"
+(define_insn "mmx_packuswb"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
-                     (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "paddusb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "usaddv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
-                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+       (vec_concat:V8QI
+        (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
+        (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
   "TARGET_MMX"
-  "paddusw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "packuswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "subv8qi3"
+(define_insn "mmx_punpckhbw"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+       (vec_merge:V8QI
+        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                         (parallel [(const_int 4)
+                                    (const_int 0)
+                                    (const_int 5)
+                                    (const_int 1)
+                                    (const_int 6)
+                                    (const_int 2)
+                                    (const_int 7)
+                                    (const_int 3)]))
+        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 4)
+                                    (const_int 1)
+                                    (const_int 5)
+                                    (const_int 2)
+                                    (const_int 6)
+                                    (const_int 3)
+                                    (const_int 7)]))
+        (const_int 85)))]
   "TARGET_MMX"
-  "psubb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "punpckhbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
-(define_insn "subv4hi3"
+(define_insn "mmx_punpckhwd"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+       (vec_merge:V4HI
+        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                         (parallel [(const_int 0)
+                                    (const_int 2)
+                                    (const_int 1)
+                                    (const_int 3)]))
+        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+                         (parallel [(const_int 2)
+                                    (const_int 0)
+                                    (const_int 3)
+                                    (const_int 1)]))
+        (const_int 5)))]
   "TARGET_MMX"
-  "psubw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "punpckhwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
-(define_insn "subv2si3"
+(define_insn "mmx_punpckhdq"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (minus:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                   (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_subdi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(minus:DI (match_operand:DI 1 "register_operand" "0")
-                   (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
+       (vec_merge:V2SI
+        (match_operand:V2SI 1 "register_operand" "0")
+        (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+                         (parallel [(const_int 1)
+                                    (const_int 0)]))
+        (const_int 1)))]
   "TARGET_MMX"
-  "psubq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "punpckhdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
-(define_insn "sssubv8qi3"
+(define_insn "mmx_punpcklbw"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+       (vec_merge:V8QI
+        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                         (parallel [(const_int 0)
+                                    (const_int 4)
+                                    (const_int 1)
+                                    (const_int 5)
+                                    (const_int 2)
+                                    (const_int 6)
+                                    (const_int 3)
+                                    (const_int 7)]))
+        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+                         (parallel [(const_int 4)
+                                    (const_int 0)
+                                    (const_int 5)
+                                    (const_int 1)
+                                    (const_int 6)
+                                    (const_int 2)
+                                    (const_int 7)
+                                    (const_int 3)]))
+        (const_int 85)))]
   "TARGET_MMX"
-  "psubsb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "punpcklbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
-(define_insn "sssubv4hi3"
+(define_insn "mmx_punpcklwd"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+       (vec_merge:V4HI
+        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                         (parallel [(const_int 2)
+                                    (const_int 0)
+                                    (const_int 3)
+                                    (const_int 1)]))
+        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 2)
+                                    (const_int 1)
+                                    (const_int 3)]))
+        (const_int 5)))]
   "TARGET_MMX"
-  "psubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "punpcklwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
-(define_insn "ussubv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+(define_insn "mmx_punpckldq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_merge:V2SI
+        (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                          (parallel [(const_int 1)
+                                     (const_int 0)]))
+        (match_operand:V2SI 2 "register_operand" "y")
+        (const_int 1)))]
   "TARGET_MMX"
-  "psubusb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "punpckldq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
-(define_insn "ussubv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubusw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
 
-(define_insn "mulv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (mult:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+;; Miscellaneous stuff
 
-(define_insn "smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (truncate:V4HI
-        (lshiftrt:V4SI
-         (mult:V4SI (sign_extend:V4SI
-                     (match_operand:V4HI 1 "register_operand" "0"))
-                    (sign_extend:V4SI
-                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
-         (const_int 16))))]
+(define_insn "emms"
+  [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)
+   (clobber (reg:XF 8))
+   (clobber (reg:XF 9))
+   (clobber (reg:XF 10))
+   (clobber (reg:XF 11))
+   (clobber (reg:XF 12))
+   (clobber (reg:XF 13))
+   (clobber (reg:XF 14))
+   (clobber (reg:XF 15))
+   (clobber (reg:DI 29))
+   (clobber (reg:DI 30))
+   (clobber (reg:DI 31))
+   (clobber (reg:DI 32))
+   (clobber (reg:DI 33))
+   (clobber (reg:DI 34))
+   (clobber (reg:DI 35))
+   (clobber (reg:DI 36))]
   "TARGET_MMX"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
-
-(define_insn "umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (truncate:V4HI
-        (lshiftrt:V4SI
-         (mult:V4SI (zero_extend:V4SI
-                     (match_operand:V4HI 1 "register_operand" "0"))
-                    (zero_extend:V4SI
-                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
-         (const_int 16))))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "emms"
+  [(set_attr "type" "mmx")
+   (set_attr "memory" "unknown")])
 
-(define_insn "mmx_pmaddwd"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (plus:V2SI
-        (mult:V2SI
-         (sign_extend:V2SI
-          (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
-                           (parallel [(const_int 0) (const_int 2)])))
-         (sign_extend:V2SI
-          (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
-                           (parallel [(const_int 0) (const_int 2)]))))
-        (mult:V2SI
-         (sign_extend:V2SI (vec_select:V2HI (match_dup 1)
-                                            (parallel [(const_int 1)
-                                                       (const_int 3)])))
-         (sign_extend:V2SI (vec_select:V2HI (match_dup 2)
-                                            (parallel [(const_int 1)
-                                                       (const_int 3)]))))))]
-  "TARGET_MMX"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+(define_insn "ldmxcsr"
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
+                   UNSPECV_LDMXCSR)]
+  "TARGET_SSE"
+  "ldmxcsr\t%0"
+  [(set_attr "type" "sse")
+   (set_attr "memory" "load")])
 
+(define_insn "stmxcsr"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+       (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
+  "TARGET_SSE"
+  "stmxcsr\t%0"
+  [(set_attr "type" "sse")
+   (set_attr "memory" "store")])
 
-;; MMX logical operations
-;; Note we don't want to declare these as regular iordi3 insns to prevent
-;; normal code that also wants to use the FPU from getting broken.
-;; The UNSPECs are there to prevent the combiner from getting overly clever.
-(define_insn "mmx_iordi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(ior:DI (match_operand:DI 1 "register_operand" "%0")
-                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "por\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+(define_expand "sfence"
+  [(set (match_dup 0)
+       (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
 
-(define_insn "mmx_xordi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(xor:DI (match_operand:DI 1 "register_operand" "%0")
-                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "pxor\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")
-   (set_attr "memory" "none")])
+(define_insn "*sfence_insn"
+  [(set (match_operand:BLK 0 "" "")
+       (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "sfence"
+  [(set_attr "type" "sse")
+   (set_attr "memory" "unknown")])
 
-;; Same as pxor, but don't show input operands so that we don't think
-;; they are live.
-(define_insn "mmx_clrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI [(const_int 0)] UNSPEC_NOP))]
-  "TARGET_MMX"
-  "pxor\t{%0, %0|%0, %0}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")
-   (set_attr "memory" "none")])
-
-(define_insn "mmx_anddi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(and:DI (match_operand:DI 1 "register_operand" "%0")
-                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "pand\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_nanddi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0"))
-                         (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-
-;; MMX unsigned averages/sum of absolute differences
-
-(define_insn "mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (ashiftrt:V8QI
-        (plus:V8QI (plus:V8QI
-                    (match_operand:V8QI 1 "register_operand" "0")
-                    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
-                   (const_vector:V8QI [(const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)]))
-        (const_int 1)))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pavgb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ashiftrt:V4HI
-        (plus:V4HI (plus:V4HI
-                    (match_operand:V4HI 1 "register_operand" "0")
-                    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
-                   (const_vector:V4HI [(const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)]))
-        (const_int 1)))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_psadbw"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI [(match_operand:V8QI 1 "register_operand" "0")
-                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-                  UNSPEC_PSADBW))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-
-;; MMX insert/extract/shuffle
-
-(define_expand "mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "")
-        (vec_merge:V4HI
-         (match_operand:V4HI 1 "register_operand" "")
-          (vec_duplicate:V4HI
-            (match_operand:SI 2 "nonimmediate_operand" ""))
-          (match_operand:SI 3 "const_0_to_3_operand" "")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-{
-  operands[2] = gen_lowpart (HImode, operands[2]);
-  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
-})
+(define_expand "sse_prologue_save"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+                  (unspec:BLK [(reg:DI 21)
+                               (reg:DI 22)
+                               (reg:DI 23)
+                               (reg:DI 24)
+                               (reg:DI 25)
+                               (reg:DI 26)
+                               (reg:DI 27)
+                               (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+             (use (match_operand:DI 1 "register_operand" ""))
+             (use (match_operand:DI 2 "immediate_operand" ""))
+             (use (label_ref:DI (match_operand 3 "" "")))])]
+  "TARGET_64BIT"
+  "")
 
-(define_insn "*mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (vec_merge:V4HI
-         (match_operand:V4HI 1 "register_operand" "0")
-          (vec_duplicate:V4HI
-            (match_operand:HI 2 "nonimmediate_operand" "rm"))
-          (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+(define_insn "*sse_prologue_save_insn"
+  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
+                         (match_operand:DI 4 "const_int_operand" "n")))
+       (unspec:BLK [(reg:DI 21)
+                    (reg:DI 22)
+                    (reg:DI 23)
+                    (reg:DI 24)
+                    (reg:DI 25)
+                    (reg:DI 26)
+                    (reg:DI 27)
+                    (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+   (use (match_operand:DI 1 "register_operand" "r"))
+   (use (match_operand:DI 2 "const_int_operand" "i"))
+   (use (label_ref:DI (match_operand 3 "" "X")))]
+  "TARGET_64BIT
+   && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128
+   && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
+  "*
 {
-  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+  int i;
+  operands[0] = gen_rtx_MEM (Pmode,
+                            gen_rtx_PLUS (Pmode, operands[0], operands[4]));
+  output_asm_insn (\"jmp\\t%A1\", operands);
+  for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
+    {
+      operands[4] = adjust_address (operands[0], DImode, i*16);
+      operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
+      PUT_MODE (operands[4], TImode);
+      if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
+        output_asm_insn (\"rex\", operands);
+      output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands);
+    }
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+                            CODE_LABEL_NUMBER (operands[3]));
+  RET;
 }
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
-                                      (parallel
-                                       [(match_operand:SI 2 "const_0_to_3_operand" "N")]))))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_pshufw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (unspec:V4HI [(match_operand:V4HI 1 "nonimmediate_operand" "ym")
-                     (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_SHUFFLE))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pshufw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-
-;; MMX mask-generating comparisons
-
-(define_insn "eqv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (eq:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpeqb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "eqv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (eq:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpeqw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "eqv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (eq:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpeqd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "gtv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (gt:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgtb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "gtv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (gt:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgtw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "gtv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (gt:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgtd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-
-;; MMX max/min insns
-
-(define_insn "umaxv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmaxub\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "
+  [(set_attr "type" "other")
+   (set_attr "length_immediate" "0")
+   (set_attr "length_address" "0")
+   (set_attr "length" "135")
+   (set_attr "memory" "store")
+   (set_attr "modrm" "0")
    (set_attr "mode" "DI")])
 
-(define_insn "smaxv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmaxsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+;; 3Dnow! instructions
 
-(define_insn "uminv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pminub\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "sminv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pminsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-
-;; MMX shifts
-
-(define_insn "ashrv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psraw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "ashrv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psrad\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "lshrv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psrlw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "lshrv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psrld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-;; See logical MMX insns.
-(define_insn "mmx_lshrdi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-         [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi"))]
-         UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psrlq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "ashlv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psllw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "ashlv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "pslld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-;; See logical MMX insns.
-(define_insn "mmx_ashldi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(ashift:DI (match_operand:DI 1 "register_operand" "0")
-                    (match_operand:DI 2 "nonmemory_operand" "yi"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psllq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-
-;; MMX pack/unpack insns.
-
-(define_insn "mmx_packsswb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (vec_concat:V8QI
-        (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
-        (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
-  "TARGET_MMX"
-  "packsswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_packssdw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (vec_concat:V4HI
-        (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0"))
-        (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
-  "TARGET_MMX"
-  "packssdw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_packuswb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (vec_concat:V8QI
-        (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
-        (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
-  "TARGET_MMX"
-  "packuswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (vec_merge:V8QI
-        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                         (parallel [(const_int 4)
-                                    (const_int 0)
-                                    (const_int 5)
-                                    (const_int 1)
-                                    (const_int 6)
-                                    (const_int 2)
-                                    (const_int 7)
-                                    (const_int 3)]))
-        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
-                         (parallel [(const_int 0)
-                                    (const_int 4)
-                                    (const_int 1)
-                                    (const_int 5)
-                                    (const_int 2)
-                                    (const_int 6)
-                                    (const_int 3)
-                                    (const_int 7)]))
-        (const_int 85)))]
-  "TARGET_MMX"
-  "punpckhbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpckhwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (vec_merge:V4HI
-        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                         (parallel [(const_int 0)
-                                    (const_int 2)
-                                    (const_int 1)
-                                    (const_int 3)]))
-        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-                         (parallel [(const_int 2)
-                                    (const_int 0)
-                                    (const_int 3)
-                                    (const_int 1)]))
-        (const_int 5)))]
-  "TARGET_MMX"
-  "punpckhwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpckhdq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_merge:V2SI
-        (match_operand:V2SI 1 "register_operand" "0")
-        (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
-                         (parallel [(const_int 1)
-                                    (const_int 0)]))
-        (const_int 1)))]
-  "TARGET_MMX"
-  "punpckhdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpcklbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (vec_merge:V8QI
-        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                         (parallel [(const_int 0)
-                                    (const_int 4)
-                                    (const_int 1)
-                                    (const_int 5)
-                                    (const_int 2)
-                                    (const_int 6)
-                                    (const_int 3)
-                                    (const_int 7)]))
-        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
-                         (parallel [(const_int 4)
-                                    (const_int 0)
-                                    (const_int 5)
-                                    (const_int 1)
-                                    (const_int 6)
-                                    (const_int 2)
-                                    (const_int 7)
-                                    (const_int 3)]))
-        (const_int 85)))]
-  "TARGET_MMX"
-  "punpcklbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpcklwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (vec_merge:V4HI
-        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                         (parallel [(const_int 2)
-                                    (const_int 0)
-                                    (const_int 3)
-                                    (const_int 1)]))
-        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-                         (parallel [(const_int 0)
-                                    (const_int 2)
-                                    (const_int 1)
-                                    (const_int 3)]))
-        (const_int 5)))]
-  "TARGET_MMX"
-  "punpcklwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpckldq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_merge:V2SI
-        (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                          (parallel [(const_int 1)
-                                     (const_int 0)]))
-        (match_operand:V2SI 2 "register_operand" "y")
-        (const_int 1)))]
-  "TARGET_MMX"
-  "punpckldq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-
-;; Miscellaneous stuff
-
-(define_insn "emms"
-  [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)
-   (clobber (reg:XF 8))
-   (clobber (reg:XF 9))
-   (clobber (reg:XF 10))
-   (clobber (reg:XF 11))
-   (clobber (reg:XF 12))
-   (clobber (reg:XF 13))
-   (clobber (reg:XF 14))
-   (clobber (reg:XF 15))
-   (clobber (reg:DI 29))
-   (clobber (reg:DI 30))
-   (clobber (reg:DI 31))
-   (clobber (reg:DI 32))
-   (clobber (reg:DI 33))
-   (clobber (reg:DI 34))
-   (clobber (reg:DI 35))
-   (clobber (reg:DI 36))]
-  "TARGET_MMX"
-  "emms"
-  [(set_attr "type" "mmx")
-   (set_attr "memory" "unknown")])
-
-(define_insn "ldmxcsr"
-  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
-                   UNSPECV_LDMXCSR)]
-  "TARGET_SSE"
-  "ldmxcsr\t%0"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "load")])
-
-(define_insn "stmxcsr"
-  [(set (match_operand:SI 0 "memory_operand" "=m")
-       (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
-  "TARGET_SSE"
-  "stmxcsr\t%0"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "store")])
-
-(define_expand "sfence"
-  [(set (match_dup 0)
-       (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-{
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
-})
-
-(define_insn "*sfence_insn"
-  [(set (match_operand:BLK 0 "" "")
-       (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "sfence"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "unknown")])
-
-(define_expand "sse_prologue_save"
-  [(parallel [(set (match_operand:BLK 0 "" "")
-                  (unspec:BLK [(reg:DI 21)
-                               (reg:DI 22)
-                               (reg:DI 23)
-                               (reg:DI 24)
-                               (reg:DI 25)
-                               (reg:DI 26)
-                               (reg:DI 27)
-                               (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
-             (use (match_operand:DI 1 "register_operand" ""))
-             (use (match_operand:DI 2 "immediate_operand" ""))
-             (use (label_ref:DI (match_operand 3 "" "")))])]
-  "TARGET_64BIT"
-  "")
-
-(define_insn "*sse_prologue_save_insn"
-  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
-                         (match_operand:DI 4 "const_int_operand" "n")))
-       (unspec:BLK [(reg:DI 21)
-                    (reg:DI 22)
-                    (reg:DI 23)
-                    (reg:DI 24)
-                    (reg:DI 25)
-                    (reg:DI 26)
-                    (reg:DI 27)
-                    (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
-   (use (match_operand:DI 1 "register_operand" "r"))
-   (use (match_operand:DI 2 "const_int_operand" "i"))
-   (use (label_ref:DI (match_operand 3 "" "X")))]
-  "TARGET_64BIT
-   && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128
-   && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
-  "*
-{
-  int i;
-  operands[0] = gen_rtx_MEM (Pmode,
-                            gen_rtx_PLUS (Pmode, operands[0], operands[4]));
-  output_asm_insn (\"jmp\\t%A1\", operands);
-  for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
-    {
-      operands[4] = adjust_address (operands[0], DImode, i*16);
-      operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
-      PUT_MODE (operands[4], TImode);
-      if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
-        output_asm_insn (\"rex\", operands);
-      output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands);
-    }
-  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
-                            CODE_LABEL_NUMBER (operands[3]));
-  RET;
-}
-  "
-  [(set_attr "type" "other")
-   (set_attr "length_immediate" "0")
-   (set_attr "length_address" "0")
-   (set_attr "length" "135")
-   (set_attr "memory" "store")
-   (set_attr "modrm" "0")
-   (set_attr "mode" "DI")])
-
-;; 3Dnow! instructions
-
-(define_insn "addv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (plus:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfadd\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "subv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (minus:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfsub\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "subrv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym")
-                    (match_operand:V2SF 1 "register_operand" "0")))]
-  "TARGET_3DNOW"
-  "pfsubr\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "gtv2sf3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
-                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfcmpgt\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "gev2sf3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
-                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfcmpge\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "eqv2sf3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (eq:V2SI (match_operand:V2SF 1 "register_operand" "0")
-                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfcmpeq\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfmaxv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (smax:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfmax\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfminv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (smin:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfmin\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "mulv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (mult:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfmul\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "femms"
-  [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)
-   (clobber (reg:XF 8))
-   (clobber (reg:XF 9))
-   (clobber (reg:XF 10))
-   (clobber (reg:XF 11))
-   (clobber (reg:XF 12))
-   (clobber (reg:XF 13))
-   (clobber (reg:XF 14))
-   (clobber (reg:XF 15))
-   (clobber (reg:DI 29))
-   (clobber (reg:DI 30))
-   (clobber (reg:DI 31))
-   (clobber (reg:DI 32))
-   (clobber (reg:DI 33))
-   (clobber (reg:DI 34))
-   (clobber (reg:DI 35))
-   (clobber (reg:DI 36))]
-  "TARGET_3DNOW"
-  "femms"
-  [(set_attr "type" "mmx")
-   (set_attr "memory" "none")]) 
-
-(define_insn "pf2id"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pf2id\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pf2iw"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (sign_extend:V2SI
-          (ss_truncate:V2HI
-             (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
-  "TARGET_3DNOW_A"
-  "pf2iw\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfacc"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (vec_concat:V2SF
-          (plus:SF
-             (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
-                            (parallel [(const_int  0)]))
-             (vec_select:SF (match_dup 1)
-                            (parallel [(const_int 1)])))
-           (plus:SF
-              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
-                            (parallel [(const_int  0)]))
-              (vec_select:SF (match_dup 2)
-                            (parallel [(const_int 1)])))))]
-  "TARGET_3DNOW"
-  "pfacc\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfnacc"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (vec_concat:V2SF
-           (minus:SF
-              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
-                            (parallel [(const_int 0)]))
-              (vec_select:SF (match_dup 1)
-                            (parallel [(const_int 1)])))
-           (minus:SF
-              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
-                            (parallel [(const_int  0)]))
-              (vec_select:SF (match_dup 2)
-                            (parallel [(const_int 1)])))))]
-  "TARGET_3DNOW_A"
-  "pfnacc\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfpnacc"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (vec_concat:V2SF
-           (minus:SF
-              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
-                            (parallel [(const_int 0)]))
-              (vec_select:SF (match_dup 1)
-                            (parallel [(const_int 1)])))
-           (plus:SF
-              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
-                            (parallel [(const_int 0)]))
-              (vec_select:SF (match_dup 2)
-                            (parallel [(const_int 1)])))))]
-  "TARGET_3DNOW_A"
-  "pfpnacc\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pi2fw"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (float:V2SF
-          (vec_concat:V2SI
-             (sign_extend:SI
-                (truncate:HI
-                   (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
-                                  (parallel [(const_int 0)]))))
-              (sign_extend:SI
-                (truncate:HI
-                    (vec_select:SI (match_dup 1)
-                                  (parallel [(const_int  1)])))))))]
-  "TARGET_3DNOW_A"
-  "pi2fw\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "floatv2si2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pi2fd\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "V2SF")])
-
-;; This insn is identical to pavgb in operation, but the opcode is
-;; different.  To avoid accidentally matching pavgb, use an unspec.
-
-(define_insn "pavgusb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (unspec:V8QI
-          [(match_operand:V8QI 1 "register_operand" "0")
-           (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-         UNSPEC_PAVGUSB))]
-  "TARGET_3DNOW"
-  "pavgusb\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "TI")])
-
-;; 3DNow reciprocal and sqrt
-(define_insn "pfrcpv2sf2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
-       UNSPEC_PFRCP))]
-  "TARGET_3DNOW"
-  "pfrcp\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-
-(define_insn "pfrcpit1v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
-                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
-                    UNSPEC_PFRCPIT1))]
-  "TARGET_3DNOW"
-  "pfrcpit1\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-
-(define_insn "pfrcpit2v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
-                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
-                    UNSPEC_PFRCPIT2))]
-  "TARGET_3DNOW"
-  "pfrcpit2\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-
-(define_insn "pfrsqrtv2sf2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
-                    UNSPEC_PFRSQRT))]
-  "TARGET_3DNOW"
-  "pfrsqrt\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-               
-(define_insn "pfrsqit1v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
-                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
-                    UNSPEC_PFRSQIT1))]
-  "TARGET_3DNOW"
-  "pfrsqit1\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-
-(define_insn "pmulhrwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (truncate:V4HI
-          (lshiftrt:V4SI
-             (plus:V4SI
-                (mult:V4SI
-                   (sign_extend:V4SI
-                      (match_operand:V4HI 1 "register_operand" "0"))
-                   (sign_extend:V4SI
-                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
-                (const_vector:V4SI [(const_int 32768)
-                                    (const_int 32768)
-                                    (const_int 32768)
-                                    (const_int 32768)]))
-             (const_int 16))))]
-  "TARGET_3DNOW"
-  "pmulhrw\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "TI")])
-
-(define_insn "pswapdv2si2"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
-                        (parallel [(const_int 1) (const_int 0)])))]
-  "TARGET_3DNOW_A"
-  "pswapd\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "pswapdv2sf2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
-                        (parallel [(const_int 1) (const_int 0)])))]
-  "TARGET_3DNOW_A"
-  "pswapd\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "TI")])
-
-(define_expand "prefetch"
-  [(prefetch (match_operand 0 "address_operand" "")
-            (match_operand:SI 1 "const_int_operand" "")
-            (match_operand:SI 2 "const_int_operand" ""))]
-  "TARGET_PREFETCH_SSE || TARGET_3DNOW"
-{
-  int rw = INTVAL (operands[1]);
-  int locality = INTVAL (operands[2]);
-
-  if (rw != 0 && rw != 1)
-    abort ();
-  if (locality < 0 || locality > 3)
-    abort ();
-  if (GET_MODE (operands[0]) != Pmode && GET_MODE (operands[0]) != VOIDmode)
-    abort ();
-
-  /* Use 3dNOW prefetch in case we are asking for write prefetch not
-     suported by SSE counterpart or the SSE prefetch is not available
-     (K6 machines).  Otherwise use SSE prefetch as it allows specifying
-     of locality.  */
-  if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
-    operands[2] = GEN_INT (3);
-  else
-    operands[1] = const0_rtx;
-})
-
-(define_insn "*prefetch_sse"
-  [(prefetch (match_operand:SI 0 "address_operand" "p")
-            (const_int 0)
-            (match_operand:SI 1 "const_int_operand" ""))]
-  "TARGET_PREFETCH_SSE && !TARGET_64BIT"
-{
-  static const char * const patterns[4] = {
-   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
-  };
-
-  int locality = INTVAL (operands[1]);
-  if (locality < 0 || locality > 3)
-    abort ();
-
-  return patterns[locality];  
-}
-  [(set_attr "type" "sse")
-   (set_attr "memory" "none")])
-
-(define_insn "*prefetch_sse_rex"
-  [(prefetch (match_operand:DI 0 "address_operand" "p")
-            (const_int 0)
-            (match_operand:SI 1 "const_int_operand" ""))]
-  "TARGET_PREFETCH_SSE && TARGET_64BIT"
-{
-  static const char * const patterns[4] = {
-   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
-  };
-
-  int locality = INTVAL (operands[1]);
-  if (locality < 0 || locality > 3)
-    abort ();
-
-  return patterns[locality];  
-}
-  [(set_attr "type" "sse")
-   (set_attr "memory" "none")])
-
-(define_insn "*prefetch_3dnow"
-  [(prefetch (match_operand:SI 0 "address_operand" "p")
-            (match_operand:SI 1 "const_int_operand" "n")
-            (const_int 3))]
-  "TARGET_3DNOW && !TARGET_64BIT"
-{
-  if (INTVAL (operands[1]) == 0)
-    return "prefetch\t%a0";
-  else
-    return "prefetchw\t%a0";
-}
-  [(set_attr "type" "mmx")
-   (set_attr "memory" "none")])
-
-(define_insn "*prefetch_3dnow_rex"
-  [(prefetch (match_operand:DI 0 "address_operand" "p")
-            (match_operand:SI 1 "const_int_operand" "n")
-            (const_int 3))]
-  "TARGET_3DNOW && TARGET_64BIT"
-{
-  if (INTVAL (operands[1]) == 0)
-    return "prefetch\t%a0";
-  else
-    return "prefetchw\t%a0";
-}
-  [(set_attr "type" "mmx")
-   (set_attr "memory" "none")])
-
-;; SSE2 support
-
-(define_insn "addv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "addpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmaddv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "addsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn "subv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "subpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "subsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn "mulv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "mulpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmmulv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "mulsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "DF")])
-
-(define_insn "divv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "divpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmdivv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "divsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "DF")])
-
-;; SSE min/max
-
-(define_insn "smaxv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "maxpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmsmaxv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "maxsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn "sminv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "minpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmsminv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "minsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-;; SSE2 square root.  There doesn't appear to be an extension for the
-;; reciprocal/rsqrt instructions if the Intel manual is to be believed.
-
-(define_insn "sqrtv2df2"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))]
-  "TARGET_SSE2"
-  "sqrtpd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmsqrtv2df2"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
-                        (match_operand:V2DF 2 "register_operand" "0")
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "sqrtsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-;; SSE mask-generating compares
-
-(define_insn "maskcmpv2df3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (match_operator:V2DI 3 "sse_comparison_operator"
-                            [(match_operand:V2DF 1 "register_operand" "0")
-                             (match_operand:V2DF 2 "nonimmediate_operand" "x")]))]
-  "TARGET_SSE2"
-  "cmp%D3pd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "maskncmpv2df3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (not:V2DI
-        (match_operator:V2DI 3 "sse_comparison_operator"
-                             [(match_operand:V2DF 1 "register_operand" "0")
-                              (match_operand:V2DF 2 "nonimmediate_operand" "x")])))]
-  "TARGET_SSE2"
-{
-  if (GET_CODE (operands[3]) == UNORDERED)
-    return "cmpordps\t{%2, %0|%0, %2}";
-  else
-    return "cmpn%D3pd\t{%2, %0|%0, %2}";
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmmaskcmpv2df3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_merge:V2DI
-        (match_operator:V2DI 3 "sse_comparison_operator"
-                             [(match_operand:V2DF 1 "register_operand" "0")
-                              (match_operand:V2DF 2 "nonimmediate_operand" "x")])
-        (subreg:V2DI (match_dup 1) 0)
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "cmp%D3sd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "DF")])
-
-(define_insn "vmmaskncmpv2df3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_merge:V2DI
-        (not:V2DI
-         (match_operator:V2DI 3 "sse_comparison_operator"
-                              [(match_operand:V2DF 1 "register_operand" "0")
-                               (match_operand:V2DF 2 "nonimmediate_operand" "x")]))
-        (subreg:V2DI (match_dup 1) 0)
-        (const_int 1)))]
-  "TARGET_SSE2"
-{
-  if (GET_CODE (operands[3]) == UNORDERED)
-    return "cmpordsd\t{%2, %0|%0, %2}";
-  else
-    return "cmpn%D3sd\t{%2, %0|%0, %2}";
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse2_comi"
-  [(set (reg:CCFP FLAGS_REG)
-        (compare:CCFP (vec_select:DF
-                      (match_operand:V2DF 0 "register_operand" "x")
-                      (parallel [(const_int 0)]))
-                     (vec_select:DF
-                      (match_operand:V2DF 1 "register_operand" "x")
-                      (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "comisd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse2_ucomi"
-  [(set (reg:CCFPU FLAGS_REG)
-       (compare:CCFPU (vec_select:DF
-                        (match_operand:V2DF 0 "register_operand" "x")
-                        (parallel [(const_int 0)]))
-                       (vec_select:DF
-                        (match_operand:V2DF 1 "register_operand" "x")
-                        (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "ucomisd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "DF")])
-
-;; SSE Strange Moves.
-
-(define_insn "sse2_movmskpd"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
-                  UNSPEC_MOVMSK))]
-  "TARGET_SSE2"
-  "movmskpd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
-                  UNSPEC_MOVMSK))]
-  "TARGET_SSE2"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_maskmovdqu"
-  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
-       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
-                      (match_operand:V16QI 2 "register_operand" "x")]
-                     UNSPEC_MASKMOV))]
-  "TARGET_SSE2"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "maskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_maskmovdqu_rex64"
-  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
-       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
-                      (match_operand:V16QI 2 "register_operand" "x")]
-                     UNSPEC_MASKMOV))]
-  "TARGET_SSE2"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "maskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movntv2df"
-  [(set (match_operand:V2DF 0 "memory_operand" "=m")
-       (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
-                    UNSPEC_MOVNT))]
-  "TARGET_SSE2"
-  "movntpd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_movntv2di"
-  [(set (match_operand:V2DI 0 "memory_operand" "=m")
-       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
-                    UNSPEC_MOVNT))]
-  "TARGET_SSE2"
-  "movntdq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movntsi"
-  [(set (match_operand:SI 0 "memory_operand" "=m")
-       (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
-                  UNSPEC_MOVNT))]
-  "TARGET_SSE2"
-  "movnti\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-;; SSE <-> integer/MMX conversions
-
-;; Conversions between SI and SF
-
-(define_insn "cvtdq2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "cvtdq2ps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "cvtps2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "cvtps2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvttps2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-                    UNSPEC_FIX))]
-  "TARGET_SSE2"
-  "cvttps2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-;; Conversions between SI and DF
-
-(define_insn "cvtdq2pd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (float:V2DF (vec_select:V2SI
-                    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
-                    (parallel
-                     [(const_int 0)
-                      (const_int 1)]))))]
-  "TARGET_SSE2"
-  "cvtdq2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "cvtpd2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_concat:V4SI
-        (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
-        (const_vector:V2SI [(const_int 0) (const_int 0)])))]
-  "TARGET_SSE2"
-  "cvtpd2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvttpd2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_concat:V4SI
-        (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
-                     UNSPEC_FIX)
-        (const_vector:V2SI [(const_int 0) (const_int 0)])))]
-  "TARGET_SSE2"
-  "cvttpd2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvtpd2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "cvtpd2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvttpd2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
-                    UNSPEC_FIX))]
-  "TARGET_SSE2"
-  "cvttpd2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvtpi2pd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE2"
-  "cvtpi2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-;; Conversions between SI and DF
-
-(define_insn "cvtsd2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
-                              (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "cvtsd2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "mode" "SI")])
-
-(define_insn "cvtsd2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
-                              (parallel [(const_int 0)]))))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "cvtsd2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "mode" "DI")])
-
-(define_insn "cvttsd2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
-                                  (parallel [(const_int 0)]))] UNSPEC_FIX))]
-  "TARGET_SSE2"
-  "cvttsd2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SI")
-   (set_attr "athlon_decode" "double,vector")])
-
-(define_insn "cvttsd2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
-                                  (parallel [(const_int 0)]))] UNSPEC_FIX))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "cvttsd2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DI")
-   (set_attr "athlon_decode" "double,vector")])
-
-(define_insn "cvtsi2sd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
-                       (vec_duplicate:V2DF
-                         (float:DF
-                           (match_operand:SI 2 "nonimmediate_operand" "r,rm")))
-                       (const_int 2)))]
-  "TARGET_SSE2"
-  "cvtsi2sd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,direct")])
-
-(define_insn "cvtsi2sdq"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
-                       (vec_duplicate:V2DF
-                         (float:DF
-                           (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
-                       (const_int 2)))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "cvtsi2sdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,direct")])
-
-;; Conversions between SF and DF
-
-(define_insn "cvtsd2ss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0,0")
-                       (vec_duplicate:V4SF
-                         (float_truncate:V2SF
-                           (match_operand:V2DF 2 "nonimmediate_operand" "x,xm")))
-                       (const_int 14)))]
-  "TARGET_SSE2"
-  "cvtsd2ss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "mode" "SF")])
-
-(define_insn "cvtss2sd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                       (float_extend:V2DF
-                         (vec_select:V2SF
-                           (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-                           (parallel [(const_int 0)
-                                      (const_int 1)])))
-                       (const_int 2)))]
-  "TARGET_SSE2"
-  "cvtss2sd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
-
-(define_insn "cvtpd2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (subreg:V4SF
-         (vec_concat:V4SI
-           (subreg:V2SI (float_truncate:V2SF
-                          (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0)
-           (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))]
-  "TARGET_SSE2"
-  "cvtpd2ps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "cvtps2pd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (float_extend:V2DF
-         (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")
-                          (parallel [(const_int 0)
-                                     (const_int 1)]))))]
-  "TARGET_SSE2"
-  "cvtps2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-;; SSE2 variants of MMX insns
-
-;; MMX arithmetic
-
-(define_insn "addv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
-                   (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "addv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
-                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "addv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0")
-                  (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "addv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0")
-                  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "ssaddv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
-                      (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddsb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "ssaddv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
-                     (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "usaddv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
-                      (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddusb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "usaddv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
-                     (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddusw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "subv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "subv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "subv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (minus:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                   (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "subv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (minus:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                   (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "sssubv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubsb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "sssubv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "ussubv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubusb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "ussubv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubusw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "mulv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (mult:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "mode" "TI")])
-
-(define_insn "smulv8hi3_highpart"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (truncate:V8HI
-        (lshiftrt:V8SI
-         (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0"))
-                    (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
-         (const_int 16))))]
-  "TARGET_SSE2"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "mode" "TI")])
-
-(define_insn "umulv8hi3_highpart"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (truncate:V8HI
-        (lshiftrt:V8SI
-         (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0"))
-                    (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
-         (const_int 16))))]
-  "TARGET_SSE2"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_umulsidi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (mult:DI (zero_extend:DI (vec_select:SI
-                                 (match_operand:V2SI 1 "register_operand" "0")
-                                 (parallel [(const_int 0)])))
-                (zero_extend:DI (vec_select:SI
-                                 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
-                                 (parallel [(const_int 0)])))))]
-  "TARGET_SSE2"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
-
-(define_insn "sse2_umulv2siv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (mult:V2DI (zero_extend:V2DI
-                    (vec_select:V2SI
-                      (match_operand:V4SI 1 "register_operand" "0")
-                      (parallel [(const_int 0) (const_int 2)])))
-                  (zero_extend:V2DI
-                    (vec_select:V2SI
-                      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-                      (parallel [(const_int 0) (const_int 2)])))))]
-  "TARGET_SSE2"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pmaddwd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (plus:V4SI
-        (mult:V4SI
-         (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0")
-                                            (parallel [(const_int 0)
-                                                       (const_int 2)
-                                                       (const_int 4)
-                                                       (const_int 6)])))
-         (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm")
-                                            (parallel [(const_int 0)
-                                                       (const_int 2)
-                                                       (const_int 4)
-                                                       (const_int 6)]))))
-        (mult:V4SI
-         (sign_extend:V4SI (vec_select:V4HI (match_dup 1)
-                                            (parallel [(const_int 1)
-                                                       (const_int 3)
-                                                       (const_int 5)
-                                                       (const_int 7)])))
-         (sign_extend:V4SI (vec_select:V4HI (match_dup 2)
-                                            (parallel [(const_int 1)
-                                                       (const_int 3)
-                                                       (const_int 5)
-                                                       (const_int 7)]))))))]
-  "TARGET_SSE2"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-;; Same as pxor, but don't show input operands so that we don't think
-;; they are live.
-(define_insn "sse2_clrti"
-  [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
-  "TARGET_SSE2"
-{
-  if (get_attr_mode (insn) == MODE_TI)
-    return "pxor\t%0, %0";
-  else
-    return "xorps\t%0, %0";
-}
-  [(set_attr "type" "ssemov")
-   (set_attr "memory" "none")
-   (set (attr "mode")
-             (if_then_else
-               (ne (symbol_ref "optimize_size")
-                   (const_int 0))
-               (const_string "V4SF")
-               (const_string "TI")))])
-
-;; MMX unsigned averages/sum of absolute differences
-
-(define_insn "sse2_uavgv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (ashiftrt:V16QI
-        (plus:V16QI (plus:V16QI
-                    (match_operand:V16QI 1 "register_operand" "0")
-                    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
-                    (const_vector:V16QI [(const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)]))
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "pavgb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_uavgv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashiftrt:V8HI
-        (plus:V8HI (plus:V8HI
-                    (match_operand:V8HI 1 "register_operand" "0")
-                    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
-                   (const_vector:V8HI [(const_int 1) (const_int 1)
-                                       (const_int 1) (const_int 1)
-                                       (const_int 1) (const_int 1)
-                                       (const_int 1) (const_int 1)]))
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-;; @@@ this isn't the right representation.
-(define_insn "sse2_psadbw"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
-                     (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_PSADBW))]
-  "TARGET_SSE2"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-
-;; MMX insert/extract/shuffle
-
-(define_expand "sse2_pinsrw"
-  [(set (match_operand:V8HI 0 "register_operand" "")
-        (vec_merge:V8HI
-         (match_operand:V8HI 1 "register_operand" "")
-          (vec_duplicate:V8HI
-            (match_operand:SI 2 "nonimmediate_operand" ""))
-          (match_operand:SI 3 "const_0_to_7_operand" "")))]
-  "TARGET_SSE2"
-{
-  operands[2] = gen_lowpart (HImode, operands[2]);
-  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
-})
-
-(define_insn "*sse2_pinsrw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (vec_merge:V8HI
-         (match_operand:V8HI 1 "register_operand" "0")
-          (vec_duplicate:V8HI
-            (match_operand:HI 2 "nonimmediate_operand" "rm"))
-          (match_operand:SI 3 "const_pow2_1_to_128_operand" "N")))]
-  "TARGET_SSE2"
-{
-  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
-}
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extend:SI
-         (vec_select:HI (match_operand:V8HI 1 "register_operand" "x")
-                        (parallel
-                         [(match_operand:SI 2 "const_0_to_7_operand" "N")]))))]
-  "TARGET_SSE2"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pshufd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (unspec:V4SI [(match_operand:V4SI 1 "nonimmediate_operand" "xm")
-                     (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_SHUFFLE))]
-  "TARGET_SSE2"
-  "pshufd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pshuflw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")
-                     (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_PSHUFLW))]
-  "TARGET_SSE2"
-  "pshuflw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pshufhw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")
-                     (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_PSHUFHW))]
-  "TARGET_SSE2"
-  "pshufhw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-;; MMX mask-generating comparisons
-
-(define_insn "eqv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (eq:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpeqb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "eqv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (eq:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpeqw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "eqv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (eq:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpeqd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "gtv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (gt:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpgtb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "gtv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (gt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpgtw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "gtv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (gt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpgtd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-
-;; MMX max/min insns
-
-(define_insn "umaxv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (umax:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pmaxub\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "smaxv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (smax:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pmaxsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "uminv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (umin:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pminub\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "sminv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (smin:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pminsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-
-;; MMX shifts
-
-(define_insn "ashrv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psraw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashrv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psrad\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psrlw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psrld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psrlq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                    (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psllw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                    (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "pslld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                    (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psllq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashrv8hi3_ti"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psraw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashrv4si3_ti"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psrad\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv8hi3_ti"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psrlw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv4si3_ti"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psrld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv2di3_ti"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psrlq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv8hi3_ti"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                    (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psllw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv4si3_ti"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                    (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "pslld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv2di3_ti"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                    (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psllq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-;; See logical MMX insns for the reason for the unspec.  Strictly speaking
-;; we wouldn't need here it since we never generate TImode arithmetic.
-
-;; There has to be some kind of prize for the weirdest new instruction...
-(define_insn "sse2_ashlti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (unspec:TI
-        [(ashift:TI (match_operand:TI 1 "register_operand" "0")
-                    (mult:SI (match_operand:SI 2 "immediate_operand" "i")
-                              (const_int 8)))] UNSPEC_NOP))]
-  "TARGET_SSE2"
-  "pslldq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
+(define_insn "addv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (plus:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfadd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_lshrti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (unspec:TI
-        [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
-                      (mult:SI (match_operand:SI 2 "immediate_operand" "i")
-                               (const_int 8)))] UNSPEC_NOP))]
-  "TARGET_SSE2"
-  "psrldq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
+(define_insn "subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (minus:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfsub\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "V2SF")])
 
-;; SSE unpack
+(define_insn "subrv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+                    (match_operand:V2SF 1 "register_operand" "0")))]
+  "TARGET_3DNOW"
+  "pfsubr\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_unpckhpd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_concat:V2DF
-        (vec_select:DF (match_operand:V2DF 1 "register_operand" "0")
-                       (parallel [(const_int 1)]))
-        (vec_select:DF (match_operand:V2DF 2 "register_operand" "x")
-                       (parallel [(const_int 1)]))))]
-  "TARGET_SSE2"
-  "unpckhpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_unpcklpd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_concat:V2DF
-        (vec_select:DF (match_operand:V2DF 1 "register_operand" "0")
-                       (parallel [(const_int 0)]))
-        (vec_select:DF (match_operand:V2DF 2 "register_operand" "x")
-                       (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "unpcklpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
+(define_insn "gtv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
+                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpgt\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "V2SF")])
 
-;; MMX pack/unpack insns.
+(define_insn "gev2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
+                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpge\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_packsswb"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (vec_concat:V16QI
-        (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0"))
-        (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))]
-  "TARGET_SSE2"
-  "packsswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (eq:V2SI (match_operand:V2SF 1 "register_operand" "0")
+                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpeq\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_packssdw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (vec_concat:V8HI
-        (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0"))
-        (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))]
-  "TARGET_SSE2"
-  "packssdw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "pfmaxv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smax:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmax\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_packuswb"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (vec_concat:V16QI
-        (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0"))
-        (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))]
-  "TARGET_SSE2"
-  "packuswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "pfminv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smin:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmin\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_punpckhbw"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (vec_merge:V16QI
-        (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                          (parallel [(const_int 8) (const_int 0)
-                                     (const_int 9) (const_int 1)
-                                     (const_int 10) (const_int 2)
-                                     (const_int 11) (const_int 3)
-                                     (const_int 12) (const_int 4)
-                                     (const_int 13) (const_int 5)
-                                     (const_int 14) (const_int 6)
-                                     (const_int 15) (const_int 7)]))
-        (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x")
-                          (parallel [(const_int 0) (const_int 8)
-                                     (const_int 1) (const_int 9)
-                                     (const_int 2) (const_int 10)
-                                     (const_int 3) (const_int 11)
-                                     (const_int 4) (const_int 12)
-                                     (const_int 5) (const_int 13)
-                                     (const_int 6) (const_int 14)
-                                     (const_int 7) (const_int 15)]))
-        (const_int 21845)))]
-  "TARGET_SSE2"
-  "punpckhbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (mult:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmul\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_punpckhwd"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (vec_merge:V8HI
-        (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                         (parallel [(const_int 4) (const_int 0)
-                                    (const_int 5) (const_int 1)
-                                    (const_int 6) (const_int 2)
-                                    (const_int 7) (const_int 3)]))
-        (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x")
-                         (parallel [(const_int 0) (const_int 4)
-                                    (const_int 1) (const_int 5)
-                                    (const_int 2) (const_int 6)
-                                    (const_int 3) (const_int 7)]))
-        (const_int 85)))]
-  "TARGET_SSE2"
-  "punpckhwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "femms"
+  [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)
+   (clobber (reg:XF 8))
+   (clobber (reg:XF 9))
+   (clobber (reg:XF 10))
+   (clobber (reg:XF 11))
+   (clobber (reg:XF 12))
+   (clobber (reg:XF 13))
+   (clobber (reg:XF 14))
+   (clobber (reg:XF 15))
+   (clobber (reg:DI 29))
+   (clobber (reg:DI 30))
+   (clobber (reg:DI 31))
+   (clobber (reg:DI 32))
+   (clobber (reg:DI 33))
+   (clobber (reg:DI 34))
+   (clobber (reg:DI 35))
+   (clobber (reg:DI 36))]
+  "TARGET_3DNOW"
+  "femms"
+  [(set_attr "type" "mmx")
+   (set_attr "memory" "none")]) 
 
-(define_insn "sse2_punpckhdq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                         (parallel [(const_int 2) (const_int 0)
-                                    (const_int 3) (const_int 1)]))
-        (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x")
-                         (parallel [(const_int 0) (const_int 2)
-                                    (const_int 1) (const_int 3)]))
-        (const_int 5)))]
-  "TARGET_SSE2"
-  "punpckhdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "pf2id"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pf2id\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_punpcklbw"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (vec_merge:V16QI
-        (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                          (parallel [(const_int 0) (const_int 8)
-                                     (const_int 1) (const_int 9)
-                                     (const_int 2) (const_int 10)
-                                     (const_int 3) (const_int 11)
-                                     (const_int 4) (const_int 12)
-                                     (const_int 5) (const_int 13)
-                                     (const_int 6) (const_int 14)
-                                     (const_int 7) (const_int 15)]))
-        (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x")
-                          (parallel [(const_int 8) (const_int 0)
-                                     (const_int 9) (const_int 1)
-                                     (const_int 10) (const_int 2)
-                                     (const_int 11) (const_int 3)
-                                     (const_int 12) (const_int 4)
-                                     (const_int 13) (const_int 5)
-                                     (const_int 14) (const_int 6)
-                                     (const_int 15) (const_int 7)]))
-        (const_int 21845)))]
-  "TARGET_SSE2"
-  "punpcklbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "pf2iw"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (sign_extend:V2SI
+          (ss_truncate:V2HI
+             (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
+  "TARGET_3DNOW_A"
+  "pf2iw\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_punpcklwd"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (vec_merge:V8HI
-        (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                         (parallel [(const_int 0) (const_int 4)
-                                    (const_int 1) (const_int 5)
-                                    (const_int 2) (const_int 6)
-                                    (const_int 3) (const_int 7)]))
-        (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x")
-                         (parallel [(const_int 4) (const_int 0)
-                                    (const_int 5) (const_int 1)
-                                    (const_int 6) (const_int 2)
-                                    (const_int 7) (const_int 3)]))
-        (const_int 85)))]
-  "TARGET_SSE2"
-  "punpcklwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "pfacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (vec_concat:V2SF
+          (plus:SF
+             (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+                            (parallel [(const_int  0)]))
+             (vec_select:SF (match_dup 1)
+                            (parallel [(const_int 1)])))
+           (plus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+                            (parallel [(const_int  0)]))
+              (vec_select:SF (match_dup 2)
+                            (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW"
+  "pfacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_punpckldq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                         (parallel [(const_int 0) (const_int 2)
-                                    (const_int 1) (const_int 3)]))
-        (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x")
-                         (parallel [(const_int 2) (const_int 0)
-                                    (const_int 3) (const_int 1)]))
-        (const_int 5)))]
-  "TARGET_SSE2"
-  "punpckldq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "pfnacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (vec_concat:V2SF
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+                            (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 1)
+                            (parallel [(const_int 1)])))
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+                            (parallel [(const_int  0)]))
+              (vec_select:SF (match_dup 2)
+                            (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A"
+  "pfnacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_punpcklqdq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_merge:V2DI
-        (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
-                         (parallel [(const_int 1)
-                                    (const_int 0)]))
-        (match_operand:V2DI 1 "register_operand" "0")
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "punpcklqdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "pfpnacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (vec_concat:V2SF
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+                            (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 1)
+                            (parallel [(const_int 1)])))
+           (plus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+                            (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 2)
+                            (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A"
+  "pfpnacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_punpckhqdq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_merge:V2DI
-        (match_operand:V2DI 1 "register_operand" "0")
-        (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
-                         (parallel [(const_int 1)
-                                    (const_int 0)]))
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "punpckhqdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "pi2fw"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (float:V2SF
+          (vec_concat:V2SI
+             (sign_extend:SI
+                (truncate:HI
+                   (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+                                  (parallel [(const_int 0)]))))
+              (sign_extend:SI
+                (truncate:HI
+                    (vec_select:SI (match_dup 1)
+                                  (parallel [(const_int  1)])))))))]
+  "TARGET_3DNOW_A"
+  "pi2fw\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "V2SF")])
 
-;; SSE2 moves
+(define_insn "floatv2si2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pi2fd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "V2SF")])
 
-(define_insn "sse2_movapd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-       (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
-                    UNSPEC_MOVA))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movapd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_movupd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-       (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
-                    UNSPEC_MOVU))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movupd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
+;; This insn is identical to pavgb in operation, but the opcode is
+;; different.  To avoid accidentally matching pavgb, use an unspec.
 
-(define_insn "sse2_movdqa"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
-       (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
-                      UNSPEC_MOVA))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movdqa\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
+(define_insn "pavgusb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+       (unspec:V8QI
+          [(match_operand:V8QI 1 "register_operand" "0")
+           (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+         UNSPEC_PAVGUSB))]
+  "TARGET_3DNOW"
+  "pavgusb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_movdqu"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
-       (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
-                      UNSPEC_MOVU))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movdqu\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+;; 3DNow reciprocal and sqrt
+(define_insn "pfrcpv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+       UNSPEC_PFRCP))]
+  "TARGET_3DNOW"
+  "pfrcp\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_movdq2q"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
-       (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
-                      (parallel [(const_int 0)])))]
-  "TARGET_SSE2 && !TARGET_64BIT"
-  "@
-   movq\t{%1, %0|%0, %1}
-   movdq2q\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+(define_insn "pfrcpit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+                    UNSPEC_PFRCPIT1))]
+  "TARGET_3DNOW"
+  "pfrcpit1\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_movdq2q_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r")
-       (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x")
-                      (parallel [(const_int 0)])))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "@
-   movq\t{%1, %0|%0, %1}
-   movdq2q\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
+(define_insn "pfrcpit2v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+                    UNSPEC_PFRCPIT2))]
+  "TARGET_3DNOW"
+  "pfrcpit2\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_movq2dq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,?x")
-       (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
-                        (const_int 0)))]
-  "TARGET_SSE2 && !TARGET_64BIT"
-  "@
-   movq\t{%1, %0|%0, %1}
-   movq2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt,ssemov")
+(define_insn "pfrsqrtv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+                    UNSPEC_PFRSQRT))]
+  "TARGET_3DNOW"
+  "pfrsqrt\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")
    (set_attr "mode" "TI")])
-
-(define_insn "sse2_movq2dq_rex64"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x")
-       (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r")
-                        (const_int 0)))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "@
-   movq\t{%1, %0|%0, %1}
-   movq2dq\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt,ssemov,ssecvt")
+               
+(define_insn "pfrsqit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+                    UNSPEC_PFRSQIT1))]
+  "TARGET_3DNOW"
+  "pfrsqit1\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_movq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_concat:V2DI (vec_select:DI
-                         (match_operand:V2DI 1 "nonimmediate_operand" "xm")
-                         (parallel [(const_int 0)]))
-                        (const_int 0)))]
-  "TARGET_SSE2"
-  "movq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
+(define_insn "pmulhrwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (truncate:V4HI
+          (lshiftrt:V4SI
+             (plus:V4SI
+                (mult:V4SI
+                   (sign_extend:V4SI
+                      (match_operand:V4HI 1 "register_operand" "0"))
+                   (sign_extend:V4SI
+                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+                (const_vector:V4SI [(const_int 32768)
+                                    (const_int 32768)
+                                    (const_int 32768)
+                                    (const_int 32768)]))
+             (const_int 16))))]
+  "TARGET_3DNOW"
+  "pmulhrw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_loadd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr"))
-        (const_vector:V4SI [(const_int 0)
-                            (const_int 0)
-                            (const_int 0)
-                            (const_int 0)])
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
+(define_insn "pswapdv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+                        (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_stored"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=mr")
-       (vec_select:SI
-        (match_operand:V4SI 1 "register_operand" "x")
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE2"
-  "movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
+(define_insn "pswapdv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
+                        (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
    (set_attr "mode" "TI")])
 
-;; Store the high double of the source vector into the double destination.
-(define_insn "sse2_storehpd"
-  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,Y,Y")
-       (vec_select:DF
-         (match_operand:V2DF 1 "nonimmediate_operand" " Y,0,o")
-         (parallel [(const_int 1)])))]
-  "TARGET_SSE2"
-  "@
-   movhpd\t{%1, %0|%0, %1}
-   unpckhpd\t%0, %0
-   #"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (vec_select:DF
-         (match_operand:V2DF 1 "memory_operand" "")
-         (parallel [(const_int 1)])))]
-  "TARGET_SSE2 && reload_completed"
-  [(const_int 0)]
+(define_expand "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "")
+            (match_operand:SI 1 "const_int_operand" "")
+            (match_operand:SI 2 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE || TARGET_3DNOW"
 {
-  emit_move_insn (operands[0], adjust_address (operands[1], DFmode, 8));
-  DONE;
-})
-
-;; Load the high double of the target vector from the source scalar.
-(define_insn "sse2_loadhpd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,o")
-       (vec_concat:V2DF
-         (vec_select:DF
-           (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
-           (parallel [(const_int 0)]))
-         (match_operand:DF 2 "nonimmediate_operand"     " m,Y,Y")))]
-  "TARGET_SSE2"
-  "@
-   movhpd\t{%2, %0|%0, %2}
-   unpcklpd\t{%2, %0|%0, %2}
-   #"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
+  int rw = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
 
-(define_split
-  [(set (match_operand:V2DF 0 "memory_operand" "")
-       (vec_concat:V2DF
-         (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
-         (match_operand:DF 1 "register_operand" "")))]
-  "TARGET_SSE2 && reload_completed"
-  [(const_int 0)]
-{
-  emit_move_insn (adjust_address (operands[0], DFmode, 8), operands[1]);
-  DONE;
-})
+  if (rw != 0 && rw != 1)
+    abort ();
+  if (locality < 0 || locality > 3)
+    abort ();
+  if (GET_MODE (operands[0]) != Pmode && GET_MODE (operands[0]) != VOIDmode)
+    abort ();
 
-;; Store the low double of the source vector into the double destination.
-(define_expand "sse2_storelpd"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-       (vec_select:DF
-         (match_operand:V2DF 1 "nonimmediate_operand" "")
-         (parallel [(const_int 0)])))]
-  "TARGET_SSE2"
-{
-  operands[1] = gen_lowpart (DFmode, operands[1]);
-  emit_move_insn (operands[0], operands[1]);
-  DONE;
+  /* Use 3dNOW prefetch in case we are asking for write prefetch not
+     suported by SSE counterpart or the SSE prefetch is not available
+     (K6 machines).  Otherwise use SSE prefetch as it allows specifying
+     of locality.  */
+  if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+    operands[2] = GEN_INT (3);
+  else
+    operands[1] = const0_rtx;
 })
 
-;; Load the low double of the target vector from the source scalar.
-(define_insn "sse2_loadlpd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,m")
-       (vec_concat:V2DF
-         (match_operand:DF 2 "nonimmediate_operand"     " m,Y,Y")
-         (vec_select:DF
-           (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
-           (parallel [(const_int 1)]))))]
-  "TARGET_SSE2"
-  "@
-   movlpd\t{%2, %0|%0, %2}
-   movsd\t{%2, %0|%0, %2}
-   movlpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-;; Merge the low part of the source vector into the low part of the target.
-(define_insn "sse2_movsd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
-        (vec_merge:V2DF
-         (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0")
-         (match_operand:V2DF 2 "nonimmediate_operand" "x,m,Y")
-         (const_int 2)))]
-  "TARGET_SSE2"
-  "@movsd\t{%2, %0|%0, %2}
-    movlpd\t{%2, %0|%0, %2}
-    movlpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF,V2DF,V2DF")])
-
-(define_expand "sse2_loadsd"
-  [(match_operand:V2DF 0 "register_operand" "")
-   (match_operand:DF 1 "memory_operand" "")]
-  "TARGET_SSE2"
+(define_insn "*prefetch_sse"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+            (const_int 0)
+            (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE && !TARGET_64BIT"
 {
-  emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
-                               CONST0_RTX (V2DFmode)));
-  DONE;
-})
-
-(define_insn "sse2_loadsd_1"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF
-        (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
-        (match_operand:V2DF 2 "const0_operand" "X")
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "movsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse2_storesd"
-  [(set (match_operand:DF 0 "memory_operand" "=m")
-       (vec_select:DF
-        (match_operand:V2DF 1 "register_operand" "x")
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE2"
-  "movsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse2_shufpd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-                     (match_operand:V2DF 2 "nonimmediate_operand" "xm")
-                     (match_operand:SI 3 "immediate_operand" "i")]
-                    UNSPEC_SHUFFLE))]
-  "TARGET_SSE2"
-  ;; @@@ check operand order for intel/nonintel syntax
-  "shufpd\t{%3, %2, %0|%0, %2, %3}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_clflush"
-  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
-                   UNSPECV_CLFLUSH)]
-  "TARGET_SSE2"
-  "clflush\t%a0"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "unknown")])
+  static const char * const patterns[4] = {
+   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+  };
 
-(define_expand "sse2_mfence"
-  [(set (match_dup 0)
-       (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
-  "TARGET_SSE2"
-{
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
-})
+  int locality = INTVAL (operands[1]);
+  if (locality < 0 || locality > 3)
+    abort ();
 
-(define_insn "*mfence_insn"
-  [(set (match_operand:BLK 0 "" "")
-       (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
-  "TARGET_SSE2"
-  "mfence"
+  return patterns[locality];  
+}
   [(set_attr "type" "sse")
-   (set_attr "memory" "unknown")])
+   (set_attr "memory" "none")])
 
-(define_expand "sse2_lfence"
-  [(set (match_dup 0)
-       (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
-  "TARGET_SSE2"
+(define_insn "*prefetch_sse_rex"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+            (const_int 0)
+            (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE && TARGET_64BIT"
 {
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
-})
-
-(define_insn "*lfence_insn"
-  [(set (match_operand:BLK 0 "" "")
-       (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
-  "TARGET_SSE2"
-  "lfence"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "unknown")])
-
-;; SSE3
-
-(define_insn "mwait"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
-                    (match_operand:SI 1 "register_operand" "c")]
-                   UNSPECV_MWAIT)]
-  "TARGET_SSE3"
-  "mwait\t%0, %1"
-  [(set_attr "length" "3")])
-
-(define_insn "monitor"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
-                    (match_operand:SI 1 "register_operand" "c")
-                    (match_operand:SI 2 "register_operand" "d")]
-                   UNSPECV_MONITOR)]
-  "TARGET_SSE3"
-  "monitor\t%0, %1, %2"
-  [(set_attr "length" "3")])
-
-;; SSE3 arithmetic
-
-(define_insn "addsubv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_ADDSUB))]
-  "TARGET_SSE3"
-  "addsubps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "addsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-                     (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_ADDSUB))]
-  "TARGET_SSE3"
-  "addsubpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "haddv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_HADD))]
-  "TARGET_SSE3"
-  "haddps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "haddv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-                     (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_HADD))]
-  "TARGET_SSE3"
-  "haddpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "hsubv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_HSUB))]
-  "TARGET_SSE3"
-  "hsubps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
+  static const char * const patterns[4] = {
+   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+  };
 
-(define_insn "hsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-                     (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_HSUB))]
-  "TARGET_SSE3"
-  "hsubpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "movshdup"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF
-        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))]
-  "TARGET_SSE3"
-  "movshdup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+  int locality = INTVAL (operands[1]);
+  if (locality < 0 || locality > 3)
+    abort ();
 
-(define_insn "movsldup"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF
-        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))]
-  "TARGET_SSE3"
-  "movsldup\t{%1, %0|%0, %1}"
+  return patterns[locality];  
+}
   [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
+   (set_attr "memory" "none")])
 
-(define_insn "lddqu"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
-                      UNSPEC_LDQQU))]
-  "TARGET_SSE3"
-  "lddqu\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "*prefetch_3dnow"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+            (match_operand:SI 1 "const_int_operand" "n")
+            (const_int 3))]
+  "TARGET_3DNOW && !TARGET_64BIT"
+{
+  if (INTVAL (operands[1]) == 0)
+    return "prefetch\t%a0";
+  else
+    return "prefetchw\t%a0";
+}
+  [(set_attr "type" "mmx")
+   (set_attr "memory" "none")])
 
-(define_insn "loadddup"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))]
-  "TARGET_SSE3"
-  "movddup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
+(define_insn "*prefetch_3dnow_rex"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+            (match_operand:SI 1 "const_int_operand" "n")
+            (const_int 3))]
+  "TARGET_3DNOW && TARGET_64BIT"
+{
+  if (INTVAL (operands[1]) == 0)
+    return "prefetch\t%a0";
+  else
+    return "prefetchw\t%a0";
+}
+  [(set_attr "type" "mmx")
+   (set_attr "memory" "none")])
 
-(define_insn "movddup"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_duplicate:V2DF
-        (vec_select:DF (match_operand:V2DF 1 "register_operand" "x")
-                       (parallel [(const_int 0)]))))]
-  "TARGET_SSE3"
-  "movddup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
+(include "sse.md")
index ec82977d8d25352a969c1e3f5179bd5f102f45bf..29dd4aafcb45e4da01dbaad060a86d41446d0b08 100644 (file)
 
 (define_insn_reservation "ppro_sse_div_V4SF_load" 48
                         (and (eq_attr "cpu" "pentiumpro")
-                             (and (eq_attr "memory" "none")
+                             (and (eq_attr "memory" "load")
                                   (and (eq_attr "mode" "V4SF")
                                        (eq_attr "type" "ssediv"))))
                         "decoder0,(p2+p0)*2,p0*32")
                         (and (eq_attr "cpu" "pentiumpro")
                              (and (eq_attr "memory" "none")
                                   (and (eq_attr "mode" "V4SF")
-                                       (eq_attr "type" "sselog"))))
+                                       (eq_attr "type" "sselog,sselog1"))))
                         "decodern,p1")
 
 (define_insn_reservation "ppro_sse_log_V4SF_load" 2
                         (and (eq_attr "cpu" "pentiumpro")
-                             (and (eq_attr "memory" "none")
+                             (and (eq_attr "memory" "load")
                                   (and (eq_attr "mode" "V4SF")
-                                       (eq_attr "type" "sselog"))))
+                                       (eq_attr "type" "sselog,sselog1"))))
                         "decoder0,(p2+p1)")
 
 (define_insn_reservation "ppro_sse_mov_V4SF" 1
index 5fa93fa05b95a935b4eaf6523ba0a627ebd8d4b7..fde85dd6d693f602cb695fd3f47a3c295af3f9fe 100644 (file)
         (and (match_operand 0 "const_double_operand")
              (match_test "GET_MODE_SIZE (mode) <= 8")))))
 
-;; Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
-;; for shift & compare patterns, as shifting by 0 does not change flags).
-(define_predicate "const_int_1_31_operand"
-  (and (match_code "const_int")
-       (match_test "INTVAL (op) >= 1 && INTVAL (op) <= 31")))
-
 ;; Returns nonzero if OP is either a symbol reference or a sum of a symbol
 ;; reference and a constant.
 (define_predicate "symbolic_operand"
   return i == 2 || i == 4 || i == 8;
 })
 
+;; Match 0 or 1.
+(define_predicate "const_0_to_1_operand"
+  (and (match_code "const_int")
+       (match_test "op == const0_rtx || op == const1_rtx")))
+
 ;; Match 0 to 3.
 (define_predicate "const_0_to_3_operand"
   (and (match_code "const_int")
   (and (match_code "const_int")
        (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 255")))
 
+;; Match (0 to 255) * 8
+(define_predicate "const_0_to_255_mul_8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT val = INTVAL (op);
+  return val <= 255*8 && val % 8 == 0;
+})
+
+;; Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
+;; for shift & compare patterns, as shifting by 0 does not change flags).
+(define_predicate "const_1_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 1 && INTVAL (op) <= 31")))
+
+;; Match 2 or 3.
+(define_predicate "const_2_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 3")))
+
+;; Match 4 to 7.
+(define_predicate "const_4_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 4 && INTVAL (op) <= 7")))
+
 ;; Match exactly one bit in 4-bit mask.
 (define_predicate "const_pow2_1_to_8_operand"
   (match_code "const_int")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
new file mode 100644 (file)
index 0000000..ee90d06
--- /dev/null
@@ -0,0 +1,3111 @@
+;; GCC machine description for SSE instructions
+;; Copyright (C) 2005
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+
+;; 16 byte integral modes handled by SSE, minus TImode, which gets
+;; special-cased for TARGET_64BIT.
+(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
+
+;; All 16-byte vector modes handled by SSE
+(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Mix-n-match
+(define_mode_macro SSEMODE12 [V16QI V8HI])
+(define_mode_macro SSEMODE24 [V8HI V4SI])
+(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
+(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
+
+;; Mapping from integer vector mode to mnemonic suffix
+(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
+
+;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; All of these patterns are enabled for SSE1 as well as SSE2.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
+       (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
+       (match_operand:SSEMODEI 1 "vector_move_operand"  "C ,xm,x"))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "pxor\t%0, %0";
+    case 1:
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movdqa\t{%1, %0|%0, %1}";
+    default:
+      abort ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set (attr "mode")
+       (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
+                (const_string "V4SF")
+
+              (eq_attr "alternative" "0,1")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "TI"))
+              (eq_attr "alternative" "2")
+                (if_then_else
+                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+                           (const_int 0))
+                       (ne (symbol_ref "optimize_size")
+                           (const_int 0)))
+                  (const_string "V4SF")
+                  (const_string "TI"))]
+              (const_string "TI")))])
+
+(define_expand "movv4sf"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+       (match_operand:V4SF 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move (V4SFmode, operands);
+  DONE;
+})
+
+(define_insn "*movv4sf_internal"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+       (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE"
+  "@
+   xorps\t%0, %0
+   movaps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "mode" "V4SF")])
+
+(define_split
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
+  "TARGET_SSE && reload_completed"
+  [(const_int 0)]
+{
+  rtx x = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
+  emit_insn (gen_sse_loadss (operands[0], x));
+  DONE;
+})
+
+(define_expand "movv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+       (match_operand:V2DF 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move (V2DFmode, operands);
+  DONE;
+})
+
+(define_insn "*movv2df_internal"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+       (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "xorpd\t%0, %0";
+    case 1:
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movapd\t{%1, %0|%0, %1}";
+    default:
+      abort ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set (attr "mode")
+       (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
+                (const_string "V4SF")
+              (eq_attr "alternative" "0,1")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "V2DF"))
+              (eq_attr "alternative" "2")
+                (if_then_else
+                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+                           (const_int 0))
+                       (ne (symbol_ref "optimize_size")
+                           (const_int 0)))
+                  (const_string "V4SF")
+                  (const_string "V2DF"))]
+              (const_string "V2DF")))])
+
+(define_split
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
+  "TARGET_SSE2 && reload_completed"
+  [(const_int 0)]
+{
+  rtx x = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
+  emit_insn (gen_sse2_loadsd (operands[0], x));
+  DONE;
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
+       (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move_misalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "sse_movups"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
+                    UNSPEC_MOVU))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "movups\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movupd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+       (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
+                    UNSPEC_MOVU))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "movupd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movdqu"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+       (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
+                     UNSPEC_MOVU))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "movdqu\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse_movntv4sf"
+  [(set (match_operand:V4SF 0 "memory_operand" "=m")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
+                    UNSPEC_MOVNT))]
+  "TARGET_SSE"
+  "movntps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_movntv2df"
+  [(set (match_operand:V2DF 0 "memory_operand" "=m")
+       (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
+                    UNSPEC_MOVNT))]
+  "TARGET_SSE2"
+  "movntpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movntv2di"
+  [(set (match_operand:V2DI 0 "memory_operand" "=m")
+       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
+                    UNSPEC_MOVNT))]
+  "TARGET_SSE2"
+  "movntdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_movntsi"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+                  UNSPEC_MOVNT))]
+  "TARGET_SSE2"
+  "movnti\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_lddqu"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+       (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
+                     UNSPEC_LDQQU))]
+  "TARGET_SSE3"
+  "lddqu\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "negv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
+
+(define_expand "absv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
+
+(define_expand "addv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
+
+(define_insn "*addv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
+  "addps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmaddv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
+  "addss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
+
+(define_expand "subv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
+                   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
+
+(define_insn "*subv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "subps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE"
+  "subss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
+
+(define_expand "mulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
+
+(define_insn "*mulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
+  "mulps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmmulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
+  "mulss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "mode" "SF")])
+
+(define_expand "divv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (div:V4SF (match_operand:V4SF 1 "register_operand" "")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
+
+(define_insn "*divv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "divps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmdivv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE"
+  "divss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_rcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (unspec:V4SF
+        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+  "TARGET_SSE"
+  "rcpps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmrcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+                      UNSPEC_RCP)
+         (match_operand:V4SF 2 "register_operand" "0")
+         (const_int 1)))]
+  "TARGET_SSE"
+  "rcpss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_rsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (unspec:V4SF
+         [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+  "TARGET_SSE"
+  "rsqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmrsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+                      UNSPEC_RSQRT)
+         (match_operand:V4SF 2 "register_operand" "0")
+         (const_int 1)))]
+  "TARGET_SSE"
+  "rsqrtss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
+(define_insn "sqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "sqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+         (match_operand:V4SF 2 "register_operand" "0")
+         (const_int 1)))]
+  "TARGET_SSE"
+  "sqrtss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
+(define_expand "smaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);")
+
+(define_insn "*smaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
+  "maxps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmsmaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+        (match_dup 1)
+        (const_int 1)))]
+  "TARGET_SSE && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
+  "maxss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
+(define_expand "sminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);")
+
+(define_insn "*sminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
+  "minps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmsminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+        (match_dup 1)
+        (const_int 1)))]
+  "TARGET_SSE && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
+  "minss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse3_addsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (plus:V4SF
+           (match_operand:V4SF 1 "register_operand" "0")
+           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+         (minus:V4SF (match_dup 1) (match_dup 2))
+         (const_int 5)))]
+  "TARGET_SSE3"
+  "addsubps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_haddv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_concat:V4SF
+         (vec_concat:V2SF
+           (plus:SF
+             (vec_select:SF 
+               (match_operand:V4SF 1 "register_operand" "0")
+               (parallel [(const_int 0)]))
+             (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+           (plus:SF
+             (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+             (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+         (vec_concat:V2SF
+           (plus:SF
+             (vec_select:SF
+               (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+               (parallel [(const_int 0)]))
+             (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+           (plus:SF
+             (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+             (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSE3"
+  "haddps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_hsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_concat:V4SF
+         (vec_concat:V2SF
+           (minus:SF
+             (vec_select:SF 
+               (match_operand:V4SF 1 "register_operand" "0")
+               (parallel [(const_int 0)]))
+             (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+           (minus:SF
+             (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+             (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+         (vec_concat:V2SF
+           (minus:SF
+             (vec_select:SF
+               (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+               (parallel [(const_int 0)]))
+             (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+           (minus:SF
+             (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+             (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSE3"
+  "hsubps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V4SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_maskcmpv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (match_operator:V4SF 3 "sse_comparison_operator"
+               [(match_operand:V4SF 1 "register_operand" "0")
+                (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
+  "TARGET_SSE"
+  "cmp%D3ps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_vmmaskcmpv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (match_operator:V4SF 3 "sse_comparison_operator"
+               [(match_operand:V4SF 1 "register_operand" "0")
+                (match_operand:V4SF 2 "register_operand" "x")])
+        (match_dup 1)
+        (const_int 1)))]
+  "TARGET_SSE"
+  "cmp%D3ss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_comi"
+  [(set (reg:CCFP FLAGS_REG)
+       (compare:CCFP
+         (vec_select:SF
+           (match_operand:V4SF 0 "register_operand" "x")
+           (parallel [(const_int 0)]))
+         (vec_select:SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+           (parallel [(const_int 0)]))))]
+  "TARGET_SSE"
+  "comiss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_ucomi"
+  [(set (reg:CCFPU FLAGS_REG)
+       (compare:CCFPU
+         (vec_select:SF
+           (match_operand:V4SF 0 "register_operand" "x")
+           (parallel [(const_int 0)]))
+         (vec_select:SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+           (parallel [(const_int 0)]))))]
+  "TARGET_SSE"
+  "ucomiss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "mode" "SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "andv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
+
+(define_insn "*andv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+  "andps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_nandv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
+                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "andnps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "iorv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
+
+(define_insn "*iorv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
+  "orps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "xorv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
+
+(define_insn "*xorv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
+  "xorps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+         (match_operand:V4SF 1 "register_operand" "0")
+         (const_int 3)))]
+  "TARGET_SSE"
+  "cvtpi2ps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_cvtps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_select:V2SI
+         (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+                      UNSPEC_FIX_NOTRUNC)
+         (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "cvtps2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_select:V2SI
+         (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "cvttps2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+         (match_operand:V4SF 1 "register_operand" "0,0")
+         (const_int 1)))]
+  "TARGET_SSE"
+  "cvtsi2ss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "vector,double")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ssq"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+         (match_operand:V4SF 1 "register_operand" "0,0")
+         (const_int 1)))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvtsi2ssq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "vector,double")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtss2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (unspec:SI
+         [(vec_select:SF
+            (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+            (parallel [(const_int 0)]))]
+         UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE"
+  "cvtss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+       (unspec:DI
+         [(vec_select:SF
+            (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+            (parallel [(const_int 0)]))]
+         UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvtss2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttss2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (fix:SI
+         (vec_select:SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+           (parallel [(const_int 0)]))))]
+  "TARGET_SSE"
+  "cvttss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse_cvttss2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+       (fix:DI
+         (vec_select:SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+           (parallel [(const_int 0)]))))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvttss2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtdq2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "cvtdq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_cvtps2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+                    UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "cvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_cvttps2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "cvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_movhlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
+           (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
+         (parallel [(const_int 4)
+                    (const_int 5)
+                    (const_int 2)
+                    (const_int 3)])))]
+  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movhlps\t{%2, %0|%0, %2}
+   movlps\t{%H1, %0|%0, %H1}
+   movhps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movlhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+           (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+         (parallel [(const_int 0)
+                    (const_int 1)
+                    (const_int 4)
+                    (const_int 5)])))]
+  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+  "@
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}
+   movlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_unpckhps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "register_operand" "0")
+           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 2) (const_int 6)
+                    (const_int 3) (const_int 7)])))]
+  "TARGET_SSE"
+  "unpckhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_unpcklps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "register_operand" "0")
+           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 0) (const_int 4)
+                    (const_int 1) (const_int 5)])))]
+  "TARGET_SSE"
+  "unpcklps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+;; These are modeled with the same vec_concat as the others so that we
+;; capture users of shufps that can use the new instructions
+(define_insn "sse3_movshdup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+           (match_dup 1))
+         (parallel [(const_int 1)
+                    (const_int 1)
+                    (const_int 7)
+                    (const_int 7)])))]
+  "TARGET_SSE3"
+  "movshdup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_movsldup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+           (match_dup 1))
+         (parallel [(const_int 0)
+                    (const_int 0)
+                    (const_int 6)
+                    (const_int 6)])))]
+  "TARGET_SSE3"
+  "movsldup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "sse_shufps"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V4SF 1 "register_operand" "")
+   (match_operand:V4SF 2 "nonimmediate_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
+                              GEN_INT ((mask >> 0) & 3),
+                              GEN_INT ((mask >> 2) & 3),
+                              GEN_INT (((mask >> 4) & 3) + 4),
+                              GEN_INT (((mask >> 6) & 3) + 4)));
+  DONE;
+})
+
+(define_insn "sse_shufps_1"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "register_operand" "0")
+           (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+         (parallel [(match_operand 3 "const_0_to_3_operand" "")
+                    (match_operand 4 "const_0_to_3_operand" "")
+                    (match_operand 5 "const_4_to_7_operand" "")
+                    (match_operand 6 "const_4_to_7_operand" "")])))]
+  "TARGET_SSE"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[3]) << 0;
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 4) << 4;
+  mask |= (INTVAL (operands[6]) - 4) << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "shufps\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_storehps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+       (vec_select:V2SF
+         (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+         (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SSE"
+  "@
+   movhps\t{%1, %0|%0, %1}
+   movhlps\t{%1, %0|%0, %1}
+   movlps\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+       (vec_concat:V4SF
+         (vec_select:V2SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+           (parallel [(const_int 0) (const_int 1)]))
+         (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+  "TARGET_SSE"
+  "@
+   movhps\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_storelps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+       (vec_select:V2SF
+         (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+         (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "@
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+       (vec_concat:V4SF
+         (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
+         (vec_select:V2SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
+           (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_SSE"
+  "@
+   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+   movlps\t{%2, %0|%0, %2}
+   movlps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_expand "sse_loadss"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF (match_operand:SF 1 "nonimmediate_operand" ""))
+         (match_dup 2)
+         (const_int 1)))]
+  "TARGET_SSE"
+  "operands[2] = CONST0_RTX (V4SFmode);")
+
+(define_insn "sse_loadlss"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"  "=x,x,Y ,m")
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (match_operand:SF 2 "general_operand"     " x,m,*r,x*rfF"))
+         (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
+         (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   movss\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   movd\t{%2, %0|%0, %2}
+   #"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF")])
+
+(define_split
+  [(set (match_operand:V4SF 0 "memory_operand" "")
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (match_operand:SF 1 "nonmemory_operand" ""))
+         (match_dup 0)
+         (const_int 1)))]
+  "TARGET_SSE && reload_completed"
+  [(const_int 0)]
+{
+  emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
+  DONE;
+})
+
+(define_insn "sse_movss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+         (match_operand:V4SF 2 "register_operand" "x")
+         (match_operand:V4SF 1 "register_operand" "0")
+         (const_int 1)))]
+  "TARGET_SSE"
+  "movss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF")])
+
+(define_insn_and_split "sse_storess"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
+       (vec_select:SF
+         (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
+         (parallel [(const_int 0)])))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[0], gen_lowpart (SFmode, operands[1]));
+  DONE;
+})
+
+(define_expand "vec_setv4sf"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:SF 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  rtx tmp, op0 = operands[0], op1 = operands[1];
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_insn (gen_sse_loadlss (op0, op0, op1));
+      break;
+
+    case 1:
+      /* tmp = op0 = A B C D */
+      tmp = copy_to_reg (op0);
+
+      /* op0 = C C D D */
+      emit_insn (gen_sse_unpcklps (op0, op0, op0));
+
+      /* op0 = C C D X */
+      emit_insn (gen_sse_loadlss (op0, op0, op1));
+
+      /* op0 = A B X D  */
+      emit_insn (gen_sse_shufps_1 (op0, op0, tmp, GEN_INT (1), GEN_INT (0),
+                                  GEN_INT (2), GEN_INT (3)));
+      break;
+
+    case 2:
+      tmp = copy_to_reg (op0);
+      emit_insn (gen_sse_loadlss (op0, op0, op1));
+      emit_insn (gen_sse_shufps_1 (op0, op0, tmp, GEN_INT (0), GEN_INT (1),
+                                  GEN_INT (0), GEN_INT (3)));
+      break;
+
+    case 3:
+      tmp = copy_to_reg (op0);
+      emit_insn (gen_sse_loadlss (op0, op0, op1));
+      emit_insn (gen_sse_shufps_1 (op0, op0, tmp, GEN_INT (0), GEN_INT (1),
+                                  GEN_INT (2), GEN_INT (0)));
+      break;
+
+    default:
+      abort ();
+    }
+  DONE;
+})
+
+(define_expand "vec_extractv4sf"
+  [(match_operand:SF 0 "register_operand" "")
+   (match_operand:V4SF 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  rtx tmp, op0 = operands[0], op1 = operands[1];
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      tmp = op1;
+      break;
+
+    case 1:
+      tmp = copy_to_reg (op1);
+      emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, GEN_INT (1), GEN_INT (1),
+                                  GEN_INT (2), GEN_INT (3)));
+      break;
+
+    case 2:
+      tmp = copy_to_reg (op1);
+      emit_insn (gen_sse_unpckhps (tmp, tmp, tmp));
+      break;
+
+    case 3:
+      tmp = copy_to_reg (op1);
+      emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, GEN_INT (3), GEN_INT (1),
+                                  GEN_INT (2), GEN_INT (3)));
+      break;
+
+    default:
+      abort ();
+    }
+
+  emit_insn (gen_sse_storess (op0, op1));
+  DONE;
+})
+
+(define_expand "vec_initv4sf"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "negv2df2"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
+
+(define_expand "absv2df2"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
+
+(define_expand "addv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
+
+(define_insn "*addv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
+  "addpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmaddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
+  "addsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+(define_expand "subv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+                   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
+
+(define_insn "*subv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+                   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "subpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+                     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "subsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+(define_expand "mulv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
+
+(define_insn "*mulv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
+  "mulpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmmulv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
+  "mulsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "mode" "DF")])
+
+(define_expand "divv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (div:V2DF (match_operand:V2DF 1 "register_operand" "")
+                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
+
+(define_insn "*divv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
+                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "divpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmdivv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
+                   (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "divsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "mode" "DF")])
+
+(define_insn "sqrtv2df2"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "sqrtpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmsqrtv2df2"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
+         (match_operand:V2DF 2 "register_operand" "0")
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "sqrtsd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
+(define_expand "smaxv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);")
+
+(define_insn "*smaxv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
+  "maxpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmsmaxv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
+  "maxsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+(define_expand "sminv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);")
+
+(define_insn "*sminv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
+  "minpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmsminv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
+  "minsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse3_addsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (plus:V2DF
+           (match_operand:V2DF 1 "register_operand" "0")
+           (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+         (minus:V2DF (match_dup 1) (match_dup 2))
+         (const_int 1)))]
+  "TARGET_SSE3"
+  "addsubpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_haddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_concat:V2DF
+         (plus:DF
+           (vec_select:DF
+             (match_operand:V2DF 1 "register_operand" "0")
+             (parallel [(const_int 0)]))
+           (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+         (plus:DF
+           (vec_select:DF
+             (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+             (parallel [(const_int 0)]))
+           (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3"
+  "haddpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_hsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_concat:V2DF
+         (minus:DF
+           (vec_select:DF
+             (match_operand:V2DF 1 "register_operand" "0")
+             (parallel [(const_int 0)]))
+           (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+         (minus:DF
+           (vec_select:DF
+             (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+             (parallel [(const_int 0)]))
+           (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3"
+  "hsubpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_maskcmpv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (match_operator:V2DF 3 "sse_comparison_operator"
+               [(match_operand:V2DF 1 "register_operand" "0")
+                (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
+  "TARGET_SSE2"
+  "cmp%D3pd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_vmmaskcmpv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (match_operator:V2DF 3 "sse_comparison_operator"
+               [(match_operand:V2DF 1 "register_operand" "0")
+                (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
+         (match_dup 1)
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "cmp%D3sd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse2_comi"
+  [(set (reg:CCFP FLAGS_REG)
+       (compare:CCFP
+         (vec_select:DF
+           (match_operand:V2DF 0 "register_operand" "x")
+           (parallel [(const_int 0)]))
+         (vec_select:DF
+           (match_operand:V2DF 1 "nonimmediate_operand" "xm")
+           (parallel [(const_int 0)]))))]
+  "TARGET_SSE2"
+  "comisd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse2_ucomi"
+  [(set (reg:CCFPU FLAGS_REG)
+       (compare:CCFPU
+         (vec_select:DF
+           (match_operand:V2DF 0 "register_operand" "x")
+           (parallel [(const_int 0)]))
+         (vec_select:DF
+           (match_operand:V2DF 1 "nonimmediate_operand" "xm")
+           (parallel [(const_int 0)]))))]
+  "TARGET_SSE2"
+  "ucomisd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "mode" "DF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "andv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
+
+(define_insn "*andv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+  "andpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_nandv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
+                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "andnpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
+(define_expand "iorv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
+
+(define_insn "*iorv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
+  "orpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
+(define_expand "xorv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
+                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
+
+(define_insn "*xorv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
+  "xorpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_cvtpi2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE2"
+  "cvtpi2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_cvtpd2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+                    UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "cvtpd2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttpd2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "cvttpd2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_cvtsi2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF
+           (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+         (match_operand:V2DF 1 "register_operand" "0,0")
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "cvtsi2sd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "double,direct")])
+
+(define_insn "sse2_cvtsi2sdq"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF
+           (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
+         (match_operand:V2DF 1 "register_operand" "0,0")
+         (const_int 1)))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "cvtsi2sdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "double,direct")])
+
+(define_insn "sse2_cvtsd2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (unspec:SI
+         [(vec_select:DF
+            (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+            (parallel [(const_int 0)]))]
+         UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "cvtsd2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+       (unspec:DI
+         [(vec_select:DF
+            (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+            (parallel [(const_int 0)]))]
+         UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "cvtsd2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttsd2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (fix:SI
+         (vec_select:DF
+           (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+           (parallel [(const_int 0)]))))]
+  "TARGET_SSE2"
+  "cvttsd2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "SI")
+   (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "sse2_cvttsd2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+       (fix:DI
+         (vec_select:DF
+           (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+           (parallel [(const_int 0)]))))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "cvttsd2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "sse2_cvtdq2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (float:V2DF
+         (vec_select:V2SI
+           (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+           (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "cvtdq2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+(define_expand "sse2_cvtpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+       (vec_concat:V4SI
+         (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
+                      UNSPEC_FIX_NOTRUNC)
+         (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvtpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (vec_concat:V4SI
+         (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+                      UNSPEC_FIX_NOTRUNC)
+         (match_operand:V2SI 2 "const0_operand" "")))]
+  "TARGET_SSE2"
+  "cvtpd2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_cvttpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+       (vec_concat:V4SI
+         (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
+         (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvttpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (vec_concat:V4SI
+         (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+         (match_operand:V2SI 2 "const0_operand" "")))]
+  "TARGET_SSE2"
+  "cvttpd2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_cvtsd2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (float_truncate:V2SF
+             (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
+         (match_operand:V4SF 1 "register_operand" "0,0")
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "cvtsd2ss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "athlon_decode" "vector,double")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse2_cvtss2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_merge:V2DF
+         (float_extend:V2DF
+           (vec_select:V2SF
+             (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+             (parallel [(const_int 0) (const_int 1)])))
+         (match_operand:V2DF 1 "register_operand" "0")
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "cvtss2sd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DF")])
+
+(define_expand "sse2_cvtpd2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+       (vec_concat:V4SF
+         (float_truncate:V2SF
+           (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+         (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SFmode);")
+
+(define_insn "*sse2_cvtpd2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_concat:V4SF
+         (float_truncate:V2SF
+           (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+         (match_operand:V2SF 2 "const0_operand" "")))]
+  "TARGET_SSE2"
+  "cvtpd2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_cvtps2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (float_extend:V2DF
+         (vec_select:V2SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+           (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "cvtps2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_unpckhpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
+       (vec_select:V2DF
+         (vec_concat:V4DF
+           (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
+           (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
+         (parallel [(const_int 1)
+                    (const_int 3)])))]
+  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   unpckhpd\t{%2, %0|%0, %2}
+   movlpd\t{%H1, %0|%0, %H1}
+   movhpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_insn "sse3_movddup"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,o")
+       (vec_select:V2DF
+         (vec_concat:V4DF
+           (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
+           (match_dup 1))
+         (parallel [(const_int 0)
+                    (const_int 2)])))]
+  "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movddup\t{%1, %0|%0, %1}
+   #"
+  [(set_attr "type" "sselog,ssemov")
+   (set_attr "mode" "V2DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand" "")
+       (vec_select:V2DF
+         (vec_concat:V4DF
+           (match_operand:V2DF 1 "register_operand" "")
+           (match_dup 1))
+         (parallel [(const_int 0)
+                    (const_int 2)])))]
+  "TARGET_SSE3 && reload_completed"
+  [(const_int 0)]
+{
+  rtx low = gen_lowpart (DFmode, operands[1]);
+  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
+  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
+  DONE;
+})
+
+(define_insn "sse2_unpcklpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
+       (vec_select:V2DF
+         (vec_concat:V4DF
+           (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+           (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
+         (parallel [(const_int 0)
+                    (const_int 2)])))]
+  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   unpcklpd\t{%2, %0|%0, %2}
+   movhpd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_expand "sse2_shufpd"
+  [(match_operand:V2DF 0 "register_operand" "")
+   (match_operand:V2DF 1 "register_operand" "")
+   (match_operand:V2DF 2 "nonimmediate_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
+                               GEN_INT (mask & 1),
+                               GEN_INT (mask & 2 ? 3 : 2)));
+  DONE;
+})
+
+(define_insn "sse2_shufpd_1"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_select:V2DF
+         (vec_concat:V4DF
+           (match_operand:V2DF 1 "register_operand" "0")
+           (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+         (parallel [(match_operand 3 "const_0_to_1_operand" "")
+                    (match_operand 4 "const_2_to_3_operand" "")])))]
+  "TARGET_SSE2"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= (INTVAL (operands[4]) - 2) << 1;
+  operands[3] = GEN_INT (mask);
+
+  return "shufpd\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_storehpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
+       (vec_select:DF
+         (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
+         (parallel [(const_int 1)])))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movhpd\t{%1, %0|%0, %1}
+   unpckhpd\t%0, %0
+   #"
+  [(set_attr "type" "ssemov,sselog1,ssemov")
+   (set_attr "mode" "V1DF,V2DF,DF")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+       (vec_select:DF
+         (match_operand:V2DF 1 "memory_operand" "")
+         (parallel [(const_int 1)])))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = adjust_address (operands[1], DFmode, 8);
+})
+
+(define_insn "sse2_storelpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
+       (vec_select:DF
+         (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
+         (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movlpd\t{%1, %0|%0, %1}
+   #
+   #"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V1DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+       (vec_select:DF
+         (match_operand:V2DF 1 "nonimmediate_operand" "")
+         (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = gen_lowpart (DFmode, operands[0]);
+  operands[1] = gen_lowpart (DFmode, operands[1]);
+})
+
+(define_insn "sse2_loadhpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
+       (vec_concat:V2DF
+         (vec_select:DF
+           (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
+           (parallel [(const_int 0)]))
+         (match_operand:DF 2 "nonimmediate_operand"     " m,x,0,x*fr")))]
+  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movhpd\t{%2, %0|%0, %2}
+   unpcklpd\t{%2, %0|%0, %2}
+   shufpd\t{$1, %1, %0|%0, %1, 1}
+   #"
+  [(set_attr "type" "ssemov,sselog,sselog,other")
+   (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand" "")
+       (vec_concat:V2DF
+         (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+         (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+(define_insn "sse2_loadlpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m")
+       (vec_concat:V2DF
+         (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x*fr")
+         (vec_select:DF
+           (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
+           (parallel [(const_int 1)]))))]
+  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movsd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %0|%0, %2}
+   movsd\t{%2, %0|%0, %2}
+   shufpd\t{$2, %2, %0|%0, %2, 2}
+   movhpd\t{%H1, %0|%0, %H1}
+   #"
+  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
+   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand" "")
+       (vec_concat:V2DF
+         (match_operand:DF 1 "register_operand" "")
+         (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+(define_expand "sse2_loadsd"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+       (vec_concat:V2DF
+         (match_operand:DF 1 "nonimmediate_operand" "")
+         (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2DFmode);")
+
+(define_insn "sse2_movsd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,m,x,x,o")
+       (vec_merge:V2DF
+         (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
+         (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   movsd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %0|%0, %2}
+   shufpd\t{$2, %2, %0|%0, %2, 2}
+   movhps\t{%H1, %0|%0, %H1
+   movhps\t{%1, %H0|%H0, %1"
+  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
+
+(define_insn "sse3_loadddup"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+       (vec_duplicate:V2DF
+         (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE3"
+  "movddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DF")])
+
+(define_expand "vec_setv2df"
+  [(match_operand:V2DF 0 "register_operand" "")
+   (match_operand:DF 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1]));
+      break;
+    case 1:
+      emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1]));
+      break;
+    default:
+      abort ();
+    }
+  DONE;
+})
+
+(define_expand "vec_extractv2df"
+  [(match_operand:DF 0 "register_operand" "")
+   (match_operand:V2DF 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_insn (gen_sse2_storelpd (operands[0], operands[1]));
+      break;
+    case 1:
+      emit_insn (gen_sse2_storehpd (operands[0], operands[1]));
+      break;
+    default:
+      abort ();
+    }
+  DONE;
+})
+
+(define_expand "vec_initv2df"
+  [(match_operand:V2DF 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+       (minus:SSEMODEI
+         (match_dup 2)
+         (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+
+(define_expand "add<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+       (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+                      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+       (plus:SSEMODEI
+         (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+         (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "padd<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_ssadd<mode>3"
+  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+       (ss_plus:SSEMODE12
+         (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
+         (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
+  "padds<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_usadd<mode>3"
+  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+       (us_plus:SSEMODE12
+         (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
+         (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
+  "paddus<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+       (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
+                       (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+
+(define_insn "*sub<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+       (minus:SSEMODEI
+         (match_operand:SSEMODEI 1 "register_operand" "0")
+         (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "psub<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_sssub<mode>3"
+  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+       (ss_minus:SSEMODE12
+         (match_operand:SSEMODE12 1 "register_operand" "0")
+         (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "psubs<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_ussub<mode>3"
+  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+       (us_minus:SSEMODE12
+         (match_operand:SSEMODE12 1 "register_operand" "0")
+         (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "psubus<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_expand "mulv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+       (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+                  (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*mulv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "pmullw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_smulv8hi3_highpart"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (truncate:V8HI
+         (lshiftrt:V8SI
+           (mult:V8SI
+             (sign_extend:V8SI
+               (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+             (sign_extend:V8SI
+               (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+           (const_int 16))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "pmulhw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_umulv8hi3_highpart"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (truncate:V8HI
+         (lshiftrt:V8SI
+           (mult:V8SI
+             (zero_extend:V8SI
+               (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+             (zero_extend:V8SI
+               (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+           (const_int 16))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "pmulhuw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_umulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+       (mult:V2DI
+         (zero_extend:V2DI
+           (vec_select:V2SI
+             (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+             (parallel [(const_int 0) (const_int 2)])))
+         (zero_extend:V2DI
+           (vec_select:V2SI
+             (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+             (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "pmuludq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_pmaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (plus:V4SI
+         (mult:V4SI
+           (sign_extend:V4SI
+             (vec_select:V4HI
+               (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+               (parallel [(const_int 0)
+                          (const_int 2)
+                          (const_int 4)
+                          (const_int 6)])))
+           (sign_extend:V4SI
+             (vec_select:V4HI
+               (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+               (parallel [(const_int 0)
+                          (const_int 2)
+                          (const_int 4)
+                          (const_int 6)]))))
+         (mult:V4SI
+           (sign_extend:V4SI
+             (vec_select:V4HI (match_dup 1)
+               (parallel [(const_int 1)
+                          (const_int 3)
+                          (const_int 5)
+                          (const_int 7)])))
+           (sign_extend:V4SI
+             (vec_select:V4HI (match_dup 2)
+               (parallel [(const_int 1)
+                          (const_int 3)
+                          (const_int 5)
+                          (const_int 7)]))))))]
+  "TARGET_SSE2"
+  "pmaddwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_insn "ashr<mode>3"
+  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+       (ashiftrt:SSEMODE24
+         (match_operand:SSEMODE24 1 "register_operand" "0")
+         (match_operand:SI 2 "nonmemory_operand" "xi")))]
+  "TARGET_SSE2"
+  "psra<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "mode" "TI")])
+
+(define_insn "lshr<mode>3"
+  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+       (lshiftrt:SSEMODE248
+         (match_operand:SSEMODE248 1 "register_operand" "0")
+         (match_operand:SI 2 "nonmemory_operand" "xi")))]
+  "TARGET_SSE2"
+  "psrl<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "mode" "TI")])
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+       (ashift:SSEMODE248
+         (match_operand:SSEMODE248 1 "register_operand" "0")
+         (match_operand:SI 2 "nonmemory_operand" "xi")))]
+  "TARGET_SSE2"
+  "psll<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_ashlti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+       (ashift:TI (match_operand:TI 1 "register_operand" "0")
+                  (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "pslldq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_lshrti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+       (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+                    (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "psrldq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "mode" "TI")])
+
+(define_expand "umaxv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+       (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
+                   (match_operand:V16QI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
+
+(define_insn "*umaxv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+       (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
+                   (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
+  "pmaxub\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_expand "smaxv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+       (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+                  (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
+
+(define_insn "*smaxv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
+  "pmaxsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_expand "uminv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+       (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
+                   (match_operand:V16QI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
+
+(define_insn "*uminv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+       (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
+                   (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
+  "pminub\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_expand "sminv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+       (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+                  (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
+
+(define_insn "*sminv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
+  "pminsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_eq<mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+       (eq:SSEMODE124
+         (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
+         (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_gt<mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+       (gt:SSEMODE124
+         (match_operand:SSEMODE124 1 "register_operand" "0")
+         (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+       (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+                     (match_dup 2)))]
+  "TARGET_SSE2"
+{
+  int i, n = GET_MODE_NUNITS (<MODE>mode);
+  rtvec v = rtvec_alloc (n);
+
+  for (i = 0; i < n; ++i)
+    RTVEC_ELT (v, i) = constm1_rtx;
+
+  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
+})
+
+(define_expand "and<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+       (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+                     (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
+
+(define_insn "*and<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+       (and:SSEMODEI
+         (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+         (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+  "pand\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_nand<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+       (and:SSEMODEI
+         (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
+         (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "pandn\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+       (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+                     (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
+
+(define_insn "*ior<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+       (ior:SSEMODEI
+         (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+         (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
+  "por\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+       (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+                     (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
+
+(define_insn "*xor<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+       (xor:SSEMODEI
+         (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+         (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+  "pxor\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_packsswb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+       (vec_concat:V16QI
+         (ss_truncate:V8QI
+           (match_operand:V8HI 1 "register_operand" "0"))
+         (ss_truncate:V8QI
+           (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_SSE2"
+  "packsswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_packssdw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (vec_concat:V8HI
+         (ss_truncate:V4HI
+           (match_operand:V4SI 1 "register_operand" "0"))
+         (ss_truncate:V4HI
+           (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_SSE2"
+  "packssdw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_packuswb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+       (vec_concat:V16QI
+         (us_truncate:V8QI
+           (match_operand:V8HI 1 "register_operand" "0"))
+         (us_truncate:V8QI
+           (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_SSE2"
+  "packuswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhbw"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+       (vec_select:V16QI
+         (vec_concat:V32QI
+           (match_operand:V16QI 1 "register_operand" "0")
+           (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 8)  (const_int 24)
+                    (const_int 9)  (const_int 25)
+                    (const_int 10) (const_int 26)
+                    (const_int 11) (const_int 27)
+                    (const_int 12) (const_int 28) 
+                    (const_int 13) (const_int 29)
+                    (const_int 14) (const_int 30)
+                    (const_int 15) (const_int 31)])))]
+  "TARGET_SSE2"
+  "punpckhbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklbw"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+       (vec_select:V16QI
+         (vec_concat:V32QI
+           (match_operand:V16QI 1 "register_operand" "0")
+           (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 0) (const_int 16)
+                    (const_int 1) (const_int 17)
+                    (const_int 2) (const_int 18)
+                    (const_int 3) (const_int 19)
+                    (const_int 4) (const_int 20)
+                    (const_int 5) (const_int 21)
+                    (const_int 6) (const_int 22)
+                    (const_int 7) (const_int 23)])))]
+  "TARGET_SSE2"
+  "punpcklbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhwd"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (vec_select:V8HI
+         (vec_concat:V16HI
+           (match_operand:V8HI 1 "register_operand" "0")
+           (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 4) (const_int 12)
+                    (const_int 5) (const_int 13)
+                    (const_int 6) (const_int 14)
+                    (const_int 7) (const_int 15)])))]
+  "TARGET_SSE2"
+  "punpckhwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklwd"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (vec_select:V8HI
+         (vec_concat:V16HI
+           (match_operand:V8HI 1 "register_operand" "0")
+           (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 0) (const_int 8)
+                    (const_int 1) (const_int 9)
+                    (const_int 2) (const_int 10)
+                    (const_int 3) (const_int 11)])))]
+  "TARGET_SSE2"
+  "punpcklwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhdq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (vec_select:V4SI
+         (vec_concat:V8SI
+           (match_operand:V4SI 1 "register_operand" "0")
+           (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 2) (const_int 6)
+                    (const_int 3) (const_int 7)])))]
+  "TARGET_SSE2"
+  "punpckhdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckldq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (vec_select:V4SI
+         (vec_concat:V8SI
+           (match_operand:V4SI 1 "register_operand" "0")
+           (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 0) (const_int 4)
+                    (const_int 1) (const_int 5)])))]
+  "TARGET_SSE2"
+  "punpckldq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhqdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+       (vec_select:V2DI
+         (vec_concat:V4DI
+           (match_operand:V2DI 1 "register_operand" "0")
+           (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 1)
+                    (const_int 3)])))]
+  "TARGET_SSE2"
+  "punpckhqdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklqdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+       (vec_select:V2DI
+         (vec_concat:V4DI
+           (match_operand:V2DI 1 "register_operand" "0")
+           (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+         (parallel [(const_int 0)
+                    (const_int 2)])))]
+  "TARGET_SSE2"
+  "punpcklqdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_pinsrw"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+       (vec_merge:V8HI
+         (match_operand:V8HI 1 "register_operand" "")
+         (vec_duplicate:V8HI
+           (match_operand:SI 2 "nonimmediate_operand" ""))
+         (match_operand:SI 3 "const_0_to_7_operand" "")))]
+  "TARGET_SSE2"
+{
+  operands[2] = gen_lowpart (HImode, operands[2]);
+  operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
+})
+
+(define_insn "*sse2_pinsrw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (vec_merge:V8HI
+         (match_operand:V8HI 1 "register_operand" "0")
+         (vec_duplicate:V8HI
+           (match_operand:HI 2 "nonimmediate_operand" "rm"))
+         (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_pextrw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (zero_extend:SI
+         (vec_select:HI
+           (match_operand:V8HI 1 "register_operand" "x")
+           (parallel [(match_operand:SI 2 "const_0_to_7_operand" "0")]))))]
+  "TARGET_SSE2"
+  "pextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshufd"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V4SI 1 "nonimmediate_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
+                               GEN_INT ((mask >> 0) & 3),
+                               GEN_INT ((mask >> 2) & 3),
+                               GEN_INT ((mask >> 4) & 3),
+                               GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_insn "sse2_pshufd_1"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (vec_select:V4SI
+         (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+         (parallel [(match_operand 2 "const_0_to_3_operand" "")
+                    (match_operand 3 "const_0_to_3_operand" "")
+                    (match_operand 4 "const_0_to_3_operand" "")
+                    (match_operand 5 "const_0_to_3_operand" "")])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "pshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshuflw"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V8HI 1 "nonimmediate_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
+                                GEN_INT ((mask >> 0) & 3),
+                                GEN_INT ((mask >> 2) & 3),
+                                GEN_INT ((mask >> 4) & 3),
+                                GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_insn "sse2_pshuflw_1"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (vec_select:V8HI
+         (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+         (parallel [(match_operand 2 "const_0_to_3_operand" "")
+                    (match_operand 3 "const_0_to_3_operand" "")
+                    (match_operand 4 "const_0_to_3_operand" "")
+                    (match_operand 5 "const_0_to_3_operand" "")
+                    (const_int 4)
+                    (const_int 5)
+                    (const_int 6)
+                    (const_int 7)])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshufhw"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V8HI 1 "nonimmediate_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
+                                GEN_INT (((mask >> 0) & 3) + 4),
+                                GEN_INT (((mask >> 2) & 3) + 4),
+                                GEN_INT (((mask >> 4) & 3) + 4),
+                                GEN_INT (((mask >> 6) & 3) + 4)));
+  DONE;
+})
+
+(define_insn "sse2_pshufhw_1"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (vec_select:V8HI
+         (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+         (parallel [(const_int 0)
+                    (const_int 1)
+                    (const_int 2)
+                    (const_int 3)
+                    (match_operand 2 "const_4_to_7_operand" "")
+                    (match_operand 3 "const_4_to_7_operand" "")
+                    (match_operand 4 "const_4_to_7_operand" "")
+                    (match_operand 5 "const_4_to_7_operand" "")])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= (INTVAL (operands[2]) - 4) << 0;
+  mask |= (INTVAL (operands[3]) - 4) << 2;
+  mask |= (INTVAL (operands[4]) - 4) << 4;
+  mask |= (INTVAL (operands[5]) - 4) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_loadd"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+       (vec_merge:V4SI
+         (vec_duplicate:V4SI
+           (match_operand:SI 1 "nonimmediate_operand" ""))
+         (match_dup 2)
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "sse2_loadld"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+       (vec_merge:V4SI
+         (vec_duplicate:V4SI
+           (match_operand:SI 2 "nonimmediate_operand" "mr,x"))
+         (match_operand:V4SI 1 "vector_move_operand" "C,0")
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   movd\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "sse2_stored"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=mrx")
+       (vec_select:SI
+         (match_operand:V4SI 1 "register_operand" "x")
+         (parallel [(const_int 0)])))]
+  "TARGET_SSE2"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+})
+
+(define_expand "sse2_storeq"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+       (vec_select:DI
+         (match_operand:V2DI 1 "register_operand" "")
+         (parallel [(const_int 0)])))]
+  "TARGET_SSE2"
+  "")
+
+(define_insn "*sse2_storeq"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=myx")
+       (vec_select:DI
+         (match_operand:V2DI 1 "register_operand" "x")
+         (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && !TARGET_64BIT"
+  "#")
+
+(define_insn "*sse2_storeq_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=myxr")
+       (vec_select:DI
+         (match_operand:V2DI 1 "register_operand" "x")
+         (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+       (vec_select:DI
+         (match_operand:V2DI 1 "register_operand" "")
+         (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+(define_expand "sse2_loadq"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+       (vec_merge:V2DI
+         (vec_duplicate:V2DI
+           (match_operand:DI 1 "nonimmediate_operand" ""))
+         (match_dup 2)
+         (const_int 1)))]        
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2DImode);")
+
+(define_insn "*sse2_loadq"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x,?x,x")
+       (vec_merge:V2DI
+         (vec_duplicate:V2DI
+           (match_operand:DI 1 "nonimmediate_operand" " m, y,x"))
+         (match_operand:V2DI 2 "vector_move_operand"  " C, C,0")
+         (const_int 1)))]
+  "TARGET_SSE2 && !TARGET_64BIT"
+  "@
+   movq\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   movq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_loadq_rex64"
+  [(set (match_operand:V2DI 0 "register_operand"       "=x,?x,?x,x")
+       (vec_merge:V2DI
+         (vec_duplicate:V2DI
+           (match_operand:DI 1 "nonimmediate_operand" " m, y, r,x"))
+         (match_operand:V2DI 2 "vector_move_operand"  " C, C, C,0")
+         (const_int 1)))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "@
+   movq\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   movq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscelaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_uavgv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+       (truncate:V16QI
+         (lshiftrt:V16HI
+           (plus:V16HI
+             (plus:V16HI
+               (zero_extend:V16HI
+                 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
+               (zero_extend:V16HI
+                 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
+             (const_vector:V16QI [(const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)]))
+           (const_int 1))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
+  "pavgb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_uavgv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+       (truncate:V8HI
+         (lshiftrt:V8SI
+           (plus:V8SI
+             (plus:V8SI
+               (zero_extend:V8SI
+                 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+               (zero_extend:V8SI
+                 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+             (const_vector:V8HI [(const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)]))
+           (const_int 1))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
+  "pavgw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+;; The correct representation for this is absolutely enormous, and 
+;; surely not generally useful.
+(define_insn "sse2_psadbw"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+       (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
+                     (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+                    UNSPEC_PSADBW))]
+  "TARGET_SSE2"
+  "psadbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse_movmskps"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
+                  UNSPEC_MOVMSK))]
+  "TARGET_SSE"
+  "movmskps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse2_movmskpd"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
+                  UNSPEC_MOVMSK))]
+  "TARGET_SSE2"
+  "movmskpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
+                  UNSPEC_MOVMSK))]
+  "TARGET_SSE2"
+  "pmovmskb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+(define_expand "sse2_maskmovdqu"
+  [(set (match_operand:V16QI 0 "memory_operand" "")
+       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+                      (match_operand:V16QI 2 "register_operand" "x")
+                      (match_dup 0)]
+                     UNSPEC_MASKMOV))]
+  "TARGET_SSE2"
+  "")
+
+(define_insn "*sse2_maskmovdqu"
+  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
+       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+                      (match_operand:V16QI 2 "register_operand" "x")
+                      (mem:V16QI (match_dup 0))]
+                     UNSPEC_MASKMOV))]
+  "TARGET_SSE2 && !TARGET_64BIT"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "maskmovdqu\t{%2, %1|%1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_maskmovdqu_rex64"
+  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
+       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+                      (match_operand:V16QI 2 "register_operand" "x")
+                      (mem:V16QI (match_dup 0))]
+                     UNSPEC_MASKMOV))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "maskmovdqu\t{%2, %1|%1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_clflush"
+  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
+                   UNSPECV_CLFLUSH)]
+  "TARGET_SSE2"
+  "clflush\t%a0"
+  [(set_attr "type" "sse")
+   (set_attr "memory" "unknown")])
+
+(define_expand "sse2_mfence"
+  [(set (match_dup 0)
+       (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+  "TARGET_SSE2"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_mfence"
+  [(set (match_operand:BLK 0 "" "")
+       (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+  "TARGET_SSE2"
+  "mfence"
+  [(set_attr "type" "sse")
+   (set_attr "memory" "unknown")])
+
+(define_expand "sse2_lfence"
+  [(set (match_dup 0)
+       (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+  "TARGET_SSE2"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_lfence"
+  [(set (match_operand:BLK 0 "" "")
+       (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+  "TARGET_SSE2"
+  "lfence"
+  [(set_attr "type" "sse")
+   (set_attr "memory" "unknown")])
+
+(define_insn "sse3_mwait"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+                    (match_operand:SI 1 "register_operand" "c")]
+                   UNSPECV_MWAIT)]
+  "TARGET_SSE3"
+  "mwait\t%0, %1"
+  [(set_attr "length" "3")])
+
+(define_insn "sse3_monitor"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+                    (match_operand:SI 1 "register_operand" "c")
+                    (match_operand:SI 2 "register_operand" "d")]
+                   UNSPECV_MONITOR)]
+  "TARGET_SSE3"
+  "monitor\t%0, %1, %2"
+  [(set_attr "length" "3")])
index 2973bfcb7d40c7262a580cc70fd017026c9f0ae3..bd05d6327d21a8049fdba343c3eec2b677aeb1ea 100644 (file)
@@ -1,3 +1,8 @@
+2005-01-08  Richard Henderson  <rth@redhat.com>
+
+       * lib/target-supports.exp (check_effective_target_vect_no_bitwise):
+       False for x86 and x86-64.
+
 2005-01-08  Diego Novillo  <dnovillo@redhat.com>
 
        PR tree-optimization/18241
index defb4d4e75db4c8dd2e8fbad6869d66f7e2c7333..1f966de88819b8f15648df232cf5ecdca571c0ed 100644 (file)
@@ -563,10 +563,6 @@ proc check_effective_target_vect_no_bitwise { } {
        verbose "check_effective_target_vect_no_bitwise: using cached result" 2
     } else {
        set et_vect_no_bitwise_saved 0
-       if { [istarget i?86-*-*]
-            || [istarget x86_64-*-*] } {
-           set et_vect_no_bitwise_saved 1
-       }
     }
     verbose "check_effective_target_vect_no_bitwise: returning $et_vect_no_bitwise_saved" 2
     return $et_vect_no_bitwise_saved