From ef719a44ef6afad4baa91dd3217e542a1a2f2683 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 8 Jan 2005 16:51:31 -0800 Subject: [PATCH] emmintrin.h (_mm_cvtsi128_si32): Move earlier. * config/i386/emmintrin.h (_mm_cvtsi128_si32): Move earlier. (_mm_cvtsi128_si64x): Likewise. (_mm_srl_epi64, _mm_srl_epi32, _mm_srl_epi16, _mm_sra_epi32, _mm_sra_epi16, _mm_sll_epi64, _mm_sll_epi32, _mm_sll_epi16): Use the _mm_{srl,sll}i_foo counterpart, and _mm_cvtsi128_si32. * config/i386/i386-modes.def: Add V16HI, V32QI, V4DF, V8SF. * config/i386/i386-protos.h: Update. * config/i386/i386.c (print_operand): Add 'H'. (ix86_fixup_binary_operands): Split out from ... (ix86_expand_binary_operator): ... here. (ix86_fixup_binary_operands_no_copy): New. (ix86_expand_fp_absneg_operator): Handle vector mode results. (bdesc_2arg): Update names for sse{,2,3}_ prefixes. (ix86_init_mmx_sse_builtins): Remove *maskncmp* special cases. (safe_vector_operand): Use CONST0_RTX. (ix86_expand_binop_builtin): Use ix86_fixup_binary_operands. (ix86_expand_builtin): Merge CODE_FOR_sse2_maskmovdqu_rex64 and CODE_FOR_sse2_maskmovdqu. Special case SSE version of MASKMOVDQU expansion. Update names for sse{,2,3}_ prefixes. Remove *maskncmp* special cases. * config/i386/i386.h (IX86_BUILTIN_CMPNGTSS): New. (IX86_BUILTIN_CMPNGESS): New. * config/i386/i386.md (UNSPEC_FIX_NOTRUNC): New. (attr type): Add sselog1. (attr unit, attr memory): Handle it. (movti, movti_internal, movti_rex64): Move near other integer moves. (movtf, movtf_internal): Move near other fp moves. (SSEMODE, SSEMODEI, vec_setv2df, vec_extractv2df, vec_initv2df, vec_setv4sf, vec_extractv4sf, vec_initv4sf, movv4sf, movv4sf_internal, movv2df, movv2df_internal, mov, mov_internal, movmisalign, sse_movups_1, sse_movmskps, sse_movntv4sf, sse_movhlps, sse_movlhps, sse_storehps, sse_loadhps, sse_storelps, sse_loadlps, sse_loadss, sse_loadss_1, sse_movss, sse_storess, sse_shufps, addv4sf3, vmaddv4sf3, subv4sf3, vmsubv4sf3, negv4sf2, mulv4sf3, vmmulv4sf3, divv4sf3, vmdivv4sf3, rcpv4sf2, vmrcpv4sf2, rsqrtv4sf2, vmrsqrtv4sf2, sqrtv4sf2, vmsqrtv4sf2, sse_andv4sf3, sse_nandv4sf3, sse_iorv4sf3, sse_xorv4sf3, sse2_andv2df3, sse2_nandv2df3, sse2_iorv2df3, sse2_xorv2df3, sse2_andv2di3, sse2_nandv2di3, sse2_iorv2di3, sse2_xorv2di3, maskcmpv4sf3, vmmaskcmpv4sf3, sse_comi, sse_ucomi, sse_unpckhps, sse_unpcklps, smaxv4sf3, vmsmaxv4sf3, sminv4sf3, vmsminv4sf3, cvtpi2ps, cvtps2pi, cvttps2pi, cvtsi2ss, cvtsi2ssq, cvtss2si, cvtss2siq, cvttss2si, cvttss2siq, addv2df3, vmaddv2df3, subv2df3, vmsubv2df3, mulv2df3, vmmulv2df3, divv2df3, vmdivv2df3, smaxv2df3, vmsmaxv2df3, sminv2df3, vmsminv2df3, sqrtv2df2, vmsqrtv2df2, maskcmpv2df3, vmmaskcmpv2df3, sse2_comi, sse2_ucomi, sse2_movmskpd, sse2_pmovmskb, sse2_maskmovdqu, sse2_maskmovdqu_rex64, sse2_movntv2df, sse2_movntv2di, sse2_movntsi, cvtdq2ps, cvtps2dq, cvttps2dq, cvtdq2pd, cvtpd2dq, cvttpd2dq, cvtpd2pi, cvttpd2pi, cvtpi2pd, cvtsd2si, cvtsd2siq, cvttsd2si, cvttsd2siq, cvtsi2sd, cvtsi2sdq, cvtsd2ss, cvtss2sd, cvtpd2ps, cvtps2pd, addv16qi3, addv8hi3, addv4si3, addv2di3, ssaddv16qi3, ssaddv8hi3, usaddv16qi3, usaddv8hi3, subv16qi3, subv8hi3, subv4si3, subv2di3, sssubv16qi3, sssubv8hi3, ussubv16qi3, ussubv8hi3, mulv8hi3, smulv8hi3_highpart, umulv8hi3_highpart, sse2_umulsidi3, sse2_umulv2siv2di3, sse2_pmaddwd, sse2_uavgv16qi3, sse2_uavgv8hi3, sse2_psadbw, sse2_pinsrw, sse2_pextrw, sse2_pshufd, sse2_pshuflw, sse2_pshufhw, eqv16qi3, eqv8hi3, eqv4si3, gtv16qi3, gtv8hi3, gtv4si3, umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3, ashrv8hi3, ashrv4si3, lshrv8hi3, lshrv4si3, lshrv2di3, ashlv8hi3, ashlv4si3, ashlv2di3, sse2_ashlti3, sse2_lshrti3, sse2_unpckhpd, sse2_unpcklpd, sse2_packsswb, sse2_packssdw, sse2_packuswb, sse2_punpckhbw, sse2_punpckhwd, sse2_punpckhdq, sse2_punpcklbw, sse2_punpcklwd, sse2_punpckldq, sse2_punpcklqdq, sse2_punpckhqdq, sse2_movupd, sse2_movdqu, sse2_movdq2q, sse2_movdq2q_rex64, sse2_movq2dq, sse2_movq2dq_rex64, sse2_loadd, sse2_stored, sse2_storehpd, sse2_loadhpd, sse2_storelpd, sse2_loadlpd, sse2_movsd, sse2_loadsd, sse2_loadsd_1, sse2_storesd, sse2_shufpd, sse2_clflush, sse2_mfence, mfence_insn, sse2_lfence, lfence_insn, mwait, monitor, addsubv4sf3, addsubv2df3, haddv4sf3, haddv2df3, hsubv4sf3, hsubv2df3, movshdup, movsldup, lddqu, loadddup, movddup): Move to sse.md. Any with non-optabs meanings renamed with an "sse{,2,3}_" prefix at the same time. (SSEPUSH, push): Remove. (MMXPUSH, push): Remove. (sse_movaps, sse_movaps_1, sse_movups): Remove. (sse2_movapd, sse2_movdqa, sse2_movq): Remove. (sse2_andti3, sse2_nandti3, sse2_iorti3, sse2_xorti3): Remove. (sse_clrv4sf, sse_clrv2df, sse2_clrti): Remove. (maskncmpv4sf3, vmmaskncmpv4sf3): Remove. (maskncmpv2df3, vmmaskncmpv2df3): Remove. (ashrv8hi3_ti, ashrv4si3_ti, lshrv8hi3_ti, lshrv4si3_ti): Remove. (lshrv2di3_ti, ashlv8hi3_ti, ashlv4si3_ti, ashlv2di3_ti): Remove. * config/i386/athlon.md (athlon_sselog_load): Handle sselog1. (athlon_sselog_load_k8, athlon_sselog, athlon_sselog_k8): Likewise. * config/i386/ppro.md (ppro_sse_div_V4SF_load): Fix memory attr. (ppro_sse_log_V4SF_load): Similarly. Handle sselog1. (ppro_sse_log_V4SF): Handle sselog1. * config/i386/predicates.md (const_0_to_1_operand): New. (const_0_to_255_mul_8_operand): New. (const_1_to_31_operand): Rename from const_int_1_31_operand. (const_2_to_3_operand, const_4_to_7_operand): New. * config/i386/sse.md: New file. (SSEMODE12, SSEMODE24, SSEMODE124, SSEMODE248, ssevecsize): New. (sse_movups): Rename from sse_movups_1. (sse_loadlss): Rename from sse_loadss_1. (andv4sf3, iorv4sf3, xorv4sf3, andv2df3): Remove the sse prefix from the name. (negv4sf2): Use ix86_expand_fp_absneg_operator. (absv4sf2, negv2df, absv2df): New. (addv4sf3): Add expander to call ix86_fixup_binary_operands_no_copy. (subv4sf3, mulv4sf3, divv4sf3, smaxv4sf3, sminv4sf3, andv4sf3, iorv4sf3, xorv4sf3, addv2df3, subv2df3, mulv2df3, divv2df3, smaxv2df3, sminv2df3, andv2df3, iorv2df3, xorv2df3, mulv8hi3, umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3): Likewise. (sse3_addsubv4sf3): Model correctly. sse3_haddv4sf3, sse3_hsubv4sf3, sse3_addsubv2df3, sse3_haddv2df3, sse3_hsubv2df3, sse2_ashlti3, sse2_lshrti3): Likewise. (sse_movhlps): Model with vec_select+vec_concat. (sse_movlhps, sse_unpckhps, sse_unpcklps, sse3_movshdup, sse3_movsldup, sse_shufps, sse_shufps_1, sse2_unpckhpd, sse3_movddup, sse2_unpcklpd, sse2_shufpd, sse2_shufpd_1, sse2_punpckhbw, sse2_punpcklbw, sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq, sse2_punpckhqdq, sse2_punpcklqdq, sse2_pshufd, sse2_pshufd_1, sse2_pshuflw, sse2_pshuflw_1, sse2_pshufhw, sse2_pshufhw_1): Likewise. (neg2, one_cmpl2): New. (add3, sse2_ssadd3, sse2_usadd3, sub3, sse2_sssub3, sse2_ussub3, ashr3, lshr3, sse2_eq3, sse2_gt3, and3, sse_nand3, ior3, xor3): Macroize from existing patterns. (addv4sf3, sse_vmaddv4sf3, mulv4sf3, sse_vmmulv4sf3, smaxv4sf3, sse_vmsmaxv4sf3, sminv4sf3, sse_vmsminv4sf3, addv2df3, sse2_vmaddv2df3, mulv2df3, sse2_vmmulv2df3, smaxv2df3, sse2_vmsmaxv2df3, sminv2df3, sse2_vmsminv2df3, umaxv16qi3, smaxv8hi3, uminv16qi3 sminv8hi3): Mark commutative operands. Use ix86_binary_operator_ok. (sse_unpckhps, sse_unpcklps, sse2_packsswb, sse2_packssdw, sse2_packuswb, sse2_punpckhbw, sse2_punpcklbw, sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq, sse2_punpckhqdq, sse2_punpcklqdq): Allow operand2 in memory. (sse_movhlps, sse_movlhps, sse2_unpckhpd, sse2_unpcklpd sse2_movsd): Add memory alternatives. (sse_storelps): Turn expander into an insn; split after reload. (sse_storess, sse2_loadhpd, sse2_loadlpd): Add non-xmm inputs. (sse2_storehpd, sse2_storelpd): Add non-xmm outputs. From-SVN: r93101 --- gcc/ChangeLog | 139 + gcc/config/i386/athlon.md | 8 +- gcc/config/i386/emmintrin.h | 123 +- gcc/config/i386/i386-modes.def | 4 + gcc/config/i386/i386-protos.h | 4 + gcc/config/i386/i386.c | 498 ++- gcc/config/i386/i386.h | 2 + gcc/config/i386/i386.md | 5656 ++++++------------------- gcc/config/i386/ppro.md | 8 +- gcc/config/i386/predicates.md | 35 +- gcc/config/i386/sse.md | 3111 ++++++++++++++ gcc/testsuite/ChangeLog | 5 + gcc/testsuite/lib/target-supports.exp | 4 - 13 files changed, 4911 insertions(+), 4686 deletions(-) create mode 100644 gcc/config/i386/sse.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 243d0eb2bb2..5238e1e3276 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,142 @@ +2005-01-08 Richard Henderson + + * config/i386/emmintrin.h (_mm_cvtsi128_si32): Move earlier. + (_mm_cvtsi128_si64x): Likewise. + (_mm_srl_epi64, _mm_srl_epi32, _mm_srl_epi16, _mm_sra_epi32, + _mm_sra_epi16, _mm_sll_epi64, _mm_sll_epi32, _mm_sll_epi16): Use + the _mm_{srl,sll}i_foo counterpart, and _mm_cvtsi128_si32. + * config/i386/i386-modes.def: Add V16HI, V32QI, V4DF, V8SF. + * config/i386/i386-protos.h: Update. + * config/i386/i386.c (print_operand): Add 'H'. + (ix86_fixup_binary_operands): Split out from ... + (ix86_expand_binary_operator): ... here. + (ix86_fixup_binary_operands_no_copy): New. + (ix86_expand_fp_absneg_operator): Handle vector mode results. + (bdesc_2arg): Update names for sse{,2,3}_ prefixes. + (ix86_init_mmx_sse_builtins): Remove *maskncmp* special cases. + (safe_vector_operand): Use CONST0_RTX. + (ix86_expand_binop_builtin): Use ix86_fixup_binary_operands. + (ix86_expand_builtin): Merge CODE_FOR_sse2_maskmovdqu_rex64 and + CODE_FOR_sse2_maskmovdqu. Special case SSE version of MASKMOVDQU + expansion. Update names for sse{,2,3}_ prefixes. Remove *maskncmp* + special cases. + * config/i386/i386.h (IX86_BUILTIN_CMPNGTSS): New. + (IX86_BUILTIN_CMPNGESS): New. + * config/i386/i386.md (UNSPEC_FIX_NOTRUNC): New. + (attr type): Add sselog1. + (attr unit, attr memory): Handle it. + (movti, movti_internal, movti_rex64): Move near other integer moves. + (movtf, movtf_internal): Move near other fp moves. + (SSEMODE, SSEMODEI, vec_setv2df, vec_extractv2df, vec_initv2df, + vec_setv4sf, vec_extractv4sf, vec_initv4sf, movv4sf, movv4sf_internal, + movv2df, movv2df_internal, mov, mov_internal, + movmisalign, sse_movups_1, sse_movmskps, sse_movntv4sf, + sse_movhlps, sse_movlhps, sse_storehps, sse_loadhps, sse_storelps, + sse_loadlps, sse_loadss, sse_loadss_1, sse_movss, sse_storess, + sse_shufps, addv4sf3, vmaddv4sf3, subv4sf3, vmsubv4sf3, negv4sf2, + mulv4sf3, vmmulv4sf3, divv4sf3, vmdivv4sf3, rcpv4sf2, vmrcpv4sf2, + rsqrtv4sf2, vmrsqrtv4sf2, sqrtv4sf2, vmsqrtv4sf2, sse_andv4sf3, + sse_nandv4sf3, sse_iorv4sf3, sse_xorv4sf3, sse2_andv2df3, + sse2_nandv2df3, sse2_iorv2df3, sse2_xorv2df3, sse2_andv2di3, + sse2_nandv2di3, sse2_iorv2di3, sse2_xorv2di3, maskcmpv4sf3, + vmmaskcmpv4sf3, sse_comi, sse_ucomi, sse_unpckhps, sse_unpcklps, + smaxv4sf3, vmsmaxv4sf3, sminv4sf3, vmsminv4sf3, cvtpi2ps, cvtps2pi, + cvttps2pi, cvtsi2ss, cvtsi2ssq, cvtss2si, cvtss2siq, cvttss2si, + cvttss2siq, addv2df3, vmaddv2df3, subv2df3, vmsubv2df3, mulv2df3, + vmmulv2df3, divv2df3, vmdivv2df3, smaxv2df3, vmsmaxv2df3, sminv2df3, + vmsminv2df3, sqrtv2df2, vmsqrtv2df2, maskcmpv2df3, vmmaskcmpv2df3, + sse2_comi, sse2_ucomi, sse2_movmskpd, sse2_pmovmskb, sse2_maskmovdqu, + sse2_maskmovdqu_rex64, sse2_movntv2df, sse2_movntv2di, sse2_movntsi, + cvtdq2ps, cvtps2dq, cvttps2dq, cvtdq2pd, cvtpd2dq, cvttpd2dq, + cvtpd2pi, cvttpd2pi, cvtpi2pd, cvtsd2si, cvtsd2siq, cvttsd2si, + cvttsd2siq, cvtsi2sd, cvtsi2sdq, cvtsd2ss, cvtss2sd, cvtpd2ps, + cvtps2pd, addv16qi3, addv8hi3, addv4si3, addv2di3, ssaddv16qi3, + ssaddv8hi3, usaddv16qi3, usaddv8hi3, subv16qi3, subv8hi3, subv4si3, + subv2di3, sssubv16qi3, sssubv8hi3, ussubv16qi3, ussubv8hi3, mulv8hi3, + smulv8hi3_highpart, umulv8hi3_highpart, sse2_umulsidi3, + sse2_umulv2siv2di3, sse2_pmaddwd, sse2_uavgv16qi3, sse2_uavgv8hi3, + sse2_psadbw, sse2_pinsrw, sse2_pextrw, sse2_pshufd, sse2_pshuflw, + sse2_pshufhw, eqv16qi3, eqv8hi3, eqv4si3, gtv16qi3, gtv8hi3, + gtv4si3, umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3, ashrv8hi3, + ashrv4si3, lshrv8hi3, lshrv4si3, lshrv2di3, ashlv8hi3, ashlv4si3, + ashlv2di3, sse2_ashlti3, sse2_lshrti3, sse2_unpckhpd, sse2_unpcklpd, + sse2_packsswb, sse2_packssdw, sse2_packuswb, sse2_punpckhbw, + sse2_punpckhwd, sse2_punpckhdq, sse2_punpcklbw, sse2_punpcklwd, + sse2_punpckldq, sse2_punpcklqdq, sse2_punpckhqdq, sse2_movupd, + sse2_movdqu, sse2_movdq2q, sse2_movdq2q_rex64, sse2_movq2dq, + sse2_movq2dq_rex64, sse2_loadd, sse2_stored, sse2_storehpd, + sse2_loadhpd, sse2_storelpd, sse2_loadlpd, sse2_movsd, sse2_loadsd, + sse2_loadsd_1, sse2_storesd, sse2_shufpd, sse2_clflush, sse2_mfence, + mfence_insn, sse2_lfence, lfence_insn, mwait, monitor, addsubv4sf3, + addsubv2df3, haddv4sf3, haddv2df3, hsubv4sf3, hsubv2df3, movshdup, + movsldup, lddqu, loadddup, movddup): Move to sse.md. Any with + non-optabs meanings renamed with an "sse{,2,3}_" prefix at the + same time. + (SSEPUSH, push): Remove. + (MMXPUSH, push): Remove. + (sse_movaps, sse_movaps_1, sse_movups): Remove. + (sse2_movapd, sse2_movdqa, sse2_movq): Remove. + (sse2_andti3, sse2_nandti3, sse2_iorti3, sse2_xorti3): Remove. + (sse_clrv4sf, sse_clrv2df, sse2_clrti): Remove. + (maskncmpv4sf3, vmmaskncmpv4sf3): Remove. + (maskncmpv2df3, vmmaskncmpv2df3): Remove. + (ashrv8hi3_ti, ashrv4si3_ti, lshrv8hi3_ti, lshrv4si3_ti): Remove. + (lshrv2di3_ti, ashlv8hi3_ti, ashlv4si3_ti, ashlv2di3_ti): Remove. + * config/i386/athlon.md (athlon_sselog_load): Handle sselog1. + (athlon_sselog_load_k8, athlon_sselog, athlon_sselog_k8): Likewise. + * config/i386/ppro.md (ppro_sse_div_V4SF_load): Fix memory attr. + (ppro_sse_log_V4SF_load): Similarly. Handle sselog1. + (ppro_sse_log_V4SF): Handle sselog1. + * config/i386/predicates.md (const_0_to_1_operand): New. + (const_0_to_255_mul_8_operand): New. + (const_1_to_31_operand): Rename from const_int_1_31_operand. + (const_2_to_3_operand, const_4_to_7_operand): New. + * config/i386/sse.md: New file. + (SSEMODE12, SSEMODE24, SSEMODE124, SSEMODE248, ssevecsize): New. + (sse_movups): Rename from sse_movups_1. + (sse_loadlss): Rename from sse_loadss_1. + (andv4sf3, iorv4sf3, xorv4sf3, andv2df3): Remove the sse prefix + from the name. + (negv4sf2): Use ix86_expand_fp_absneg_operator. + (absv4sf2, negv2df, absv2df): New. + (addv4sf3): Add expander to call ix86_fixup_binary_operands_no_copy. + (subv4sf3, mulv4sf3, divv4sf3, smaxv4sf3, sminv4sf3, andv4sf3, + iorv4sf3, xorv4sf3, addv2df3, subv2df3, mulv2df3, divv2df3, + smaxv2df3, sminv2df3, andv2df3, iorv2df3, xorv2df3, mulv8hi3, + umaxv16qi3, smaxv8hi3, uminv16qi3, sminv8hi3): Likewise. + (sse3_addsubv4sf3): Model correctly. + sse3_haddv4sf3, sse3_hsubv4sf3, sse3_addsubv2df3, sse3_haddv2df3, + sse3_hsubv2df3, sse2_ashlti3, sse2_lshrti3): Likewise. + (sse_movhlps): Model with vec_select+vec_concat. + (sse_movlhps, sse_unpckhps, sse_unpcklps, sse3_movshdup, + sse3_movsldup, sse_shufps, sse_shufps_1, sse2_unpckhpd, sse3_movddup, + sse2_unpcklpd, sse2_shufpd, sse2_shufpd_1, sse2_punpckhbw, + sse2_punpcklbw, sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, + sse2_punpckldq, sse2_punpckhqdq, sse2_punpcklqdq, sse2_pshufd, + sse2_pshufd_1, sse2_pshuflw, sse2_pshuflw_1, sse2_pshufhw, + sse2_pshufhw_1): Likewise. + (neg2, one_cmpl2): New. + (add3, sse2_ssadd3, sse2_usadd3, + sub3, sse2_sssub3, sse2_ussub3, + ashr3, lshr3, sse2_eq3, + sse2_gt3, and3, sse_nand3, + ior3, xor3): Macroize from existing patterns. + (addv4sf3, sse_vmaddv4sf3, mulv4sf3, sse_vmmulv4sf3, smaxv4sf3, + sse_vmsmaxv4sf3, sminv4sf3, sse_vmsminv4sf3, addv2df3, sse2_vmaddv2df3, + mulv2df3, sse2_vmmulv2df3, smaxv2df3, sse2_vmsmaxv2df3, sminv2df3, + sse2_vmsminv2df3, umaxv16qi3, smaxv8hi3, uminv16qi3 + sminv8hi3): Mark commutative + operands. Use ix86_binary_operator_ok. + (sse_unpckhps, sse_unpcklps, sse2_packsswb, sse2_packssdw, + sse2_packuswb, sse2_punpckhbw, sse2_punpcklbw, sse2_punpckhwd, + sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq, sse2_punpckhqdq, + sse2_punpcklqdq): Allow operand2 in memory. + (sse_movhlps, sse_movlhps, sse2_unpckhpd, sse2_unpcklpd + sse2_movsd): Add memory alternatives. + (sse_storelps): Turn expander into an insn; split after reload. + (sse_storess, sse2_loadhpd, sse2_loadlpd): Add non-xmm inputs. + (sse2_storehpd, sse2_storelpd): Add non-xmm outputs. + 2005-01-08 Eric Botcazou * configure.ac (DWARF-2 debug_line): Use objdump. diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md index 79c9e023fa6..1029a818196 100644 --- a/gcc/config/i386/athlon.md +++ b/gcc/config/i386/athlon.md @@ -565,21 +565,21 @@ (define_insn_reservation "athlon_sselog_load" 3 (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "sselog") + (and (eq_attr "type" "sselog,sselog1") (eq_attr "memory" "load"))) "athlon-vector,athlon-fpload2,(athlon-fmul*2)") (define_insn_reservation "athlon_sselog_load_k8" 5 (and (eq_attr "cpu" "k8") - (and (eq_attr "type" "sselog") + (and (eq_attr "type" "sselog,sselog1") (eq_attr "memory" "load"))) "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") (define_insn_reservation "athlon_sselog" 3 (and (eq_attr "cpu" "athlon") - (eq_attr "type" "sselog")) + (eq_attr "type" "sselog,sselog1")) "athlon-vector,athlon-fpsched,athlon-fmul*2") (define_insn_reservation "athlon_sselog_k8" 3 (and (eq_attr "cpu" "k8") - (eq_attr "type" "sselog")) + (eq_attr "type" "sselog,sselog1")) "athlon-double,athlon-fpsched,athlon-fmul") ;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. (define_insn_reservation "athlon_ssecmp_load" 2 diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 49c6a7f8147..2d2b710d734 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -177,6 +177,22 @@ _mm_storer_pd (double *__P, __m128d __A) __builtin_ia32_storeapd (__P, __tmp); } +static __inline int +_mm_cvtsi128_si32 (__m128i __A) +{ + int __tmp; + __builtin_ia32_stored (&__tmp, (__v4si)__A); + return __tmp; +} + +#ifdef __x86_64__ +static __inline long long +_mm_cvtsi128_si64x (__m128i __A) +{ + return __builtin_ia32_movdq2q ((__v2di)__A); +} +#endif + /* Sets the low DPFP value of A from the low value of B. */ static __inline __m128d _mm_move_sd (__m128d __A, __m128d __B) @@ -1157,115 +1173,118 @@ _mm_mul_epu32 (__m128i __A, __m128i __B) } static __inline __m128i -_mm_sll_epi16 (__m128i __A, __m128i __B) +_mm_slli_epi16 (__m128i __A, int __B) { - return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B); + return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); } static __inline __m128i -_mm_sll_epi32 (__m128i __A, __m128i __B) +_mm_slli_epi32 (__m128i __A, int __B) { - return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B); + return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); } static __inline __m128i -_mm_sll_epi64 (__m128i __A, __m128i __B) +_mm_slli_epi64 (__m128i __A, int __B) { - return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); } static __inline __m128i -_mm_sra_epi16 (__m128i __A, __m128i __B) +_mm_srai_epi16 (__m128i __A, int __B) { - return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B); + return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); } static __inline __m128i -_mm_sra_epi32 (__m128i __A, __m128i __B) +_mm_srai_epi32 (__m128i __A, int __B) { - return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B); + return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); } -static __inline __m128i -_mm_srl_epi16 (__m128i __A, __m128i __B) +#if 0 +static __m128i __attribute__((__always_inline__)) +_mm_srli_si128 (__m128i __A, const int __B) { - return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B); + return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) } -static __inline __m128i -_mm_srl_epi32 (__m128i __A, __m128i __B) +static __m128i __attribute__((__always_inline__)) +_mm_srli_si128 (__m128i __A, const int __B) { - return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B); + return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) } +#else +#define _mm_srli_si128(__A, __B) \ + ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8)) +#define _mm_slli_si128(__A, __B) \ + ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8)) +#endif static __inline __m128i -_mm_srl_epi64 (__m128i __A, __m128i __B) +_mm_srli_epi16 (__m128i __A, int __B) { - return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); } static __inline __m128i -_mm_slli_epi16 (__m128i __A, int __B) +_mm_srli_epi32 (__m128i __A, int __B) { - return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); + return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); } static __inline __m128i -_mm_slli_epi32 (__m128i __A, int __B) +_mm_srli_epi64 (__m128i __A, int __B) { - return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); + return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); } static __inline __m128i -_mm_slli_epi64 (__m128i __A, int __B) +_mm_sll_epi16 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); + return _mm_slli_epi16 (__A, _mm_cvtsi128_si32 (__B)); } static __inline __m128i -_mm_srai_epi16 (__m128i __A, int __B) +_mm_sll_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); + return _mm_slli_epi32 (__A, _mm_cvtsi128_si32 (__B)); } static __inline __m128i -_mm_srai_epi32 (__m128i __A, int __B) +_mm_sll_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); + return _mm_slli_epi64 (__A, _mm_cvtsi128_si32 (__B)); } -#if 0 -static __m128i __attribute__((__always_inline__)) -_mm_srli_si128 (__m128i __A, const int __B) +static __inline __m128i +_mm_sra_epi16 (__m128i __A, __m128i __B) { - return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) + return _mm_srai_epi16 (__A, _mm_cvtsi128_si32 (__B)); } -static __m128i __attribute__((__always_inline__)) -_mm_srli_si128 (__m128i __A, const int __B) +static __inline __m128i +_mm_sra_epi32 (__m128i __A, __m128i __B) { - return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) + return _mm_srai_epi32 (__A, _mm_cvtsi128_si32 (__B)); } -#endif -#define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) -#define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) static __inline __m128i -_mm_srli_epi16 (__m128i __A, int __B) +_mm_srl_epi16 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); + return _mm_srli_epi16 (__A, _mm_cvtsi128_si32 (__B)); } static __inline __m128i -_mm_srli_epi32 (__m128i __A, int __B) +_mm_srl_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); + return _mm_srli_epi32 (__A, _mm_cvtsi128_si32 (__B)); } static __inline __m128i -_mm_srli_epi64 (__m128i __A, int __B) +_mm_srl_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); + return _mm_srli_epi64 (__A, _mm_cvtsi128_si32 (__B)); } static __inline __m128i @@ -1470,22 +1489,6 @@ _mm_cvtsi64x_si128 (long long __A) } #endif -static __inline int -_mm_cvtsi128_si32 (__m128i __A) -{ - int __tmp; - __builtin_ia32_stored (&__tmp, (__v4si)__A); - return __tmp; -} - -#ifdef __x86_64__ -static __inline long long -_mm_cvtsi128_si64x (__m128i __A) -{ - return __builtin_ia32_movdq2q ((__v2di)__A); -} -#endif - #endif /* __SSE2__ */ #endif /* _EMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index ea35a2a1768..6a6e68d8b1f 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -70,6 +70,10 @@ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODE (INT, DI, 4); /* V4DI */ VECTOR_MODE (INT, SI, 8); /* V8SI */ +VECTOR_MODE (INT, HI, 16); /* V16HI */ +VECTOR_MODE (INT, QI, 32); /* V32QI */ +VECTOR_MODE (FLOAT, DF, 4); /* V4DF */ +VECTOR_MODE (FLOAT, SF, 8); /* V8SF */ /* The symbol Pmode stands for one of the above machine modes (usually SImode). The tm.h file specifies which one. It is not a distinct mode. */ diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 58e4e23471b..5920c9f1fdd 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -126,6 +126,10 @@ extern void ix86_expand_clear (rtx); extern void ix86_expand_move (enum machine_mode, rtx[]); extern void ix86_expand_vector_move (enum machine_mode, rtx[]); extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]); +extern rtx ix86_fixup_binary_operands (enum rtx_code, + enum machine_mode, rtx[]); +extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, + enum machine_mode, rtx[]); extern void ix86_expand_binary_operator (enum rtx_code, enum machine_mode, rtx[]); extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 00319592785..7edd97c3c99 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -6312,6 +6312,7 @@ get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) P -- if PIC, print an @PLT suffix. X -- don't print any sort of PIC '@' suffix for a symbol. & -- print some in-use local-dynamic symbol name. + H -- print a memory address offset by 8; used for sse high-parts */ void @@ -6539,6 +6540,13 @@ print_operand (FILE *file, rtx x, int code) #endif put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); return; + + case 'H': + /* It doesn't actually matter what mode we use here, as we're + only going to use this for printing. */ + x = adjust_address_nv (x, DImode, 8); + break; + case '+': { rtx x; @@ -7714,16 +7722,16 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) } -/* Attempt to expand a binary operator. Make the expansion closer to the - actual machine, then just general_operand, which will allow 3 separate - memory references (one output, two input) in a single insn. */ +/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the + destination to use for the operation. If different from the true + destination in operands[0], a copy operation will be required. */ -void -ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, - rtx operands[]) +rtx +ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, + rtx operands[]) { int matching_memory; - rtx src1, src2, dst, op, clob; + rtx src1, src2, dst; dst = operands[0]; src1 = operands[1]; @@ -7780,7 +7788,37 @@ ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, src2 = force_reg (mode, src2); } - /* Emit the instruction. */ + src1 = operands[1] = src1; + src2 = operands[2] = src2; + return dst; +} + +/* Similarly, but assume that the destination has already been + set up properly. */ + +void +ix86_fixup_binary_operands_no_copy (enum rtx_code code, + enum machine_mode mode, rtx operands[]) +{ + rtx dst = ix86_fixup_binary_operands (code, mode, operands); + gcc_assert (dst == operands[0]); +} + +/* Attempt to expand a binary operator. Make the expansion closer to the + actual machine, then just general_operand, which will allow 3 separate + memory references (one output, two input) in a single insn. */ + +void +ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, + rtx operands[]) +{ + rtx src1, src2, dst, op, clob; + + dst = ix86_fixup_binary_operands (code, mode, operands); + src1 = operands[1]; + src2 = operands[2]; + + /* Emit the instruction. */ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); if (reload_in_progress) @@ -7916,13 +7954,28 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, rtx mask, set, use, clob, dst, src; bool matching_memory; bool use_sse = false; + bool vector_mode = VECTOR_MODE_P (mode); + enum machine_mode elt_mode = mode; + enum machine_mode vec_mode = VOIDmode; + if (vector_mode) + { + elt_mode = GET_MODE_INNER (mode); + vec_mode = mode; + use_sse = true; + } if (TARGET_SSE_MATH) { if (mode == SFmode) - use_sse = true; + { + use_sse = true; + vec_mode = V4SFmode; + } else if (mode == DFmode && TARGET_SSE2) - use_sse = true; + { + use_sse = true; + vec_mode = V2DFmode; + } } /* NEG and ABS performed with SSE use bitwise mask operations. @@ -7931,9 +7984,10 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, { HOST_WIDE_INT hi, lo; int shift = 63; + rtvec v; /* Find the sign bit, sign extended to 2*HWI. */ - if (mode == SFmode) + if (elt_mode == SFmode) lo = 0x80000000, hi = lo < 0; else if (HOST_BITS_PER_WIDE_INT >= 64) lo = (HOST_WIDE_INT)1 << shift, hi = -1; @@ -7948,15 +8002,32 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, /* Force this value into the low part of a fp vector constant. */ mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); mask = gen_lowpart (mode, mask); - if (mode == SFmode) - mask = gen_rtx_CONST_VECTOR (V4SFmode, - gen_rtvec (4, mask, CONST0_RTX (SFmode), - CONST0_RTX (SFmode), - CONST0_RTX (SFmode))); - else - mask = gen_rtx_CONST_VECTOR (V2DFmode, - gen_rtvec (2, mask, CONST0_RTX (DFmode))); - mask = force_reg (GET_MODE (mask), mask); + + switch (mode) + { + case SFmode: + v = gen_rtvec (4, mask, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), CONST0_RTX (SFmode)); + break; + + case DFmode: + v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); + break; + + case V4SFmode: + v = gen_rtvec (4, mask, mask, mask, mask); + break; + + case V4DFmode: + v = gen_rtvec (2, mask, mask); + break; + + default: + gcc_unreachable (); + } + + mask = gen_rtx_CONST_VECTOR (vec_mode, v); + mask = force_reg (vec_mode, mask); } else { @@ -7982,11 +8053,20 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, if (MEM_P (src) && !matching_memory) src = force_reg (mode, src); - set = gen_rtx_fmt_e (code, mode, src); - set = gen_rtx_SET (VOIDmode, dst, set); - use = gen_rtx_USE (VOIDmode, mask); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob))); + if (vector_mode) + { + set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); + set = gen_rtx_SET (VOIDmode, dst, set); + emit_insn (set); + } + else + { + set = gen_rtx_fmt_e (code, mode, src); + set = gen_rtx_SET (VOIDmode, dst, set); + use = gen_rtx_USE (VOIDmode, mask); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob))); + } if (dst != operands[0]) emit_move_insn (operands[0], dst); @@ -12128,45 +12208,49 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, - - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, + { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, + + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, - { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, - { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, - { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, + { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, + { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, + { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, @@ -12229,9 +12313,9 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, - { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, @@ -12260,45 +12344,45 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, - { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, - { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, - { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, - - { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, - { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, - { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, - { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, + { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, + + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS }, - { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS }, - { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, - { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 }, - { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 }, - { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 }, - { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS }, - { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS }, - { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 }, - { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, - { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, - { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, - { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, - { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 }, - { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 }, - { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 }, - { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 }, { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, - { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, @@ -12314,32 +12398,32 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, - { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, - { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, - { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, - { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, - { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, - { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, - { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, - { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, + { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, - { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, + { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, + { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, + { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, - { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, - { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, - { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, - { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, - { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, - { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, @@ -12359,45 +12443,37 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, - { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, - { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 }, { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, - { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 }, { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, - { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 }, { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, - { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 }, { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, - { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 }, { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, - { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 }, { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, - { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 }, { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, - { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 }, { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, - { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, /* SSE3 MMX */ - { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, - { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, - { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, - { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, - { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, - { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } + { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } }; static const struct builtin_description bdesc_1arg[] = @@ -12406,49 +12482,45 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, - { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, - { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, - { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, - { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, - { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, - { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, - { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, + { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 }, { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, - - { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, - { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, - { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, + { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, /* SSE3 */ - { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, - { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, - { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 } + { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, + { MASK_SSE3, CODE_FOR_sse3_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 } }; void @@ -12857,16 +12929,12 @@ ix86_init_mmx_sse_builtins (void) } /* Override for comparisons. */ - if (d->icode == CODE_FOR_maskcmpv4sf3 - || d->icode == CODE_FOR_maskncmpv4sf3 - || d->icode == CODE_FOR_vmmaskcmpv4sf3 - || d->icode == CODE_FOR_vmmaskncmpv4sf3) + if (d->icode == CODE_FOR_sse_maskcmpv4sf3 + || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3) type = v4si_ftype_v4sf_v4sf; - if (d->icode == CODE_FOR_maskcmpv2df3 - || d->icode == CODE_FOR_maskncmpv2df3 - || d->icode == CODE_FOR_vmmaskcmpv2df3 - || d->icode == CODE_FOR_vmmaskncmpv2df3) + if (d->icode == CODE_FOR_sse2_maskcmpv2df3 + || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) type = v2di_ftype_v2df_v2df; def_builtin (d->mask, d->name, type, d->code); @@ -13118,17 +13186,8 @@ ix86_init_mmx_sse_builtins (void) static rtx safe_vector_operand (rtx x, enum machine_mode mode) { - if (x != const0_rtx) - return x; - x = gen_reg_rtx (mode); - - if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) - emit_insn (gen_mmx_clrdi (mode == DImode ? x - : gen_rtx_SUBREG (DImode, x, 0))); - else - emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x - : gen_rtx_SUBREG (V4SFmode, x, 0), - CONST0_RTX (V4SFmode))); + if (x == const0_rtx) + x = CONST0_RTX (mode); return x; } @@ -13137,7 +13196,7 @@ safe_vector_operand (rtx x, enum machine_mode mode) static rtx ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) { - rtx pat; + rtx pat, xops[3]; tree arg0 = TREE_VALUE (arglist); tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -13169,20 +13228,17 @@ ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)) abort (); - if ((optimize && !register_operand (op0, mode0)) - || !(*insn_data[icode].operand[1].predicate) (op0, mode0)) + if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) + if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); - /* In the commutative cases, both op0 and op1 are nonimmediate_operand, - yet one of the two must not be a memory. This is normally enforced - by expanders, but we didn't bother to create one here. */ - if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) - op0 = copy_to_mode_reg (mode0, op0); + xops[0] = target; + xops[1] = op0; + xops[2] = op1; + target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops); - pat = GEN_FCN (icode) (target, op0, op1); + pat = GEN_FCN (icode) (target, xops[1], xops[2]); if (! pat) return 0; emit_insn (pat); @@ -13495,8 +13551,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, icode = (fcode == IX86_BUILTIN_MASKMOVQ ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) - : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64 - : CODE_FOR_sse2_maskmovdqu)); + : CODE_FOR_sse2_maskmovdqu); /* Note the arg order is different from the operand order. */ arg1 = TREE_VALUE (arglist); arg2 = TREE_VALUE (TREE_CHAIN (arglist)); @@ -13508,6 +13563,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, mode1 = insn_data[icode].operand[1].mode; mode2 = insn_data[icode].operand[2].mode; + if (fcode == IX86_BUILTIN_MASKMOVDQU) + { + op0 = force_reg (Pmode, op0); + op0 = gen_rtx_MEM (V16QImode, op0); + } + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) @@ -13521,20 +13582,20 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return 0; case IX86_BUILTIN_SQRTSS: - return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target); + return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target); case IX86_BUILTIN_RSQRTSS: - return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target); + return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target); case IX86_BUILTIN_RCPSS: - return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); + return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); case IX86_BUILTIN_LOADAPS: - return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); + return ix86_expand_unop_builtin (CODE_FOR_movv4sf, arglist, target, 1); case IX86_BUILTIN_LOADUPS: return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); case IX86_BUILTIN_STOREAPS: - return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); + return ix86_expand_store_builtin (CODE_FOR_movv4sf, arglist); case IX86_BUILTIN_STOREUPS: return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); @@ -13794,9 +13855,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); case IX86_BUILTIN_SSE_ZERO: - target = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode))); - return target; + return CONST0_RTX (V4SFmode); case IX86_BUILTIN_MMX_ZERO: target = gen_reg_rtx (DImode); @@ -13804,20 +13863,17 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return target; case IX86_BUILTIN_CLRTI: - target = gen_reg_rtx (V2DImode); - emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0))); - return target; - + return const0_rtx; case IX86_BUILTIN_SQRTSD: - return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target); + return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target); case IX86_BUILTIN_LOADAPD: - return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1); + return ix86_expand_unop_builtin (CODE_FOR_movv2df, arglist, target, 1); case IX86_BUILTIN_LOADUPD: return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); case IX86_BUILTIN_STOREAPD: - return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist); case IX86_BUILTIN_STOREUPD: return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); @@ -13825,7 +13881,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1); case IX86_BUILTIN_STORESD: - return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist); + return ix86_expand_store_builtin (CODE_FOR_sse2_storelpd, arglist); case IX86_BUILTIN_SETPD1: target = assign_386_stack_local (DFmode, 0); @@ -13846,11 +13902,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_move_insn (adjust_address (target, DFmode, 8), expand_expr (arg1, NULL_RTX, VOIDmode, 0)); op0 = gen_reg_rtx (V2DFmode); - emit_insn (gen_sse2_movapd (op0, target)); + emit_move_insn (op0, target); return op0; case IX86_BUILTIN_LOADRPD: - target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, + target = ix86_expand_unop_builtin (CODE_FOR_movv2df, arglist, gen_reg_rtx (V2DFmode), 1); emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx)); return target; @@ -13862,14 +13918,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return target; case IX86_BUILTIN_STOREPD1: - return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist); case IX86_BUILTIN_STORERPD: - return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); + return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist); case IX86_BUILTIN_CLRPD: - target = gen_reg_rtx (V2DFmode); - emit_insn (gen_sse_clrv2df (target)); - return target; + return CONST0_RTX (V2DFmode); case IX86_BUILTIN_MFENCE: emit_insn (gen_sse2_mfence ()); @@ -13896,14 +13950,14 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); case IX86_BUILTIN_LOADDQA: - return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1); + return ix86_expand_unop_builtin (CODE_FOR_movv2di, arglist, target, 1); case IX86_BUILTIN_LOADDQU: return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); case IX86_BUILTIN_LOADD: return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1); case IX86_BUILTIN_STOREDQA: - return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist); + return ix86_expand_store_builtin (CODE_FOR_movv2di, arglist); case IX86_BUILTIN_STOREDQU: return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); case IX86_BUILTIN_STORED: @@ -13922,7 +13976,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, op1 = copy_to_mode_reg (SImode, op1); if (!REG_P (op2)) op2 = copy_to_mode_reg (SImode, op2); - emit_insn (gen_monitor (op0, op1, op2)); + emit_insn (gen_sse3_monitor (op0, op1, op2)); return 0; case IX86_BUILTIN_MWAIT: @@ -13934,14 +13988,14 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, op0 = copy_to_mode_reg (SImode, op0); if (!REG_P (op1)) op1 = copy_to_mode_reg (SImode, op1); - emit_insn (gen_mwait (op0, op1)); + emit_insn (gen_sse3_mwait (op0, op1)); return 0; case IX86_BUILTIN_LOADDDUP: - return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1); + return ix86_expand_unop_builtin (CODE_FOR_sse3_loadddup, arglist, target, 1); case IX86_BUILTIN_LDDQU: - return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target, + return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, target, 1); default: @@ -13952,14 +14006,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if (d->code == fcode) { /* Compares are treated specially. */ - if (d->icode == CODE_FOR_maskcmpv4sf3 - || d->icode == CODE_FOR_vmmaskcmpv4sf3 - || d->icode == CODE_FOR_maskncmpv4sf3 - || d->icode == CODE_FOR_vmmaskncmpv4sf3 - || d->icode == CODE_FOR_maskcmpv2df3 - || d->icode == CODE_FOR_vmmaskcmpv2df3 - || d->icode == CODE_FOR_maskncmpv2df3 - || d->icode == CODE_FOR_vmmaskncmpv2df3) + if (d->icode == CODE_FOR_sse_maskcmpv4sf3 + || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3 + || d->icode == CODE_FOR_sse2_maskcmpv2df3 + || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) return ix86_expand_sse_compare (d, arglist, target); return ix86_expand_binop_builtin (d->icode, arglist, target); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 47d7035d466..0a0db2e102f 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2062,6 +2062,8 @@ enum ix86_builtins IX86_BUILTIN_CMPNEQSS, IX86_BUILTIN_CMPNLTSS, IX86_BUILTIN_CMPNLESS, + IX86_BUILTIN_CMPNGTSS, + IX86_BUILTIN_CMPNGESS, IX86_BUILTIN_CMPORDSS, IX86_BUILTIN_CMPUNORDSS, IX86_BUILTIN_CMPNESS, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0851dde2f80..08aa382fb9d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -84,6 +84,7 @@ ; For SSE/MMX support: (UNSPEC_FIX 30) + (UNSPEC_FIX_NOTRUNC 31) (UNSPEC_MASKMOV 32) (UNSPEC_MOVMSK 33) (UNSPEC_MOVNT 34) @@ -192,7 +193,7 @@ push,pop,call,callv,leave, str,cld, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint, - sselog,sseiadd,sseishft,sseimul, + sselog,sselog1,sseiadd,sseishft,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) @@ -206,7 +207,7 @@ (define_attr "unit" "integer,i387,sse,mmx,unknown" (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint") (const_string "i387") - (eq_attr "type" "sselog,sseiadd,sseishft,sseimul, + (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") @@ -384,7 +385,7 @@ (if_then_else (match_operand 1 "constant_call_address_operand" "") (const_string "none") (const_string "load")) - (and (eq_attr "type" "alu1,negnot,ishift1") + (and (eq_attr "type" "alu1,negnot,ishift1,sselog1") (match_operand 1 "memory_operand" "")) (const_string "both") (and (match_operand 0 "memory_operand" "") @@ -398,7 +399,7 @@ "!alu1,negnot,ishift1, imov,imovx,icmp,test, fmov,fcmp,fsgn, - sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt, + sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1, mmx,mmxmov,mmxcmp,mmxcvt") (match_operand 2 "memory_operand" "")) (const_string "load") @@ -2122,6 +2123,112 @@ (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector")]) +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] + "TARGET_SSE || TARGET_64BIT" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) + +(define_insn "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} + [(set_attr "type" "ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + + (eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) + +(define_insn "*movti_rex64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} + [(set_attr "type" "*,*,ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + (define_expand "movsf" [(set (match_operand:SF 0 "nonimmediate_operand" "") (match_operand:SF 1 "general_operand" ""))] @@ -2907,6 +3014,67 @@ } [(set_attr "type" "fxch") (set_attr "mode" "XF")]) + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_64BIT" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} + [(set_attr "type" "*,*,ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") ;; Zero extension instructions @@ -4732,162 +4900,6 @@ ;; SSE extract/set expanders -(define_expand "vec_setv2df" - [(match_operand:V2DF 0 "register_operand" "") - (match_operand:DF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE2" -{ - switch (INTVAL (operands[2])) - { - case 0: - emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1])); - break; - case 1: - emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1])); - break; - default: - abort (); - } - DONE; -}) - -(define_expand "vec_extractv2df" - [(match_operand:DF 0 "register_operand" "") - (match_operand:V2DF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE2" -{ - switch (INTVAL (operands[2])) - { - case 0: - emit_insn (gen_sse2_storelpd (operands[0], operands[1])); - break; - case 1: - emit_insn (gen_sse2_storehpd (operands[0], operands[1])); - break; - default: - abort (); - } - DONE; -}) - -(define_expand "vec_initv2df" - [(match_operand:V2DF 0 "register_operand" "") - (match_operand 1 "" "")] - "TARGET_SSE2" -{ - ix86_expand_vector_init (operands[0], operands[1]); - DONE; -}) - -(define_expand "vec_setv4sf" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand:SF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE" -{ - switch (INTVAL (operands[2])) - { - case 0: - emit_insn (gen_sse_movss (operands[0], operands[0], - simplify_gen_subreg (V4SFmode, operands[1], - SFmode, 0))); - break; - case 1: - { - rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[0]); - emit_insn (gen_sse_unpcklps (operands[0], operands[0], operands[0])); - emit_insn (gen_sse_movss (operands[0], operands[0], op1)); - emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, - GEN_INT (1 + (0<<2) + (2<<4) + (3<<6)))); - } - break; - case 2: - { - rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[0]); - emit_insn (gen_sse_movss (tmp, tmp, op1)); - emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, - GEN_INT (0 + (1<<2) + (0<<4) + (3<<6)))); - } - break; - case 3: - { - rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[0]); - emit_insn (gen_sse_movss (tmp, tmp, op1)); - emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, - GEN_INT (0 + (1<<2) + (2<<4) + (0<<6)))); - } - break; - default: - abort (); - } - DONE; -}) - -(define_expand "vec_extractv4sf" - [(match_operand:SF 0 "register_operand" "") - (match_operand:V4SF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_SSE" -{ - switch (INTVAL (operands[2])) - { - case 0: - emit_move_insn (operands[0], gen_lowpart (SFmode, operands[1])); - break; - case 1: - { - rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[1]); - emit_insn (gen_sse_shufps (op0, tmp, tmp, - const1_rtx)); - } - break; - case 2: - { - rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[1]); - emit_insn (gen_sse_unpckhps (op0, tmp, tmp)); - } - break; - case 3: - { - rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); - rtx tmp = gen_reg_rtx (V4SFmode); - - emit_move_insn (tmp, operands[1]); - emit_insn (gen_sse_shufps (op0, tmp, tmp, - GEN_INT (3))); - } - break; - default: - abort (); - } - DONE; -}) - -(define_expand "vec_initv4sf" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand 1 "" "")] - "TARGET_SSE" -{ - ix86_expand_vector_init (operands[0], operands[1]); - DONE; -}) ;; Add instructions @@ -10511,7 +10523,7 @@ [(set (reg FLAGS_REG) (compare (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashift:SI (match_dup 1) (match_dup 2)))] @@ -10549,7 +10561,7 @@ [(set (reg FLAGS_REG) (compare (ashift:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] @@ -10671,7 +10683,7 @@ [(set (reg FLAGS_REG) (compare (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashift:HI (match_dup 1) (match_dup 2)))] @@ -10832,7 +10844,7 @@ [(set (reg FLAGS_REG) (compare (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashift:QI (match_dup 1) (match_dup 2)))] @@ -11165,7 +11177,7 @@ [(set (reg FLAGS_REG) (compare (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] @@ -11179,7 +11191,7 @@ [(set (reg FLAGS_REG) (compare (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] @@ -11251,7 +11263,7 @@ [(set (reg FLAGS_REG) (compare (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] @@ -11351,7 +11363,7 @@ [(set (reg FLAGS_REG) (compare (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] @@ -11569,7 +11581,7 @@ [(set (reg FLAGS_REG) (compare (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] @@ -11583,7 +11595,7 @@ [(set (reg FLAGS_REG) (compare (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] @@ -11655,7 +11667,7 @@ [(set (reg FLAGS_REG) (compare (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] @@ -11754,7 +11766,7 @@ [(set (reg FLAGS_REG) (compare (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_31_operand" "I")) + (match_operand:QI 2 "const_1_to_31_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] @@ -19696,176 +19708,19 @@ RET; }) - ;; Pentium III SIMD instructions. +;; Pentium III SIMD instructions. ;; Moves for SSE/MMX regs. -(define_expand "movv4sf" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "") - (match_operand:V4SF 1 "nonimmediate_operand" ""))] - "TARGET_SSE" +;; 8 byte integral modes handled by MMX (and by extension, SSE) +(define_mode_macro MMXMODEI [V8QI V4HI V2SI]) + +(define_expand "mov" + [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "") + (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))] + "TARGET_MMX" { - ix86_expand_vector_move (V4SFmode, operands); - DONE; -}) - -(define_insn "*movv4sf_internal" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") - (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] - "TARGET_SSE" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) - -(define_split - [(set (match_operand:V4SF 0 "register_operand" "") - (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] - "TARGET_SSE && reload_completed" - [(set (match_dup 0) - (vec_merge:V4SF - (vec_duplicate:V4SF (match_dup 1)) - (match_dup 2) - (const_int 1)))] -{ - operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); - operands[2] = CONST0_RTX (V4SFmode); -}) - -(define_expand "movv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "") - (match_operand:V2DF 1 "nonimmediate_operand" ""))] - "TARGET_SSE" -{ - ix86_expand_vector_move (V2DFmode, operands); - DONE; -}) - -(define_insn "*movv2df_internal" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") - (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] - "TARGET_SSE - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "xorpd\t%0, %0"; - case 1: - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movapd\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "ssemov") - (set (attr "mode") - (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) - (const_string "V4SF") - (eq_attr "alternative" "0,1") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "V2DF")) - (eq_attr "alternative" "2") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "V2DF"))] - (const_string "V2DF")))]) - -(define_split - [(set (match_operand:V2DF 0 "register_operand" "") - (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] - "TARGET_SSE2 && reload_completed" - [(set (match_dup 0) - (vec_merge:V2DF - (vec_duplicate:V2DF (match_dup 1)) - (match_dup 2) - (const_int 1)))] -{ - operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); - operands[2] = CONST0_RTX (V2DFmode); -}) - -;; 16 byte integral modes handled by SSE, minus TImode, which gets -;; special-cased for TARGET_64BIT. -(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI]) - -(define_expand "mov" - [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "") - (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))] - "TARGET_SSE" -{ - ix86_expand_vector_move (mode, operands); - DONE; -}) - -(define_insn "*mov_internal" - [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m") - (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))] - "TARGET_SSE - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 1: - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "ssemov") - (set (attr "mode") - (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) - (const_string "V4SF") - - (eq_attr "alternative" "0,1") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "2") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "TI")))]) - -;; 8 byte integral modes handled by MMX (and by extension, SSE) -(define_mode_macro MMXMODEI [V8QI V4HI V2SI]) - -(define_expand "mov" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "") - (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (mode, operands); + ix86_expand_vector_move (mode, operands); DONE; }) @@ -19966,319 +19821,40 @@ [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov") (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V2SF,V2SF")]) -(define_expand "movti" - [(set (match_operand:TI 0 "nonimmediate_operand" "") - (match_operand:TI 1 "nonimmediate_operand" ""))] - "TARGET_SSE || TARGET_64BIT" +;; All 8-byte vector modes handled by MMX +(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF]) + +(define_expand "movmisalign" + [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "") + (match_operand:MMXMODE 1 "nonimmediate_operand" ""))] + "TARGET_MMX" { - if (TARGET_64BIT) - ix86_expand_move (TImode, operands); - else - ix86_expand_vector_move (TImode, operands); + ix86_expand_vector_move (mode, operands); DONE; }) -(define_insn "*movti_internal" - [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] - "TARGET_SSE && !TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 1: - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "ssemov,ssemov,ssemov") - (set (attr "mode") - (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) - (const_string "V4SF") +;; SSE Strange Moves. - (eq_attr "alternative" "0,1") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "2") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI"))] - (const_string "TI")))]) +(define_insn "mmx_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] + UNSPEC_MOVMSK))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) -(define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") - (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] - "TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - case 1: - return "#"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 3: - case 4: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "*,*,ssemov,ssemov,ssemov") - (set (attr "mode") - (cond [(eq_attr "alternative" "2,3") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "4") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) -(define_expand "movtf" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "nonimmediate_operand" ""))] - "TARGET_64BIT" -{ - ix86_expand_move (TFmode, operands); - DONE; -}) - -(define_insn "*movtf_internal" - [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm") - (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))] - "TARGET_64BIT - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - case 1: - return "#"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "xorps\t%0, %0"; - else - return "pxor\t%0, %0"; - case 3: - case 4: - if (get_attr_mode (insn) == MODE_V4SF) - return "movaps\t{%1, %0|%0, %1}"; - else - return "movdqa\t{%1, %0|%0, %1}"; - default: - abort (); - } -} - [(set_attr "type" "*,*,ssemov,ssemov,ssemov") - (set (attr "mode") - (cond [(eq_attr "alternative" "2,3") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "4") - (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") - (const_int 0)) - (ne (symbol_ref "optimize_size") - (const_int 0))) - (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) - -(define_mode_macro SSEPUSH [V16QI V8HI V4SI V2DI TI V4SF V2DF]) - -(define_insn "*push" - [(set (match_operand:SSEPUSH 0 "push_operand" "=<") - (match_operand:SSEPUSH 1 "register_operand" "x"))] - "TARGET_SSE" - "#") - -(define_mode_macro MMXPUSH [V8QI V4HI V2SI V2SF]) - -(define_insn "*push" - [(set (match_operand:MMXPUSH 0 "push_operand" "=<") - (match_operand:MMXPUSH 1 "register_operand" "xy"))] - "TARGET_MMX" - "#") - -(define_split - [(set (match_operand 0 "push_operand" "") - (match_operand 1 "register_operand" ""))] - "!TARGET_64BIT && reload_completed - && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" - [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 3))) - (set (match_dup 2) (match_dup 1))] - "operands[2] = change_address (operands[0], GET_MODE (operands[0]), - stack_pointer_rtx); - operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") - -(define_split - [(set (match_operand 0 "push_operand" "") - (match_operand 1 "register_operand" ""))] - "TARGET_64BIT && reload_completed - && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" - [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 3))) - (set (match_dup 2) (match_dup 1))] - "operands[2] = change_address (operands[0], GET_MODE (operands[0]), - stack_pointer_rtx); - operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") - - -(define_split - [(set (match_operand:TI 0 "nonimmediate_operand" "") - (match_operand:TI 1 "general_operand" ""))] - "reload_completed && !SSE_REG_P (operands[0]) - && !SSE_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -(define_split - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "general_operand" ""))] - "reload_completed && !SSE_REG_P (operands[0]) - && !SSE_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -;; All 16-byte vector modes handled by SSE -(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) - -(define_expand "movmisalign" - [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") - (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] - "TARGET_SSE" -{ - ix86_expand_vector_move_misalign (mode, operands); - DONE; -}) - -;; All 8-byte vector modes handled by MMX -(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF]) - -(define_expand "movmisalign" - [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "") - (match_operand:MMXMODE 1 "nonimmediate_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (mode, operands); - DONE; -}) - -;; These two patterns are useful for specifying exactly whether to use -;; movaps or movups -(define_expand "sse_movaps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] - UNSPEC_MOVA))] - "TARGET_SSE" -{ - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - { - rtx tmp = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse_movaps (tmp, operands[1])); - emit_move_insn (operands[0], tmp); - DONE; - } -}) - -(define_insn "*sse_movaps_1" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVA))] - "TARGET_SSE - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov,ssemov") - (set_attr "mode" "V4SF")]) - -(define_expand "sse_movups" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] - UNSPEC_MOVU))] - "TARGET_SSE" -{ - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - { - rtx tmp = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse_movups (tmp, operands[1])); - emit_move_insn (operands[0], tmp); - DONE; - } -}) - -(define_insn "*sse_movups_1" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movups\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt,ssecvt") - (set_attr "mode" "V4SF")]) - -;; SSE Strange Moves. - -(define_insn "sse_movmskps" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "TARGET_SSE" - "movmskps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "mmx_pmovmskb" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] - UNSPEC_MOVMSK))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - - -(define_insn "mmx_maskmovq" - [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] - UNSPEC_MASKMOV))] - "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) +(define_insn "mmx_maskmovq" + [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] + UNSPEC_MASKMOV))] + "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovq\t{%2, %1|%1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_maskmovq_rex" [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D")) @@ -20291,15 +19867,6 @@ [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) -(define_insn "sse_movntv4sf" - [(set (match_operand:V4SF 0 "memory_operand" "=m") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE" - "movntps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) - (define_insn "sse_movntdi" [(set (match_operand:DI 0 "memory_operand" "=m") (unspec:DI [(match_operand:DI 1 "register_operand" "y")] @@ -20309,3916 +19876,1237 @@ [(set_attr "type" "mmxmov") (set_attr "mode" "DI")]) -(define_insn "sse_movhlps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 3) - (const_int 0) - (const_int 1)])) - (const_int 3)))] - "TARGET_SSE" - "movhlps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) +;; MMX insns -(define_insn "sse_movlhps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 3) - (const_int 0) - (const_int 1)])) - (const_int 12)))] - "TARGET_SSE" - "movlhps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) +;; MMX arithmetic -;; Store the high V2SF of the source vector to the destination. -(define_insn "sse_storehps" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") - (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") - (parallel [(const_int 2) (const_int 3)])))] - "TARGET_SSE" - "@ - movhps\t{%1, %0|%0, %1} - movhlps\t{%1, %0|%0, %1} - #" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2SF")]) +(define_insn "addv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_split - [(set (match_operand:V2SF 0 "register_operand" "") - (vec_select:V2SF - (match_operand:V4SF 1 "memory_operand" "") - (parallel [(const_int 2) (const_int 3)])))] - "TARGET_SSE && reload_completed" - [(const_int 0)] -{ - emit_move_insn (operands[0], adjust_address (operands[1], V2SFmode, 8)); - DONE; -}) +(define_insn "addv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -;; Load the high V2SF of the target vector from the source vector. -(define_insn "sse_loadhps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") - (vec_concat:V4SF - (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") - (parallel [(const_int 0) (const_int 1)])) - (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] - "TARGET_SSE" - "@ - movhps\t{%2, %0|%0, %2} - movlhps\t{%2, %0|%0, %2} - #" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2SF")]) +(define_insn "addv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_split - [(set (match_operand:V4SF 0 "memory_operand" "") - (vec_concat:V4SF - (vec_select:V2SF - (match_dup 0) - (parallel [(const_int 0) (const_int 1)])) - (match_operand:V2SF 1 "register_operand" "")))] - "TARGET_SSE && reload_completed" - [(const_int 0)] -{ - emit_move_insn (adjust_address (operands[0], V2SFmode, 8), operands[1]); - DONE; -}) +(define_insn "mmx_adddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(plus:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -;; Store the low V2SF of the source vector to the destination. -(define_expand "sse_storelps" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "") - (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "") - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" -{ - operands[1] = gen_lowpart (V2SFmode, operands[1]); - emit_move_insn (operands[0], operands[1]); - DONE; -}) +(define_insn "ssaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddsb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -;; Load the low V2SF of the target vector from the source vector. -(define_insn "sse_loadlps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") - (vec_concat:V4SF - (match_operand:V2SF 2 "nonimmediate_operand" "m,0,x") - (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "0,x,0") - (parallel [(const_int 2) (const_int 3)]))))] - "TARGET_SSE" -{ - static const char * const alt[] = { - "movlps\t{%2, %0|%0, %2}", - "shufps\t{%2, %1, %0|%0, %1, %2}", - "movlps\t{%2, %0|%0, %2}" - }; +(define_insn "ssaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) - if (which_alternative == 1) - operands[2] = GEN_INT (0xe4); +(define_insn "usaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddusb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) - return alt[which_alternative]; -} - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2SF")]) +(define_insn "usaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddusw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_expand "sse_loadss" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand:SF 1 "memory_operand" "")] - "TARGET_SSE" -{ - emit_insn (gen_sse_loadss_1 (operands[0], operands[1], - CONST0_RTX (V4SFmode))); - DONE; -}) +(define_insn "subv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "sse_loadss_1" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) - (match_operand:V4SF 2 "const0_operand" "X") - (const_int 1)))] - "TARGET_SSE" - "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) +(define_insn "subv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "sse_movss" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x") - (const_int 14)))] - "TARGET_SSE" - "movss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) +(define_insn "subv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (minus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "sse_storess" - [(set (match_operand:SF 0 "memory_operand" "=m") - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)])))] - "TARGET_SSE" - "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "SF")]) +(define_insn "mmx_subdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "sse_shufps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_int_operand" "n")] - UNSPEC_SHUFFLE))] - "TARGET_SSE" - "shufps\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) +(define_insn "sssubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubsb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) +(define_insn "sssubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -;; SSE arithmetic +(define_insn "ussubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubusb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "addv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "addps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) +(define_insn "ussubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubusw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "vmaddv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "addss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) +(define_insn "mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) -(define_insn "subv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "subps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) +(define_insn "smulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (sign_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_MMX" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) -(define_insn "vmsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "subss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) +(define_insn "umulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (zero_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (zero_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) -;; ??? Should probably be done by generic code instead. -(define_expand "negv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "") - (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") - (match_dup 2)))] - "TARGET_SSE" -{ - rtx m0 = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode)); - rtx vm0 = gen_rtx_CONST_VECTOR (V4SFmode, gen_rtvec (4, m0, m0, m0, m0)); - operands[2] = force_reg (V4SFmode, vm0); -}) +(define_insn "mmx_pmaddwd" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2)]))) + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) (const_int 2)])))) + (mult:V2SI + (sign_extend:V2SI (vec_select:V2HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2SI (vec_select:V2HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3)]))))))] + "TARGET_MMX" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) -(define_insn "mulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "mulps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "V4SF")]) +(define_insn "sse2_umulsidi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (mult:DI + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)]))) + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])))))] + "TARGET_SSE2" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) -(define_insn "vmmulv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "mulss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "SF")]) -(define_insn "divv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "divps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "V4SF")]) +;; MMX logical operations +;; Note we don't want to declare these as regular iordi3 insns to prevent +;; normal code that also wants to use the FPU from getting broken. +;; The UNSPECs are there to prevent the combiner from getting overly clever. +(define_insn "mmx_iordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(ior:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "vmdivv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "divss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "SF")]) +(define_insn "mmx_xordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(xor:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI") + (set_attr "memory" "none")]) +;; Same as pxor, but don't show input operands so that we don't think +;; they are live. +(define_insn "mmx_clrdi" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(const_int 0)] UNSPEC_NOP))] + "TARGET_MMX" + "pxor\t{%0, %0|%0, %0}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI") + (set_attr "memory" "none")]) -;; SSE square root/reciprocal +(define_insn "mmx_anddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(and:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "rcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] - "TARGET_SSE" - "rcpps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) +(define_insn "mmx_nanddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "vmrcpv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_RCP) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE" - "rcpss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) -(define_insn "rsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] - "TARGET_SSE" - "rsqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) +;; MMX unsigned averages/sum of absolute differences -(define_insn "vmrsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_RSQRT) - (match_operand:V4SF 2 "register_operand" "0") +(define_insn "mmx_uavgv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ashiftrt:V8QI + (plus:V8QI (plus:V8QI + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")) + (const_vector:V8QI [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)])) (const_int 1)))] - "TARGET_SSE" - "rsqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_insn "sqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "sqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) + "TARGET_SSE || TARGET_3DNOW_A" + "pavgb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) -(define_insn "vmsqrtv4sf2" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (match_operand:V4SF 2 "register_operand" "0") +(define_insn "mmx_uavgv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI + (plus:V4HI (plus:V4HI + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")) + (const_vector:V4HI [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)])) (const_int 1)))] - "TARGET_SSE" - "sqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) + "TARGET_SSE || TARGET_3DNOW_A" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) -;; SSE logical operations. +(define_insn "mmx_psadbw" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSADBW))] + "TARGET_SSE || TARGET_3DNOW_A" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) -;; SSE defines logical operations on floating point values. This brings -;; interesting challenge to RTL representation where logicals are only valid -;; on integral types. We deal with this by representing the floating point -;; logical as logical on arguments casted to TImode as this is what hardware -;; really does. Unfortunately hardware requires the type information to be -;; present and thus we must avoid subregs from being simplified and eliminated -;; in later compilation phases. -;; -;; We have following variants from each instruction: -;; sse_andsf3 - the operation taking V4SF vector operands -;; and doing TImode cast on them -;; *sse_andsf3_memory - the operation taking one memory operand casted to -;; TImode, since backend insist on eliminating casts -;; on memory operands -;; sse_andti3_sf_1 - the operation taking SF scalar operands. -;; We cannot accept memory operand here as instruction reads -;; whole scalar. This is generated only post reload by GCC -;; scalar float operations that expands to logicals (fabs) -;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode -;; memory operand. Eventually combine can be able -;; to synthesize these using splitter. -;; sse2_anddf3, *sse2_anddf3_memory -;; -;; -;; These are not called andti3 etc. because we really really don't want -;; the compiler to widen DImode ands to TImode ands and then try to move -;; into DImode subregs of SSE registers, and them together, and move out -;; of DImode subregs again! -;; SSE1 single precision floating point logical operation -(define_expand "sse_andv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (and:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "") -(define_insn "*sse_andv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) +;; MMX insert/extract/shuffle -(define_expand "sse_nandv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "")) - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "") +(define_expand "mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "") + (vec_merge:V4HI + (match_operand:V4HI 1 "register_operand" "") + (vec_duplicate:V4HI + (match_operand:SI 2 "nonimmediate_operand" "")) + (match_operand:SI 3 "const_0_to_3_operand" "")))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[2] = gen_lowpart (HImode, operands[2]); + operands[3] = GEN_INT (1 << INTVAL (operands[3])); +}) -(define_insn "*sse_nandv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) +(define_insn "*mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (match_operand:V4HI 1 "register_operand" "0") + (vec_duplicate:V4HI + (match_operand:HI 2 "nonimmediate_operand" "rm")) + (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; +} + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) -(define_expand "sse_iorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (ior:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "") +(define_insn "mmx_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "const_0_to_3_operand" "N")]))))] + "TARGET_SSE || TARGET_3DNOW_A" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) -(define_insn "*sse_iorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) +(define_insn "mmx_pshufw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "nonimmediate_operand" "ym") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] + "TARGET_SSE || TARGET_3DNOW_A" + "pshufw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) -(define_expand "sse_xorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "") - (xor:V4SF (match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "nonimmediate_operand" "")))] - "TARGET_SSE" - "") -(define_insn "*sse_xorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) +;; MMX mask-generating comparisons -;; SSE2 double precision floating point logical operation +(define_insn "eqv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (eq:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) -(define_expand "sse2_andv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (and:V2DF (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "") +(define_insn "eqv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (eq:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) -(define_insn "*sse2_andv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_expand "sse2_nandv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "")) - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "") +(define_insn "eqv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) -(define_insn "*sse2_nandv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_expand "sse2_iorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (ior:V2DF (match_operand:V2DF 1 "register_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "") +(define_insn "gtv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (gt:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) -(define_insn "*sse2_iorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_expand "sse2_xorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "") - (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") - (match_operand:V2DF 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" - "") +(define_insn "gtv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (gt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) -(define_insn "*sse2_xorv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -;; SSE2 integral logicals. These patterns must always come after floating -;; point ones since we don't want compiler to use integer opcodes on floating -;; point SSE values to avoid matching of subregs in the match_operand. -(define_insn "*sse2_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) +(define_insn "gtv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) -(define_insn "sse2_andv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) -(define_insn "*sse2_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) +;; MMX max/min insns -(define_insn "sse2_nandv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (not:V2DI (match_operand:V2DI 1 "register_operand" "0")) - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) +(define_insn "umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "*sse2_iorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) +(define_insn "smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "sse2_iorv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) +(define_insn "uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "*sse2_xorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) +(define_insn "sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) -(define_insn "sse2_xorv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 - && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) -;; Use xor, but don't show input operands so they aren't live before -;; this insn. -(define_insn "sse_clrv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (match_operand:V4SF 1 "const0_operand" "X"))] - "TARGET_SSE" -{ - if (get_attr_mode (insn) == MODE_TI) - return "pxor\t{%0, %0|%0, %0}"; - else - return "xorps\t{%0, %0|%0, %0}"; -} - [(set_attr "type" "sselog") - (set_attr "memory" "none") - (set (attr "mode") - (if_then_else - (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") - (const_int 0)) - (ne (symbol_ref "TARGET_SSE2") - (const_int 0))) - (eq (symbol_ref "optimize_size") - (const_int 0))) - (const_string "TI") - (const_string "V4SF")))]) - -;; Use xor, but don't show input operands so they aren't live before -;; this insn. -(define_insn "sse_clrv2df" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(const_int 0)] UNSPEC_NOP))] - "TARGET_SSE2" - "xorpd\t{%0, %0|%0, %0}" - [(set_attr "type" "sselog") - (set_attr "memory" "none") - (set_attr "mode" "V4SF")]) +;; MMX shifts -;; SSE mask-generating compares +(define_insn "ashrv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psraw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) -(define_insn "maskcmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")]))] - "TARGET_SSE" - "cmp%D3ps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "V4SF")]) +(define_insn "ashrv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrad\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) -(define_insn "maskncmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (not:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")])))] - "TARGET_SSE" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordps\t{%2, %0|%0, %2}"; - else - return "cmpn%D3ps\t{%2, %0|%0, %2}"; -} - [(set_attr "type" "ssecmp") - (set_attr "mode" "V4SF")]) - -(define_insn "vmmaskcmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")]) - (subreg:V4SI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE" - "cmp%D3ss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) - -(define_insn "vmmaskncmpv4sf3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (not:V4SI - (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "register_operand" "x")])) - (subreg:V4SI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordss\t{%2, %0|%0, %2}"; - else - return "cmpn%D3ss\t{%2, %0|%0, %2}"; -} - [(set_attr "type" "ssecmp") - (set_attr "mode" "SF")]) - -(define_insn "sse_comi" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE" - "comiss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "SF")]) - -(define_insn "sse_ucomi" - [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU (vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE" - "ucomiss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "SF")]) - - -;; SSE unpack - -(define_insn "sse_unpckhps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_SSE" - "unpckhps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse_unpcklps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "TARGET_SSE" - "unpcklps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - - -;; SSE min/max - -(define_insn "smaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "maxps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "vmsmaxv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -(define_insn "sminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "minps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) - -(define_insn "vmsminv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -;; SSE <-> integer/MMX conversions - -(define_insn "cvtpi2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") - (vec_duplicate:V4SF - (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) - (const_int 12)))] - "TARGET_SSE" - "cvtpi2ps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "cvtps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvtps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) +(define_insn "lshrv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrlw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) -(define_insn "cvttps2pi" +(define_insn "lshrv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX) - (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvttps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "SF")]) - -(define_insn "cvtsi2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0,0") - (vec_duplicate:V4SF - (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,rm"))) - (const_int 14)))] - "TARGET_SSE" - "cvtsi2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "vector,double") - (set_attr "mode" "SF")]) - -(define_insn "cvtsi2ssq" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0,0") - (vec_duplicate:V4SF - (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) - (const_int 14)))] - "TARGET_SSE && TARGET_64BIT" - "cvtsi2ssq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "vector,double") - (set_attr "mode" "SF")]) - -(define_insn "cvtss2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (vec_select:SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) - (parallel [(const_int 0)])))] - "TARGET_SSE" - "cvtss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "mode" "SI")]) - -(define_insn "cvtss2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (vec_select:DI - (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) - (parallel [(const_int 0)])))] - "TARGET_SSE" - "cvtss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") + (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrld\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "cvttss2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (vec_select:SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] - UNSPEC_FIX) - (parallel [(const_int 0)])))] - "TARGET_SSE" - "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector")]) - -(define_insn "cvttss2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (vec_select:DI - (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] - UNSPEC_FIX) - (parallel [(const_int 0)])))] - "TARGET_SSE && TARGET_64BIT" - "cvttss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector")]) - - -;; MMX insns - -;; MMX arithmetic - -(define_insn "addv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] +;; See logical MMX insns. +(define_insn "mmx_lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi"))] + UNSPEC_NOP))] "TARGET_MMX" - "paddb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "psrlq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "addv4hi3" +(define_insn "ashlv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" - "paddw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "psllw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "addv2si3" +(define_insn "ashlv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" - "paddd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "pslld\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_adddi3" +;; See logical MMX insns. +(define_insn "mmx_ashldi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(plus:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] + [(ashift:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi"))] UNSPEC_NOP))] "TARGET_MMX" - "paddq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "psllq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "ssaddv8qi3" + +;; MMX pack/unpack insns. + +(define_insn "mmx_packsswb" [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + (vec_concat:V8QI + (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) + (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] "TARGET_MMX" - "paddsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "ssaddv4hi3" +(define_insn "mmx_packssdw" [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + (vec_concat:V4HI + (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0")) + (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] "TARGET_MMX" - "paddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "usaddv8qi3" +(define_insn "mmx_packuswb" [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "usaddv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + (vec_concat:V8QI + (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) + (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] "TARGET_MMX" - "paddusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "subv8qi3" +(define_insn "mmx_punpckhbw" [(set (match_operand:V8QI 0 "register_operand" "=y") - (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (const_int 85)))] "TARGET_MMX" - "psubb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) -(define_insn "subv4hi3" +(define_insn "mmx_punpckhwd" [(set (match_operand:V4HI 0 "register_operand" "=y") - (minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] "TARGET_MMX" - "psubw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) -(define_insn "subv2si3" +(define_insn "mmx_punpckhdq" [(set (match_operand:V2SI 0 "register_operand" "=y") - (minus:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_subdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(minus:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] + (vec_merge:V2SI + (match_operand:V2SI 1 "register_operand" "0") + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] "TARGET_MMX" - "psubq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) -(define_insn "sssubv8qi3" +(define_insn "mmx_punpcklbw" [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (const_int 85)))] "TARGET_MMX" - "psubsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) -(define_insn "sssubv4hi3" +(define_insn "mmx_punpcklwd" [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] "TARGET_MMX" - "psubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) -(define_insn "ussubv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] +(define_insn "mmx_punpckldq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 1) + (const_int 0)])) + (match_operand:V2SI 2 "register_operand" "y") + (const_int 1)))] "TARGET_MMX" - "psubusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) -(define_insn "ussubv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) -(define_insn "mulv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (mult:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) +;; Miscellaneous stuff -(define_insn "smulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_int 16))))] +(define_insn "emms" + [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] "TARGET_MMX" - "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "umulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_int 16))))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) + "emms" + [(set_attr "type" "mmx") + (set_attr "memory" "unknown")]) -(define_insn "mmx_pmaddwd" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI - (mult:V2SI - (sign_extend:V2SI - (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 2)]))) - (sign_extend:V2SI - (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0) (const_int 2)])))) - (mult:V2SI - (sign_extend:V2SI (vec_select:V2HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2SI (vec_select:V2HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3)]))))))] - "TARGET_MMX" - "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) +(define_insn "ldmxcsr" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] + UNSPECV_LDMXCSR)] + "TARGET_SSE" + "ldmxcsr\t%0" + [(set_attr "type" "sse") + (set_attr "memory" "load")]) +(define_insn "stmxcsr" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] + "TARGET_SSE" + "stmxcsr\t%0" + [(set_attr "type" "sse") + (set_attr "memory" "store")]) -;; MMX logical operations -;; Note we don't want to declare these as regular iordi3 insns to prevent -;; normal code that also wants to use the FPU from getting broken. -;; The UNSPECs are there to prevent the combiner from getting overly clever. -(define_insn "mmx_iordi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) +(define_expand "sfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) -(define_insn "mmx_xordi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI") - (set_attr "memory" "none")]) +(define_insn "*sfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] + "TARGET_SSE || TARGET_3DNOW_A" + "sfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) -;; Same as pxor, but don't show input operands so that we don't think -;; they are live. -(define_insn "mmx_clrdi" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(const_int 0)] UNSPEC_NOP))] - "TARGET_MMX" - "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI") - (set_attr "memory" "none")]) - -(define_insn "mmx_anddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_nanddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - - -;; MMX unsigned averages/sum of absolute differences - -(define_insn "mmx_uavgv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ashiftrt:V8QI - (plus:V8QI (plus:V8QI - (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")) - (const_vector:V8QI [(const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1)])) - (const_int 1)))] - "TARGET_SSE || TARGET_3DNOW_A" - "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_uavgv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashiftrt:V4HI - (plus:V4HI (plus:V4HI - (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")) - (const_vector:V4HI [(const_int 1) - (const_int 1) - (const_int 1) - (const_int 1)])) - (const_int 1)))] - "TARGET_SSE || TARGET_3DNOW_A" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_psadbw" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] - UNSPEC_PSADBW))] - "TARGET_SSE || TARGET_3DNOW_A" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - - -;; MMX insert/extract/shuffle - -(define_expand "mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "") - (vec_merge:V4HI - (match_operand:V4HI 1 "register_operand" "") - (vec_duplicate:V4HI - (match_operand:SI 2 "nonimmediate_operand" "")) - (match_operand:SI 3 "const_0_to_3_operand" "")))] - "TARGET_SSE || TARGET_3DNOW_A" -{ - operands[2] = gen_lowpart (HImode, operands[2]); - operands[3] = GEN_INT (1 << INTVAL (operands[3])); -}) +(define_expand "sse_prologue_save" + [(parallel [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "immediate_operand" "")) + (use (label_ref:DI (match_operand 3 "" "")))])] + "TARGET_64BIT" + "") -(define_insn "*mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (match_operand:V4HI 1 "register_operand" "0") - (vec_duplicate:V4HI - (match_operand:HI 2 "nonimmediate_operand" "rm")) - (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))] - "TARGET_SSE || TARGET_3DNOW_A" +(define_insn "*sse_prologue_save_insn" + [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") + (match_operand:DI 4 "const_int_operand" "n"))) + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (use (match_operand:DI 1 "register_operand" "r")) + (use (match_operand:DI 2 "const_int_operand" "i")) + (use (label_ref:DI (match_operand 3 "" "X")))] + "TARGET_64BIT + && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128 + && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" + "* { - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); - return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; + int i; + operands[0] = gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, operands[0], operands[4])); + output_asm_insn (\"jmp\\t%A1\", operands); + for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) + { + operands[4] = adjust_address (operands[0], DImode, i*16); + operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); + PUT_MODE (operands[4], TImode); + if (GET_CODE (XEXP (operands[0], 0)) != PLUS) + output_asm_insn (\"rex\", operands); + output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); + } + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", + CODE_LABEL_NUMBER (operands[3])); + RET; } - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") - (parallel - [(match_operand:SI 2 "const_0_to_3_operand" "N")]))))] - "TARGET_SSE || TARGET_3DNOW_A" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_pshufw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (unspec:V4HI [(match_operand:V4HI 1 "nonimmediate_operand" "ym") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_SHUFFLE))] - "TARGET_SSE || TARGET_3DNOW_A" - "pshufw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - - -;; MMX mask-generating comparisons - -(define_insn "eqv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (eq:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "eqv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (eq:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "eqv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (eq:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (gt:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (gt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (gt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - - -;; MMX max/min insns - -(define_insn "umaxv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmaxub\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + " + [(set_attr "type" "other") + (set_attr "length_immediate" "0") + (set_attr "length_address" "0") + (set_attr "length" "135") + (set_attr "memory" "store") + (set_attr "modrm" "0") (set_attr "mode" "DI")]) -(define_insn "smaxv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmaxsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) +;; 3Dnow! instructions -(define_insn "uminv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pminub\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "sminv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pminsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - - -;; MMX shifts - -(define_insn "ashrv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashrv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "lshrv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "lshrv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -;; See logical MMX insns. -(define_insn "mmx_lshrdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashlv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashlv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -;; See logical MMX insns. -(define_insn "mmx_ashldi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - - -;; MMX pack/unpack insns. - -(define_insn "mmx_packsswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) - (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_packssdw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_concat:V4HI - (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0")) - (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_packuswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) - (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_merge:V8QI - (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (const_int 85)))] - "TARGET_MMX" - "punpckhbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhwd" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "TARGET_MMX" - "punpckhwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhdq" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_merge:V2SI - (match_operand:V2SI 1 "register_operand" "0") - (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 1)))] - "TARGET_MMX" - "punpckhdq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpcklbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_merge:V8QI - (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (const_int 85)))] - "TARGET_MMX" - "punpcklbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpcklwd" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_MMX" - "punpcklwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckldq" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_merge:V2SI - (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2SI 2 "register_operand" "y") - (const_int 1)))] - "TARGET_MMX" - "punpckldq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - - -;; Miscellaneous stuff - -(define_insn "emms" - [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_MMX" - "emms" - [(set_attr "type" "mmx") - (set_attr "memory" "unknown")]) - -(define_insn "ldmxcsr" - [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] - UNSPECV_LDMXCSR)] - "TARGET_SSE" - "ldmxcsr\t%0" - [(set_attr "type" "sse") - (set_attr "memory" "load")]) - -(define_insn "stmxcsr" - [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] - "TARGET_SSE" - "stmxcsr\t%0" - [(set_attr "type" "sse") - (set_attr "memory" "store")]) - -(define_expand "sfence" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] - "TARGET_SSE || TARGET_3DNOW_A" -{ - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; -}) - -(define_insn "*sfence_insn" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] - "TARGET_SSE || TARGET_3DNOW_A" - "sfence" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) - -(define_expand "sse_prologue_save" - [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI 21) - (reg:DI 22) - (reg:DI 23) - (reg:DI 24) - (reg:DI 25) - (reg:DI 26) - (reg:DI 27) - (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) - (use (match_operand:DI 1 "register_operand" "")) - (use (match_operand:DI 2 "immediate_operand" "")) - (use (label_ref:DI (match_operand 3 "" "")))])] - "TARGET_64BIT" - "") - -(define_insn "*sse_prologue_save_insn" - [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 4 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI 21) - (reg:DI 22) - (reg:DI 23) - (reg:DI 24) - (reg:DI 25) - (reg:DI 26) - (reg:DI 27) - (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) - (use (match_operand:DI 1 "register_operand" "r")) - (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X")))] - "TARGET_64BIT - && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128 - && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" - "* -{ - int i; - operands[0] = gen_rtx_MEM (Pmode, - gen_rtx_PLUS (Pmode, operands[0], operands[4])); - output_asm_insn (\"jmp\\t%A1\", operands); - for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) - { - operands[4] = adjust_address (operands[0], DImode, i*16); - operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); - PUT_MODE (operands[4], TImode); - if (GET_CODE (XEXP (operands[0], 0)) != PLUS) - output_asm_insn (\"rex\", operands); - output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); - } - (*targetm.asm_out.internal_label) (asm_out_file, \"L\", - CODE_LABEL_NUMBER (operands[3])); - RET; -} - " - [(set_attr "type" "other") - (set_attr "length_immediate" "0") - (set_attr "length_address" "0") - (set_attr "length" "135") - (set_attr "memory" "store") - (set_attr "modrm" "0") - (set_attr "mode" "DI")]) - -;; 3Dnow! instructions - -(define_insn "addv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (plus:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfadd\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "subv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (minus:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfsub\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "subrv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym") - (match_operand:V2SF 1 "register_operand" "0")))] - "TARGET_3DNOW" - "pfsubr\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "gtv2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpgt\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "gev2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpge\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "eqv2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (eq:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpeq\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "pfmaxv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (smax:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmax\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfminv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (smin:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmin\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "mulv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (mult:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmul\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "V2SF")]) - -(define_insn "femms" - [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_3DNOW" - "femms" - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) - -(define_insn "pf2id" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pf2id\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "pf2iw" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (sign_extend:V2SI - (ss_truncate:V2HI - (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] - "TARGET_3DNOW_A" - "pf2iw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "pfacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (plus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (plus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW" - "pfacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfnacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (minus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (minus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW_A" - "pfnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfpnacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (minus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (plus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW_A" - "pfpnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pi2fw" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (float:V2SF - (vec_concat:V2SI - (sign_extend:SI - (truncate:HI - (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])))) - (sign_extend:SI - (truncate:HI - (vec_select:SI (match_dup 1) - (parallel [(const_int 1)])))))))] - "TARGET_3DNOW_A" - "pi2fw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "floatv2si2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pi2fd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -;; This insn is identical to pavgb in operation, but the opcode is -;; different. To avoid accidentally matching pavgb, use an unspec. - -(define_insn "pavgusb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (unspec:V8QI - [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] - UNSPEC_PAVGUSB))] - "TARGET_3DNOW" - "pavgusb\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "TI")]) - -;; 3DNow reciprocal and sqrt - -(define_insn "pfrcpv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] - UNSPEC_PFRCP))] - "TARGET_3DNOW" - "pfrcp\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrcpit1v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRCPIT1))] - "TARGET_3DNOW" - "pfrcpit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrcpit2v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRCPIT2))] - "TARGET_3DNOW" - "pfrcpit2\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrsqrtv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] - UNSPEC_PFRSQRT))] - "TARGET_3DNOW" - "pfrsqrt\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrsqit1v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRSQIT1))] - "TARGET_3DNOW" - "pfrsqit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pmulhrwv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_vector:V4SI [(const_int 32768) - (const_int 32768) - (const_int 32768) - (const_int 32768)])) - (const_int 16))))] - "TARGET_3DNOW" - "pmulhrw\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "TI")]) - -(define_insn "pswapdv2si2" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "TI")]) - -(define_insn "pswapdv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "TI")]) - -(define_expand "prefetch" - [(prefetch (match_operand 0 "address_operand" "") - (match_operand:SI 1 "const_int_operand" "") - (match_operand:SI 2 "const_int_operand" ""))] - "TARGET_PREFETCH_SSE || TARGET_3DNOW" -{ - int rw = INTVAL (operands[1]); - int locality = INTVAL (operands[2]); - - if (rw != 0 && rw != 1) - abort (); - if (locality < 0 || locality > 3) - abort (); - if (GET_MODE (operands[0]) != Pmode && GET_MODE (operands[0]) != VOIDmode) - abort (); - - /* Use 3dNOW prefetch in case we are asking for write prefetch not - suported by SSE counterpart or the SSE prefetch is not available - (K6 machines). Otherwise use SSE prefetch as it allows specifying - of locality. */ - if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) - operands[2] = GEN_INT (3); - else - operands[1] = const0_rtx; -}) - -(define_insn "*prefetch_sse" - [(prefetch (match_operand:SI 0 "address_operand" "p") - (const_int 0) - (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_PREFETCH_SSE && !TARGET_64BIT" -{ - static const char * const patterns[4] = { - "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" - }; - - int locality = INTVAL (operands[1]); - if (locality < 0 || locality > 3) - abort (); - - return patterns[locality]; -} - [(set_attr "type" "sse") - (set_attr "memory" "none")]) - -(define_insn "*prefetch_sse_rex" - [(prefetch (match_operand:DI 0 "address_operand" "p") - (const_int 0) - (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_PREFETCH_SSE && TARGET_64BIT" -{ - static const char * const patterns[4] = { - "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" - }; - - int locality = INTVAL (operands[1]); - if (locality < 0 || locality > 3) - abort (); - - return patterns[locality]; -} - [(set_attr "type" "sse") - (set_attr "memory" "none")]) - -(define_insn "*prefetch_3dnow" - [(prefetch (match_operand:SI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (const_int 3))] - "TARGET_3DNOW && !TARGET_64BIT" -{ - if (INTVAL (operands[1]) == 0) - return "prefetch\t%a0"; - else - return "prefetchw\t%a0"; -} - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) - -(define_insn "*prefetch_3dnow_rex" - [(prefetch (match_operand:DI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (const_int 3))] - "TARGET_3DNOW && TARGET_64BIT" -{ - if (INTVAL (operands[1]) == 0) - return "prefetch\t%a0"; - else - return "prefetchw\t%a0"; -} - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) - -;; SSE2 support - -(define_insn "addv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "addpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "vmaddv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "addsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn "subv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "subpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "vmsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "subsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn "mulv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "mulpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "V2DF")]) - -(define_insn "vmmulv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "mulsd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssemul") - (set_attr "mode" "DF")]) - -(define_insn "divv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (div:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "divpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "V2DF")]) - -(define_insn "vmdivv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "divsd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssediv") - (set_attr "mode" "DF")]) - -;; SSE min/max - -(define_insn "smaxv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "maxpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "vmsmaxv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "maxsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn "sminv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "minpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "vmsminv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE2" - "minsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) -;; SSE2 square root. There doesn't appear to be an extension for the -;; reciprocal/rsqrt instructions if the Intel manual is to be believed. - -(define_insn "sqrtv2df2" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))] - "TARGET_SSE2" - "sqrtpd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V2DF")]) - -(define_insn "vmsqrtv2df2" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) - (match_operand:V2DF 2 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE2" - "sqrtsd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "SF")]) - -;; SSE mask-generating compares - -(define_insn "maskcmpv2df3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (match_operator:V2DI 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "x")]))] - "TARGET_SSE2" - "cmp%D3pd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "V2DF")]) - -(define_insn "maskncmpv2df3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (not:V2DI - (match_operator:V2DI 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "x")])))] - "TARGET_SSE2" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordps\t{%2, %0|%0, %2}"; - else - return "cmpn%D3pd\t{%2, %0|%0, %2}"; -} - [(set_attr "type" "ssecmp") - (set_attr "mode" "V2DF")]) - -(define_insn "vmmaskcmpv2df3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_merge:V2DI - (match_operator:V2DI 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "x")]) - (subreg:V2DI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE2" - "cmp%D3sd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) - -(define_insn "vmmaskncmpv2df3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_merge:V2DI - (not:V2DI - (match_operator:V2DI 3 "sse_comparison_operator" - [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "x")])) - (subreg:V2DI (match_dup 1) 0) - (const_int 1)))] - "TARGET_SSE2" -{ - if (GET_CODE (operands[3]) == UNORDERED) - return "cmpordsd\t{%2, %0|%0, %2}"; - else - return "cmpn%D3sd\t{%2, %0|%0, %2}"; -} - [(set_attr "type" "ssecmp") - (set_attr "mode" "DF")]) - -(define_insn "sse2_comi" - [(set (reg:CCFP FLAGS_REG) - (compare:CCFP (vec_select:DF - (match_operand:V2DF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "comisd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "DF")]) - -(define_insn "sse2_ucomi" - [(set (reg:CCFPU FLAGS_REG) - (compare:CCFPU (vec_select:DF - (match_operand:V2DF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "ucomisd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecomi") - (set_attr "mode" "DF")]) - -;; SSE Strange Moves. - -(define_insn "sse2_movmskpd" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "TARGET_SSE2" - "movmskpd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_pmovmskb" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] - UNSPEC_MOVMSK))] - "TARGET_SSE2" - "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_maskmovdqu" - [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "register_operand" "x")] - UNSPEC_MASKMOV))] - "TARGET_SSE2" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_maskmovdqu_rex64" - [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") - (match_operand:V16QI 2 "register_operand" "x")] - UNSPEC_MASKMOV))] - "TARGET_SSE2" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movntv2df" - [(set (match_operand:V2DF 0 "memory_operand" "=m") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE2" - "movntpd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_movntv2di" - [(set (match_operand:V2DI 0 "memory_operand" "=m") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] - UNSPEC_MOVNT))] - "TARGET_SSE2" - "movntdq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_movntsi" - [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec:SI [(match_operand:SI 1 "register_operand" "r")] - UNSPEC_MOVNT))] - "TARGET_SSE2" - "movnti\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -;; SSE <-> integer/MMX conversions - -;; Conversions between SI and SF - -(define_insn "cvtdq2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "cvtdq2ps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "cvtps2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "cvtps2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvttps2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX))] - "TARGET_SSE2" - "cvttps2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -;; Conversions between SI and DF - -(define_insn "cvtdq2pd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (float:V2DF (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel - [(const_int 0) - (const_int 1)]))))] - "TARGET_SSE2" - "cvtdq2pd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "cvtpd2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_concat:V4SI - (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) - (const_vector:V2SI [(const_int 0) (const_int 0)])))] - "TARGET_SSE2" - "cvtpd2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvttpd2dq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_concat:V4SI - (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX) - (const_vector:V2SI [(const_int 0) (const_int 0)])))] - "TARGET_SSE2" - "cvttpd2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvtpd2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "cvtpd2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvttpd2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] - UNSPEC_FIX))] - "TARGET_SSE2" - "cvttpd2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "cvtpi2pd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] - "TARGET_SSE2" - "cvtpi2pd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -;; Conversions between SI and DF - -(define_insn "cvtsd2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m") - (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "cvtsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "mode" "SI")]) - -(define_insn "cvtsd2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m") - (parallel [(const_int 0)]))))] - "TARGET_SSE2 && TARGET_64BIT" - "cvtsd2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "athlon_decode" "double,vector") - (set_attr "mode" "DI")]) - -(define_insn "cvttsd2si" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") - (parallel [(const_int 0)]))] UNSPEC_FIX))] - "TARGET_SSE2" - "cvttsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SI") - (set_attr "athlon_decode" "double,vector")]) - -(define_insn "cvttsd2siq" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") - (parallel [(const_int 0)]))] UNSPEC_FIX))] - "TARGET_SSE2 && TARGET_64BIT" - "cvttsd2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DI") - (set_attr "athlon_decode" "double,vector")]) - -(define_insn "cvtsi2sd" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") - (vec_duplicate:V2DF - (float:DF - (match_operand:SI 2 "nonimmediate_operand" "r,rm"))) - (const_int 2)))] - "TARGET_SSE2" - "cvtsi2sd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,direct")]) - -(define_insn "cvtsi2sdq" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") - (vec_duplicate:V2DF - (float:DF - (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) - (const_int 2)))] - "TARGET_SSE2 && TARGET_64BIT" - "cvtsi2sdq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,direct")]) - -;; Conversions between SF and DF - -(define_insn "cvtsd2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0,0") - (vec_duplicate:V4SF - (float_truncate:V2SF - (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))) - (const_int 14)))] - "TARGET_SSE2" - "cvtsd2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "athlon_decode" "vector,double") - (set_attr "mode" "SF")]) - -(define_insn "cvtss2sd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") - (float_extend:V2DF - (vec_select:V2SF - (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))) - (const_int 2)))] - "TARGET_SSE2" - "cvtss2sd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) - -(define_insn "cvtpd2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (subreg:V4SF - (vec_concat:V4SI - (subreg:V2SI (float_truncate:V2SF - (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0) - (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))] - "TARGET_SSE2" - "cvtpd2ps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "cvtps2pd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (float_extend:V2DF - (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))))] - "TARGET_SSE2" - "cvtps2pd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -;; SSE2 variants of MMX insns - -;; MMX arithmetic - -(define_insn "addv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "addv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "addv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "addv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "ssaddv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddsb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "ssaddv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "usaddv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddusb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "usaddv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "paddusw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "subv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (minus:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "subv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (minus:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "subv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (minus:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "subv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (minus:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "sssubv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubsb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "sssubv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "ussubv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubusb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "ussubv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "psubusw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "mulv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (mult:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) - -(define_insn "smulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) - (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) - (const_int 16))))] - "TARGET_SSE2" - "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) - -(define_insn "umulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) - (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) - (const_int 16))))] - "TARGET_SSE2" - "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) - -(define_insn "sse2_umulsidi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (mult:DI (zero_extend:DI (vec_select:SI - (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 0)]))) - (zero_extend:DI (vec_select:SI - (match_operand:V2SI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])))))] - "TARGET_SSE2" - "pmuludq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "sse2_umulv2siv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (mult:V2DI (zero_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 2)]))) - (zero_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) (const_int 2)])))))] - "TARGET_SSE2" - "pmuludq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pmaddwd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) - (mult:V4SI - (sign_extend:V4SI (vec_select:V4HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI (vec_select:V4HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))))] - "TARGET_SSE2" - "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -;; Same as pxor, but don't show input operands so that we don't think -;; they are live. -(define_insn "sse2_clrti" - [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] - "TARGET_SSE2" -{ - if (get_attr_mode (insn) == MODE_TI) - return "pxor\t%0, %0"; - else - return "xorps\t%0, %0"; -} - [(set_attr "type" "ssemov") - (set_attr "memory" "none") - (set (attr "mode") - (if_then_else - (ne (symbol_ref "optimize_size") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")))]) - -;; MMX unsigned averages/sum of absolute differences - -(define_insn "sse2_uavgv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (ashiftrt:V16QI - (plus:V16QI (plus:V16QI - (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")) - (const_vector:V16QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1)))] - "TARGET_SSE2" - "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "sse2_uavgv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashiftrt:V8HI - (plus:V8HI (plus:V8HI - (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1)))] - "TARGET_SSE2" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -;; @@@ this isn't the right representation. -(define_insn "sse2_psadbw" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")] - UNSPEC_PSADBW))] - "TARGET_SSE2" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - - -;; MMX insert/extract/shuffle - -(define_expand "sse2_pinsrw" - [(set (match_operand:V8HI 0 "register_operand" "") - (vec_merge:V8HI - (match_operand:V8HI 1 "register_operand" "") - (vec_duplicate:V8HI - (match_operand:SI 2 "nonimmediate_operand" "")) - (match_operand:SI 3 "const_0_to_7_operand" "")))] - "TARGET_SSE2" -{ - operands[2] = gen_lowpart (HImode, operands[2]); - operands[3] = GEN_INT (1 << INTVAL (operands[3])); -}) - -(define_insn "*sse2_pinsrw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_merge:V8HI - (match_operand:V8HI 1 "register_operand" "0") - (vec_duplicate:V8HI - (match_operand:HI 2 "nonimmediate_operand" "rm")) - (match_operand:SI 3 "const_pow2_1_to_128_operand" "N")))] - "TARGET_SSE2" -{ - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); - return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; -} - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI - (vec_select:HI (match_operand:V8HI 1 "register_operand" "x") - (parallel - [(match_operand:SI 2 "const_0_to_7_operand" "N")]))))] - "TARGET_SSE2" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pshufd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (unspec:V4SI [(match_operand:V4SI 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_SHUFFLE))] - "TARGET_SSE2" - "pshufd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pshuflw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_PSHUFLW))] - "TARGET_SSE2" - "pshuflw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -(define_insn "sse2_pshufhw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_PSHUFHW))] - "TARGET_SSE2" - "pshufhw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) - -;; MMX mask-generating comparisons - -(define_insn "eqv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (eq:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpeqb\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "eqv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (eq:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpeqw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "eqv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (eq:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpeqd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "gtv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (gt:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpgtb\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "gtv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (gt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpgtw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - -(define_insn "gtv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (gt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pcmpgtd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecmp") - (set_attr "mode" "TI")]) - - -;; MMX max/min insns - -(define_insn "umaxv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (umax:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pmaxub\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "smaxv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (smax:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pmaxsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "uminv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (umin:V16QI (match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pminub\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - -(define_insn "sminv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (smin:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" - "pminsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "mode" "TI")]) - - -;; MMX shifts - -(define_insn "ashrv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashrv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] - "TARGET_SSE2" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashrv8hi3_ti" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashrv4si3_ti" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv8hi3_ti" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv4si3_ti" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "lshrv2di3_ti" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv8hi3_ti" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv4si3_ti" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -(define_insn "ashlv2di3_ti" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] - "TARGET_SSE2" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) - -;; See logical MMX insns for the reason for the unspec. Strictly speaking -;; we wouldn't need here it since we never generate TImode arithmetic. - -;; There has to be some kind of prize for the weirdest new instruction... -(define_insn "sse2_ashlti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (unspec:TI - [(ashift:TI (match_operand:TI 1 "register_operand" "0") - (mult:SI (match_operand:SI 2 "immediate_operand" "i") - (const_int 8)))] UNSPEC_NOP))] - "TARGET_SSE2" - "pslldq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) +(define_insn "addv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (plus:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfadd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_lshrti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (unspec:TI - [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0") - (mult:SI (match_operand:SI 2 "immediate_operand" "i") - (const_int 8)))] UNSPEC_NOP))] - "TARGET_SSE2" - "psrldq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseishft") - (set_attr "mode" "TI")]) +(define_insn "subv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (minus:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfsub\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) -;; SSE unpack +(define_insn "subrv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym") + (match_operand:V2SF 1 "register_operand" "0")))] + "TARGET_3DNOW" + "pfsubr\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_unpckhpd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_concat:V2DF - (vec_select:DF (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 1)])) - (vec_select:DF (match_operand:V2DF 2 "register_operand" "x") - (parallel [(const_int 1)]))))] - "TARGET_SSE2" - "unpckhpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_unpcklpd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_concat:V2DF - (vec_select:DF (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:DF (match_operand:V2DF 2 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE2" - "unpcklpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) +(define_insn "gtv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpgt\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) -;; MMX pack/unpack insns. +(define_insn "gev2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpge\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_packsswb" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (vec_concat:V16QI - (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) - (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] - "TARGET_SSE2" - "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "eqv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpeq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_packssdw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_concat:V8HI - (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0")) - (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))] - "TARGET_SSE2" - "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "pfmaxv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smax:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmax\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_packuswb" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (vec_concat:V16QI - (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) - (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] - "TARGET_SSE2" - "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "pfminv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smin:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmin\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_punpckhbw" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (vec_merge:V16QI - (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") - (parallel [(const_int 8) (const_int 0) - (const_int 9) (const_int 1) - (const_int 10) (const_int 2) - (const_int 11) (const_int 3) - (const_int 12) (const_int 4) - (const_int 13) (const_int 5) - (const_int 14) (const_int 6) - (const_int 15) (const_int 7)])) - (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") - (parallel [(const_int 0) (const_int 8) - (const_int 1) (const_int 9) - (const_int 2) (const_int 10) - (const_int 3) (const_int 11) - (const_int 4) (const_int 12) - (const_int 5) (const_int 13) - (const_int 6) (const_int 14) - (const_int 7) (const_int 15)])) - (const_int 21845)))] - "TARGET_SSE2" - "punpckhbw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "mulv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (mult:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmul\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_punpckhwd" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_merge:V8HI - (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") - (parallel [(const_int 4) (const_int 0) - (const_int 5) (const_int 1) - (const_int 6) (const_int 2) - (const_int 7) (const_int 3)])) - (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5) - (const_int 2) (const_int 6) - (const_int 3) (const_int 7)])) - (const_int 85)))] - "TARGET_SSE2" - "punpckhwd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "femms" + [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] + "TARGET_3DNOW" + "femms" + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) -(define_insn "sse2_punpckhdq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") - (parallel [(const_int 2) (const_int 0) - (const_int 3) (const_int 1)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") - (parallel [(const_int 0) (const_int 2) - (const_int 1) (const_int 3)])) - (const_int 5)))] - "TARGET_SSE2" - "punpckhdq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "pf2id" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pf2id\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_punpcklbw" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (vec_merge:V16QI - (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 8) - (const_int 1) (const_int 9) - (const_int 2) (const_int 10) - (const_int 3) (const_int 11) - (const_int 4) (const_int 12) - (const_int 5) (const_int 13) - (const_int 6) (const_int 14) - (const_int 7) (const_int 15)])) - (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") - (parallel [(const_int 8) (const_int 0) - (const_int 9) (const_int 1) - (const_int 10) (const_int 2) - (const_int 11) (const_int 3) - (const_int 12) (const_int 4) - (const_int 13) (const_int 5) - (const_int 14) (const_int 6) - (const_int 15) (const_int 7)])) - (const_int 21845)))] - "TARGET_SSE2" - "punpcklbw\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "pf2iw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (sign_extend:V2SI + (ss_truncate:V2HI + (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] + "TARGET_3DNOW_A" + "pf2iw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_punpcklwd" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (vec_merge:V8HI - (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 4) - (const_int 1) (const_int 5) - (const_int 2) (const_int 6) - (const_int 3) (const_int 7)])) - (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") - (parallel [(const_int 4) (const_int 0) - (const_int 5) (const_int 1) - (const_int 6) (const_int 2) - (const_int 7) (const_int 3)])) - (const_int 85)))] - "TARGET_SSE2" - "punpcklwd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "pfacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (plus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW" + "pfacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_punpckldq" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 2) - (const_int 1) (const_int 3)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") - (parallel [(const_int 2) (const_int 0) - (const_int 3) (const_int 1)])) - (const_int 5)))] - "TARGET_SSE2" - "punpckldq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "pfnacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (minus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW_A" + "pfnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_punpcklqdq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_merge:V2DI - (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2DI 1 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE2" - "punpcklqdq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "pfpnacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (minus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW_A" + "pfpnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_punpckhqdq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_merge:V2DI - (match_operand:V2DI 1 "register_operand" "0") - (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 1)))] - "TARGET_SSE2" - "punpckhqdq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "pi2fw" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF + (vec_concat:V2SI + (sign_extend:SI + (truncate:HI + (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])))) + (sign_extend:SI + (truncate:HI + (vec_select:SI (match_dup 1) + (parallel [(const_int 1)])))))))] + "TARGET_3DNOW_A" + "pi2fw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) -;; SSE2 moves +(define_insn "floatv2si2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pi2fd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) -(define_insn "sse2_movapd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVA))] - "TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movapd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_movupd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movupd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) +;; This insn is identical to pavgb in operation, but the opcode is +;; different. To avoid accidentally matching pavgb, use an unspec. -(define_insn "sse2_movdqa" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVA))] - "TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movdqa\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") +(define_insn "pavgusb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI + [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PAVGUSB))] + "TARGET_3DNOW" + "pavgusb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") (set_attr "mode" "TI")]) -(define_insn "sse2_movdqu" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] - UNSPEC_MOVU))] - "TARGET_SSE2 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "movdqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") +;; 3DNow reciprocal and sqrt + +(define_insn "pfrcpv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRCP))] + "TARGET_3DNOW" + "pfrcp\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx") (set_attr "mode" "TI")]) -(define_insn "sse2_movdq2q" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y") - (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x") - (parallel [(const_int 0)])))] - "TARGET_SSE2 && !TARGET_64BIT" - "@ - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") +(define_insn "pfrcpit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT1))] + "TARGET_3DNOW" + "pfrcpit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") (set_attr "mode" "TI")]) -(define_insn "sse2_movdq2q_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r") - (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x") - (parallel [(const_int 0)])))] - "TARGET_SSE2 && TARGET_64BIT" - "@ - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") +(define_insn "pfrcpit2v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT2))] + "TARGET_3DNOW" + "pfrcpit2\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") (set_attr "mode" "TI")]) -(define_insn "sse2_movq2dq" - [(set (match_operand:V2DI 0 "register_operand" "=x,?x") - (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y") - (const_int 0)))] - "TARGET_SSE2 && !TARGET_64BIT" - "@ - movq\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt,ssemov") +(define_insn "pfrsqrtv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQRT))] + "TARGET_3DNOW" + "pfrsqrt\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx") (set_attr "mode" "TI")]) - -(define_insn "sse2_movq2dq_rex64" - [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x") - (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r") - (const_int 0)))] - "TARGET_SSE2 && TARGET_64BIT" - "@ - movq\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt,ssemov,ssecvt") + +(define_insn "pfrsqit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQIT1))] + "TARGET_3DNOW" + "pfrsqit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") (set_attr "mode" "TI")]) -(define_insn "sse2_movq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (vec_concat:V2DI (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (const_int 0)))] - "TARGET_SSE2" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") +(define_insn "pmulhrwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_vector:V4SI [(const_int 32768) + (const_int 32768) + (const_int 32768) + (const_int 32768)])) + (const_int 16))))] + "TARGET_3DNOW" + "pmulhrw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") (set_attr "mode" "TI")]) -(define_insn "sse2_loadd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (vec_merge:V4SI - (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr")) - (const_vector:V4SI [(const_int 0) - (const_int 0) - (const_int 0) - (const_int 0)]) - (const_int 1)))] - "TARGET_SSE2" - "movd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") +(define_insn "pswapdv2si2" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") (set_attr "mode" "TI")]) -(define_insn "sse2_stored" - [(set (match_operand:SI 0 "nonimmediate_operand" "=mr") - (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") - (parallel [(const_int 0)])))] - "TARGET_SSE2" - "movd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") +(define_insn "pswapdv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") (set_attr "mode" "TI")]) -;; Store the high double of the source vector into the double destination. -(define_insn "sse2_storehpd" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,Y,Y") - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " Y,0,o") - (parallel [(const_int 1)])))] - "TARGET_SSE2" - "@ - movhpd\t{%1, %0|%0, %1} - unpckhpd\t%0, %0 - #" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (vec_select:DF - (match_operand:V2DF 1 "memory_operand" "") - (parallel [(const_int 1)])))] - "TARGET_SSE2 && reload_completed" - [(const_int 0)] +(define_expand "prefetch" + [(prefetch (match_operand 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" { - emit_move_insn (operands[0], adjust_address (operands[1], DFmode, 8)); - DONE; -}) - -;; Load the high double of the target vector from the source scalar. -(define_insn "sse2_loadhpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,o") - (vec_concat:V2DF - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") - (parallel [(const_int 0)])) - (match_operand:DF 2 "nonimmediate_operand" " m,Y,Y")))] - "TARGET_SSE2" - "@ - movhpd\t{%2, %0|%0, %2} - unpcklpd\t{%2, %0|%0, %2} - #" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); -(define_split - [(set (match_operand:V2DF 0 "memory_operand" "") - (vec_concat:V2DF - (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) - (match_operand:DF 1 "register_operand" "")))] - "TARGET_SSE2 && reload_completed" - [(const_int 0)] -{ - emit_move_insn (adjust_address (operands[0], DFmode, 8), operands[1]); - DONE; -}) + if (rw != 0 && rw != 1) + abort (); + if (locality < 0 || locality > 3) + abort (); + if (GET_MODE (operands[0]) != Pmode && GET_MODE (operands[0]) != VOIDmode) + abort (); -;; Store the low double of the source vector into the double destination. -(define_expand "sse2_storelpd" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "") - (parallel [(const_int 0)])))] - "TARGET_SSE2" -{ - operands[1] = gen_lowpart (DFmode, operands[1]); - emit_move_insn (operands[0], operands[1]); - DONE; + /* Use 3dNOW prefetch in case we are asking for write prefetch not + suported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + operands[2] = GEN_INT (3); + else + operands[1] = const0_rtx; }) -;; Load the low double of the target vector from the source scalar. -(define_insn "sse2_loadlpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m") - (vec_concat:V2DF - (match_operand:DF 2 "nonimmediate_operand" " m,Y,Y") - (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") - (parallel [(const_int 1)]))))] - "TARGET_SSE2" - "@ - movlpd\t{%2, %0|%0, %2} - movsd\t{%2, %0|%0, %2} - movlpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -;; Merge the low part of the source vector into the low part of the target. -(define_insn "sse2_movsd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m") - (vec_merge:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0") - (match_operand:V2DF 2 "nonimmediate_operand" "x,m,Y") - (const_int 2)))] - "TARGET_SSE2" - "@movsd\t{%2, %0|%0, %2} - movlpd\t{%2, %0|%0, %2} - movlpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF,V2DF,V2DF")]) - -(define_expand "sse2_loadsd" - [(match_operand:V2DF 0 "register_operand" "") - (match_operand:DF 1 "memory_operand" "")] - "TARGET_SSE2" +(define_insn "*prefetch_sse" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE && !TARGET_64BIT" { - emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], - CONST0_RTX (V2DFmode))); - DONE; -}) - -(define_insn "sse2_loadsd_1" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF - (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) - (match_operand:V2DF 2 "const0_operand" "X") - (const_int 1)))] - "TARGET_SSE2" - "movsd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) - -(define_insn "sse2_storesd" - [(set (match_operand:DF 0 "memory_operand" "=m") - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)])))] - "TARGET_SSE2" - "movsd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) - -(define_insn "sse2_shufpd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_SHUFFLE))] - "TARGET_SSE2" - ;; @@@ check operand order for intel/nonintel syntax - "shufpd\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_clflush" - [(unspec_volatile [(match_operand 0 "address_operand" "p")] - UNSPECV_CLFLUSH)] - "TARGET_SSE2" - "clflush\t%a0" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; -(define_expand "sse2_mfence" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] - "TARGET_SSE2" -{ - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; -}) + int locality = INTVAL (operands[1]); + if (locality < 0 || locality > 3) + abort (); -(define_insn "*mfence_insn" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] - "TARGET_SSE2" - "mfence" + return patterns[locality]; +} [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) + (set_attr "memory" "none")]) -(define_expand "sse2_lfence" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] - "TARGET_SSE2" +(define_insn "*prefetch_sse_rex" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE && TARGET_64BIT" { - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; -}) - -(define_insn "*lfence_insn" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] - "TARGET_SSE2" - "lfence" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) - -;; SSE3 - -(define_insn "mwait" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") - (match_operand:SI 1 "register_operand" "c")] - UNSPECV_MWAIT)] - "TARGET_SSE3" - "mwait\t%0, %1" - [(set_attr "length" "3")]) - -(define_insn "monitor" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") - (match_operand:SI 1 "register_operand" "c") - (match_operand:SI 2 "register_operand" "d")] - UNSPECV_MONITOR)] - "TARGET_SSE3" - "monitor\t%0, %1, %2" - [(set_attr "length" "3")]) - -;; SSE3 arithmetic - -(define_insn "addsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")] - UNSPEC_ADDSUB))] - "TARGET_SSE3" - "addsubps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "addsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")] - UNSPEC_ADDSUB))] - "TARGET_SSE3" - "addsubpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "haddv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")] - UNSPEC_HADD))] - "TARGET_SSE3" - "haddps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) - -(define_insn "haddv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")] - UNSPEC_HADD))] - "TARGET_SSE3" - "haddpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "hsubv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")] - UNSPEC_HSUB))] - "TARGET_SSE3" - "hsubps\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V4SF")]) + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; -(define_insn "hsubv2df3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")] - UNSPEC_HSUB))] - "TARGET_SSE3" - "hsubpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "V2DF")]) - -(define_insn "movshdup" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))] - "TARGET_SSE3" - "movshdup\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) + int locality = INTVAL (operands[1]); + if (locality < 0 || locality > 3) + abort (); -(define_insn "movsldup" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))] - "TARGET_SSE3" - "movsldup\t{%1, %0|%0, %1}" + return patterns[locality]; +} [(set_attr "type" "sse") - (set_attr "mode" "V4SF")]) + (set_attr "memory" "none")]) -(define_insn "lddqu" - [(set (match_operand:V16QI 0 "register_operand" "=x") - (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] - UNSPEC_LDQQU))] - "TARGET_SSE3" - "lddqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) +(define_insn "*prefetch_3dnow" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW && !TARGET_64BIT" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) -(define_insn "loadddup" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))] - "TARGET_SSE3" - "movddup\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) +(define_insn "*prefetch_3dnow_rex" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW && TARGET_64BIT" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) -(define_insn "movddup" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_duplicate:V2DF - (vec_select:DF (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))))] - "TARGET_SSE3" - "movddup\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) +(include "sse.md") diff --git a/gcc/config/i386/ppro.md b/gcc/config/i386/ppro.md index ec82977d8d2..29dd4aafcb4 100644 --- a/gcc/config/i386/ppro.md +++ b/gcc/config/i386/ppro.md @@ -687,7 +687,7 @@ (define_insn_reservation "ppro_sse_div_V4SF_load" 48 (and (eq_attr "cpu" "pentiumpro") - (and (eq_attr "memory" "none") + (and (eq_attr "memory" "load") (and (eq_attr "mode" "V4SF") (eq_attr "type" "ssediv")))) "decoder0,(p2+p0)*2,p0*32") @@ -696,14 +696,14 @@ (and (eq_attr "cpu" "pentiumpro") (and (eq_attr "memory" "none") (and (eq_attr "mode" "V4SF") - (eq_attr "type" "sselog")))) + (eq_attr "type" "sselog,sselog1")))) "decodern,p1") (define_insn_reservation "ppro_sse_log_V4SF_load" 2 (and (eq_attr "cpu" "pentiumpro") - (and (eq_attr "memory" "none") + (and (eq_attr "memory" "load") (and (eq_attr "mode" "V4SF") - (eq_attr "type" "sselog")))) + (eq_attr "type" "sselog,sselog1")))) "decoder0,(p2+p1)") (define_insn_reservation "ppro_sse_mov_V4SF" 1 diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 5fa93fa05b9..fde85dd6d69 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -319,12 +319,6 @@ (and (match_operand 0 "const_double_operand") (match_test "GET_MODE_SIZE (mode) <= 8"))))) -;; Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand -;; for shift & compare patterns, as shifting by 0 does not change flags). -(define_predicate "const_int_1_31_operand" - (and (match_code "const_int") - (match_test "INTVAL (op) >= 1 && INTVAL (op) <= 31"))) - ;; Returns nonzero if OP is either a symbol reference or a sum of a symbol ;; reference and a constant. (define_predicate "symbolic_operand" @@ -521,6 +515,11 @@ return i == 2 || i == 4 || i == 8; }) +;; Match 0 or 1. +(define_predicate "const_0_to_1_operand" + (and (match_code "const_int") + (match_test "op == const0_rtx || op == const1_rtx"))) + ;; Match 0 to 3. (define_predicate "const_0_to_3_operand" (and (match_code "const_int") @@ -546,6 +545,30 @@ (and (match_code "const_int") (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 255"))) +;; Match (0 to 255) * 8 +(define_predicate "const_0_to_255_mul_8_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT val = INTVAL (op); + return val <= 255*8 && val % 8 == 0; +}) + +;; Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand +;; for shift & compare patterns, as shifting by 0 does not change flags). +(define_predicate "const_1_to_31_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 1 && INTVAL (op) <= 31"))) + +;; Match 2 or 3. +(define_predicate "const_2_to_3_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 3"))) + +;; Match 4 to 7. +(define_predicate "const_4_to_7_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 4 && INTVAL (op) <= 7"))) + ;; Match exactly one bit in 4-bit mask. (define_predicate "const_pow2_1_to_8_operand" (match_code "const_int") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md new file mode 100644 index 00000000000..ee90d0664d6 --- /dev/null +++ b/gcc/config/i386/sse.md @@ -0,0 +1,3111 @@ +;; GCC machine description for SSE instructions +;; Copyright (C) 2005 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + + +;; 16 byte integral modes handled by SSE, minus TImode, which gets +;; special-cased for TARGET_64BIT. +(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI]) + +;; All 16-byte vector modes handled by SSE +(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; Mix-n-match +(define_mode_macro SSEMODE12 [V16QI V8HI]) +(define_mode_macro SSEMODE24 [V8HI V4SI]) +(define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) +(define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) + +;; Mapping from integer vector mode to mnemonic suffix +(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) + +;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Move patterns +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; All of these patterns are enabled for SSE1 as well as SSE2. +;; This is essential for maintaining stable calling conventions. + +(define_expand "mov" + [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "") + (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (mode, operands); + DONE; +}) + +(define_insn "*mov_internal" + [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m") + (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set (attr "mode") + (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + + (eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) + +(define_expand "movv4sf" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V4SFmode, operands); + DONE; +}) + +(define_insn "*movv4sf_internal" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "mode" "V4SF")]) + +(define_split + [(set (match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE && reload_completed" + [(const_int 0)] +{ + rtx x = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); + emit_insn (gen_sse_loadss (operands[0], x)); + DONE; +}) + +(define_expand "movv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V2DFmode, operands); + DONE; +}) + +(define_insn "*movv2df_internal" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "xorpd\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movapd\t{%1, %0|%0, %1}"; + default: + abort (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set (attr "mode") + (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + (eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "V2DF")) + (eq_attr "alternative" "2") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "V2DF"))] + (const_string "V2DF")))]) + +(define_split + [(set (match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE2 && reload_completed" + [(const_int 0)] +{ + rtx x = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); + emit_insn (gen_sse2_loadsd (operands[0], x)); + DONE; +}) + +(define_expand "movmisalign" + [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") + (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move_misalign (mode, operands); + DONE; +}) + +(define_insn "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "movups\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movupd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "movupd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movdqu" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "movdqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse_movntv4sf" + [(set (match_operand:V4SF 0 "memory_operand" "=m") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE" + "movntps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + +(define_insn "sse2_movntv2df" + [(set (match_operand:V2DF 0 "memory_operand" "=m") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movntpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movntv2di" + [(set (match_operand:V2DI 0 "memory_operand" "=m") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movntdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movntsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movnti\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse3_lddqu" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] + UNSPEC_LDQQU))] + "TARGET_SSE3" + "lddqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "negv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "") + (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;") + +(define_expand "absv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "") + (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;") + +(define_expand "addv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);") + +(define_insn "*addv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" + "addps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmaddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" + "addss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_expand "subv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (minus:V4SF (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);") + +(define_insn "*subv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "subps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "subss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_expand "mulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);") + +(define_insn "*mulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" + "mulps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmmulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" + "mulss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "SF")]) + +(define_expand "divv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (div:V4SF (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);") + +(define_insn "*divv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "divps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmdivv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "divss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "SF")]) + +(define_insn "sse_rcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] + "TARGET_SSE" + "rcpps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmrcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rcpss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_insn "sse_rsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] + "TARGET_SSE" + "rsqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmrsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rsqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_insn "sqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "sqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "sqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_expand "smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);") + +(define_insn "*smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" + "maxps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmsmaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_expand "sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);") + +(define_insn "*sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" + "minps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmsminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_insn "sse3_addsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (plus:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (minus:V4SF (match_dup 1) (match_dup 2)) + (const_int 5)))] + "TARGET_SSE3" + "addsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "sse3_haddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_concat:V4SF + (vec_concat:V2SF + (plus:SF + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SF + (plus:SF + (vec_select:SF + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSE3" + "haddps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "sse3_hsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_concat:V4SF + (vec_concat:V2SF + (minus:SF + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SF + (minus:SF + (vec_select:SF + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSE3" + "hsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse_maskcmpv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (match_operator:V4SF 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE" + "cmp%D3ps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_vmmaskcmpv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operator:V4SF 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")]) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "cmp%D3ss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) + +(define_insn "sse_comi" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE" + "comiss\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "mode" "SF")]) + +(define_insn "sse_ucomi" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE" + "ucomiss\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "mode" "SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point logical operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "andv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);") + +(define_insn "*andv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_nandv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_expand "iorv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);") + +(define_insn "*iorv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)" + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_expand "xorv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "") + (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")))] + "TARGET_SSE" + "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);") + +(define_insn "*xorv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point conversion operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse_cvtpi2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4SF 1 "register_operand" "0") + (const_int 3)))] + "TARGET_SSE" + "cvtpi2ps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_cvtps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "cvtps2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DI")]) + +(define_insn "sse_cvttps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "cvttps2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "sse_cvtsi2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) + (match_operand:V4SF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE" + "cvtsi2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "mode" "SF")]) + +(define_insn "sse_cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (match_operand:V4SF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "mode" "SF")]) + +(define_insn "sse_cvtss2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI + [(vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE" + "cvtss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "SI")]) + +(define_insn "sse_cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI + [(vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE && TARGET_64BIT" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "DI")]) + +(define_insn "sse_cvttss2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))))] + "TARGET_SSE" + "cvttss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "SI")]) + +(define_insn "sse_cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))))] + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "DI")]) + +(define_insn "sse2_cvtdq2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtdq2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_cvtps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2" + "cvtps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_cvttps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvttps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse_movhlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x") + (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0")) + (parallel [(const_int 4) + (const_int 5) + (const_int 2) + (const_int 3)])))] + "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + movhlps\t{%2, %0|%0, %2} + movlps\t{%H1, %0|%0, %H1} + movhps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF,V2SF,V2SF")]) + +(define_insn "sse_movlhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") + (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x")) + (parallel [(const_int 0) + (const_int 1) + (const_int 4) + (const_int 5)])))] + "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" + "@ + movlhps\t{%2, %0|%0, %2} + movhps\t{%2, %0|%0, %2} + movlps\t{%2, %H0|%H0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF,V2SF,V2SF")]) + +(define_insn "sse_unpckhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_SSE" + "unpckhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_unpcklps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_SSE" + "unpcklps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +;; These are modeled with the same vec_concat as the others so that we +;; capture users of shufps that can use the new instructions +(define_insn "sse3_movshdup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (match_dup 1)) + (parallel [(const_int 1) + (const_int 1) + (const_int 7) + (const_int 7)])))] + "TARGET_SSE3" + "movshdup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "sse3_movsldup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (match_dup 1)) + (parallel [(const_int 0) + (const_int 0) + (const_int 6) + (const_int 6)])))] + "TARGET_SSE3" + "movsldup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_expand "sse_shufps" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "TARGET_SSE" +{ + int mask = INTVAL (operands[3]); + emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT (((mask >> 4) & 3) + 4), + GEN_INT (((mask >> 6) & 3) + 4))); + DONE; +}) + +(define_insn "sse_shufps_1" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (parallel [(match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_4_to_7_operand" "") + (match_operand 6 "const_4_to_7_operand" "")])))] + "TARGET_SSE" +{ + int mask = 0; + mask |= INTVAL (operands[3]) << 0; + mask |= INTVAL (operands[4]) << 2; + mask |= (INTVAL (operands[5]) - 4) << 4; + mask |= (INTVAL (operands[6]) - 4) << 6; + operands[3] = GEN_INT (mask); + + return "shufps\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "sse_storehps" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_SSE" + "@ + movhps\t{%1, %0|%0, %1} + movhlps\t{%1, %0|%0, %1} + movlps\t{%H1, %0|%0, %H1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_loadhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") + (vec_concat:V4SF + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] + "TARGET_SSE" + "@ + movhps\t{%2, %0|%0, %2} + movlhps\t{%2, %0|%0, %2} + movlps\t{%2, %H0|%H0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_storelps" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "@ + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2SF,V4SF,V2SF")]) + +(define_insn "sse_loadlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (vec_concat:V4SF + (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x") + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0") + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_SSE" + "@ + shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} + movlps\t{%2, %0|%0, %2} + movlps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "mode" "V4SF,V2SF,V2SF")]) + +(define_expand "sse_loadss" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (vec_merge:V4SF + (vec_duplicate:V4SF (match_operand:SF 1 "nonimmediate_operand" "")) + (match_dup 2) + (const_int 1)))] + "TARGET_SSE" + "operands[2] = CONST0_RTX (V4SFmode);") + +(define_insn "sse_loadlss" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m") + (vec_merge:V4SF + (vec_duplicate:V4SF + (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) + (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") + (const_int 1)))] + "TARGET_SSE" + "@ + movss\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2} + movd\t{%2, %0|%0, %2} + #" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +(define_split + [(set (match_operand:V4SF 0 "memory_operand" "") + (vec_merge:V4SF + (vec_duplicate:V4SF + (match_operand:SF 1 "nonmemory_operand" "")) + (match_dup 0) + (const_int 1)))] + "TARGET_SSE && reload_completed" + [(const_int 0)] +{ + emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]); + DONE; +}) + +(define_insn "sse_movss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 2 "register_operand" "x") + (match_operand:V4SF 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "movss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) + +(define_insn_and_split "sse_storess" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") + (vec_select:SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") + (parallel [(const_int 0)])))] + "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + emit_move_insn (operands[0], gen_lowpart (SFmode, operands[1])); + DONE; +}) + +(define_expand "vec_setv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + rtx tmp, op0 = operands[0], op1 = operands[1]; + + switch (INTVAL (operands[2])) + { + case 0: + emit_insn (gen_sse_loadlss (op0, op0, op1)); + break; + + case 1: + /* tmp = op0 = A B C D */ + tmp = copy_to_reg (op0); + + /* op0 = C C D D */ + emit_insn (gen_sse_unpcklps (op0, op0, op0)); + + /* op0 = C C D X */ + emit_insn (gen_sse_loadlss (op0, op0, op1)); + + /* op0 = A B X D */ + emit_insn (gen_sse_shufps_1 (op0, op0, tmp, GEN_INT (1), GEN_INT (0), + GEN_INT (2), GEN_INT (3))); + break; + + case 2: + tmp = copy_to_reg (op0); + emit_insn (gen_sse_loadlss (op0, op0, op1)); + emit_insn (gen_sse_shufps_1 (op0, op0, tmp, GEN_INT (0), GEN_INT (1), + GEN_INT (0), GEN_INT (3))); + break; + + case 3: + tmp = copy_to_reg (op0); + emit_insn (gen_sse_loadlss (op0, op0, op1)); + emit_insn (gen_sse_shufps_1 (op0, op0, tmp, GEN_INT (0), GEN_INT (1), + GEN_INT (2), GEN_INT (0))); + break; + + default: + abort (); + } + DONE; +}) + +(define_expand "vec_extractv4sf" + [(match_operand:SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + rtx tmp, op0 = operands[0], op1 = operands[1]; + + switch (INTVAL (operands[2])) + { + case 0: + tmp = op1; + break; + + case 1: + tmp = copy_to_reg (op1); + emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, GEN_INT (1), GEN_INT (1), + GEN_INT (2), GEN_INT (3))); + break; + + case 2: + tmp = copy_to_reg (op1); + emit_insn (gen_sse_unpckhps (tmp, tmp, tmp)); + break; + + case 3: + tmp = copy_to_reg (op1); + emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, GEN_INT (3), GEN_INT (1), + GEN_INT (2), GEN_INT (3))); + break; + + default: + abort (); + } + + emit_insn (gen_sse_storess (op0, op1)); + DONE; +}) + +(define_expand "vec_initv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (operands[0], operands[1]); + DONE; +}) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "negv2df2" + [(set (match_operand:V2DF 0 "register_operand" "") + (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;") + +(define_expand "absv2df2" + [(set (match_operand:V2DF 0 "register_operand" "") + (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;") + +(define_expand "addv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);") + +(define_insn "*addv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)" + "addpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmaddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" + "addsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_expand "subv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);") + +(define_insn "*subv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "subpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "subsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_expand "mulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);") + +(define_insn "*mulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" + "mulpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmmulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" + "mulsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "DF")]) + +(define_expand "divv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (div:V2DF (match_operand:V2DF 1 "register_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);") + +(define_insn "*divv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "divpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmdivv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "divsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "DF")]) + +(define_insn "sqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "sqrtpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmsqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) + (match_operand:V2DF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_expand "smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);") + +(define_insn "*smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" + "maxpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmsmaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_expand "sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);") + +(define_insn "*sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" + "minpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmsminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "sse3_addsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (plus:V2DF + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (minus:V2DF (match_dup 1) (match_dup 2)) + (const_int 1)))] + "TARGET_SSE3" + "addsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse3_haddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (plus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) + (plus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSE3" + "haddpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "sse3_hsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (minus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) + (minus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSE3" + "hsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_maskcmpv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (match_operator:V2DF 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE2" + "cmp%D3pd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_vmmaskcmpv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (match_operator:V2DF 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")]) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "cmp%D3sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +(define_insn "sse2_comi" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "comisd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "mode" "DF")]) + +(define_insn "sse2_ucomi" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "ucomisd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecomi") + (set_attr "mode" "DF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point logical operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "andv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);") + +(define_insn "*andv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_nandv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_expand "iorv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);") + +(define_insn "*iorv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)" + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_expand "xorv2df3" + [(set (match_operand:V2DF 0 "register_operand" "") + (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);") + +(define_insn "*xorv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)" + "xorpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point conversion operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_cvtpi2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_SSE2" + "cvtpi2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_cvtpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2" + "cvtpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DI")]) + +(define_insn "sse2_cvttpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvttpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_cvtsi2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF + (vec_duplicate:V2DF + (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) + (match_operand:V2DF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE2" + "cvtsi2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct")]) + +(define_insn "sse2_cvtsi2sdq" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF + (vec_duplicate:V2DF + (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m"))) + (match_operand:V2DF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsi2sdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct")]) + +(define_insn "sse2_cvtsd2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI + [(vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2" + "cvtsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "SI")]) + +(define_insn "sse2_cvtsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI + [(vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "DI")]) + +(define_insn "sse2_cvttsd2si" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "cvttsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SI") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "sse2_cvttsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "x,m") + (parallel [(const_int 0)]))))] + "TARGET_SSE2 && TARGET_64BIT" + "cvttsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "double,vector")]) + +(define_insn "sse2_cvtdq2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE2" + "cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_cvtpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "") + (vec_concat:V4SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")] + UNSPEC_FIX_NOTRUNC) + (match_dup 2)))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V2SImode);") + +(define_insn "*sse2_cvtpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX_NOTRUNC) + (match_operand:V2SI 2 "const0_operand" "")))] + "TARGET_SSE2" + "cvtpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_expand "sse2_cvttpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "") + (vec_concat:V4SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "")) + (match_dup 2)))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V2SImode);") + +(define_insn "*sse2_cvttpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (match_operand:V2SI 2 "const0_operand" "")))] + "TARGET_SSE2" + "cvttpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_cvtsd2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float_truncate:V2SF + (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))) + (match_operand:V4SF 1 "register_operand" "0,0") + (const_int 1)))] + "TARGET_SSE2" + "cvtsd2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "mode" "SF")]) + +(define_insn "sse2_cvtss2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 1)]))) + (match_operand:V2DF 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "cvtss2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_expand "sse2_cvtpd2ps" + [(set (match_operand:V4SF 0 "register_operand" "") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (match_dup 2)))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V2SFmode);") + +(define_insn "*sse2_cvtpd2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (match_operand:V2SF 2 "const0_operand" "")))] + "TARGET_SSE2" + "cvtpd2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "sse2_cvtps2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE2" + "cvtps2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel double-precision floating point element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_unpckhpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") + (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + unpckhpd\t{%2, %0|%0, %2} + movlpd\t{%H1, %0|%0, %H1} + movhpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "mode" "V2DF,V1DF,V1DF")]) + +(define_insn "sse3_movddup" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") + (match_dup 1)) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + movddup\t{%1, %0|%0, %1} + #" + [(set_attr "type" "sselog,ssemov") + (set_attr "mode" "V2DF")]) + +(define_split + [(set (match_operand:V2DF 0 "memory_operand" "") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "register_operand" "") + (match_dup 1)) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE3 && reload_completed" + [(const_int 0)] +{ + rtx low = gen_lowpart (DFmode, operands[1]); + emit_move_insn (adjust_address (operands[0], DFmode, 0), low); + emit_move_insn (adjust_address (operands[0], DFmode, 8), low); + DONE; +}) + +(define_insn "sse2_unpcklpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") + (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + unpcklpd\t{%2, %0|%0, %2} + movhpd\t{%2, %0|%0, %2} + movlpd\t{%2, %H0|%H0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov") + (set_attr "mode" "V2DF,V1DF,V1DF")]) + +(define_expand "sse2_shufpd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "register_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "TARGET_SSE2" +{ + int mask = INTVAL (operands[3]); + emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2], + GEN_INT (mask & 1), + GEN_INT (mask & 2 ? 3 : 2))); + DONE; +}) + +(define_insn "sse2_shufpd_1" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (parallel [(match_operand 3 "const_0_to_1_operand" "") + (match_operand 4 "const_2_to_3_operand" "")])))] + "TARGET_SSE2" +{ + int mask; + mask = INTVAL (operands[3]); + mask |= (INTVAL (operands[4]) - 2) << 1; + operands[3] = GEN_INT (mask); + + return "shufpd\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_storehpd" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o") + (parallel [(const_int 1)])))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movhpd\t{%1, %0|%0, %1} + unpckhpd\t%0, %0 + #" + [(set_attr "type" "ssemov,sselog1,ssemov") + (set_attr "mode" "V1DF,V2DF,DF")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (vec_select:DF + (match_operand:V2DF 1 "memory_operand" "") + (parallel [(const_int 1)])))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[1] = adjust_address (operands[1], DFmode, 8); +}) + +(define_insn "sse2_storelpd" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + movlpd\t{%1, %0|%0, %1} + # + #" + [(set_attr "type" "ssemov") + (set_attr "mode" "V1DF,DF,DF")]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[0] = gen_lowpart (DFmode, operands[0]); + operands[1] = gen_lowpart (DFmode, operands[1]); +}) + +(define_insn "sse2_loadhpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") + (vec_concat:V2DF + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0") + (parallel [(const_int 0)])) + (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))] + "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + movhpd\t{%2, %0|%0, %2} + unpcklpd\t{%2, %0|%0, %2} + shufpd\t{$1, %1, %0|%0, %1, 1} + #" + [(set_attr "type" "ssemov,sselog,sselog,other") + (set_attr "mode" "V1DF,V2DF,V2DF,DF")]) + +(define_split + [(set (match_operand:V2DF 0 "memory_operand" "") + (vec_concat:V2DF + (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) + (match_operand:DF 1 "register_operand" "")))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[0] = adjust_address (operands[0], DFmode, 8); +}) + +(define_insn "sse2_loadlpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") + (vec_concat:V2DF + (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr") + (vec_select:DF + (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0") + (parallel [(const_int 1)]))))] + "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + movsd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2} + movsd\t{%2, %0|%0, %2} + shufpd\t{$2, %2, %0|%0, %2, 2} + movhpd\t{%H1, %0|%0, %H1} + #" + [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other") + (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")]) + +(define_split + [(set (match_operand:V2DF 0 "memory_operand" "") + (vec_concat:V2DF + (match_operand:DF 1 "register_operand" "") + (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[0] = adjust_address (operands[0], DFmode, 8); +}) + +(define_expand "sse2_loadsd" + [(set (match_operand:V2DF 0 "register_operand" "") + (vec_concat:V2DF + (match_operand:DF 1 "nonimmediate_operand" "") + (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V2DFmode);") + +(define_insn "sse2_movsd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o") + (vec_merge:V2DF + (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0") + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x") + (const_int 1)))] + "TARGET_SSE2" + "@ + movsd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2} + shufpd\t{$2, %2, %0|%0, %2, 2} + movhps\t{%H1, %0|%0, %H1 + movhps\t{%1, %H0|%H0, %1" + [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") + (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) + +(define_insn "sse3_loadddup" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF + (match_operand:DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE3" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_expand "vec_setv2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + switch (INTVAL (operands[2])) + { + case 0: + emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1])); + break; + case 1: + emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1])); + break; + default: + abort (); + } + DONE; +}) + +(define_expand "vec_extractv2df" + [(match_operand:DF 0 "register_operand" "") + (match_operand:V2DF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + switch (INTVAL (operands[2])) + { + case 0: + emit_insn (gen_sse2_storelpd (operands[0], operands[1])); + break; + case 1: + emit_insn (gen_sse2_storehpd (operands[0], operands[1])); + break; + default: + abort (); + } + DONE; +}) + +(define_expand "vec_initv2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE2" +{ + ix86_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "neg2" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (minus:SSEMODEI + (match_dup 2) + (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "operands[2] = force_reg (mode, CONST0_RTX (mode));") + +(define_expand "add3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (PLUS, mode, operands);") + +(define_insn "*add3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (plus:SSEMODEI + (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, mode, operands)" + "padd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_ssadd3" + [(set (match_operand:SSEMODE12 0 "register_operand" "=x") + (ss_plus:SSEMODE12 + (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, mode, operands)" + "padds\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_usadd3" + [(set (match_operand:SSEMODE12 0 "register_operand" "=x") + (us_plus:SSEMODE12 + (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, mode, operands)" + "paddus\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "sub3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands);") + +(define_insn "*sub3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (minus:SSEMODEI + (match_operand:SSEMODEI 1 "register_operand" "0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_sssub3" + [(set (match_operand:SSEMODE12 0 "register_operand" "=x") + (ss_minus:SSEMODE12 + (match_operand:SSEMODE12 1 "register_operand" "0") + (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubs\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_ussub3" + [(set (match_operand:SSEMODE12 0 "register_operand" "=x") + (us_minus:SSEMODE12 + (match_operand:SSEMODE12 1 "register_operand" "0") + (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubus\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "mulv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "") + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") + +(define_insn "*mulv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_smulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI + (sign_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_umulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI + (zero_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (zero_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_umulv2siv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (mult:V2DI + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) (const_int 2)]))) + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 2)])))))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pmaddwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))))] + "TARGET_SSE2" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ashr3" + [(set (match_operand:SSEMODE24 0 "register_operand" "=x") + (ashiftrt:SSEMODE24 + (match_operand:SSEMODE24 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psra\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshr3" + [(set (match_operand:SSEMODE248 0 "register_operand" "=x") + (lshiftrt:SSEMODE248 + (match_operand:SSEMODE248 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrl\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashl3" + [(set (match_operand:SSEMODE248 0 "register_operand" "=x") + (ashift:SSEMODE248 + (match_operand:SSEMODE248 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psll\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "pslldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "psrldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_expand "umaxv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "") + (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") + (match_operand:V16QI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);") + +(define_insn "*umaxv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "smaxv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "") + (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);") + +(define_insn "*smaxv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "uminv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "") + (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") + (match_operand:V16QI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);") + +(define_insn "*uminv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_expand "sminv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "") + (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);") + +(define_insn "*sminv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_eq3" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (eq:SSEMODE124 + (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (EQ, mode, operands)" + "pcmpeq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "sse2_gt3" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (gt:SSEMODE124 + (match_operand:SSEMODE124 1 "register_operand" "0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgt\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral logical operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "one_cmpl2" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_dup 2)))] + "TARGET_SSE2" +{ + int i, n = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (n); + + for (i = 0; i < n; ++i) + RTVEC_ELT (v, i) = constm1_rtx; + + operands[2] = force_reg (mode, gen_rtx_CONST_VECTOR (mode, v)); +}) + +(define_expand "and3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (AND, mode, operands);") + +(define_insn "*and3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (and:SSEMODEI + (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (AND, mode, operands)" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_nand3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (and:SSEMODEI + (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0")) + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "ior3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (IOR, mode, operands);") + +(define_insn "*ior3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (ior:SSEMODEI + (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (IOR, mode, operands)" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "xor3" + [(set (match_operand:SSEMODEI 0 "register_operand" "") + (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (XOR, mode, operands);") + +(define_insn "*xor3" + [(set (match_operand:SSEMODEI 0 "register_operand" "=x") + (xor:SSEMODEI + (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && ix86_binary_operator_ok (XOR, mode, operands)" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_packsswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (ss_truncate:V8QI + (match_operand:V8HI 1 "register_operand" "0")) + (ss_truncate:V8QI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] + "TARGET_SSE2" + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_packssdw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (ss_truncate:V4HI + (match_operand:V4SI 1 "register_operand" "0")) + (ss_truncate:V4HI + (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] + "TARGET_SSE2" + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_packuswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (us_truncate:V8QI + (match_operand:V8HI 1 "register_operand" "0")) + (us_truncate:V8QI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] + "TARGET_SSE2" + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 8) (const_int 24) + (const_int 9) (const_int 25) + (const_int 10) (const_int 26) + (const_int 11) (const_int 27) + (const_int 12) (const_int 28) + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] + "TARGET_SSE2" + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 16) + (const_int 1) (const_int 17) + (const_int 2) (const_int 18) + (const_int 3) (const_int 19) + (const_int 4) (const_int 20) + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] + "TARGET_SSE2" + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_SSE2" + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_SSE2" + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhdq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_SSE2" + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckldq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_SSE2" + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_select:V2DI + (vec_concat:V4DI + (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_SSE2" + "punpckhqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_select:V2DI + (vec_concat:V4DI + (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2" + "punpcklqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "sse2_pinsrw" + [(set (match_operand:V8HI 0 "register_operand" "") + (vec_merge:V8HI + (match_operand:V8HI 1 "register_operand" "") + (vec_duplicate:V8HI + (match_operand:SI 2 "nonimmediate_operand" "")) + (match_operand:SI 3 "const_0_to_7_operand" "")))] + "TARGET_SSE2" +{ + operands[2] = gen_lowpart (HImode, operands[2]); + operands[3] = GEN_INT ((1 << INTVAL (operands[3]))); +}) + +(define_insn "*sse2_pinsrw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (match_operand:V8HI 1 "register_operand" "0") + (vec_duplicate:V8HI + (match_operand:HI 2 "nonimmediate_operand" "rm")) + (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))] + "TARGET_SSE2" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "x") + (parallel [(match_operand:SI 2 "const_0_to_7_operand" "0")]))))] + "TARGET_SSE2" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "sse2_pshufd" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V4SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); + DONE; +}) + +(define_insn "sse2_pshufd_1" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_select:V4SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(match_operand 2 "const_0_to_3_operand" "") + (match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_0_to_3_operand" "")])))] + "TARGET_SSE2" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + return "pshufd\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_expand "sse2_pshuflw" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); + DONE; +}) + +(define_insn "sse2_pshuflw_1" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(match_operand 2 "const_0_to_3_operand" "") + (match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_0_to_3_operand" "") + (const_int 4) + (const_int 5) + (const_int 6) + (const_int 7)])))] + "TARGET_SSE2" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + return "pshuflw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "sse2_pshufhw" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], + GEN_INT (((mask >> 0) & 3) + 4), + GEN_INT (((mask >> 2) & 3) + 4), + GEN_INT (((mask >> 4) & 3) + 4), + GEN_INT (((mask >> 6) & 3) + 4))); + DONE; +}) + +(define_insn "sse2_pshufhw_1" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_select:V8HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (match_operand 2 "const_4_to_7_operand" "") + (match_operand 3 "const_4_to_7_operand" "") + (match_operand 4 "const_4_to_7_operand" "") + (match_operand 5 "const_4_to_7_operand" "")])))] + "TARGET_SSE2" +{ + int mask = 0; + mask |= (INTVAL (operands[2]) - 4) << 0; + mask |= (INTVAL (operands[3]) - 4) << 2; + mask |= (INTVAL (operands[4]) - 4) << 4; + mask |= (INTVAL (operands[5]) - 4) << 6; + operands[2] = GEN_INT (mask); + + return "pshufhw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_expand "sse2_loadd" + [(set (match_operand:V4SI 0 "register_operand" "") + (vec_merge:V4SI + (vec_duplicate:V4SI + (match_operand:SI 1 "nonimmediate_operand" "")) + (match_dup 2) + (const_int 1)))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V4SImode);") + +(define_insn "sse2_loadld" + [(set (match_operand:V4SI 0 "register_operand" "=x,x") + (vec_merge:V4SI + (vec_duplicate:V4SI + (match_operand:SI 2 "nonimmediate_operand" "mr,x")) + (match_operand:V4SI 1 "vector_move_operand" "C,0") + (const_int 1)))] + "TARGET_SSE2" + "@ + movd\t{%2, %0|%0, %2} + movss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn_and_split "sse2_stored" + [(set (match_operand:SI 0 "nonimmediate_operand" "=mrx") + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[1] = gen_lowpart (SImode, operands[1]); +}) + +(define_expand "sse2_storeq" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "") + (parallel [(const_int 0)])))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_storeq" + [(set (match_operand:DI 0 "nonimmediate_operand" "=myx") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && !TARGET_64BIT" + "#") + +(define_insn "*sse2_storeq_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=myxr") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && TARGET_64BIT" + "#") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[1] = gen_lowpart (DImode, operands[1]); +}) + +(define_expand "sse2_loadq" + [(set (match_operand:V2DI 0 "register_operand" "") + (vec_merge:V2DI + (vec_duplicate:V2DI + (match_operand:DI 1 "nonimmediate_operand" "")) + (match_dup 2) + (const_int 1)))] + "TARGET_SSE2" + "operands[2] = CONST0_RTX (V2DImode);") + +(define_insn "*sse2_loadq" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x") + (vec_merge:V2DI + (vec_duplicate:V2DI + (match_operand:DI 1 "nonimmediate_operand" " m, y,x")) + (match_operand:V2DI 2 "vector_move_operand" " C, C,0") + (const_int 1)))] + "TARGET_SSE2 && !TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_loadq_rex64" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x,x") + (vec_merge:V2DI + (vec_duplicate:V2DI + (match_operand:DI 1 "nonimmediate_operand" " m, y, r,x")) + (match_operand:V2DI 2 "vector_move_operand" " C, C, C,0") + (const_int 1)))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Miscelaneous +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "sse2_uavgv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (truncate:V16QI + (lshiftrt:V16HI + (plus:V16HI + (plus:V16HI + (zero_extend:V16HI + (match_operand:V16QI 1 "nonimmediate_operand" "%0")) + (zero_extend:V16HI + (match_operand:V16QI 2 "nonimmediate_operand" "xm"))) + (const_vector:V16QI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" + "pavgb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_uavgv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (plus:V8SI + (plus:V8SI + (zero_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (zero_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +;; The correct representation for this is absolutely enormous, and +;; surely not generally useful. +(define_insn "sse2_psadbw" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + UNSPEC_PSADBW))] + "TARGET_SSE2" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse_movmskps" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE" + "movmskps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "sse2_movmskpd" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "movmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_maskmovdqu" + [(set (match_operand:V16QI 0 "memory_operand" "") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x") + (match_dup 0)] + UNSPEC_MASKMOV))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_maskmovdqu" + [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x") + (mem:V16QI (match_dup 0))] + UNSPEC_MASKMOV))] + "TARGET_SSE2 && !TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_maskmovdqu_rex64" + [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x") + (mem:V16QI (match_dup 0))] + UNSPEC_MASKMOV))] + "TARGET_SSE2 && TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_clflush" + [(unspec_volatile [(match_operand 0 "address_operand" "p")] + UNSPECV_CLFLUSH)] + "TARGET_SSE2" + "clflush\t%a0" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_mfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*sse2_mfence" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" + "mfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_lfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*sse2_lfence" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" + "lfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_insn "sse3_mwait" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c")] + UNSPECV_MWAIT)] + "TARGET_SSE3" + "mwait\t%0, %1" + [(set_attr "length" "3")]) + +(define_insn "sse3_monitor" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_MONITOR)] + "TARGET_SSE3" + "monitor\t%0, %1, %2" + [(set_attr "length" "3")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2973bfcb7d4..bd05d6327d2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2005-01-08 Richard Henderson + + * lib/target-supports.exp (check_effective_target_vect_no_bitwise): + False for x86 and x86-64. + 2005-01-08 Diego Novillo PR tree-optimization/18241 diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index defb4d4e75d..1f966de8881 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -563,10 +563,6 @@ proc check_effective_target_vect_no_bitwise { } { verbose "check_effective_target_vect_no_bitwise: using cached result" 2 } else { set et_vect_no_bitwise_saved 0 - if { [istarget i?86-*-*] - || [istarget x86_64-*-*] } { - set et_vect_no_bitwise_saved 1 - } } verbose "check_effective_target_vect_no_bitwise: returning $et_vect_no_bitwise_saved" 2 return $et_vect_no_bitwise_saved -- 2.30.2