From 80e8bb90c4183cc54caf5c8e7078106b4c9a7750 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 9 Jan 2005 03:23:25 -0800 Subject: [PATCH] i386.c (bdesc_2arg): Update names for mmx_ prefixes. * config/i386/i386.c (bdesc_2arg): Update names for mmx_ prefixes. (ix86_expand_builtin): Likewise. Frob MASKMOVQ wrt the input mem just like MASKMOVDQU. Return plain zero for MMX_ZERO. * config/i386/i386.md (MMXMODEI, mov, mov_internal_rex64, mov_internal, movv2sf, movv2sf_internal_rex64, movv2sf_internal, MMXMODE, movmisalign, mmx_pmovmskb, mmx_maskmovq, mmx_maskmovq_rex, sse_movntdi, addv8qi3, addv4hi3, addv2si3, mmx_adddi3, ssaddv8qi3, ssaddv4hi3, usaddv8qi3, usaddv4hi3, subv8qi3, subv4hi3, subv2si3, mmx_subdi3, sssubv8qi3, sssubv4hi3, ussubv8qi3, ussubv4hi3, mulv4hi3, smulv4hi3_highpart, umulv4hi3_highpart, mmx_pmaddwd, sse2_umulsidi3, mmx_iordi3, mmx_xordi3, mmx_anddi3, mmx_nanddi3, mmx_uavgv8qi3, mmx_uavgv4hi3, mmx_psadbw, mmx_pinsrw, mmx_pinsrw, mmx_pextrw, mmx_pshufw, eqv8qi3, eqv4hi3, eqv2si3, gtv8qi3, gtv4hi3, gtv2si3, umaxv8qi3, smaxv4hi3, uminv8qi3, sminv4hi3, ashrv4hi3, ashrv2si3, lshrv4hi3, lshrv2si3, mmx_lshrdi3, ashlv4hi3, ashlv2si3, mmx_ashldi3, mmx_packsswb, mmx_packssdw, mmx_packuswb, mmx_punpckhbw, mmx_punpckhwd, mmx_punpckhdq, mmx_punpcklbw, mmx_punpcklwd, mmx_punpckldq, emms, addv2sf3, subv2sf3, subrv2sf3, gtv2sf3, gev2sf3, eqv2sf3, pfmaxv2sf3, pfminv2sf3, mulv2sf3, femms, pf2id, pf2iw, pfacc, pfnacc, pfpnacc, pi2fw, floatv2si2, pfrcpv2sf2, pfrcpit1v2sf3, pfrcpit2v2sf3, pfrsqrtv2sf2, pfrsqit1v2sf3, pmulhrwv4hi3, pswapdv2si2, pswapdv2sf2): Move to mmx.md; rename as necessary with leading mmx_ prefix. (mmx_clrdi, pavgusb): Remove. (ldmxcsr, stmxcsr, sfence, sfence_insn): Move to sse.md; rename with leading sse_ prefix. * config/i386/sse.md: Receive them. * config/i386/mmx.md: New file. (MMXMODE12, MMXMODE24, mmxvecsize): New. (subrv2sf3): Turn into expander for normal subtraction. (mmx_addv2sf3, mmx_mulv2sf3, mmx_smaxv2sf3, mmx_sminv2sf3, mmx_eqv2sf3, mmx_mulv4hi3, mmx_smulv4hi3_highpart, mmx_umulv4hi3_highpart, mmx_pmaddwd, mmx_pmulhrwv4hi3, sse2_umulsidi3, mmx_umaxv8qi3, mmx_smaxv4hi3, mmx_uminv8qi3, mmx_sminv4hi3): Mark commutative; use ix86_binary_operator_ok. (mmx_add3, mmx_ssadd3, mmx_usadd3, mmx_sub3, mmx_sssub3, mmx_ussub3 mmx_ashr3, mmx_lshr3, mmx_ashl3 mmx_eq3, mmx_gt3, mmx_and3, mmx_nand3, mmx_ior3, mmx_xor3): Macroize from existing patterns; use ix86_binary_operator_ok. (mmx_packsswb, mmx_packssdw, mmx_packuswb): Add memory alternative. (mmx_punpckhbw, mmx_punpcklbw, mmx_punpckhwd, mmx_punpcklwd, mmx_punpckhdq, mmx_punpckhdq, mmx_punpckldq): Likewise. Model with vec_select+vec_concat. (mmx_pshufw, mmx_pshufw_1): Likewise. (mmx_uavgv8qi3): Merge pavgusb. Model correcty. (mmx_uavgv4hi3): Model correctly. * config/i386/mmintrin.h (_mm_and_si64, _mm_andnot_si64, _mm_or_si64, _mm_xor_si64): Remove casts. From-SVN: r93107 --- gcc/ChangeLog | 54 ++ gcc/config/i386/i386.c | 160 +++-- gcc/config/i386/i386.md | 1249 +----------------------------------- gcc/config/i386/mmintrin.h | 8 +- gcc/config/i386/mmx.md | 1138 ++++++++++++++++++++++++++++++++ gcc/config/i386/sse.md | 33 + 6 files changed, 1307 insertions(+), 1335 deletions(-) create mode 100644 gcc/config/i386/mmx.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5f6b14b2570..31d29e4f68f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,57 @@ +2005-01-09 Richard Henderson + + * config/i386/i386.c (bdesc_2arg): Update names for mmx_ prefixes. + (ix86_expand_builtin): Likewise. Frob MASKMOVQ wrt the input mem + just like MASKMOVDQU. Return plain zero for MMX_ZERO. + * config/i386/i386.md (MMXMODEI, mov, + mov_internal_rex64, mov_internal, movv2sf, + movv2sf_internal_rex64, movv2sf_internal, MMXMODE, + movmisalign, mmx_pmovmskb, mmx_maskmovq, mmx_maskmovq_rex, + sse_movntdi, addv8qi3, addv4hi3, addv2si3, mmx_adddi3, ssaddv8qi3, + ssaddv4hi3, usaddv8qi3, usaddv4hi3, subv8qi3, subv4hi3, subv2si3, + mmx_subdi3, sssubv8qi3, sssubv4hi3, ussubv8qi3, ussubv4hi3, + mulv4hi3, smulv4hi3_highpart, umulv4hi3_highpart, mmx_pmaddwd, + sse2_umulsidi3, mmx_iordi3, mmx_xordi3, mmx_anddi3, mmx_nanddi3, + mmx_uavgv8qi3, mmx_uavgv4hi3, mmx_psadbw, mmx_pinsrw, mmx_pinsrw, + mmx_pextrw, mmx_pshufw, eqv8qi3, eqv4hi3, eqv2si3, gtv8qi3, gtv4hi3, + gtv2si3, umaxv8qi3, smaxv4hi3, uminv8qi3, sminv4hi3, ashrv4hi3, + ashrv2si3, lshrv4hi3, lshrv2si3, mmx_lshrdi3, ashlv4hi3, ashlv2si3, + mmx_ashldi3, mmx_packsswb, mmx_packssdw, mmx_packuswb, mmx_punpckhbw, + mmx_punpckhwd, mmx_punpckhdq, mmx_punpcklbw, mmx_punpcklwd, + mmx_punpckldq, emms, addv2sf3, subv2sf3, subrv2sf3, gtv2sf3, gev2sf3, + eqv2sf3, pfmaxv2sf3, pfminv2sf3, mulv2sf3, femms, pf2id, pf2iw, + pfacc, pfnacc, pfpnacc, pi2fw, floatv2si2, pfrcpv2sf2, pfrcpit1v2sf3, + pfrcpit2v2sf3, pfrsqrtv2sf2, pfrsqit1v2sf3, pmulhrwv4hi3, pswapdv2si2, + pswapdv2sf2): Move to mmx.md; rename as necessary with leading + mmx_ prefix. + (mmx_clrdi, pavgusb): Remove. + (ldmxcsr, stmxcsr, sfence, sfence_insn): Move to sse.md; rename + with leading sse_ prefix. + * config/i386/sse.md: Receive them. + * config/i386/mmx.md: New file. + (MMXMODE12, MMXMODE24, mmxvecsize): New. + (subrv2sf3): Turn into expander for normal subtraction. + (mmx_addv2sf3, mmx_mulv2sf3, mmx_smaxv2sf3, mmx_sminv2sf3, + mmx_eqv2sf3, mmx_mulv4hi3, mmx_smulv4hi3_highpart, + mmx_umulv4hi3_highpart, mmx_pmaddwd, mmx_pmulhrwv4hi3, sse2_umulsidi3, + mmx_umaxv8qi3, mmx_smaxv4hi3, mmx_uminv8qi3, mmx_sminv4hi3): Mark + commutative; use ix86_binary_operator_ok. + (mmx_add3, mmx_ssadd3, mmx_usadd3, + mmx_sub3, mmx_sssub3, mmx_ussub3 + mmx_ashr3, mmx_lshr3, mmx_ashl3 + mmx_eq3, mmx_gt3, mmx_and3, + mmx_nand3, mmx_ior3, mmx_xor3): + Macroize from existing patterns; use ix86_binary_operator_ok. + (mmx_packsswb, mmx_packssdw, mmx_packuswb): Add memory alternative. + (mmx_punpckhbw, mmx_punpcklbw, mmx_punpckhwd, mmx_punpcklwd, + mmx_punpckhdq, mmx_punpckhdq, mmx_punpckldq): Likewise. Model + with vec_select+vec_concat. + (mmx_pshufw, mmx_pshufw_1): Likewise. + (mmx_uavgv8qi3): Merge pavgusb. Model correcty. + (mmx_uavgv4hi3): Model correctly. + * config/i386/mmintrin.h (_mm_and_si64, _mm_andnot_si64, _mm_or_si64, + _mm_xor_si64): Remove casts. + 2005-01-09 Zdenek Dvorak PR tree-optimization/19224 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7edd97c3c99..91bbac34c0d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12259,47 +12259,47 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, /* MMX */ - { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, - { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, - { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, - { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, - { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, - { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, - { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, - { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, - { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, - { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, - { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, - { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, - { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, - { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, - { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, - { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, - { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, - { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, - { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, - { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, - { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, - { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, - { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, @@ -12317,24 +12317,24 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, - { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, - { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, - { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, - { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, - { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, - { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, - { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, - { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, - { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, - { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, - { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, - { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, @@ -13471,11 +13471,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, switch (fcode) { case IX86_BUILTIN_EMMS: - emit_insn (gen_emms ()); + emit_insn (gen_mmx_emms ()); return 0; case IX86_BUILTIN_SFENCE: - emit_insn (gen_sfence ()); + emit_insn (gen_sse_sfence ()); return 0; case IX86_BUILTIN_PEXTRW: @@ -13549,8 +13549,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case IX86_BUILTIN_MASKMOVQ: case IX86_BUILTIN_MASKMOVDQU: icode = (fcode == IX86_BUILTIN_MASKMOVQ - ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex - : CODE_FOR_mmx_maskmovq) + ? CODE_FOR_mmx_maskmovq : CODE_FOR_sse2_maskmovdqu); /* Note the arg order is different from the operand order. */ arg1 = TREE_VALUE (arglist); @@ -13563,11 +13562,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, mode1 = insn_data[icode].operand[1].mode; mode2 = insn_data[icode].operand[2].mode; - if (fcode == IX86_BUILTIN_MASKMOVDQU) - { - op0 = force_reg (Pmode, op0); - op0 = gen_rtx_MEM (V16QImode, op0); - } + op0 = force_reg (Pmode, op0); + op0 = gen_rtx_MEM (mode1, op0); if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); @@ -13667,12 +13663,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); target = assign_386_stack_local (SImode, 0); emit_move_insn (target, op0); - emit_insn (gen_ldmxcsr (target)); + emit_insn (gen_sse_ldmxcsr (target)); return 0; case IX86_BUILTIN_STMXCSR: target = assign_386_stack_local (SImode, 0); - emit_insn (gen_stmxcsr (target)); + emit_insn (gen_sse_stmxcsr (target)); return copy_to_mode_reg (SImode, target); case IX86_BUILTIN_SHUFPS: @@ -13776,91 +13772,89 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return target; case IX86_BUILTIN_FEMMS: - emit_insn (gen_femms ()); + emit_insn (gen_mmx_femms ()); return NULL_RTX; case IX86_BUILTIN_PAVGUSB: - return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target); case IX86_BUILTIN_PF2ID: - return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0); + return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0); case IX86_BUILTIN_PFACC: - return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target); case IX86_BUILTIN_PFADD: - return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target); case IX86_BUILTIN_PFCMPEQ: - return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target); case IX86_BUILTIN_PFCMPGE: - return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target); case IX86_BUILTIN_PFCMPGT: - return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target); case IX86_BUILTIN_PFMAX: - return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target); case IX86_BUILTIN_PFMIN: - return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target); case IX86_BUILTIN_PFMUL: - return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target); case IX86_BUILTIN_PFRCP: - return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0); + return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0); case IX86_BUILTIN_PFRCPIT1: - return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target); case IX86_BUILTIN_PFRCPIT2: - return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target); case IX86_BUILTIN_PFRSQIT1: - return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target); case IX86_BUILTIN_PFRSQRT: - return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0); + return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0); case IX86_BUILTIN_PFSUB: - return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target); case IX86_BUILTIN_PFSUBR: - return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target); case IX86_BUILTIN_PI2FD: - return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0); + return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0); case IX86_BUILTIN_PMULHRW: - return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target); case IX86_BUILTIN_PF2IW: - return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); + return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0); case IX86_BUILTIN_PFNACC: - return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target); case IX86_BUILTIN_PFPNACC: - return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target); + return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target); case IX86_BUILTIN_PI2FW: - return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0); + return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0); case IX86_BUILTIN_PSWAPDSI: - return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0); + return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0); case IX86_BUILTIN_PSWAPDSF: - return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); + return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0); case IX86_BUILTIN_SSE_ZERO: return CONST0_RTX (V4SFmode); case IX86_BUILTIN_MMX_ZERO: - target = gen_reg_rtx (DImode); - emit_insn (gen_mmx_clrdi (target)); - return target; + return const0_rtx; case IX86_BUILTIN_CLRTI: return const0_rtx; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c582c3596bf..194c218c163 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -19708,949 +19708,6 @@ RET; }) -;; Pentium III SIMD instructions. - -;; Moves for SSE/MMX regs. - -;; 8 byte integral modes handled by MMX (and by extension, SSE) -(define_mode_macro MMXMODEI [V8QI V4HI V2SI]) - -(define_expand "mov" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "") - (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (mode, operands); - DONE; -}) - -(define_insn "*mov_internal_rex64" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" - "=rm,r,*y,*y ,m ,*y,Y ,x,x ,m,r,x") - (match_operand:MMXMODEI 1 "vector_move_operand" - "Cr ,m,C ,*ym,*y,Y ,*y,C,xm,x,x,r"))] - "TARGET_64BIT && TARGET_MMX - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "imov,imov,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DI")]) - -(define_insn "*mov_internal" - [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" - "=*y,*y ,m ,*y,*Y,*Y,*Y ,m ,*x,*x,*x,m") - (match_operand:MMXMODEI 1 "vector_move_operand" - "C ,*ym,*y,*Y,*y,C ,*Ym,*Y,C ,*x,m ,*x"))] - "TARGET_MMX - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,V4SF,V4SF,V2SF,V2SF")]) - -(define_expand "movv2sf" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "") - (match_operand:V2SF 1 "nonimmediate_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (V2SFmode, operands); - DONE; -}) - -(define_insn "*movv2sf_internal_rex64" - [(set (match_operand:V2SF 0 "nonimmediate_operand" - "=rm,r,*y ,*y ,m ,*y,Y ,x,x ,m,r,x") - (match_operand:V2SF 1 "vector_move_operand" - "Cr ,m ,C ,*ym,*y,Y ,*y,C,xm,x,x,r"))] - "TARGET_64BIT && TARGET_MMX - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - xorps\t%0, %0 - movlps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1} - movd\t{%1, %0|%0, %1}" - [(set_attr "type" "imov,imov,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V2SF,V2SF,DI,DI")]) - -(define_insn "*movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" - "=*y,*y ,m,*y,*Y,*x,*x ,m") - (match_operand:V2SF 1 "vector_move_operand" - "C ,*ym,*y,*Y,*y,C ,*xm,*x"))] - "TARGET_MMX - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - pxor\t%0, %0 - movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1} - movdq2q\t{%1, %0|%0, %1} - movq2dq\t{%1, %0|%0, %1} - xorps\t%0, %0 - movlps\t{%1, %0|%0, %1} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov") - (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V2SF,V2SF")]) - -;; All 8-byte vector modes handled by MMX -(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF]) - -(define_expand "movmisalign" - [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "") - (match_operand:MMXMODE 1 "nonimmediate_operand" ""))] - "TARGET_MMX" -{ - ix86_expand_vector_move (mode, operands); - DONE; -}) - -;; SSE Strange Moves. - -(define_insn "mmx_pmovmskb" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] - UNSPEC_MOVMSK))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - - -(define_insn "mmx_maskmovq" - [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] - UNSPEC_MASKMOV))] - "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_maskmovq_rex" - [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D")) - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] - UNSPEC_MASKMOV))] - "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT" - ;; @@@ check ordering of operands in intel/nonintel syntax - "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "sse_movntdi" - [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "register_operand" "y")] - UNSPEC_MOVNT))] - "TARGET_SSE || TARGET_3DNOW_A" - "movntq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov") - (set_attr "mode" "DI")]) - -;; MMX insns - -;; MMX arithmetic - -(define_insn "addv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "addv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "addv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_adddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(plus:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "paddq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "ssaddv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "ssaddv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "usaddv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "usaddv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "paddusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "subv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "subv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "subv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (minus:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_subdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(minus:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psubq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "sssubv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "sssubv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "ussubv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "ussubv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "psubusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mulv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (mult:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "smulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_int 16))))] - "TARGET_MMX" - "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "umulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_int 16))))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "mmx_pmaddwd" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI - (mult:V2SI - (sign_extend:V2SI - (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) (const_int 2)]))) - (sign_extend:V2SI - (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0) (const_int 2)])))) - (mult:V2SI - (sign_extend:V2SI (vec_select:V2HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2SI (vec_select:V2HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3)]))))))] - "TARGET_MMX" - "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - -(define_insn "sse2_umulsidi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (mult:DI - (zero_extend:DI - (vec_select:SI - (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 0)]))) - (zero_extend:DI - (vec_select:SI - (match_operand:V2SI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])))))] - "TARGET_SSE2" - "pmuludq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) - - -;; MMX logical operations -;; Note we don't want to declare these as regular iordi3 insns to prevent -;; normal code that also wants to use the FPU from getting broken. -;; The UNSPECs are there to prevent the combiner from getting overly clever. -(define_insn "mmx_iordi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_xordi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI") - (set_attr "memory" "none")]) - -;; Same as pxor, but don't show input operands so that we don't think -;; they are live. -(define_insn "mmx_clrdi" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(const_int 0)] UNSPEC_NOP))] - "TARGET_MMX" - "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI") - (set_attr "memory" "none")]) - -(define_insn "mmx_anddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "%0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "mmx_nanddi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) - (match_operand:DI 2 "nonimmediate_operand" "ym"))] - UNSPEC_NOP))] - "TARGET_MMX" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - - -;; MMX unsigned averages/sum of absolute differences - -(define_insn "mmx_uavgv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (ashiftrt:V8QI - (plus:V8QI (plus:V8QI - (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")) - (const_vector:V8QI [(const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1) - (const_int 1)])) - (const_int 1)))] - "TARGET_SSE || TARGET_3DNOW_A" - "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_uavgv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashiftrt:V4HI - (plus:V4HI (plus:V4HI - (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")) - (const_vector:V4HI [(const_int 1) - (const_int 1) - (const_int 1) - (const_int 1)])) - (const_int 1)))] - "TARGET_SSE || TARGET_3DNOW_A" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_psadbw" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] - UNSPEC_PSADBW))] - "TARGET_SSE || TARGET_3DNOW_A" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - - -;; MMX insert/extract/shuffle - -(define_expand "mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "") - (vec_merge:V4HI - (match_operand:V4HI 1 "register_operand" "") - (vec_duplicate:V4HI - (match_operand:SI 2 "nonimmediate_operand" "")) - (match_operand:SI 3 "const_0_to_3_operand" "")))] - "TARGET_SSE || TARGET_3DNOW_A" -{ - operands[2] = gen_lowpart (HImode, operands[2]); - operands[3] = GEN_INT (1 << INTVAL (operands[3])); -}) - -(define_insn "*mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (match_operand:V4HI 1 "register_operand" "0") - (vec_duplicate:V4HI - (match_operand:HI 2 "nonimmediate_operand" "rm")) - (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))] - "TARGET_SSE || TARGET_3DNOW_A" -{ - operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); - return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; -} - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") - (parallel - [(match_operand:SI 2 "const_0_to_3_operand" "N")]))))] - "TARGET_SSE || TARGET_3DNOW_A" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_pshufw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (unspec:V4HI [(match_operand:V4HI 1 "nonimmediate_operand" "ym") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_SHUFFLE))] - "TARGET_SSE || TARGET_3DNOW_A" - "pshufw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - - -;; MMX mask-generating comparisons - -(define_insn "eqv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (eq:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "eqv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (eq:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "eqv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (eq:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpeqd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (gt:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (gt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - -(define_insn "gtv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (gt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgtd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) - - -;; MMX max/min insns - -(define_insn "umaxv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmaxub\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "smaxv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmaxsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "uminv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pminub\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - -(define_insn "sminv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_SSE || TARGET_3DNOW_A" - "pminsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) - - -;; MMX shifts - -(define_insn "ashrv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashrv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "lshrv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "lshrv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -;; See logical MMX insns. -(define_insn "mmx_lshrdi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashlv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "ashlv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi")))] - "TARGET_MMX" - "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -;; See logical MMX insns. -(define_insn "mmx_ashldi3" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI - [(ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] - UNSPEC_NOP))] - "TARGET_MMX" - "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - - -;; MMX pack/unpack insns. - -(define_insn "mmx_packsswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) - (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_packssdw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_concat:V4HI - (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0")) - (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_packuswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) - (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] - "TARGET_MMX" - "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_merge:V8QI - (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (const_int 85)))] - "TARGET_MMX" - "punpckhbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhwd" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "TARGET_MMX" - "punpckhwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckhdq" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_merge:V2SI - (match_operand:V2SI 1 "register_operand" "0") - (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 1)))] - "TARGET_MMX" - "punpckhdq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpcklbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_merge:V8QI - (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (const_int 85)))] - "TARGET_MMX" - "punpcklbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpcklwd" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_merge:V4HI - (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_MMX" - "punpcklwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - -(define_insn "mmx_punpckldq" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_merge:V2SI - (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2SI 2 "register_operand" "y") - (const_int 1)))] - "TARGET_MMX" - "punpckldq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) - - -;; Miscellaneous stuff - -(define_insn "emms" - [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_MMX" - "emms" - [(set_attr "type" "mmx") - (set_attr "memory" "unknown")]) - -(define_insn "ldmxcsr" - [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] - UNSPECV_LDMXCSR)] - "TARGET_SSE" - "ldmxcsr\t%0" - [(set_attr "type" "sse") - (set_attr "memory" "load")]) - -(define_insn "stmxcsr" - [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] - "TARGET_SSE" - "stmxcsr\t%0" - [(set_attr "type" "sse") - (set_attr "memory" "store")]) - -(define_expand "sfence" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] - "TARGET_SSE || TARGET_3DNOW_A" -{ - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; -}) - -(define_insn "*sfence_insn" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] - "TARGET_SSE || TARGET_3DNOW_A" - "sfence" - [(set_attr "type" "sse") - (set_attr "memory" "unknown")]) - (define_expand "sse_prologue_save" [(parallel [(set (match_operand:BLK 0 "" "") (unspec:BLK [(reg:DI 21) @@ -20712,311 +19769,6 @@ (set_attr "modrm" "0") (set_attr "mode" "DI")]) -;; 3Dnow! instructions - -(define_insn "addv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (plus:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfadd\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "subv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (minus:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfsub\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "subrv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym") - (match_operand:V2SF 1 "register_operand" "0")))] - "TARGET_3DNOW" - "pfsubr\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "gtv2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpgt\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "gev2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpge\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "eqv2sf3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (eq:V2SI (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfcmpeq\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "V2SF")]) - -(define_insn "pfmaxv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (smax:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmax\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfminv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (smin:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmin\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "mulv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (mult:V2SF (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pfmul\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "V2SF")]) - -(define_insn "femms" - [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_3DNOW" - "femms" - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) - -(define_insn "pf2id" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pf2id\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "pf2iw" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (sign_extend:V2SI - (ss_truncate:V2HI - (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] - "TARGET_3DNOW_A" - "pf2iw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "pfacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (plus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (plus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW" - "pfacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfnacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (minus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (minus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW_A" - "pfnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pfpnacc" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_concat:V2SF - (minus:SF - (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 1) - (parallel [(const_int 1)]))) - (plus:SF - (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") - (parallel [(const_int 0)])) - (vec_select:SF (match_dup 2) - (parallel [(const_int 1)])))))] - "TARGET_3DNOW_A" - "pfpnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "V2SF")]) - -(define_insn "pi2fw" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (float:V2SF - (vec_concat:V2SI - (sign_extend:SI - (truncate:HI - (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])))) - (sign_extend:SI - (truncate:HI - (vec_select:SI (match_dup 1) - (parallel [(const_int 1)])))))))] - "TARGET_3DNOW_A" - "pi2fw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -(define_insn "floatv2si2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW" - "pi2fd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "V2SF")]) - -;; This insn is identical to pavgb in operation, but the opcode is -;; different. To avoid accidentally matching pavgb, use an unspec. - -(define_insn "pavgusb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (unspec:V8QI - [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] - UNSPEC_PAVGUSB))] - "TARGET_3DNOW" - "pavgusb\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "TI")]) - -;; 3DNow reciprocal and sqrt - -(define_insn "pfrcpv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] - UNSPEC_PFRCP))] - "TARGET_3DNOW" - "pfrcp\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrcpit1v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRCPIT1))] - "TARGET_3DNOW" - "pfrcpit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrcpit2v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRCPIT2))] - "TARGET_3DNOW" - "pfrcpit2\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrsqrtv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] - UNSPEC_PFRSQRT))] - "TARGET_3DNOW" - "pfrsqrt\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pfrsqit1v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] - UNSPEC_PFRSQIT1))] - "TARGET_3DNOW" - "pfrsqit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") - (set_attr "mode" "TI")]) - -(define_insn "pmulhrwv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_vector:V4SI [(const_int 32768) - (const_int 32768) - (const_int 32768) - (const_int 32768)])) - (const_int 16))))] - "TARGET_3DNOW" - "pmulhrw\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "TI")]) - -(define_insn "pswapdv2si2" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "TI")]) - -(define_insn "pswapdv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "TI")]) - (define_expand "prefetch" [(prefetch (match_operand 0 "address_operand" "") (match_operand:SI 1 "const_int_operand" "") @@ -21110,3 +19862,4 @@ (set_attr "memory" "none")]) (include "sse.md") +(include "mmx.md") diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index e0be004dc24..ab0eb852f84 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -687,7 +687,7 @@ _m_psrlqi (__m64 __m, int __count) static __inline __m64 _mm_and_si64 (__m64 __m1, __m64 __m2) { - return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2); + return __builtin_ia32_pand (__m1, __m2); } static __inline __m64 @@ -701,7 +701,7 @@ _m_pand (__m64 __m1, __m64 __m2) static __inline __m64 _mm_andnot_si64 (__m64 __m1, __m64 __m2) { - return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2); + return __builtin_ia32_pandn (__m1, __m2); } static __inline __m64 @@ -714,7 +714,7 @@ _m_pandn (__m64 __m1, __m64 __m2) static __inline __m64 _mm_or_si64 (__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2); + return __builtin_ia32_por (__m1, __m2); } static __inline __m64 @@ -727,7 +727,7 @@ _m_por (__m64 __m1, __m64 __m2) static __inline __m64 _mm_xor_si64 (__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2); + return __builtin_ia32_pxor (__m1, __m2); } static __inline __m64 diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md new file mode 100644 index 00000000000..7d16488c232 --- /dev/null +++ b/gcc/config/i386/mmx.md @@ -0,0 +1,1138 @@ +;; GCC machine description for MMX and 3dNOW! instructions +;; Copyright (C) 2005 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;; The MMX and 3dNOW! patterns are in the same file because they use +;; the same register file, and 3dNOW! adds a number of extensions to +;; the base integer MMX isa. + +;; Note! Except for the basic move instructions, *all* of these +;; patterns are outside the normal optabs namespace. This is because +;; use of these registers requires the insertion of emms or femms +;; instructions to return to normal fpu mode. The compiler doesn't +;; know how to do that itself, which means it's up to the user. Which +;; means that we should never use any of these patterns except at the +;; direction of the user via a builtin. + +;; 8 byte integral modes handled by MMX (and by extension, SSE) +(define_mode_macro MMXMODEI [V8QI V4HI V2SI]) + +;; All 8-byte vector modes handled by MMX +(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF]) + +;; Mix-n-match +(define_mode_macro MMXMODE12 [V8QI V4HI]) +(define_mode_macro MMXMODE24 [V4HI V2SI]) + +;; Mapping from integer vector mode to mnemonic suffix +(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (DI "q")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Move patterns +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; All of these patterns are enabled for MMX as well as 3dNOW. +;; This is essential for maintaining stable calling conventions. + +(define_expand "mov" + [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "") + (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (mode, operands); + DONE; +}) + +(define_insn "*mov_internal_rex64" + [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" + "=rm,r,*y,*y ,m ,*y,Y ,x,x ,m,r,x") + (match_operand:MMXMODEI 1 "vector_move_operand" + "Cr ,m,C ,*ym,*y,Y ,*y,C,xm,x,x,r"))] + "TARGET_64BIT && TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "imov,imov,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov") + (set_attr "mode" "DI")]) + +(define_insn "*mov_internal" + [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" + "=*y,*y ,m ,*y,*Y,*Y,*Y ,m ,*x,*x,*x,m") + (match_operand:MMXMODEI 1 "vector_move_operand" + "C ,*ym,*y,*Y,*y,C ,*Ym,*Y,C ,*x,m ,*x"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov,ssemov,ssemov") + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,V4SF,V4SF,V2SF,V2SF")]) + +(define_expand "movv2sf" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (V2SFmode, operands); + DONE; +}) + +(define_insn "*movv2sf_internal_rex64" + [(set (match_operand:V2SF 0 "nonimmediate_operand" + "=rm,r,*y ,*y ,m ,*y,Y ,x,x ,m,r,x") + (match_operand:V2SF 1 "vector_move_operand" + "Cr ,m ,C ,*ym,*y,Y ,*y,C,xm,x,x,r"))] + "TARGET_64BIT && TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movlps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "imov,imov,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov") + (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V2SF,V2SF,DI,DI")]) + +(define_insn "*movv2sf_internal" + [(set (match_operand:V2SF 0 "nonimmediate_operand" + "=*y,*y ,m,*y,*Y,*x,*x ,m") + (match_operand:V2SF 1 "vector_move_operand" + "C ,*ym,*y,*Y,*y,C ,*xm,*x"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movlps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov") + (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V2SF,V2SF")]) + +(define_expand "movmisalign" + [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "") + (match_operand:MMXMODE 1 "nonimmediate_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (mode, operands); + DONE; +}) + +(define_insn "sse_movntdi" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "register_operand" "y")] + UNSPEC_MOVNT))] + "TARGET_SSE || TARGET_3DNOW_A" + "movntq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_addv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)" + "pfadd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_subv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y,y") + (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym") + (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))] + "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + pfsub\\t{%2, %0|%0, %2} + pfsubr\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_expand "mmx_subrv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "") + (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" "")))] + "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "") + +(define_insn "mmx_mulv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)" + "pfmul\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_smaxv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smax:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (SMAX, V2SFmode, operands)" + "pfmax\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_sminv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smin:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (SMIN, V2SFmode, operands)" + "pfmin\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rcpv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRCP))] + "TARGET_3DNOW" + "pfrcp\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rcpit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT1))] + "TARGET_3DNOW" + "pfrcpit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rcpit2v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT2))] + "TARGET_3DNOW" + "pfrcpit2\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rsqrtv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQRT))] + "TARGET_3DNOW" + "pfrsqrt\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_rsqit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQIT1))] + "TARGET_3DNOW" + "pfrsqit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_haddv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (plus:SF + (vec_select:SF + (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF + (match_operand:V2SF 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_3DNOW" + "pfacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_hsubv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (minus:SF + (vec_select:SF + (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF + (match_operand:V2SF 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_3DNOW_A" + "pfnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_addsubv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_merge:V2SF + (plus:V2SF + (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")) + (minus:V2SF (match_dup 1) (match_dup 2)) + (const_int 1)))] + "TARGET_3DNOW_A" + "pfpnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_gtv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpgt\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_gev2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpge\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_eqv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)" + "pfcmpeq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point conversion operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_pf2id" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pf2id\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_pf2iw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (sign_extend:V2SI + (ss_truncate:V2HI + (fix:V2SI + (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] + "TARGET_3DNOW_A" + "pf2iw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_pi2fw" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF + (sign_extend:V2SI + (truncate:V2HI + (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))] + "TARGET_3DNOW_A" + "pi2fw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +(define_insn "mmx_floatv2si2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pi2fd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel single-precision floating point element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_pswapdv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral arithmetic +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_add3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (plus:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (PLUS, mode, operands)" + "padd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_adddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX && ix86_binary_operator_ok (PLUS, DImode, operands)" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ssadd3" + [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + (ss_plus:MMXMODE12 + (match_operand:MMXMODE12 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "padds\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_usadd3" + [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + (us_plus:MMXMODE12 + (match_operand:MMXMODE12 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddus\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_sub3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (minus:MMXMODEI + (match_operand:MMXMODEI 1 "register_operand" "0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psub\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_subdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_sssub3" + [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + (ss_minus:MMXMODE12 + (match_operand:MMXMODE12 1 "register_operand" "0") + (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubs\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ussub3" + [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + (us_minus:MMXMODE12 + (match_operand:MMXMODE12 1 "register_operand" "0") + (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubus\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_smulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (sign_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_umulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (zero_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (zero_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pmaddwd" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (sign_extend:V2SI + (vec_select:V2HI + (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) (const_int 2)]))) + (sign_extend:V2SI + (vec_select:V2HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) (const_int 2)])))) + (mult:V2SI + (sign_extend:V2SI + (vec_select:V2HI (match_dup 1) + (parallel [(const_int 1) (const_int 3)]))) + (sign_extend:V2SI + (vec_select:V2HI (match_dup 2) + (parallel [(const_int 1) (const_int 3)]))))))] + "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pmulhrwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_vector:V4SI [(const_int 32768) (const_int 32768) + (const_int 32768) (const_int 32768)])) + (const_int 16))))] + "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhrw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "sse2_umulsidi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (mult:DI + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0)]))) + (zero_extend:DI + (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])))))] + "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + +(define_insn "mmx_umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (UMAX, V8QImode, operands)" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (SMAX, V4HImode, operands)" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (UMIN, V8QImode, operands)" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (SMIN, V4HImode, operands)" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ashr3" + [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + (ashiftrt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psra\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_lshr3" + [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + (lshiftrt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrl\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psrlq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ashl3" + [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + (ashift:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psll\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ashldi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(ashift:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psllq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral comparisons +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_eq3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (eq:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (EQ, mode, operands)" + "pcmpeq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) + +(define_insn "mmx_gt3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (gt:MMXMODEI + (match_operand:MMXMODEI 1 "register_operand" "0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgt\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral logical operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_and3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (and:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (AND, mode, operands)" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_nand3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (and:MMXMODEI + (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0")) + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_ior3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (ior:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (IOR, mode, operands)" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_xor3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (xor:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX && ix86_binary_operator_ok (XOR, mode, operands)" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI") + (set_attr "memory" "none")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Parallel integral element swizzling +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_packsswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (ss_truncate:V4QI + (match_operand:V4HI 1 "register_operand" "0")) + (ss_truncate:V4QI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))] + "TARGET_MMX" + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_packssdw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (ss_truncate:V2HI + (match_operand:V2SI 1 "register_operand" "0")) + (ss_truncate:V2HI + (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))] + "TARGET_MMX" + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_packuswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (us_truncate:V4QI + (match_operand:V4HI 1 "register_operand" "0")) + (us_truncate:V4QI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))] + "TARGET_MMX" + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpckhbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_MMX" + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpcklbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_MMX" + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpckhwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_MMX" + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpcklwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_MMX" + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpckhdq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_MMX" + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_punpckldq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_MMX" + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_expand "mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "") + (vec_merge:V4HI + (match_operand:V4HI 1 "register_operand" "") + (vec_duplicate:V4HI + (match_operand:SI 2 "nonimmediate_operand" "")) + (match_operand:SI 3 "const_0_to_3_operand" "")))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[2] = gen_lowpart (HImode, operands[2]); + operands[3] = GEN_INT (1 << INTVAL (operands[3])); +}) + +(define_insn "*mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (match_operand:V4HI 1 "register_operand" "0") + (vec_duplicate:V4HI + (match_operand:HI 2 "nonimmediate_operand" "rm")) + (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; +} + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "y") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))] + "TARGET_SSE || TARGET_3DNOW_A" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + + +(define_expand "mmx_pshufw" + [(match_operand:V4HI 0 "register_operand" "") + (match_operand:V4HI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_SSE || TARGET_3DNOW_A" +{ + int mask = INTVAL (operands[2]); + emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1], + GEN_INT ((mask >> 0) & 3), + GEN_INT ((mask >> 2) & 3), + GEN_INT ((mask >> 4) & 3), + GEN_INT ((mask >> 6) & 3))); + DONE; +}) + +(define_insn "mmx_pshufw_1" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (match_operand:V4HI 1 "nonimmediate_operand" "ym") + (parallel [(match_operand 2 "const_0_to_3_operand" "") + (match_operand 3 "const_0_to_3_operand" "") + (match_operand 4 "const_0_to_3_operand" "") + (match_operand 5 "const_0_to_3_operand" "")])))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + return "pshufw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pswapdv2si2" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (match_operand:V2SI 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Miscelaneous +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "mmx_uavgv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (truncate:V8QI + (lshiftrt:V8HI + (plus:V8HI + (plus:V8HI + (zero_extend:V8HI + (match_operand:V8QI 1 "nonimmediate_operand" "%0")) + (zero_extend:V8HI + (match_operand:V8QI 2 "nonimmediate_operand" "ym"))) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "(TARGET_SSE || TARGET_3DNOW) + && ix86_binary_operator_ok (PLUS, V8QImode, operands)" +{ + /* These two instructions have the same operation, but their encoding + is different. Prefer the one that is defacto standard. */ + if (TARGET_SSE || TARGET_3DNOW_A) + return "pavgb\t{%2, %0|%0, %2}"; + else + return "pavgusb\\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_uavgv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (plus:V4SI + (zero_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (zero_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_vector:V4SI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "(TARGET_SSE || TARGET_3DNOW_A) + && ix86_binary_operator_ok (PLUS, V4HImode, operands)" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_psadbw" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSADBW))] + "TARGET_SSE || TARGET_3DNOW_A" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +(define_insn "mmx_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] + UNSPEC_MOVMSK))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_expand "mmx_maskmovq" + [(set (match_operand:V8QI 0 "memory_operand" "") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y") + (match_dup 0)] + UNSPEC_MASKMOV))] + "TARGET_SSE || TARGET_3DNOW_A" + "") + +(define_insn "*mmx_maskmovq" + [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y") + (mem:V8QI (match_dup 0))] + UNSPEC_MASKMOV))] + "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovq\t{%2, %1|%1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "*mmx_maskmovq_rex" + [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y") + (mem:V8QI (match_dup 0))] + UNSPEC_MASKMOV))] + "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovq\t{%2, %1|%1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +(define_insn "mmx_emms" + [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] + "TARGET_MMX" + "emms" + [(set_attr "type" "mmx") + (set_attr "memory" "unknown")]) + +(define_insn "mmx_femms" + [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] + "TARGET_3DNOW" + "femms" + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ee90d0664d6..7821744b526 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3051,6 +3051,39 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) +(define_insn "sse_ldmxcsr" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] + UNSPECV_LDMXCSR)] + "TARGET_SSE" + "ldmxcsr\t%0" + [(set_attr "type" "sse") + (set_attr "memory" "load")]) + +(define_insn "sse_stmxcsr" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] + "TARGET_SSE" + "stmxcsr\t%0" + [(set_attr "type" "sse") + (set_attr "memory" "store")]) + +(define_expand "sse_sfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*sse_sfence" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] + "TARGET_SSE || TARGET_3DNOW_A" + "sfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + (define_insn "sse2_clflush" [(unspec_volatile [(match_operand 0 "address_operand" "p")] UNSPECV_CLFLUSH)] -- 2.30.2