From 077084dd677b85ea2feb1475c0f7499fd10b8d58 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 22 Oct 2002 19:08:42 +0200 Subject: [PATCH] i386.c (builtin_description): Add IX86_BUILTIN_PUNPCKHQDQ128. * i386.c (builtin_description): Add IX86_BUILTIN_PUNPCKHQDQ128. (ix86_expand_builtin): Fix MASKMOVDQU expasion. * i386.h (ix86_builtins): Add IX86_BUILTIN_PUNPCKHQDQ128. * i386.md (mmx_punpck?dq): Simplify. (sse2_pubpcklqdq): Fix. (sse2_pubpckhqdq): New. * xmmintrin.h (_mm_unpackhi_epi32): New. * xmmintrin.h (_mm_cvt*, _mm_stream_pd): Fix prototypes. (_mm_shufflehi_epi16, _mm_shufflelo_epi16): Fix typo. From-SVN: r58412 --- gcc/ChangeLog | 13 ++++++++++ gcc/config/i386/i386.c | 2 ++ gcc/config/i386/i386.h | 1 + gcc/config/i386/i386.md | 23 +++++++++++----- gcc/config/i386/xmmintrin.h | 52 +++++++++++++++++++++---------------- 5 files changed, 61 insertions(+), 30 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2d85ff97c87..c2f7fbd151d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +Tue Oct 22 19:07:03 CEST 2002 Jan Hubicka + + * i386.c (builtin_description): Add IX86_BUILTIN_PUNPCKHQDQ128. + (ix86_expand_builtin): Fix MASKMOVDQU expasion. + * i386.h (ix86_builtins): Add IX86_BUILTIN_PUNPCKHQDQ128. + * i386.md (mmx_punpck?dq): Simplify. + (sse2_pubpcklqdq): Fix. + (sse2_pubpckhqdq): New. + * xmmintrin.h (_mm_unpackhi_epi32): New. + + * xmmintrin.h (_mm_cvt*, _mm_stream_pd): Fix prototypes. + (_mm_shufflehi_epi16, _mm_shufflelo_epi16): Fix typo. + 2002-10-22 Nathan Sidwell PR c++/7209 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2b857bf9898..537aae82d68 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12115,6 +12115,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, @@ -13145,6 +13146,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return target; case IX86_BUILTIN_MASKMOVQ: + case IX86_BUILTIN_MASKMOVDQU: icode = (fcode == IX86_BUILTIN_MASKMOVQ ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) : CODE_FOR_sse2_maskmovdqu); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f832e1088a6..aabf3e531df 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2427,6 +2427,7 @@ enum ix86_builtins IX86_BUILTIN_PUNPCKHBW128, IX86_BUILTIN_PUNPCKHWD128, IX86_BUILTIN_PUNPCKHDQ128, + IX86_BUILTIN_PUNPCKHQDQ128, IX86_BUILTIN_PUNPCKLBW128, IX86_BUILTIN_PUNPCKLWD128, IX86_BUILTIN_PUNPCKLDQ128, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a4991313d5b..13a8c215428 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -19694,9 +19694,7 @@ (define_insn "mmx_punpckhdq" [(set (match_operand:V2SI 0 "register_operand" "=y") (vec_merge:V2SI - (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 1)])) + (match_operand:V2SI 1 "register_operand" "0") (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1) (const_int 0)])) @@ -19758,9 +19756,7 @@ (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") (parallel [(const_int 1) (const_int 0)])) - (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 1)])) + (match_operand:V2SI 2 "register_operand" "y") (const_int 1)))] "TARGET_MMX" "punpckldq\t{%2, %0|%0, %2}" @@ -21548,16 +21544,29 @@ (define_insn "sse2_punpcklqdq" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_merge:V2DI - (match_operand:V2DI 1 "register_operand" "0") (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") (parallel [(const_int 1) (const_int 0)])) + (match_operand:V2DI 1 "register_operand" "0") (const_int 1)))] "TARGET_SSE2" "punpcklqdq\t{%2, %0|%0, %2}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) +(define_insn "sse2_punpckhqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (match_operand:V2DI 1 "register_operand" "0") + (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] + "TARGET_SSE2" + "punpckhqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + ;; SSE2 moves (define_insn "sse2_movapd" diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 08ef76ad1a0..14fdcd5bee8 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1827,16 +1827,16 @@ _mm_cvtepi32_pd (__m128i __A) return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); } -static __inline __m128d +static __inline __m128 _mm_cvtepi32_ps (__m128i __A) { - return (__m128d)__builtin_ia32_cvtdq2ps ((__v4si) __A); + return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); } -static __inline __m128d +static __inline __m128i _mm_cvtpd_epi32 (__m128d __A) { - return (__m128d)__builtin_ia32_cvtpd2dq ((__v2df) __A); + return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); } static __inline __m64 @@ -1845,16 +1845,16 @@ _mm_cvtpd_pi32 (__m128d __A) return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); } -static __inline __m128d +static __inline __m128 _mm_cvtpd_ps (__m128d __A) { - return (__m128d)__builtin_ia32_cvtpd2ps ((__v2df) __A); + return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); } -static __inline __m128d +static __inline __m128i _mm_cvttpd_epi32 (__m128d __A) { - return (__m128d)__builtin_ia32_cvttpd2dq ((__v2df) __A); + return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); } static __inline __m64 @@ -1869,20 +1869,20 @@ _mm_cvtpi32_pd (__m64 __A) return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); } -static __inline __m128d -_mm_cvtps_epi32 (__m128d __A) +static __inline __m128i +_mm_cvtps_epi32 (__m128 __A) { - return (__m128d)__builtin_ia32_cvtps2dq ((__v4sf) __A); + return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); } -static __inline __m128d -_mm_cvttps_epi32 (__m128d __A) +static __inline __m128i +_mm_cvttps_epi32 (__m128 __A) { - return (__m128d)__builtin_ia32_cvttps2dq ((__v4sf) __A); + return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); } static __inline __m128d -_mm_cvtps_pd (__m128d __A) +_mm_cvtps_pd (__m128 __A) { return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); } @@ -1899,10 +1899,10 @@ _mm_cvttsd_si32 (__m128d __A) return __builtin_ia32_cvttsd2si ((__v2df) __A); } -static __inline __m128d -_mm_cvtsd_ss (__m128d __A, __m128d __B) +static __inline __m128 +_mm_cvtsd_ss (__m128 __A, __m128d __B) { - return (__m128d)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); + return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); } static __inline __m128d @@ -1912,7 +1912,7 @@ _mm_cvtsi32_sd (__m128d __A, int __B) } static __inline __m128d -_mm_cvtss_sd (__m128d __A, __m128d __B) +_mm_cvtss_sd (__m128d __A, __m128 __B) { return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); } @@ -1997,6 +1997,12 @@ _mm_unpackhi_epi32 (__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); } +static __inline __m128i +_mm_unpackhi_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); +} + static __inline __m128i _mm_unpacklo_epi8 (__m128i __A, __m128i __B) { @@ -2359,8 +2365,8 @@ _mm_mulhi_epu16 (__m128i __A, __m128i __B) return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); } -#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw128 ((__v8hi)__A, __B)) -#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw128 ((__v8hi)__A, __B)) +#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B)) +#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B)) #define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B)) static __inline void @@ -2400,9 +2406,9 @@ _mm_stream_si128 (__m128i *__A, __m128i __B) } static __inline void -_mm_stream_pd (__m128d *__A, __m128d __B) +_mm_stream_pd (double *__A, __m128d __B) { - __builtin_ia32_movntpd (__A, (__v2df)__B); + __builtin_ia32_movntpd ((__m128d *)__A, (__v2df)__B); } static __inline __m128i -- 2.30.2