+2008-03-13 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/34000
+ PR target/35553
+ * config/i386/xmmintrin.h: Change all static inline functions to
+ extern inline and add __gnu_inline__ attribute.
+ * config/i386/bmintrin.h: Ditto.
+ * config/i386/smmintrin.h: Ditto.
+ * config/i386/tmmintrin.h: Ditto.
+ * config/i386/mmintrin-common.h: Ditto.
+ * config/i386/ammintrin.h: Ditto.
+ * config/i386/emmintrin.h: Ditto.
+ * config/i386/pmmintrin.h: Ditto.
+ * config/i386/mmintrin.h: Ditto.
+ * config/i386/mm3dnow.h: Ditto.
+
2008-03-13 Jakub Jelinek <jakub@redhat.com>
PR middle-end/35185
/* We need definitions from the SSE3, SSE2 and SSE header files*/
#include <pmmintrin.h>
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_sd (double * __P, __m128d __Y)
{
__builtin_ia32_movntsd (__P, (__v2df) __Y);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_ss (float * __P, __m128 __Y)
{
__builtin_ia32_movntss (__P, (__v4sf) __Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_si64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
{
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
(unsigned int)(I), (unsigned int)(L)))
#endif
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_si64 (__m128i __X,__m128i __Y)
{
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
{
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
#include <mmintrin-common.h>
/* Floating point multiply/add type instructions */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_fmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_fmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_fmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_fmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_fmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_fmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_fmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_fmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_fnmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_fnmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_fnmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_fnmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_fnmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_fnmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_fnmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_fnmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
/* Integer multiply/add intructions. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
}
/* Packed Integer Horizontal Add and Subtract */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddw_epi8(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddbw ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddd_epi8(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddbd ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epi8(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddbq ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddd_epi16(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddwd ((__v8hi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epi16(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddwq ((__v8hi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epi32(__m128i __A)
{
return (__m128i) __builtin_ia32_phadddq ((__v4si)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddw_epu8(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddubw ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddd_epu8(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddubd ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epu8(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddubq ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddd_epu16(__m128i __A)
{
return (__m128i) __builtin_ia32_phadduwd ((__v8hi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epu16(__m128i __A)
{
return (__m128i) __builtin_ia32_phadduwq ((__v8hi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epu32(__m128i __A)
{
return (__m128i) __builtin_ia32_phaddudq ((__v4si)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubw_epi8(__m128i __A)
{
return (__m128i) __builtin_ia32_phsubbw ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubd_epi16(__m128i __A)
{
return (__m128i) __builtin_ia32_phsubwd ((__v8hi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubq_epi32(__m128i __A)
{
return (__m128i) __builtin_ia32_phsubdq ((__v4si)__A);
}
/* Vector conditional move and permute */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pcmov (__A, __B, __C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_perm_ps(__m128 __A, __m128 __B, __m128i __C)
{
return (__m128) __builtin_ia32_permps ((__m128)__A, (__m128)__B, (__v16qi)__C);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_perm_pd(__m128d __A, __m128d __B, __m128i __C)
{
return (__m128d) __builtin_ia32_permpd ((__m128d)__A, (__m128d)__B, (__v16qi)__C);
/* Packed Integer Rotates and Shifts */
/* Rotates - Non-Immediate form */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rot_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_protb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rot_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_protw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rot_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_protd ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rot_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_protq ((__v2di)__A, (__v2di)__B);
/* Rotates - Immediate form */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_roti_epi8(__m128i __A, const int __B)
{
return (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_roti_epi16(__m128i __A, const int __B)
{
return (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_roti_epi32(__m128i __A, const int __B)
{
return (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_roti_epi64(__m128i __A, const int __B)
{
return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
/* pshl */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shl_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshlb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shl_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshlw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shl_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshld ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shl_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshlq ((__v2di)__A, (__v2di)__B);
}
/* psha */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshab ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshaw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshad ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshaq ((__v2di)__A, (__v2di)__B);
/* Compare and Predicate Generation */
/* com (floating point, packed single) */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comeqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comltps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comleps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comunord_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunordps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comuneqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnlt_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunltps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnle_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunleps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comord_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comordps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comueq_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comueqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnge_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comungeps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comngt_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comungtps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comfalseps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comoneq_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comneqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comgeps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comgtps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comtrueps ((__v4sf)__A, (__v4sf)__B);
/* com (floating point, packed double) */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comeqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comltpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comlepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comunord_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunordpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comuneqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnlt_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunltpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnle_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunlepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comord_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comordpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comueq_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comueqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnge_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comungepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comngt_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comungtpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comfalsepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comoneq_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comneqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comgepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comgtpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comtruepd ((__v2df)__A, (__v2df)__B);
}
/* com (floating point, scalar single) */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comeqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comltss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comless ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comunord_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunordss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comuneqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnlt_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunltss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnle_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunless ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comord_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comordss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comueq_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comueqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnge_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comungess ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comngt_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comungtss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comfalsess ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comoneq_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comneqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comgess ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comgtss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comtruess ((__v4sf)__A, (__v4sf)__B);
/* com (floating point, scalar double) */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comeqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comltsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comlesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comunord_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunordsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comuneqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnlt_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunltsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnle_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunlesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comord_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comordsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comueq_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comueqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comnge_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comungesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comngt_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comungtsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comfalsesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comoneq_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comneqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comgesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comgtsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comtruesd ((__v2df)__A, (__v2df)__B);
/*pcom (integer, unsinged bytes) */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltub ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleub ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtub ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeub ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomequb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomnequb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseub ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueub ((__v16qi)__A, (__v16qi)__B);
/*pcom (integer, unsinged words) */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltuw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleuw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtuw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeuw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomequw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomnequw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseuw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueuw ((__v8hi)__A, (__v8hi)__B);
/*pcom (integer, unsinged double words) */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltud ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleud ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtud ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeud ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomequd ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomnequd ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseud ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueud ((__v4si)__A, (__v4si)__B);
/*pcom (integer, unsinged quad words) */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltuq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleuq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtuq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeuq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomequq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomnequq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseuq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueuq ((__v2di)__A, (__v2di)__B);
/*pcom (integer, signed bytes) */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomeqb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomneqb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueb ((__v16qi)__A, (__v16qi)__B);
/*pcom (integer, signed words) */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomlew ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgew ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomeqw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomneqw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalsew ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtruew ((__v8hi)__A, (__v8hi)__B);
/*pcom (integer, signed double words) */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltd ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomled ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtd ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomged ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomeqd ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomneqd ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalsed ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrued ((__v4si)__A, (__v4si)__B);
/*pcom (integer, signed quad words) */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomeqq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomneqq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseq ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueq ((__v2di)__A, (__v2di)__B);
}
/* FRCZ */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_frcz_ps (__m128 __A)
{
return (__m128) __builtin_ia32_frczps ((__v4sf)__A);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_frcz_pd (__m128d __A)
{
return (__m128d) __builtin_ia32_frczpd ((__v2df)__A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_frcz_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_frczss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_frcz_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_frczsd ((__v2df)__A, (__v2df)__B);
(((fp1) << 1) | (fp0))
/* Create a vector with element 0 as F and the rest zero. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_sd (double __F)
{
return __extension__ (__m128d){ __F, 0.0 };
}
/* Create a vector with both elements equal to F. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pd (double __F)
{
return __extension__ (__m128d){ __F, __F };
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pd1 (double __F)
{
return _mm_set1_pd (__F);
}
/* Create a vector with the lower value X and upper value W. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pd (double __W, double __X)
{
return __extension__ (__m128d){ __X, __W };
}
/* Create a vector with the lower value W and upper value X. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pd (double __W, double __X)
{
return __extension__ (__m128d){ __W, __X };
}
/* Create a vector of zeros. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_pd (void)
{
return __extension__ (__m128d){ 0.0, 0.0 };
}
/* Sets the low DPFP value of A from the low value of B. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
}
/* Load two DPFP values from P. The address must be 16-byte aligned. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_pd (double const *__P)
{
return *(__m128d *)__P;
}
/* Load two DPFP values from P. The address need not be 16-byte aligned. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_pd (double const *__P)
{
return __builtin_ia32_loadupd (__P);
}
/* Create a vector with all two elements equal to *P. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load1_pd (double const *__P)
{
return _mm_set1_pd (*__P);
}
/* Create a vector with element 0 as *P and the rest zero. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_sd (double const *__P)
{
return _mm_set_sd (*__P);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_pd1 (double const *__P)
{
return _mm_load1_pd (__P);
}
/* Load two DPFP values in reverse order. The address must be aligned. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadr_pd (double const *__P)
{
__m128d __tmp = _mm_load_pd (__P);
}
/* Store two DPFP values. The address must be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_pd (double *__P, __m128d __A)
{
*(__m128d *)__P = __A;
}
/* Store two DPFP values. The address need not be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_pd (double *__P, __m128d __A)
{
__builtin_ia32_storeupd (__P, __A);
}
/* Stores the lower DPFP value. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_sd (double *__P, __m128d __A)
{
*__P = __builtin_ia32_vec_ext_v2df (__A, 0);
}
-static __inline double __attribute__((__always_inline__, __artificial__))
+extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_f64 (__m128d __A)
{
return __builtin_ia32_vec_ext_v2df (__A, 0);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pd (double *__P, __m128d __A)
{
_mm_store_sd (__P, __A);
}
/* Stores the upper DPFP value. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeh_pd (double *__P, __m128d __A)
{
*__P = __builtin_ia32_vec_ext_v2df (__A, 1);
/* Store the lower DPFP value across two words.
The address must be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store1_pd (double *__P, __m128d __A)
{
_mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_pd1 (double *__P, __m128d __A)
{
_mm_store1_pd (__P, __A);
}
/* Store two DPFP values in reverse order. The address must be aligned. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storer_pd (double *__P, __m128d __A)
{
_mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si32 (__m128i __A)
{
return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si64 (__m128i __A)
{
return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si64x (__m128i __A)
{
return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
}
#endif
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_pd (__m128d __A)
{
return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
}
/* Return pair {sqrt (A[0), B[1]}. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_sd (__m128d __A, __m128d __B)
{
__v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
__A));
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
__A));
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
__A));
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
__A));
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comieq_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comilt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comile_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comigt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comige_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comineq_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomieq_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomilt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomile_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomigt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomige_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomineq_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
/* Create a vector of Qi, where i is the element number. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi64x (long long __q1, long long __q0)
{
return __extension__ (__m128i)(__v2di){ __q0, __q1 };
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi64 (__m64 __q1, __m64 __q0)
{
return _mm_set_epi64x ((long long)__q1, (long long)__q0);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
{
return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
short __q3, short __q2, short __q1, short __q0)
{
__q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
char __q11, char __q10, char __q09, char __q08,
char __q07, char __q06, char __q05, char __q04,
/* Set all of the elements of the vector to A. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi64x (long long __A)
{
return _mm_set_epi64x (__A, __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi64 (__m64 __A)
{
return _mm_set_epi64 (__A, __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi32 (int __A)
{
return _mm_set_epi32 (__A, __A, __A, __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi16 (short __A)
{
return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi8 (char __A)
{
return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
/* Create a vector of Qi, where i is the element number.
The parameter order is reversed from the _mm_set_epi* functions. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi64 (__m64 __q0, __m64 __q1)
{
return _mm_set_epi64 (__q1, __q0);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
{
return _mm_set_epi32 (__q3, __q2, __q1, __q0);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
short __q4, short __q5, short __q6, short __q7)
{
return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
char __q04, char __q05, char __q06, char __q07,
char __q08, char __q09, char __q10, char __q11,
/* Create a vector with element 0 as *P and the rest zero. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_si128 (__m128i const *__P)
{
return *__P;
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_si128 (__m128i const *__P)
{
return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_epi64 (__m128i const *__P)
{
return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_si128 (__m128i *__P, __m128i __B)
{
*__P = __B;
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_si128 (__m128i *__P, __m128i __B)
{
__builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_epi64 (__m128i *__P, __m128i __B)
{
*(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movepi64_pi64 (__m128i __B)
{
return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movpi64_epi64 (__m64 __A)
{
return _mm_set_epi64 ((__m64)0LL, __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_epi64 (__m128i __A)
{
return _mm_set_epi64 ((__m64)0LL, _mm_movepi64_pi64 (__A));
}
/* Create a vector of zeros. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si128 (void)
{
return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_pd (__m128i __A)
{
return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_ps (__m128i __A)
{
return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_epi32 (__m128d __A)
{
return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_pi32 (__m128d __A)
{
return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_ps (__m128d __A)
{
return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_epi32 (__m128d __A)
{
return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_pi32 (__m128d __A)
{
return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi32_pd (__m64 __A)
{
return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_epi32 (__m128 __A)
{
return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttps_epi32 (__m128 __A)
{
return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pd (__m128 __A)
{
return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si32 (__m128d __A)
{
return __builtin_ia32_cvtsd2si ((__v2df) __A);
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si64 (__m128d __A)
{
return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si64x (__m128d __A)
{
return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
}
#endif
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si32 (__m128d __A)
{
return __builtin_ia32_cvttsd2si ((__v2df) __A);
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si64 (__m128d __A)
{
return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si64x (__m128d __A)
{
return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
}
#endif
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_ss (__m128 __A, __m128d __B)
{
return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_sd (__m128d __A, int __B)
{
return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_sd (__m128d __A, long long __B)
{
return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
}
/* Microsoft intrinsic. */
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_sd (__m128d __A, long long __B)
{
return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
}
#endif
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_sd (__m128d __A, __m128 __B)
{
return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
}
#ifdef __OPTIMIZE__
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
{
return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
(__v2df)(__m128d)(B), (int)(N)))
#endif
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pd (__m128d __A, double const *__B)
{
return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pd (__m128d __A, double const *__B)
{
return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pd (__m128d __A)
{
return __builtin_ia32_movmskpd ((__v2df)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packus_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epu16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epu16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_su32 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epu32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi16 (__m128i __A, int __B)
{
return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi32 (__m128i __A, int __B)
{
return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi64 (__m128i __A, int __B)
{
return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi16 (__m128i __A, int __B)
{
return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi32 (__m128i __A, int __B)
{
return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si128 (__m128i __A, const int __N)
{
return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si128 (__m128i __A, const int __N)
{
return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
#endif
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi16 (__m128i __A, int __B)
{
return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi32 (__m128i __A, int __B)
{
return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi64 (__m128i __A, int __B)
{
return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
}
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi16 (__m128i const __A, int const __N)
{
return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
{
return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
(int)(D), (int)(N)))
#endif
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_epi8 (__m128i __A)
{
return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_epu16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shufflehi_epi16 (__m128i __A, const int __mask)
{
return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shufflelo_epi16 (__m128i __A, const int __mask)
{
return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi32 (__m128i __A, const int __mask)
{
return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
#endif
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
{
__builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_epu16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sad_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si32 (int *__A, int __B)
{
__builtin_ia32_movnti (__A, __B);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si128 (__m128i *__A, __m128i __B)
{
__builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_pd (double *__A, __m128d __B)
{
__builtin_ia32_movntpd (__A, (__v2df)__B);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clflush (void const *__A)
{
__builtin_ia32_clflush (__A);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lfence (void)
{
__builtin_ia32_lfence ();
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mfence (void)
{
__builtin_ia32_mfence ();
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si128 (int __A)
{
return _mm_set_epi32 (0, 0, 0, __A);
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si128 (long long __A)
{
return _mm_set_epi64x (0, __A);
}
/* Microsoft intrinsic. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si128 (long long __A)
{
return _mm_set_epi64x (0, __A);
/* Casts between various SP, DP, INT vector types. Note that these do no
conversion of values, they just change the type. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_ps(__m128d __A)
{
return (__m128) __A;
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_si128(__m128d __A)
{
return (__m128i) __A;
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_pd(__m128 __A)
{
return (__m128d) __A;
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_si128(__m128 __A)
{
return (__m128i) __A;
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_ps(__m128i __A)
{
return (__m128) __A;
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_pd(__m128i __A)
{
return (__m128d) __A;
/* Internal data types for implementing the intrinsics. */
typedef float __v2sf __attribute__ ((__vector_size__ (8)));
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_femms (void)
{
__builtin_ia32_femms();
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pavgusb (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pf2id (__m64 __A)
{
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfadd (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpeq (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpge (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpgt (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmax (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmin (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmul (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcp (__m64 __A)
{
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcpit1 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcpit2 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrsqrt (__m64 __A)
{
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrsqit1 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfsub (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfsubr (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pi2fd (__m64 __A)
{
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhrw (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetch (void *__P)
{
__builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetchw (void *__P)
{
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_float (float __A)
{
return __extension__ (__m64)(__v2sf){ __A, 0.0f };
}
-static __inline float __attribute__((__always_inline__, __artificial__))
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_float (__m64 __A)
{
union { __v2sf v; float a[2]; } __tmp;
#ifdef __3dNOW_A__
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pf2iw (__m64 __A)
{
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfnacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfpnacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pi2fw (__m64 __A)
{
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pswapd (__m64 __A)
{
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
/* Test Instruction */
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & __M) == 0. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testz_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & ~__M) == 0. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testc_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & __M) != 0 && (__V & ~__M) != 0. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testnzc_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
/* Packed/scalar double precision floating point rounding. */
#ifdef __OPTIMIZE__
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_pd (__m128d __V, const int __M)
{
return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_sd(__m128d __D, __m128d __V, const int __M)
{
return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
/* Packed/scalar single precision floating point rounding. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_ps (__m128 __V, const int __M)
{
return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_ss (__m128 __D, __m128 __V, const int __M)
{
return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
typedef long long __v1di __attribute__ ((__vector_size__ (8)));
/* Empty the multimedia state. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_empty (void)
{
__builtin_ia32_emms ();
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_empty (void)
{
_mm_empty ();
}
/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si64 (int __i)
{
return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int (int __i)
{
return _mm_cvtsi32_si64 (__i);
/* Convert I to a __m64 object. */
/* Intel intrinsic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int64 (long long __i)
{
return (__m64) __i;
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_m64 (long long __i)
{
return (__m64) __i;
}
/* Microsoft intrinsic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si64 (long long __i)
{
return (__m64) __i;
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi64x (long long __i)
{
return (__m64) __i;
#endif
/* Convert the lower 32 bits of the __m64 object into an integer. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si32 (__m64 __i)
{
return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int (__m64 __i)
{
return _mm_cvtsi64_si32 (__i);
/* Convert the __m64 object to a 64bit integer. */
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int64 (__m64 __i)
{
return (long long)__i;
}
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtm64_si64 (__m64 __i)
{
return (long long)__i;
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si64x (__m64 __i)
{
return (long long)__i;
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
the result, and the four 16-bit values from M2 into the upper four 8-bit
values of the result, all with signed saturation. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packsswb (__m64 __m1, __m64 __m2)
{
return _mm_packs_pi16 (__m1, __m2);
/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
the result, and the two 32-bit values from M2 into the upper two 16-bit
values of the result, all with signed saturation. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packssdw (__m64 __m1, __m64 __m2)
{
return _mm_packs_pi32 (__m1, __m2);
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
the result, and the four 16-bit values from M2 into the upper four 8-bit
values of the result, all with unsigned saturation. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packuswb (__m64 __m1, __m64 __m2)
{
return _mm_packs_pu16 (__m1, __m2);
/* Interleave the four 8-bit values from the high half of M1 with the four
8-bit values from the high half of M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhbw (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi8 (__m1, __m2);
/* Interleave the two 16-bit values from the high half of M1 with the two
16-bit values from the high half of M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhwd (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi16 (__m1, __m2);
/* Interleave the 32-bit value from the high half of M1 with the 32-bit
value from the high half of M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhdq (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi32 (__m1, __m2);
/* Interleave the four 8-bit values from the low half of M1 with the four
8-bit values from the low half of M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklbw (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi8 (__m1, __m2);
/* Interleave the two 16-bit values from the low half of M1 with the two
16-bit values from the low half of M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklwd (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi16 (__m1, __m2);
/* Interleave the 32-bit value from the low half of M1 with the 32-bit
value from the low half of M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckldq (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi32 (__m1, __m2);
}
/* Add the 8-bit values in M1 to the 8-bit values in M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddb (__m64 __m1, __m64 __m2)
{
return _mm_add_pi8 (__m1, __m2);
}
/* Add the 16-bit values in M1 to the 16-bit values in M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddw (__m64 __m1, __m64 __m2)
{
return _mm_add_pi16 (__m1, __m2);
}
/* Add the 32-bit values in M1 to the 32-bit values in M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddd (__m64 __m1, __m64 __m2)
{
return _mm_add_pi32 (__m1, __m2);
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifdef __SSE2__
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
saturated arithmetic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsb (__m64 __m1, __m64 __m2)
{
return _mm_adds_pi8 (__m1, __m2);
/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
saturated arithmetic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsw (__m64 __m1, __m64 __m2)
{
return _mm_adds_pi16 (__m1, __m2);
/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
saturated arithmetic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusb (__m64 __m1, __m64 __m2)
{
return _mm_adds_pu8 (__m1, __m2);
/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
saturated arithmetic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusw (__m64 __m1, __m64 __m2)
{
return _mm_adds_pu16 (__m1, __m2);
}
/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubb (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi8 (__m1, __m2);
}
/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubw (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi16 (__m1, __m2);
}
/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubd (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi32 (__m1, __m2);
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifdef __SSE2__
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
saturating arithmetic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsb (__m64 __m1, __m64 __m2)
{
return _mm_subs_pi8 (__m1, __m2);
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
signed saturating arithmetic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsw (__m64 __m1, __m64 __m2)
{
return _mm_subs_pi16 (__m1, __m2);
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
unsigned saturating arithmetic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusb (__m64 __m1, __m64 __m2)
{
return _mm_subs_pu8 (__m1, __m2);
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
unsigned saturating arithmetic. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusw (__m64 __m1, __m64 __m2)
{
return _mm_subs_pu16 (__m1, __m2);
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
four 32-bit intermediate results, which are then summed by pairs to
produce two 32-bit results. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaddwd (__m64 __m1, __m64 __m2)
{
return _mm_madd_pi16 (__m1, __m2);
/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
M2 and produce the high 16 bits of the 32-bit results. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhw (__m64 __m1, __m64 __m2)
{
return _mm_mulhi_pi16 (__m1, __m2);
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
the low 16 bits of the results. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmullw (__m64 __m1, __m64 __m2)
{
return _mm_mullo_pi16 (__m1, __m2);
}
/* Shift four 16-bit values in M left by COUNT. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllw (__m64 __m, __m64 __count)
{
return _mm_sll_pi16 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllwi (__m64 __m, int __count)
{
return _mm_slli_pi16 (__m, __count);
}
/* Shift two 32-bit values in M left by COUNT. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslld (__m64 __m, __m64 __count)
{
return _mm_sll_pi32 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslldi (__m64 __m, int __count)
{
return _mm_slli_pi32 (__m, __count);
}
/* Shift the 64-bit value in M left by COUNT. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_si64 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllq (__m64 __m, __m64 __count)
{
return _mm_sll_si64 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si64 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllqi (__m64 __m, int __count)
{
return _mm_slli_si64 (__m, __count);
}
/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psraw (__m64 __m, __m64 __count)
{
return _mm_sra_pi16 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrawi (__m64 __m, int __count)
{
return _mm_srai_pi16 (__m, __count);
}
/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrad (__m64 __m, __m64 __count)
{
return _mm_sra_pi32 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psradi (__m64 __m, int __count)
{
return _mm_srai_pi32 (__m, __count);
}
/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlw (__m64 __m, __m64 __count)
{
return _mm_srl_pi16 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlwi (__m64 __m, int __count)
{
return _mm_srli_pi16 (__m, __count);
}
/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrld (__m64 __m, __m64 __count)
{
return _mm_srl_pi32 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrldi (__m64 __m, int __count)
{
return _mm_srli_pi32 (__m, __count);
}
/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_si64 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlq (__m64 __m, __m64 __count)
{
return _mm_srl_si64 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si64 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlqi (__m64 __m, int __count)
{
return _mm_srli_si64 (__m, __count);
}
/* Bit-wise AND the 64-bit values in M1 and M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pand (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pand (__m64 __m1, __m64 __m2)
{
return _mm_and_si64 (__m1, __m2);
/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
64-bit value in M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pandn (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pandn (__m64 __m1, __m64 __m2)
{
return _mm_andnot_si64 (__m1, __m2);
}
/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_por (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_por (__m64 __m1, __m64 __m2)
{
return _mm_or_si64 (__m1, __m2);
}
/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pxor (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pxor (__m64 __m1, __m64 __m2)
{
return _mm_xor_si64 (__m1, __m2);
/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
test is true and zero if false. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqb (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi8 (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtb (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi8 (__m1, __m2);
/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
the test is true and zero if false. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqw (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi16 (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtw (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi16 (__m1, __m2);
/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
the test is true and zero if false. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqd (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi32 (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtd (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi32 (__m1, __m2);
}
/* Creates a 64-bit zero. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si64 (void)
{
return (__m64)0LL;
}
/* Creates a vector of two 32-bit values; I0 is least significant. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi32 (int __i1, int __i0)
{
return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
}
/* Creates a vector of four 16-bit values; W0 is least significant. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
{
return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
}
/* Creates a vector of eight 8-bit values; B0 is least significant. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
char __b3, char __b2, char __b1, char __b0)
{
}
/* Similar, but with the arguments in reverse order. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi32 (int __i0, int __i1)
{
return _mm_set_pi32 (__i1, __i0);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
{
return _mm_set_pi16 (__w3, __w2, __w1, __w0);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
char __b4, char __b5, char __b6, char __b7)
{
}
/* Creates a vector of two 32-bit values, both elements containing I. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi32 (int __i)
{
return _mm_set_pi32 (__i, __i);
}
/* Creates a vector of four 16-bit values, all elements containing W. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi16 (short __w)
{
return _mm_set_pi16 (__w, __w, __w, __w);
}
/* Creates a vector of eight 8-bit values, all elements containing B. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi8 (char __b)
{
return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
#define _MM_GET_DENORMALS_ZERO_MODE() \
(_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehdup_ps (__m128 __X)
{
return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_moveldup_ps (__m128 __X)
{
return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loaddup_pd (double const *__P)
{
return _mm_load1_pd (__P);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movedup_pd (__m128d __X)
{
return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lddqu_si128 (__m128i const *__P)
{
return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
{
__builtin_ia32_monitor (__P, __E, __H);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mwait (unsigned int __E, unsigned int __H)
{
__builtin_ia32_mwait (__E, __H);
constant/variable mask. */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
(__v8hi)(__m128i)(Y), (int)(M)))
#endif
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
{
return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
from 2 sources using constant/variable mask. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
{
return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
(__v4sf)(__m128)(Y), (int)(M)))
#endif
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
{
return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
from 2 sources using constant/variable mask. */
#ifdef __OPTIMIZE__
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
{
return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
(__v2df)(__m128d)(Y), (int)(M)))
#endif
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
{
return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
of result. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
{
return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
__M);
}
-static __inline __m128d __attribute__((__always_inline__, __artificial__))
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
{
return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
/* Min/max packed integer instructions. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
/* Packed integer 32-bit multiplication with truncation of upper
halves of results. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
/* Packed integer 32-bit multiplication of 2 pairs of operands
with two 64-bit results. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
zeroing mask for D. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
{
return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
single precision array element of X selected by index N. */
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_ps (__m128 __X, const int __N)
{
union { int i; float f; } __tmp;
selected by index N. */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi8 (__m128i __D, int __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
__S, __N);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi32 (__m128i __D, int __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
}
#ifdef __x86_64__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
index N. */
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi8 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi32 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
}
#ifdef __x86_64__
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi64 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
/* Return horizontal packed word minimum and its index in bits [15:0]
and bits [18:16] respectively. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_minpos_epu16 (__m128i __X)
{
return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
/* Packed integer sign-extension. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi16_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi16_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
/* Packed integer zero-extension. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu16_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu32_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu16_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
/* Pack 8 double words from 2 operands into 8 words of result with
unsigned saturation. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packus_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
operands are determined by the 3rd mask operand. */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
#endif
/* Load double quadword using non-temporal aligned hint. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_load_si128 (__m128i *__X)
{
return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
/* Intrinsics for text/string processing. */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
__M);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
EFlags. */
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
__M);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
}
/* Calculate a number of bits set to 1. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_u32 (unsigned int __X)
{
return __builtin_popcount (__X);
}
#ifdef __x86_64__
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_u64 (unsigned long long __X)
{
return __builtin_popcountll (__X);
#endif
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
-static __inline unsigned int __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u8 (unsigned int __C, unsigned char __V)
{
return __builtin_ia32_crc32qi (__C, __V);
}
-static __inline unsigned int __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u16 (unsigned int __C, unsigned short __V)
{
return __builtin_ia32_crc32hi (__C, __V);
}
-static __inline unsigned int __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u32 (unsigned int __C, unsigned int __V)
{
return __builtin_ia32_crc32si (__C, __V);
}
#ifdef __x86_64__
-static __inline unsigned long long __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
{
return __builtin_ia32_crc32di (__C, __V);
/* We need definitions from the SSE3, SSE2 and SSE header files*/
#include <pmmintrin.h>
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
{
return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
(__v2di)__Y, __N * 8);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
{
return (__m64) __builtin_ia32_palignr ((long long)__X,
(int)(N) * 8))
#endif
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi8 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__, __artificial__))
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi8 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi16 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi32 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
#define _MM_FLUSH_ZERO_OFF 0x0000
/* Create a vector of zeros. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_ps (void)
{
return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
floating-point) values of A and B; the upper three SPFP values are
passed through from A. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_ss (__m128 __A)
{
return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp_ss (__m128 __A)
{
return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt_ss (__m128 __A)
{
return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
/* Perform the respective operation on the four SPFP values in A and B. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_ps (__m128 __A)
{
return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp_ps (__m128 __A)
{
return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt_ps (__m128 __A)
{
return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
/* Perform logical bit-wise operations on 128-bit values. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_andps (__A, __B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_andnps (__A, __B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_orps (__A, __B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_xorps (__A, __B);
comparison is true, place a mask of all ones in the result, otherwise a
mask of zeros. The upper three SPFP values are passed through from A. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf) __A,
__A));
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf) __A,
__A));
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf) __A,
__A));
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf) __A,
__A));
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
element, if the comparison is true, place a mask of all ones in the
result, otherwise a mask of zeros. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
/* Compare the lower SPFP values of A and B and return 1 if true
and 0 if false. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comieq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comilt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comile_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comigt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comige_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comineq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomieq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomilt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomile_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomigt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomige_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomineq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
/* Convert the lower SPFP value to a 32-bit integer according to the current
rounding mode. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si32 (__m128 __A)
{
return __builtin_ia32_cvtss2si ((__v4sf) __A);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_ss2si (__m128 __A)
{
return _mm_cvtss_si32 (__A);
current rounding mode. */
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si64 (__m128 __A)
{
return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si64x (__m128 __A)
{
return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
/* Convert the two lower SPFP values to 32-bit integers according to the
current rounding mode. Return the integers in packed form. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pi32 (__m128 __A)
{
return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_ps2pi (__m128 __A)
{
return _mm_cvtps_pi32 (__A);
}
/* Truncate the lower SPFP value to a 32-bit integer. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttss_si32 (__m128 __A)
{
return __builtin_ia32_cvttss2si ((__v4sf) __A);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_ss2si (__m128 __A)
{
return _mm_cvttss_si32 (__A);
/* Truncate the lower SPFP value to a 32-bit integer. */
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttss_si64 (__m128 __A)
{
return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__, __artificial__))
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttss_si64x (__m128 __A)
{
return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
/* Truncate the two lower SPFP values to 32-bit integers. Return the
integers in packed form. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttps_pi32 (__m128 __A)
{
return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_ps2pi (__m128 __A)
{
return _mm_cvttps_pi32 (__A);
}
/* Convert B to a SPFP value and insert it as element zero in A. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_ss (__m128 __A, int __B)
{
return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_si2ss (__m128 __A, int __B)
{
return _mm_cvtsi32_ss (__A, __B);
/* Convert B to a SPFP value and insert it as element zero in A. */
/* Intel intrinsic. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_ss (__m128 __A, long long __B)
{
return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
}
/* Microsoft intrinsic. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_ss (__m128 __A, long long __B)
{
return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
/* Convert the two 32-bit values in B to SPFP form and insert them
as the two lower elements in A. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi32_ps (__m128 __A, __m64 __B)
{
return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_pi2ps (__m128 __A, __m64 __B)
{
return _mm_cvtpi32_ps (__A, __B);
}
/* Convert the four signed 16-bit values in A to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi16_ps (__m64 __A)
{
__v4hi __sign;
}
/* Convert the four unsigned 16-bit values in A to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpu16_ps (__m64 __A)
{
__v2si __hisi, __losi;
}
/* Convert the low four signed 8-bit values in A to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi8_ps (__m64 __A)
{
__v8qi __sign;
}
/* Convert the low four unsigned 8-bit values in A to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpu8_ps(__m64 __A)
{
__A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
}
/* Convert the four signed 32-bit values in A and B to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
{
__v4sf __zero = (__v4sf) _mm_setzero_ps ();
}
/* Convert the four SPFP values in A to four signed 16-bit integers. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pi16(__m128 __A)
{
__v4sf __hisf = (__v4sf)__A;
}
/* Convert the four SPFP values in A to four signed 8-bit integers. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pi8(__m128 __A)
{
__v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
/* Selects four specific SPFP values from A and B based on MASK. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
{
return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
#endif
/* Selects and interleaves the upper two SPFP values from A and B. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
}
/* Selects and interleaves the lower two SPFP values from A and B. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
/* Sets the upper two SPFP values with 64-bits of data loaded from P;
the lower two values are passed through from A. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pi (__m128 __A, __m64 const *__P)
{
return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P);
}
/* Stores the upper two SPFP values of A into P. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeh_pi (__m64 *__P, __m128 __A)
{
__builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A);
}
/* Moves the upper two values of B into the lower two values of A. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehl_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
}
/* Moves the lower two values of B into the upper two values of A. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movelh_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
/* Sets the lower two SPFP values with 64-bits of data loaded from P;
the upper two values are passed through from A. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pi (__m128 __A, __m64 const *__P)
{
return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P);
}
/* Stores the lower two SPFP values of A into P. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pi (__m64 *__P, __m128 __A)
{
__builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A);
}
/* Creates a 4-bit mask from the most significant bits of the SPFP values. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_ps (__m128 __A)
{
return __builtin_ia32_movmskps ((__v4sf)__A);
}
/* Return the contents of the control register. */
-static __inline unsigned int __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_getcsr (void)
{
return __builtin_ia32_stmxcsr ();
}
/* Read exception bits from the control register. */
-static __inline unsigned int __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_GET_EXCEPTION_STATE (void)
{
return _mm_getcsr() & _MM_EXCEPT_MASK;
}
-static __inline unsigned int __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_GET_EXCEPTION_MASK (void)
{
return _mm_getcsr() & _MM_MASK_MASK;
}
-static __inline unsigned int __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_GET_ROUNDING_MODE (void)
{
return _mm_getcsr() & _MM_ROUND_MASK;
}
-static __inline unsigned int __attribute__((__always_inline__, __artificial__))
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_GET_FLUSH_ZERO_MODE (void)
{
return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
}
/* Set the control register to I. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setcsr (unsigned int __I)
{
__builtin_ia32_ldmxcsr (__I);
}
/* Set exception bits in the control register. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_SET_EXCEPTION_STATE(unsigned int __mask)
{
_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_SET_EXCEPTION_MASK (unsigned int __mask)
{
_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_SET_ROUNDING_MODE (unsigned int __mode)
{
_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
{
_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
}
/* Create a vector with element 0 as F and the rest zero. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_ss (float __F)
{
return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
}
/* Create a vector with all four elements equal to F. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_ps (float __F)
{
return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_ps1 (float __F)
{
return _mm_set1_ps (__F);
}
/* Create a vector with element 0 as *P and the rest zero. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ss (float const *__P)
{
return _mm_set_ss (*__P);
}
/* Create a vector with all four elements equal to *P. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load1_ps (float const *__P)
{
return _mm_set1_ps (*__P);
}
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ps1 (float const *__P)
{
return _mm_load1_ps (__P);
}
/* Load four SPFP values from P. The address must be 16-byte aligned. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ps (float const *__P)
{
return (__m128) *(__v4sf *)__P;
}
/* Load four SPFP values from P. The address need not be 16-byte aligned. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_ps (float const *__P)
{
return (__m128) __builtin_ia32_loadups (__P);
}
/* Load four SPFP values in reverse order. The address must be aligned. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadr_ps (float const *__P)
{
__v4sf __tmp = *(__v4sf *)__P;
}
/* Create the vector [Z Y X W]. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
{
return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
}
/* Create the vector [W X Y Z]. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_ps (float __Z, float __Y, float __X, float __W)
{
return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
}
/* Stores the lower SPFP value. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ss (float *__P, __m128 __A)
{
*__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
}
-static __inline float __attribute__((__always_inline__, __artificial__))
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_f32 (__m128 __A)
{
return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
}
/* Store four SPFP values. The address must be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ps (float *__P, __m128 __A)
{
*(__v4sf *)__P = (__v4sf)__A;
}
/* Store four SPFP values. The address need not be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_ps (float *__P, __m128 __A)
{
__builtin_ia32_storeups (__P, (__v4sf)__A);
}
/* Store the lower SPFP value across four words. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store1_ps (float *__P, __m128 __A)
{
__v4sf __va = (__v4sf)__A;
_mm_storeu_ps (__P, __tmp);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ps1 (float *__P, __m128 __A)
{
_mm_store1_ps (__P, __A);
}
/* Store four SPFP values in reverse order. The address must be aligned. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storer_ps (float *__P, __m128 __A)
{
__v4sf __va = (__v4sf)__A;
}
/* Sets the low SPFP value of A from the low value of B. */
-static __inline __m128 __attribute__((__always_inline__, __artificial__))
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
/* Extracts one of the four words of A. The selector N must be immediate. */
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_pi16 (__m64 const __A, int const __N)
{
return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pextrw (__m64 const __A, int const __N)
{
return _mm_extract_pi16 (__A, __N);
/* Inserts word D into one of four words of A. The selector N must be
immediate. */
#ifdef __OPTIMIZE__
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
{
return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pinsrw (__m64 const __A, int const __D, int const __N)
{
return _mm_insert_pi16 (__A, __D, __N);
#endif
/* Compute the element-wise maximum of signed 16-bit values. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pi16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaxsw (__m64 __A, __m64 __B)
{
return _mm_max_pi16 (__A, __B);
}
/* Compute the element-wise maximum of unsigned 8-bit values. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pu8 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaxub (__m64 __A, __m64 __B)
{
return _mm_max_pu8 (__A, __B);
}
/* Compute the element-wise minimum of signed 16-bit values. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pi16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pminsw (__m64 __A, __m64 __B)
{
return _mm_min_pi16 (__A, __B);
}
/* Compute the element-wise minimum of unsigned 8-bit values. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pu8 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pminub (__m64 __A, __m64 __B)
{
return _mm_min_pu8 (__A, __B);
}
/* Create an 8-bit mask of the signs of 8-bit values. */
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pi8 (__m64 __A)
{
return __builtin_ia32_pmovmskb ((__v8qi)__A);
}
-static __inline int __attribute__((__always_inline__, __artificial__))
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmovmskb (__m64 __A)
{
return _mm_movemask_pi8 (__A);
/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
in B and produce the high 16 bits of the 32-bit results. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pu16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhuw (__m64 __A, __m64 __B)
{
return _mm_mulhi_pu16 (__A, __B);
/* Return a combination of the four 16-bit values in A. The selector
must be an immediate. */
#ifdef __OPTIMIZE__
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi16 (__m64 __A, int const __N)
{
return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pshufw (__m64 __A, int const __N)
{
return _mm_shuffle_pi16 (__A, __N);
/* Conditionally store byte elements of A into P. The high bit of each
byte in the selector N determines whether the corresponding byte from
A is stored. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
{
__builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
}
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_maskmovq (__m64 __A, __m64 __N, char *__P)
{
_mm_maskmove_si64 (__A, __N, __P);
}
/* Compute the rounded averages of the unsigned 8-bit values in A and B. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_pu8 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pavgb (__m64 __A, __m64 __B)
{
return _mm_avg_pu8 (__A, __B);
}
/* Compute the rounded averages of the unsigned 16-bit values in A and B. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_pu16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pavgw (__m64 __A, __m64 __B)
{
return _mm_avg_pu16 (__A, __B);
/* Compute the sum of the absolute differences of the unsigned 8-bit
values in A and B. Return the value in the lower 16-bit word; the
upper words are cleared. */
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sad_pu8 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__, __artificial__))
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psadbw (__m64 __A, __m64 __B)
{
return _mm_sad_pu8 (__A, __B);
/* Loads one cache line from address P to a location "closer" to the
processor. The selector I specifies the type of prefetch operation. */
#ifdef __OPTIMIZE__
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_prefetch (const void *__P, enum _mm_hint __I)
{
__builtin_prefetch (__P, 0, __I);
#endif
/* Stores the data in A to the address P without polluting the caches. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_pi (__m64 *__P, __m64 __A)
{
__builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
}
/* Likewise. The address must be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_ps (float *__P, __m128 __A)
{
__builtin_ia32_movntps (__P, (__v4sf)__A);
/* Guarantees that every preceding store is globally visible before
any subsequent store. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sfence (void)
{
__builtin_ia32_sfence ();
/* The execution of the next instruction is delayed by an implementation
specific amount of time. The instruction does not modify the
architectural state. */
-static __inline void __attribute__((__always_inline__, __artificial__))
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_pause (void)
{
__asm__ __volatile__ ("rep; nop" : : );
+2008-03-13 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/34000
+ PR target/35553
+ * g++.dg/other/i386-3.C: New test.
+ * gcc.target/i386/sse-13.c: Redefine extern instead of static.
+ * gcc.target/i386/sse-14.c: Ditto.
+ * gcc.target/i386/mmx-1.c: Ditto.
+ * gcc.target/i386/mmx-2.c: Ditto.
+ * gcc.target/i386/3dnow-1.c: Ditto.
+ * gcc.target/i386/3dnow-2.c: Ditto.
+ * gcc.target/i386/3dnowA-1.c: Ditto.
+ * gcc.target/i386/3dnowA-2.c: Ditto.
+
2008-03-13 Paolo Bonzini <bonzini@gnu.org>
PR tree-opt/35422
--- /dev/null
+/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
+ usable with -O -fkeep-inline-functions. */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -msse4 -msse5" } */
+
+#include <bmmintrin.h>
+#include <smmintrin.h>
+#include <mm3dnow.h>
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
- builtin functions. Defining away "static" and "__inline" results in
+ builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
-#define static
+#define extern
#define __inline
#include <mm3dnow.h>
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
- builtin functions. Defining away "static" and "__inline" results in
+ builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
-#define static
+#define extern
#define __inline
#include <mm3dnow.h>
/* { dg-do assemble } */
-/* { dg-require-effective-target ilp32 } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -m3dnow -march=athlon" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow" } */
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
- builtin functions. Defining away "static" and "__inline" results in
+ builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
-#define static
+#define extern
#define __inline
#include <mm3dnow.h>
/* { dg-do assemble } */
-/* { dg-require-effective-target ilp32 } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -m3dnow -march=athlon" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow" } */
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
- builtin functions. Defining away "static" and "__inline" results in
+ builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
-#define static
+#define extern
#define __inline
#include <mm3dnow.h>
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
- builtin functions. Defining away "static" and "__inline" results in
+ builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
-#define static
+#define extern
#define __inline
#include <mmintrin.h>
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in mmintrin.h that reference the proper
- builtin functions. Defining away "static" and "__inline" results in
+ builtin functions. Defining away "extern" and "__inline" results in
all of them being compiled as proper functions. */
-#define static
+#define extern
#define __inline
#include <mmintrin.h>
/* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
- that reference the proper builtin functions. Defining away "static" and
+ that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */
-#define static
+#define extern
#define __inline
/* Following intrinsics require immediate arguments. */
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
- that reference the proper builtin functions. Defining away "static" and
+ that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */
-#define static
+#define extern
#define __inline
#include <bmmintrin.h>