+2018-12-06 Paul A. Clarke <pc@us.ibm.com>
+
+ PR target/88316
+ * config/rs6000/mmintrin.h (_mm_unpackhi_pi8): Fix for big-endian.
+ (_mm_unpacklo_pi8): Likewise.
+ (_mm_mulhi_pi16): Likewise.
+ (_mm_packs_pi16): Fix for big-endian. Use preferred API.
+ (_mm_packs_pi32): Likewise.
+ (_mm_packs_pu16): Likewise.
+ * config/rs6000/xmmintrin.h (_mm_cvtss_si32): Fix for big-endian.
+ (_mm_cvtss_si64): Likewise.
+ (_mm_cvtpi32x2_ps): Likewise.
+ (_mm_shuffle_ps): Likewise.
+ (_mm_movemask_pi8): Likewise.
+ (_mm_mulhi_pu16): Likewise.
+ (_mm_sad_pu8): Likewise.
+ (_mm_sad_pu8): Likewise.
+ (_mm_cvtpu16_ps): Fix for big-endian. Use preferred API.
+ (_mm_cvtpu8_ps): Likewise.
+ (_mm_movemask_ps): Better #else case for big-endian (no functional
+ change).
+ (_mm_shuffle_pi16): Likewise.
+ * config/rs6000/emmintrin.h (_mm_movemask_pd): Fix for big-endian.
+ Better #else case for big-endian (no functional change).
+ (_mm_movemask_epi8): Likewise.
+ (_mm_shufflehi_epi16): Likewise.
+ (_mm_shufflelo_epi16): Likewise.
+ (_mm_shuffle_epi32): Likewise.
+ (_mm_mul_epu32): Fix for big-endian.
+ (_mm_bsrli_si128): Likewise.
+ (_mm_cvtps_pd): Better #else case for big endian.
+ (_mm_mulhi_epi16): Likewise.
+ (_mm_mul_epu32): Likewise.
+ (_mm_slli_si128): Likewise.
+ (_mm_sll_epi16): Likewise.
+ (_mm_sll_epi32): Likewise.
+ (_mm_sra_epi16): Likewise.
+ (_mm_sra_epi32): Likewise.
+ (_mm_srl_epi16): Likewise.
+ (_mm_srl_epi32): Likewise.
+ (_mm_mulhi_epu16): Likewise.
+ (_mm_sad_epu8): Likewise.
+ * config/rs6000/pmmintrin.h (_mm_hadd_ps): Fix for big-endian.
+ (_mm_sub_ps): Likewise.
+ * config/rs6000/mmintrin.h (_mm_cmpeq_pi8): Fix for 32-bit mode.
+ * gcc/config/rs6000/tmmintrin.h (_mm_alignr_epi8): Use ENDIAN
+ macros consistently (no functional changes).
+ (_mm_alignr_pi8): Likewise.
+
2018-12-06 Iain Sandoe <iain@sandoe.co.uk>
PR c++/87380
lined up. */
temp = __builtin_vsx_xxsldwi (a, a, 3);
temp = __builtin_vsx_xxsldwi (a, temp, 2);
-#elif __BIG_ENDIAN__
+#else
/* The input float values are in elements {[0], [1]} but the convert
instruction needs them in elements {[0], [2]}, So we use two
shift left double vector word immediates to get the elements
{
#ifdef __LITTLE_ENDIAN__
0x80800040, 0x80808080, 0x80808080, 0x80808080
-#elif __BIG_ENDIAN__
- 0x80808080, 0x80808080, 0x80808080, 0x80800040
+#else
+ 0x80808080, 0x80808080, 0x80808080, 0x80804000
#endif
};
#ifdef __LITTLE_ENDIAN__
return result[1];
-#elif __BIG_ENDIAN__
+#else
return result[0];
#endif
}
#ifdef __LITTLE_ENDIAN__
0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
-#elif __BIG_ENDIAN__
+#else
0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15,
0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D
#endif
: "=v" (result)
: "v" (__A), "v" (__B)
: );
-#elif __BIG_ENDIAN__
+#else
/* VMX Vector Multiply Even Unsigned Word. */
__asm__(
"vmuleuw %0,%1,%2"
#endif
return (__m128i) result;
#else
-#ifdef __LITTLE_ENDIAN__
return (__m128i) vec_mule ((__v4su)__A, (__v4su)__B);
-#elif __BIG_ENDIAN__
- return (__m128i) vec_mulo ((__v4su)__A, (__v4su)__B);
-#endif
#endif
}
const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
if (__N < 16)
+#ifdef __LITTLE_ENDIAN__
if (__builtin_constant_p(__N))
/* Would like to use Vector Shift Left Double by Octet
Immediate here to use the immediate form and avoid
load of __N * 8 value into a separate VR. */
result = vec_sld (zeros, (__v16qu) __A, (16 - __N));
else
+#endif
{
__v16qu shift = vec_splats((unsigned char)(__N*8));
+#ifdef __LITTLE_ENDIAN__
result = vec_sro ((__v16qu)__A, shift);
+#else
+ result = vec_slo ((__v16qu)__A, shift);
+#endif
}
else
result = zeros;
if (_imm5 < 16)
#ifdef __LITTLE_ENDIAN__
result = vec_sld ((__v16qu) __A, zeros, _imm5);
-#elif __BIG_ENDIAN__
+#else
result = vec_sld (zeros, (__v16qu) __A, (16 - _imm5));
#endif
else
#ifdef __LITTLE_ENDIAN__
lshift = vec_splat ((__v8hu) __B, 0);
-#elif __BIG_ENDIAN__
+#else
lshift = vec_splat ((__v8hu) __B, 3);
#endif
shmask = vec_cmple (lshift, shmax);
__v4su result;
#ifdef __LITTLE_ENDIAN__
lshift = vec_splat ((__v4su) __B, 0);
-#elif __BIG_ENDIAN__
+#else
lshift = vec_splat ((__v4su) __B, 1);
#endif
shmask = vec_cmplt (lshift, shmax);
#ifdef __LITTLE_ENDIAN__
rshift = vec_splat ((__v8hu)__B, 0);
-#elif __BIG_ENDIAN__
+#else
rshift = vec_splat ((__v8hu)__B, 3);
#endif
rshift = vec_min (rshift, rshmax);
#ifdef __LITTLE_ENDIAN__
rshift = vec_splat ((__v4su)__B, 0);
-#elif __BIG_ENDIAN__
+#else
rshift = vec_splat ((__v4su)__B, 1);
#endif
rshift = vec_min (rshift, rshmax);
#ifdef __LITTLE_ENDIAN__
rshift = vec_splat ((__v8hu) __B, 0);
-#elif __BIG_ENDIAN__
+#else
rshift = vec_splat ((__v8hu) __B, 3);
#endif
shmask = vec_cmple (rshift, shmax);
#ifdef __LITTLE_ENDIAN__
rshift = vec_splat ((__v4su) __B, 0);
-#elif __BIG_ENDIAN__
+#else
rshift = vec_splat ((__v4su) __B, 1);
#endif
shmask = vec_cmplt (rshift, shmax);
__vector unsigned long long result;
static const __vector unsigned char perm_mask =
{
-#ifdef __LITTLE_ENDIAN__
0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00
-#elif __BIG_ENDIAN__
- 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38,
- 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78
-#endif
};
result = ((__vector unsigned long long)
#ifdef __LITTLE_ENDIAN__
return result[1];
-#elif __BIG_ENDIAN__
+#else
return result[0];
#endif
}
#ifdef __LITTLE_ENDIAN__
0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
-#elif __BIG_ENDIAN__
+#else
0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15,
0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D
#endif
{
#ifdef __LITTLE_ENDIAN__
0x0908, 0x0B0A, 0x0D0C, 0x0F0E
-#elif __BIG_ENDIAN__
- 0x0607, 0x0405, 0x0203, 0x0001
+#else
+ 0x0809, 0x0A0B, 0x0C0D, 0x0E0F
#endif
};
__v2du pmask =
#ifdef __LITTLE_ENDIAN__
- { 0x1716151413121110UL, 0x1f1e1d1c1b1a1918UL};
-#elif __BIG_ENDIAN__
- { 0x1011121314151617UL, 0x18191a1b1c1d1e1fUL};
+ { 0x1716151413121110UL, 0UL};
+#else
+ { 0x1011121314151617UL, 0UL};
#endif
__m64_union t;
__v2du a, r;
-#ifdef __LITTLE_ENDIAN__
t.as_short[0] = permute_selectors[element_selector_98];
t.as_short[1] = permute_selectors[element_selector_BA];
t.as_short[2] = permute_selectors[element_selector_DC];
t.as_short[3] = permute_selectors[element_selector_FE];
-#elif __BIG_ENDIAN__
- t.as_short[3] = permute_selectors[element_selector_98];
- t.as_short[2] = permute_selectors[element_selector_BA];
- t.as_short[1] = permute_selectors[element_selector_DC];
- t.as_short[0] = permute_selectors[element_selector_FE];
-#endif
-#ifdef __LITTLE_ENDIAN__
pmask[1] = t.as_m64;
-#elif __BIG_ENDIAN__
- pmask[0] = t.as_m64;
-#endif
a = (__v2du)__A;
r = vec_perm (a, a, (__vector unsigned char)pmask);
return (__m128i) r;
{
#ifdef __LITTLE_ENDIAN__
0x0100, 0x0302, 0x0504, 0x0706
-#elif __BIG_ENDIAN__
- 0x0e0f, 0x0c0d, 0x0a0b, 0x0809
+#else
+ 0x0001, 0x0203, 0x0405, 0x0607
#endif
};
- __v2du pmask = { 0x1011121314151617UL, 0x1f1e1d1c1b1a1918UL};
+ __v2du pmask =
+#ifdef __LITTLE_ENDIAN__
+ { 0UL, 0x1f1e1d1c1b1a1918UL};
+#else
+ { 0UL, 0x18191a1b1c1d1e1fUL};
+#endif
__m64_union t;
__v2du a, r;
-
-#ifdef __LITTLE_ENDIAN__
t.as_short[0] = permute_selectors[element_selector_10];
t.as_short[1] = permute_selectors[element_selector_32];
t.as_short[2] = permute_selectors[element_selector_54];
t.as_short[3] = permute_selectors[element_selector_76];
-#elif __BIG_ENDIAN__
- t.as_short[3] = permute_selectors[element_selector_10];
- t.as_short[2] = permute_selectors[element_selector_32];
- t.as_short[1] = permute_selectors[element_selector_54];
- t.as_short[0] = permute_selectors[element_selector_76];
-#endif
-#ifdef __LITTLE_ENDIAN__
pmask[0] = t.as_m64;
-#elif __BIG_ENDIAN__
- pmask[1] = t.as_m64;
-#endif
a = (__v2du)__A;
r = vec_perm (a, a, (__vector unsigned char)pmask);
return (__m128i) r;
{
#ifdef __LITTLE_ENDIAN__
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-#elif __BIG_ENDIAN__
- 0x0C0D0E0F, 0x08090A0B, 0x04050607, 0x00010203
+#else
+ 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
#endif
};
__v4su t;
-#ifdef __LITTLE_ENDIAN__
t[0] = permute_selectors[element_selector_10];
t[1] = permute_selectors[element_selector_32];
t[2] = permute_selectors[element_selector_54] + 0x10101010;
t[3] = permute_selectors[element_selector_76] + 0x10101010;
-#elif __BIG_ENDIAN__
- t[3] = permute_selectors[element_selector_10] + 0x10101010;
- t[2] = permute_selectors[element_selector_32] + 0x10101010;
- t[1] = permute_selectors[element_selector_54];
- t[0] = permute_selectors[element_selector_76];
-#endif
return (__m128i)vec_perm ((__v4si) __A, (__v4si)__A, (__vector unsigned char)t);
}
/* Rotate the sums into the correct position. */
#ifdef __LITTLE_ENDIAN__
result = vec_sld (result, result, 4);
-#elif __BIG_ENDIAN__
+#else
result = vec_sld (result, result, 6);
#endif
/* Rotate the sums into the correct position. */
__vector signed short vm1;
__vector signed char vresult;
- vm1 = (__vector signed short) (__vector unsigned long long) { __m2, __m1 };
- vresult = vec_vpkshss (vm1, vm1);
+ vm1 = (__vector signed short) (__vector unsigned long long)
+#ifdef __LITTLE_ENDIAN__
+ { __m1, __m2 };
+#else
+ { __m2, __m1 };
+#endif
+ vresult = vec_packs (vm1, vm1);
return (__m64) ((__vector long long) vresult)[0];
}
__vector signed int vm1;
__vector signed short vresult;
- vm1 = (__vector signed int) (__vector unsigned long long) { __m2, __m1 };
- vresult = vec_vpkswss (vm1, vm1);
+ vm1 = (__vector signed int) (__vector unsigned long long)
+#ifdef __LITTLE_ENDIAN__
+ { __m1, __m2 };
+#else
+ { __m2, __m1 };
+#endif
+ vresult = vec_packs (vm1, vm1);
return (__m64) ((__vector long long) vresult)[0];
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
{
- __vector signed short vm1;
- __vector unsigned char vresult;
-
- vm1 = (__vector signed short) (__vector unsigned long long) { __m2, __m1 };
- vresult = vec_vpkshus (vm1, vm1);
- return (__m64) ((__vector long long) vresult)[0];
+ __vector unsigned char r;
+ __vector signed short vm1 = (__vector signed short) (__vector long long)
+#ifdef __LITTLE_ENDIAN__
+ { __m1, __m2 };
+#else
+ { __m2, __m1 };
+#endif
+ const __vector signed short __zero = { 0 };
+ __vector __bool short __select = vec_cmplt (vm1, __zero);
+ r = vec_packs ((vector unsigned short) vm1, (vector unsigned short) vm1);
+ __vector __bool char packsel = vec_pack (__select, __select);
+ r = vec_sel (r, (const vector unsigned char) __zero, packsel);
+ return (__m64) ((__vector long long) r)[0];
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
a = (__vector unsigned char)vec_splats (__m1);
b = (__vector unsigned char)vec_splats (__m2);
c = vec_mergel (a, b);
- return (__m64) ((__vector long long) c)[0];
+ return (__m64) ((__vector long long) c)[1];
#else
__m64_union m1, m2, res;
a = (__vector unsigned char)vec_splats (__m1);
b = (__vector unsigned char)vec_splats (__m2);
c = vec_mergel (a, b);
- return (__m64) ((__vector long long) c)[1];
+ return (__m64) ((__vector long long) c)[0];
#else
__m64_union m1, m2, res;
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
{
-#ifdef _ARCH_PWR6
+#if defined(_ARCH_PWR6) && defined(__powerpc64__)
__m64 res;
__asm__(
"cmpb %0,%1,%2;\n"
__vector signed short c;
__vector signed int w0, w1;
__vector unsigned char xform1 = {
+#ifdef __LITTLE_ENDIAN__
0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
+#else
+ 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15,
+ 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15
+#endif
};
a = (__vector signed short)vec_splats (__m1);
_mm_hadd_ps (__m128 __X, __m128 __Y)
{
__vector unsigned char xform2 = {
- #ifdef __LITTLE_ENDIAN__
- 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B
- #elif __BIG_ENDIAN__
- 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F, 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F
- #endif
+ 0x00, 0x01, 0x02, 0x03,
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x18, 0x19, 0x1A, 0x1B
};
__vector unsigned char xform1 = {
- #ifdef __LITTLE_ENDIAN__
- 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F
- #elif __BIG_ENDIAN__
- 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B
- #endif
+ 0x04, 0x05, 0x06, 0x07,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x14, 0x15, 0x16, 0x17,
+ 0x1C, 0x1D, 0x1E, 0x1F
};
return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
_mm_hsub_ps (__m128 __X, __m128 __Y)
{
__vector unsigned char xform2 = {
- #ifdef __LITTLE_ENDIAN__
- 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B
- #elif __BIG_ENDIAN__
- 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F, 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F
- #endif
+ 0x00, 0x01, 0x02, 0x03,
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x10, 0x11, 0x12, 0x13,
+ 0x18, 0x19, 0x1A, 0x1B
};
__vector unsigned char xform1 = {
- #ifdef __LITTLE_ENDIAN__
- 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F
- #elif __BIG_ENDIAN__
- 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B
- #endif
+ 0x04, 0x05, 0x06, 0x07,
+ 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x14, 0x15, 0x16, 0x17,
+ 0x1C, 0x1D, 0x1E, 0x1F
};
return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
{
if (__builtin_constant_p (__count) && __count < 16)
{
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#ifdef __LITTLE_ENDIAN__
__A = (__m128i) vec_reve ((__v16qu) __A);
__B = (__m128i) vec_reve ((__v16qu) __B);
#endif
__A = (__m128i) vec_sld ((__v16qu) __B, (__v16qu) __A, __count);
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#ifdef __LITTLE_ENDIAN__
__A = (__m128i) vec_reve ((__v16qu) __A);
#endif
return __A;
{
const __v16qu __shift =
vec_splats ((unsigned char) ((__count - 16) * 8));
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#ifdef __LITTLE_ENDIAN__
return (__m128i) vec_sro ((__v16qu) __A, __shift);
#else
return (__m128i) vec_slo ((__v16qu) __A, __shift);
const __v16qu __shiftA =
vec_splats ((unsigned char) ((16 - __count) * 8));
const __v16qu __shiftB = vec_splats ((unsigned char) (__count * 8));
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#ifdef __LITTLE_ENDIAN__
__A = (__m128i) vec_slo ((__v16qu) __A, __shiftA);
__B = (__m128i) vec_sro ((__v16qu) __B, __shiftB);
#else
if (__count < 16)
{
__v2du __C = { __B, __A };
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#ifdef __LITTLE_ENDIAN__
const __v4su __shift = { __count << 3, 0, 0, 0 };
__C = (__v2du) vec_sro ((__v16qu) __C, (__v16qu) __shift);
#else
{
__m64 res = 0;
#ifdef _ARCH_PWR8
- __m128 vtmp;
double dtmp;
__asm__(
- "xxsldwi %x1,%x3,%x3,3;\n"
- "xscvspdp %x2,%x1;\n"
+#ifdef __LITTLE_ENDIAN__
+ "xxsldwi %x0,%x0,%x0,3;\n"
+#endif
+ "xscvspdp %x2,%x0;\n"
"fctiw %2,%2;\n"
- "mfvsrd %0,%x2;\n"
- : "=r" (res),
- "=&wa" (vtmp),
+ "mfvsrd %1,%x2;\n"
+ : "+wa" (__A),
+ "=r" (res),
"=f" (dtmp)
- : "wa" (__A)
: );
#else
res = __builtin_rint(__A[0]);
{
__m64 res = 0;
#ifdef _ARCH_PWR8
- __m128 vtmp;
double dtmp;
__asm__(
- "xxsldwi %x1,%x3,%x3,3;\n"
- "xscvspdp %x2,%x1;\n"
+#ifdef __LITTLE_ENDIAN__
+ "xxsldwi %x0,%x0,%x0,3;\n"
+#endif
+ "xscvspdp %x2,%x0;\n"
"fctid %2,%2;\n"
- "mfvsrd %0,%x2;\n"
- : "=r" (res),
- "=&wa" (vtmp),
+ "mfvsrd %1,%x2;\n"
+ : "+wa" (__A),
+ "=r" (res),
"=f" (dtmp)
- : "wa" (__A)
: );
#else
res = __builtin_llrint(__A[0]);
__vector float vf1;
vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
- vi4 = (__vector unsigned int) vec_vmrglh (vs8, zero);
+ vi4 = (__vector unsigned int) vec_mergel
+#ifdef __LITTLE_ENDIAN__
+ (vs8, zero);
+#else
+ (zero, vs8);
+#endif
vf1 = (__vector float) vec_ctf (vi4, 0);
return (__m128) vf1;
__vector float vf1;
vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
- vs8 = (__vector unsigned short) vec_vmrglb (vc16, zero);
- vi4 = (__vector unsigned int) vec_vmrghh (vs8,
+#ifdef __LITTLE_ENDIAN__
+ vs8 = (__vector unsigned short) vec_mergel (vc16, zero);
+ vi4 = (__vector unsigned int) vec_mergeh (vs8,
(__vector unsigned short) zero);
+#else
+ vs8 = (__vector unsigned short) vec_mergel (zero, vc16);
+ vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) zero,
+ vs8);
+#endif
vf1 = (__vector float) vec_ctf (vi4, 0);
return (__m128) vf1;
__vector signed int vi4;
__vector float vf4;
- vi4 = (__vector signed int) (__vector unsigned long long) { __B, __A };
+ vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B };
vf4 = (__vector float) vec_ctf (vi4, 0);
return (__m128) vf4;
}
{
#ifdef __LITTLE_ENDIAN__
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-#elif __BIG_ENDIAN__
- 0x0C0D0E0F, 0x08090A0B, 0x04050607, 0x00010203
+#else
+ 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
#endif
};
__vector unsigned int t;
-#ifdef __LITTLE_ENDIAN__
t[0] = permute_selectors[element_selector_10];
t[1] = permute_selectors[element_selector_32];
t[2] = permute_selectors[element_selector_54] + 0x10101010;
t[3] = permute_selectors[element_selector_76] + 0x10101010;
-#elif __BIG_ENDIAN__
- t[3] = permute_selectors[element_selector_10] + 0x10101010;
- t[2] = permute_selectors[element_selector_32] + 0x10101010;
- t[1] = permute_selectors[element_selector_54];
- t[0] = permute_selectors[element_selector_76];
-#endif
return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)t);
}
{
#ifdef __LITTLE_ENDIAN__
0x00204060, 0x80808080, 0x80808080, 0x80808080
-#elif __BIG_ENDIAN__
+#else
0x80808080, 0x80808080, 0x80808080, 0x00204060
#endif
};
#ifdef __LITTLE_ENDIAN__
return result[1];
-#elif __BIG_ENDIAN__
+#else
return result[0];
#endif
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pi8 (__m64 __A)
{
- unsigned long long p = 0x0008101820283038UL; // permute control for sign bits
-
+ unsigned long long p =
+#ifdef __LITTLE_ENDIAN__
+ 0x0008101820283038UL; // permute control for sign bits
+#else
+ 0x3830282018100800UL; // permute control for sign bits
+#endif
return __builtin_bpermd (p, __A);
}
__vector unsigned short c;
__vector unsigned int w0, w1;
__vector unsigned char xform1 = {
+#ifdef __LITTLE_ENDIAN__
0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
+#else
+ 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15,
+ 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15
+#endif
};
a = (__vector unsigned short)vec_splats (__A);
{
#ifdef __LITTLE_ENDIAN__
0x0908, 0x0B0A, 0x0D0C, 0x0F0E
-#elif __BIG_ENDIAN__
+#else
0x0607, 0x0405, 0x0203, 0x0001
#endif
};
t.as_short[1] = permute_selectors[element_selector_32];
t.as_short[2] = permute_selectors[element_selector_54];
t.as_short[3] = permute_selectors[element_selector_76];
-#elif __BIG_ENDIAN__
+#else
t.as_short[3] = permute_selectors[element_selector_10];
t.as_short[2] = permute_selectors[element_selector_32];
t.as_short[1] = permute_selectors[element_selector_54];
__vector signed int vsum;
const __vector unsigned int zero =
{ 0, 0, 0, 0 };
- unsigned short result;
+ __m64_union result = {0};
a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
vsum = vec_sums (vsum, (__vector signed int) zero);
/* The sum is in the right most 32-bits of the vector result.
Transfer to a GPR and truncate to 16 bits. */
- result = vsum[3];
- return (result);
+ result.as_short[0] = vsum[3];
+ return result.as_m64;
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))