+2018-12-06 Paul A. Clarke <pc@us.ibm.com>
+
+ PR target/88316
+ * config/rs6000/smmintrin.h: New file.
+ * config.gcc: Add smmintrin.h to extra_headers for powerpc*-*-*.
+
2018-12-06 Paul A. Clarke <pc@us.ibm.com>
PR target/88316
extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
extra_headers="${extra_headers} mmintrin.h x86intrin.h"
- extra_headers="${extra_headers} pmmintrin.h tmmintrin.h"
+ extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h"
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h si2vmx.h"
extra_headers="${extra_headers} amo.h"
case x$with_cpu in
--- /dev/null
+/* Copyright (C) 2018 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0.
+
+ NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header is distributed to simplify porting x86_64 code that
+ makes explicit use of Intel intrinsics to powerpc64le.
+ It is the user's responsibility to determine if the results are
+ acceptable and make additional changes as necessary.
+ Note that much code that uses Intel intrinsics can be rewritten in
+ standard C or GNU C extensions, which are more portable and better
+ optimized across multiple targets. */
+#endif
+
+#ifndef SMMINTRIN_H_
+#define SMMINTRIN_H_
+
+#include <altivec.h>
+#include <tmmintrin.h>
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi8 (__m128i __X, const int __N)
+{
+ return (unsigned char) ((__v16qi)__X)[__N & 15];
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi32 (__m128i __X, const int __N)
+{
+ return ((__v4si)__X)[__N & 3];
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi64 (__m128i __X, const int __N)
+{
+ return ((__v2di)__X)[__N & 1];
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_ps (__m128 __X, const int __N)
+{
+ return ((__v4si)__X)[__N & 3];
+}
+
+#endif
+2018-12-06 Paul A. Clarke <pc@us.ibm.com>
+
+ PR target/88316
+ * gcc.target/powerpc/mmx-packssdw-1.c: Fixes for big-endian.
+ * gcc.target/powerpc/mmx-packsswb-1.c: Likewise.
+ * gcc.target/powerpc/mmx-packuswb-1.c: Likewise.
+ * gcc.target/powerpc/mmx-pmulhw-1.c: Likewise.
+ * gcc.target/powerpc/sse-cvtpi32x2ps-1.c: Likewise.
+ * gcc.target/powerpc/sse-cvtpu16ps-1.c: Likewise.
+ * gcc.target/powerpc/sse-cvtss2si-1.c: Likewise.
+ * gcc.target/powerpc/sse-cvtss2si-2.c: Likewise.
+ * gcc.target/powerpc/sse2-pshufhw-1.c: Likewise.
+ * gcc.target/powerpc/sse2-pshuflw-1.c: Likewise.
+
2018-12-06 Marek Polacek <polacek@redhat.com>
PR c++/88373 - wrong parse error with ~.
return _mm_packs_pi32 (s1, s2);
}
+static short
+saturate (int val)
+{
+ if (val > 32767)
+ return 32767;
+ else if (val < -32768)
+ return -32768;
+ else
+ return val;
+}
+
+static inline int
+l_mm_extract_pi32 (__m64 b, int imm8)
+{
+ unsigned int shift = imm8 & 0x1;
+#ifdef __BIG_ENDIAN__
+ shift = 1 - shift;
+#endif
+ return ((long long)b >> (shift * 32)) & 0xffffffff;
+}
+
static void
TEST (void)
{
__m64_union s1, s2;
__m64_union u;
__m64_union e;
- int i;
+ int start, end, inc;
s1.as_m64 = _mm_set_pi32 (2134, -128);
s2.as_m64 = _mm_set_pi32 (41124, 234);
u.as_m64 = test (s1.as_m64, s2.as_m64);
- for (i = 0; i < 2; i++)
- {
- if (s1.as_int[i] > 32767)
- e.as_short[i] = 32767;
- else if (s1.as_int[i] < -32768)
- e.as_short[i] = -32768;
- else
- e.as_short[i] = s1.as_int[i];
- }
-
- for (i = 0; i < 2; i++)
- {
- if (s2.as_int[i] > 32767)
- e.as_short[i+2] = 32767;
- else if (s2.as_int[i] < -32768)
- e.as_short[i+2] = -32768;
- else
- e.as_short[i+2] = s2.as_int[i];
- }
+#ifdef __LITTLE_ENDIAN__
+ e.as_m64 = _mm_set_pi16 (saturate (l_mm_extract_pi32 (s2.as_m64, 1)),
+ saturate (l_mm_extract_pi32 (s2.as_m64, 0)),
+ saturate (l_mm_extract_pi32 (s1.as_m64, 1)),
+ saturate (l_mm_extract_pi32 (s1.as_m64, 0)));
+#else
+ e.as_m64 = _mm_set_pi16 (saturate (l_mm_extract_pi32 (s1.as_m64, 1)),
+ saturate (l_mm_extract_pi32 (s1.as_m64, 0)),
+ saturate (l_mm_extract_pi32 (s2.as_m64, 1)),
+ saturate (l_mm_extract_pi32 (s2.as_m64, 0)));
+#endif
if (u.as_m64 != e.as_m64)
abort ();
#include CHECK_H
#include <mmintrin.h>
+#include <xmmintrin.h>
static __m64
__attribute__((noinline, unused))
return _mm_packs_pi16 (s1, s2);
}
+static signed char
+saturate (signed short val)
+{
+ if (val > 127)
+ return 127;
+ else if (val < -128)
+ return -128;
+ else
+ return val;
+}
+
static void
TEST (void)
{
s2.as_m64 = _mm_set_pi16 (41124, 234, 2344, 2354);
u.as_m64 = test (s1.as_m64, s2.as_m64);
- for (i = 0; i < 4; i++)
- {
- if (s1.as_short[i] > 127)
- e.as_char[i] = 127;
- else if (s1.as_short[i] < -128)
- e.as_char[i] = -128;
- else
- e.as_char[i] = s1.as_short[i];
- }
-
- for (i = 0; i < 4; i++)
- {
- if (s2.as_short[i] > 127)
- e.as_char[i+4] = 127;
- else if (s2.as_short[i] < -128)
- e.as_char[i+4] = -128;
- else
- e.as_char[i+4] = s2.as_short[i];
- }
+#ifdef __LITTLE_ENDIAN__
+ e.as_m64 = _mm_set_pi8 (saturate (_mm_extract_pi16 (s2.as_m64, 3)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 2)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 1)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 0)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 3)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 2)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 1)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 0)));
+#else
+ e.as_m64 = _mm_set_pi8 (saturate (_mm_extract_pi16 (s1.as_m64, 3)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 2)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 1)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 0)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 3)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 2)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 1)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 0)));
+#endif
if (u.as_m64 != e.as_m64)
abort ();
#include CHECK_H
#include <mmintrin.h>
+#include <xmmintrin.h>
static __m64
__attribute__((noinline, unused))
return _mm_packs_pu16 (s1, s2);
}
+static unsigned char
+saturate (signed short val)
+{
+ if (val > 255)
+ return 255;
+ else if (val < 0)
+ return 0;
+ else
+ return val;
+}
+
static void
TEST (void)
{
s2.as_m64 = _mm_set_pi16 (-9, -10, -11, -12);
u.as_m64 = test (s1.as_m64, s2.as_m64);
- for (i=0; i<4; i++)
- {
- tmp = s1.as_short[i]<0 ? 0 : s1.as_short[i];
- tmp = tmp>255 ? 255 : tmp;
- e.as_char[i] = tmp;
-
- tmp = s2.as_short[i]<0 ? 0 : s2.as_short[i];
- tmp = tmp>255 ? 255 : tmp;
- e.as_char[i+4] = tmp;
- }
+#ifdef __LITTLE_ENDIAN__
+ e.as_m64 = _mm_set_pi8 (saturate (_mm_extract_pi16 (s2.as_m64, 3)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 2)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 1)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 0)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 3)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 2)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 1)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 0)));
+#else
+ e.as_m64 = _mm_set_pi8 (saturate (_mm_extract_pi16 (s1.as_m64, 3)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 2)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 1)),
+ saturate (_mm_extract_pi16 (s1.as_m64, 0)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 3)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 2)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 1)),
+ saturate (_mm_extract_pi16 (s2.as_m64, 0)));
+#endif
if (u.as_m64 != e.as_m64)
abort ();
s2.as_m64 = _mm_set_pi16 (11, 9834, 7444, -10222);
u.as_m64 = test (s1.as_m64, s2.as_m64);
- for (i = 0; i < 4; i++)
- {
- tmp = s1.as_short[i] * s2.as_short[i];
-
- e.as_short[i] = (tmp & 0xffff0000)>>16;
- }
+ e.as_m64 = _mm_set_pi16 (
+ ((s1.as_short[3] * s2.as_short[3]) & 0xffff0000) >> 16,
+ ((s1.as_short[2] * s2.as_short[2]) & 0xffff0000) >> 16,
+ ((s1.as_short[1] * s2.as_short[1]) & 0xffff0000) >> 16,
+ ((s1.as_short[0] * s2.as_short[0]) & 0xffff0000) >> 16);
if (u.as_m64 != e.as_m64)
abort ();
TEST (void)
{
__m64_union s1, s2;
- union128 u;
- float e[4] = {1000.0, -20000.0, 43.0, 546.0};
+ union128 u, e;
+ e.x = _mm_set_ps (546.0, 43.0, -20000.0, 1000.0);
/* input signed in {1000, -20000, 43, 546}. */
s1.as_m64 = _mm_setr_pi32 (1000, -20000);
u.x = test (s1.as_m64, s2.as_m64);
- if (check_union128 (u, e))
+ if (check_union128 (u, e.a))
abort ();
}
TEST (void)
{
__m64_union s1;
- union128 u;
- float e[4] = {1000.0, 45536.0, 45.0, 65535.0};
+ union128 u, e;
+ e.x = _mm_set_ps (65535.0, 45.0, 45536.0, 1000.0);
/* input unsigned short {1000, 45536, 45, 65535}. */
s1.as_m64 = _mm_setr_pi16 (1000, -20000, 45, -1);
u.x = test (s1.as_m64);
- if (check_union128 (u, e))
+ if (check_union128 (u, e.a))
abort ();
}
#endif
#include <xmmintrin.h>
+#include <smmintrin.h>
static int
__attribute__((noinline, unused))
{
union128 s1;
int d;
- int e;
+ union {
+ float f;
+ int i;
+ } e;
s1.x = _mm_set_ps (24.43, 68.346, 43.35, 546.46);
d = test (s1.x);
- e = (int)s1.a[0];
- if (e != d)
+ e.i = _mm_extract_ps (s1.x, 0);
+ e.i = e.f;
+
+ if (e.i != d)
abort ();
}
#endif
#include <xmmintrin.h>
+#include <smmintrin.h>
static long long
__attribute__((noinline, unused))
union128 s1;
long long d;
long long e;
+ union {
+ float f;
+ int i;
+ } u;
s1.x = _mm_set_ps (344.4, 68.346, 43.35, 429496729501.4);
d = test (s1.x);
- e = (long long)s1.a[0];
+
+ u.i = _mm_extract_ps (s1.x, 0);
+ e = u.f;
if (e != d)
abort ();
static void
TEST (void)
{
- union128i_q s1;
- union128i_w u;
+ union128i_w s1, u;
short e[8] = { 0 };
int i;
int m1[4] = { 0x3, 0x3<<2, 0x3<<4, 0x3<<6 };
int m2[4];
- s1.x = _mm_set_epi64x (0xabcde,0xef58a234);
+ s1.x = _mm_set_epi16 (0, 0, 0xa, 0xbcde, 0, 0, 0xef58, 0xa234);
u.x = test (s1.x);
for (i = 0; i < 4; i++)
- e[i] = (s1.a[0]>>(16 * i)) & 0xffff;
+ e[i] = s1.a[i];
- for (i = 0; i < 4; i++)
- m2[i] = (N & m1[i])>>(2*i);
+ for (i = 0; i < 4; i++) {
+ int i2 = i;
+#ifdef __LITTLE_ENDIAN__
+ i2 = 3 - i;
+#endif
+ m2[i2] = (N & m1[i2]) >> (2 * i2);
+ }
for (i = 0; i < 4; i++)
- e[i+4] = (s1.a[1] >> (16 * m2[i])) & 0xffff;
+ e[i + 4] = s1.a[m2[i] + 4];
if (check_union128i_w(u, e))
{
static void
TEST (void)
{
- union128i_q s1;
- union128i_w u;
+ union128i_w s1, u;
short e[8] = { 0 };
int i;
int m1[4] = { 0x3, 0x3<<2, 0x3<<4, 0x3<<6 };
int m2[4];
- s1.x = _mm_set_epi64x (0xabcde,0xef58a234);
+ s1.x = _mm_set_epi16 (0, 0, 0xa, 0xbcde, 0, 0, 0xef58, 0xa234);
u.x = test (s1.x);
for (i = 0; i < 4; i++)
- e[i+4] = (s1.a[1]>>(16 * i)) & 0xffff;
+ e[i + 4] = s1.a[i + 4];
- for (i = 0; i < 4; i++)
- m2[i] = (N & m1[i])>>(2*i);
+ for (i = 0; i < 4; i++) {
+ int i2 = i;
+#ifdef __LITTLE_ENDIAN__
+ i2 = 3 - i;
+#endif
+ m2[i2] = (N & m1[i2]) >> (2 * i2);
+ }
for (i = 0; i < 4; i++)
- e[i] = (s1.a[0] >> (16 * m2[i])) & 0xffff;
+ e[i] = s1.a[m2[i]];
if (check_union128i_w(u, e))
{