+2019-05-15 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/89021
+ * gcc.target/i386/mmx-vals.h: New file.
+ * gcc.target/i386/sse2-mmx-2.c: Likewise.
+ * gcc.target/i386/sse2-mmx-3.c: Likewise.
+ * gcc.target/i386/sse2-mmx-4.c: Likewise.
+ * gcc.target/i386/sse2-mmx-5.c: Likewise.
+ * gcc.target/i386/sse2-mmx-6.c: Likewise.
+ * gcc.target/i386/sse2-mmx-7.c: Likewise.
+ * gcc.target/i386/sse2-mmx-8.c: Likewise.
+ * gcc.target/i386/sse2-mmx-9.c: Likewise.
+ * gcc.target/i386/sse2-mmx-10.c: Likewise.
+ * gcc.target/i386/sse2-mmx-11.c: Likewise.
+ * gcc.target/i386/sse2-mmx-12.c: Likewise.
+ * gcc.target/i386/sse2-mmx-13.c: Likewise.
+ * gcc.target/i386/sse2-mmx-14.c: Likewise.
+ * gcc.target/i386/sse2-mmx-15.c: Likewise.
+ * gcc.target/i386/sse2-mmx-16.c: Likewise.
+ * gcc.target/i386/sse2-mmx-17.c: Likewise.
+ * gcc.target/i386/sse2-mmx-18a.c: Likewise.
+ * gcc.target/i386/sse2-mmx-18b.c: Likewise.
+ * gcc.target/i386/sse2-mmx-18c.c: Likewise.
+ * gcc.target/i386/sse2-mmx-19a.c: Likewise.
+ * gcc.target/i386/sse2-mmx-18b.c: Likewise.
+ * gcc.target/i386/sse2-mmx-19c.c: Likewise.
+ * gcc.target/i386/sse2-mmx-19d.c: Likewise.
+ * gcc.target/i386/sse2-mmx-19e.c: Likewise.
+ * gcc.target/i386/sse2-mmx-20.c: Likewise.
+ * gcc.target/i386/sse2-mmx-21.c: Likewise.
+ * gcc.target/i386/sse2-mmx-22.c: Likewise.
+ * gcc.target/i386/sse2-mmx-cvtpi2ps.c: Likewise.
+ * gcc.target/i386/sse2-mmx-cvtps2pi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-cvttps2pi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-maskmovq.c: Likewise.
+ * gcc.target/i386/sse2-mmx-packssdw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-packsswb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-packuswb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-paddb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-paddd.c: Likewise.
+ * gcc.target/i386/sse2-mmx-paddq.c: Likewise.
+ * gcc.target/i386/sse2-mmx-paddsb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-paddsw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-paddusb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-paddusw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-paddw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pand.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pandn.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pavgb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pavgw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pcmpeqb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pcmpeqd.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pcmpeqw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pcmpgtb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pcmpgtd.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pcmpgtw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pextrw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pinsrw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pmaddwd.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pmaxsw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pmaxub.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pminsw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pminub.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pmovmskb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pmulhuw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pmulhw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pmullw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pmuludq.c: Likewise.
+ * gcc.target/i386/sse2-mmx-por.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psadbw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pshufw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pslld.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pslldi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psllq.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psllqi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psllw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psllwi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psrad.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psradi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psraw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psrawi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psrld.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psrldi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psrlq.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psrlqi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psrlw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psrlwi.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psubb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psubd.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psubq.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psubusb.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psubusw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-psubw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-punpckhbw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-punpckhdq.c: Likewise.
+ * gcc.target/i386/sse2-mmx-punpckhwd.c: Likewise.
+ * gcc.target/i386/sse2-mmx-punpcklbw.c: Likewise.
+ * gcc.target/i386/sse2-mmx-punpckldq.c: Likewise.
+ * gcc.target/i386/sse2-mmx-punpcklwd.c: Likewise.
+ * gcc.target/i386/sse2-mmx-pxor.c: Likewise.
+
2019-05-15 H.J. Lu <hongjiu.lu@intel.com>
PR target/89021
--- /dev/null
+/* Routine to check correctness of the results */
+
+__attribute__((unused))
+static int
+saturate_b (int i)
+{
+ if (i > 127)
+ i = 127;
+ else if (i < -128)
+ i = -128;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_w (int i)
+{
+ if (i > 32767)
+ i = 32767;
+ else if (i < -32768)
+ i = -32768;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_ub (int i)
+{
+ if (i > 255)
+ i = 255;
+ else if (i < 0)
+ i = 0;
+ return i;
+}
+
+__attribute__((unused))
+static int
+saturate_uw (int i)
+{
+ if (i > 65535)
+ i = 65535;
+ else if (i < 0)
+ i = 0;
+ return i;
+}
+
+static long long MMXops[] =
+{
+ 0x3467512347612976LL, 0x000000000000000eLL,
+ 0x3467512347612976LL, 0x0000000000000014LL,
+ 0x3467512347612976LL, 0x000000000000003cLL,
+ 0x0000000000000000LL, 0xFFFFFFFFFFFFFFFFLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x0000000000000000LL,
+ 0x0000000000000001LL, 0x1000000000000000LL,
+ 0x1000000000000000LL, 0x0000000000000001LL,
+ 0xFF00FF00FF00FF00LL, 0x00FF00FF00FF00FFLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x0101010101010101LL,
+ 0x0101010101010101LL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x0123456789ABCDEFLL, 0x0123456789ABCDEFLL,
+ 0x3467512347612976LL, 0x1839876340879234LL,
+ 0x0000000000000000LL, 0x0000000000000000LL,
+ 0xFFFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x7F7F7F7F7F7F7F7FLL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x0101010101010101LL,
+ 0x7F7F7F7F7F7F7F7FLL, 0x4782082349761237LL,
+ 0x0000000000000000LL, 0x7F7F7F7F7F7F7F7FLL,
+ 0x8080808080808080LL, 0x8080808080808080LL,
+ 0x0101010101010101LL, 0x8080808080808080LL,
+ 0x8080808080808080LL, 0x0000000000000000LL,
+ 0x2372347120982458LL, 0x8080808080808080LL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x8080808080808080LL,
+ 0x7F7F7F7F7F7F7F7FLL, 0xFFFFFFFFFFFFFFFFLL,
+ 0x8080808080808080LL, 0x7F7F7F7F7F7F7F7FLL,
+ 0xFFFFFFFFFFFFFFFFLL, 0x7F7F7F7F7F7F7F7FLL
+};
+
+#define MMX_num_ops (sizeof (MMXops) / sizeof (MMXops[0]))
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_from_int (long long *ll1, long long *r)
+{
+ int i1 = *(int *) ll1;
+ *(__m64 *) r = _m_from_int (i1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ int *res = (int *) r;
+ res[0] = *(int *) ll1;
+ res[1] = 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_from_int (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_from_long_long (long long *ll1, long long *r)
+{
+ *(__m64 *) r = _mm_cvtsi64_m64 (*ll1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ *r = *ll1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_from_long_long (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_to_int (long long *ll1, long long *r)
+{
+ __m64 m = *(__m64 *) ll1;
+ *(int *) r = _m_to_int (m);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ int *i1 = (int *) ll1;
+ *(int *) r = *i1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r = 0, ck = 0;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_to_int (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2 -mno-mmx" } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_to_long_long (long long *ll1, long long *r)
+{
+ __m64 m = *(__m64 *) ll1;
+ *r = _mm_cvtm64_si64 (m);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *ll1, long long *r)
+{
+ *r = *ll1;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_to_long_long (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_setzero (long long *r)
+{
+ *(__m64 *) r = _mm_setzero_si64 ();
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *r)
+{
+ *r = 0x0LL;
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+
+ /* Run the MMX tests */
+ test_setzero (&r);
+ compute_correct_result (&ck);
+ if (ck != r)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (int x, int y, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi32 (x, y);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int x, int y, long long *res_p)
+{
+ int *res = (int *) res_p;
+ res[0] = y;
+ res[1] = x;
+}
+
+static void
+sse2_test (void)
+{
+ int x, y;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ x = 0x0badbeef;
+ y = 0x0badfeed;
+ test_set (x, y, &r);
+ compute_correct_result (x, y, &ck);
+ if (ck != r)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (int i0, int i1, int i2, int i3, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi16 (i0, i1, i2, i3);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int i0, int i1, int i2, int i3, long long *res_p)
+{
+ short *res = (short *) res_p;
+ res[0] = i3;
+ res[1] = i2;
+ res[2] = i1;
+ res[3] = i0;
+}
+
+static void
+sse2_test (void)
+{
+ short i0, i1, i2, i3;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ i0 = 0x0bad;
+ i1 = 0xbeef;
+ i2 = 0x0bad;
+ i3 = 0xfeed;
+ test_set (i0, i1, i2, i3, &r);
+ compute_correct_result (i0, i1, i2, i3, &ck);
+ if (ck != r)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_set (char i0, char i1, char i2, char i3,
+ char i4, char i5, char i6, char i7, long long *r)
+{
+ *(__m64 *) r = _mm_set_pi8 (i0, i1, i2, i3, i4, i5, i6, i7);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (char i0, char i1, char i2, char i3,
+ char i4, char i5, char i6, char i7,
+ long long *res_p)
+{
+ char *res = (char *) res_p;
+ res[0] = i7;
+ res[1] = i6;
+ res[2] = i5;
+ res[3] = i4;
+ res[4] = i3;
+ res[5] = i2;
+ res[6] = i1;
+ res[7] = i0;
+}
+
+static void
+sse2_test (void)
+{
+ char i0, i1, i2, i3, i4, i5, i6, i7;
+ long long r, ck;
+
+ /* Run the MMX tests */
+ i0 = 0x12;
+ i1 = 0x34;
+ i2 = 0x56;
+ i3 = 0x78;
+ i4 = 0x90;
+ i5 = 0xab;
+ i6 = 0xcd;
+ i7 = 0xef;
+ test_set (i0, i1, i2, i3, i4, i5, i6, i7, &r);
+ compute_correct_result (i0, i1, i2, i3, i4, i5, i6, i7, &ck);
+ if (ck != r)
+ abort ();
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx -mno-avx512vl" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo (int i)
+{
+ __v2si x = { i, i };
+ return (__m64) x;
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "pbroadcastd" 1 } } */
+/* { dg-final { scan-assembler-not "movd" } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-18a.c"
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mno-avx512vl" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-18a.c"
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx -mno-avx -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pshuflw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include <mmintrin.h>
+
+__m64
+foo (short i)
+{
+ __v4hi x = { i, i, i, i };
+ return (__m64) x;
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-not "movd" } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx -mno-avx2 -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pshuflw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx512bw -mno-avx512vl -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-mmx -mavx2 -mno-avx512f -mtune=intel" } */
+/* { dg-final { scan-assembler-times "pbroadcastw" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "movl" } } */
+
+#include "sse2-mmx-19a.c"
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%xmm" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+float
+foo (__m64 x)
+{
+ return ((__v2sf) x)[0];
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+int
+foo (__m64 x)
+{
+ return ((__v2si) x)[0];
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
+/* { dg-final { scan-assembler-times "movd" 1 } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <mmintrin.h>
+
+int
+foo (__m64 x)
+{
+ return ((__v2si) x)[1];
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <mmintrin.h>
+
+void
+foo (void)
+{
+ _mm_empty ();
+}
+
+/* { dg-final { scan-assembler-times "emms" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "emms" { target { ! ia32 } } } } */
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "cvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "cvtpi2ps" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+__m128
+foo (__m128 i1, __m64 i2)
+{
+ return _mm_cvtpi32_ps (i1, i2);
+}
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+
+#include "mmx-4.c"
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+int
+foo (__m64 i)
+{
+ return _m_pextrw (i, 2);
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+__m64
+foo (__m64 i, int w)
+{
+ return _m_pinsrw (i, w, 2);
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler "movnti" } } */
+/* { dg-final { scan-assembler-not "movntq" } } */
+/* { dg-final { scan-assembler-not "%mm" } } */
+
+#include <xmmintrin.h>
+
+void
+foo (__m64 *p, __m64 i)
+{
+ _mm_stream_pi (p, i);
+}
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+
+#include "mmx-8.c"
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#include <string.h>
+
+#define FLOAT_X 2.3456
+#define FLOAT_Y -4.5987
+
+static float expected_x = FLOAT_X;
+static float expected_y = FLOAT_Y;
+static __v2sf expected1 = { FLOAT_X, FLOAT_Y };
+static __v2sf expected2 = { FLOAT_X, 0 };
+static __v2sf expected3 = { FLOAT_X, FLOAT_X };
+
+float
+__attribute__((noinline, noclone))
+foo1 (__m64 x)
+{
+ return ((__v2sf) x)[0];
+}
+
+float
+__attribute__((noinline, noclone))
+foo2 (__m64 x)
+{
+ return ((__v2sf) x)[1];
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo3 (float x)
+{
+ return __extension__ (__m64) (__v2sf) { x, 0 };
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo4 (float x)
+{
+ return __extension__ (__m64) (__v2sf) { x, x };
+}
+
+__m64
+__attribute__((noinline, noclone))
+foo5 (float x, float y)
+{
+ return __extension__ (__m64) (__v2sf) { x, y };
+}
+
+void
+__attribute__((noinline))
+sse2_test (void)
+{
+ __m64 res;
+ float x;
+
+ x = foo1 ((__m64) expected1);
+ if (x != expected_x)
+ abort ();
+
+ x = foo2 ((__m64) expected1);
+ if (x != expected_y)
+ abort ();
+
+ res = foo3 (FLOAT_X);
+ if (memcmp (&res, &expected2, sizeof (res)))
+ abort ();
+
+ res = foo4 (FLOAT_X);
+ if (memcmp (&res, &expected3, sizeof (res)))
+ abort ();
+
+ res = foo5 (FLOAT_X, FLOAT_Y);
+ if (memcmp (&res, &expected1, sizeof (res)))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvtpi32_ps (__m128 *i1, __m64 *i2, __m128 *r)
+{
+ *(__m128 *) r = _mm_cvtpi32_ps (*i1, *i2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *dst_p, __m64 *src_p, __m128 *res_p)
+{
+ int *src = (int *) src_p;
+ float *res = (float *) res_p;
+ *res_p = *dst_p;
+ int i;
+ __m128 r;
+ for (i = 0; i < 2; i++)
+ {
+ r = _mm_cvt_si2ss (*dst_p, src[i]);
+ res[i] = ((__v4sf) r)[0];
+ }
+}
+
+static void
+sse2_test (void)
+{
+ __m128 r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+ __v2si y = { 30, -39 };
+
+ /* Run the MMX tests */
+ test_cvtpi32_ps ((__m128 *) &x, (__m64 *) &y, &r);
+ compute_correct_result ((__m128 *) &x, (__m64 *) &y, &ck);
+ if (memcmp (&ck, &r, sizeof (r)))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvtps_pi32 (__m128 *src_p, long long *r)
+{
+ *(__m64 *) r = _mm_cvtps_pi32 (*src_p);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *src_p, long long *res_p)
+{
+ __v4sf *src = (__v4sf *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = _mm_cvt_ss2si (_mm_set_ss ((*src)[i]));
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+
+ /* Run the MMX tests */
+ test_cvtps_pi32 ((__m128 *) &x, &r);
+ compute_correct_result ((__m128 *) &x, &ck);
+ if (ck != r)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_cvttps_pi32 (__m128 *src_p, long long *r)
+{
+ *(__m64 *) r = _mm_cvttps_pi32 (*src_p);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m128 *src_p, long long *res_p)
+{
+ __v4sf *src = (__v4sf *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = _mm_cvtt_ss2si (_mm_set_ss ((*src)[i]));
+}
+
+static void
+sse2_test (void)
+{
+ long long r, ck;
+ __v4sf x = { 1.99f, -3.9f, -4.9f, 3.8f };
+
+ /* Run the MMX tests */
+ test_cvttps_pi32 ((__m128 *) &x, &r);
+ compute_correct_result ((__m128 *) &x, &ck);
+ if (ck != r)
+ abort ();
+}
--- /dev/null
+/* { dg-do run { target mmap } } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_maskmovq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ _m_maskmovq (t1, t2, (char *) r);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ if ((src[i] & 0x80) != 0)
+ res[i] = dst[i];
+}
+
+static void
+do_maskmovq_test (long long *r)
+{
+ int i;
+ long long ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ r[0] = -1LL;
+ ck = -1LL;
+ test_maskmovq (&MMXops[i], &MMXops[i], r);
+ compute_correct_result (&MMXops[i], &MMXops[i], &ck);
+ if (*r != ck)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
+
+static void
+sse2_test (void)
+{
+ char *buf;
+ long long *r;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+
+ buf = mmap (0, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buf == MAP_FAILED)
+ {
+ perror ("mmap");
+ abort ();
+ }
+
+ if (mprotect (buf, page_size, PROT_NONE))
+ {
+ perror ("mprotect");
+ abort ();
+ }
+
+ if (mprotect (buf + 2 * page_size, page_size, PROT_NONE))
+ {
+ perror ("mprotect");
+ abort ();
+ }
+
+ r = (long long *) (buf + page_size);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + page_size + 3);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + page_size + 11);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16 + 3);
+ do_maskmovq_test (r);
+
+ r = (long long *) (buf + 2 * page_size - 16 + 8);
+ do_maskmovq_test (r);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packssdw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packssdw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ res[i] = saturate_w (dst[i]);
+ res[i + 2] = saturate_w (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packssdw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packsswb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packsswb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ char *res = (char *) res_p;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ res[i] = saturate_b (dst[i]);
+ res[i + 4] = saturate_b (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packsswb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_packuswb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_packuswb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ res[i] = saturate_ub (dst[i]);
+ res[i + 4] = saturate_ub (src[i]);
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_packuswb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_add_si64 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ res_p[0] = dst_p[0] + src_p[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddsb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddsb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_b (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddsb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_w (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddusb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddusb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_ub (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddusb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddusw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddusw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_uw (dst[i] + src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddusw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_paddw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_paddw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] + src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_paddw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pand (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pand (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] & src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pand (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pandn (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pandn (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = ~dst[0] & src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pandn (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pavgb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pavgb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ unsigned int tmp;
+ for (i = 0; i < 8; i++)
+ {
+ tmp = dst[i] + src[i] + 1;
+ res[i] = tmp >> 1;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pavgb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pavgw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pavgw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ unsigned int tmp;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] + src[i] + 1;
+ res[i] = tmp >> 1;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pavgw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpeqw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpeqw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] == src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpeqw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pcmpgtw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pcmpgtw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] > src[i] ? -1 : 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pcmpgtw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pextrw (__m64 *i, unsigned int imm, int *r)
+{
+ switch (imm)
+ {
+ case 0:
+ *r = _m_pextrw (*i, 0);
+ break;
+ case 1:
+ *r = _m_pextrw (*i, 1);
+ break;
+ case 2:
+ *r = _m_pextrw (*i, 2);
+ break;
+ case 3:
+ *r = _m_pextrw (*i, 3);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m64 *src_p, unsigned int imm, int *res_p)
+{
+ short *src = (short *) src_p;
+ if (imm < 4)
+ *res_p = src[imm];
+}
+
+static void
+sse2_test (void)
+{
+ int r, ck;
+ int i;
+ int failed = 0;
+ __v4hi y = { 3320, -3339, 48, 4392 };
+
+ /* Run the MMX tests */
+ for (i = 0; i < 4; i++)
+ {
+ test_pextrw ((__m64 *) &y, i, &r);
+ compute_correct_result ((__m64 *) &y, i, &ck);
+ if (r != ck)
+ failed++;
+ }
+
+ if (failed)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include <string.h>
+#include "sse2-check.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pinsrw (__m64 *i, int val, unsigned int imm, int *r)
+{
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pinsrw (*i, val, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pinsrw (*i, val, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pinsrw (*i, val, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pinsrw (*i, val, 3);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (__m64 *src_p, int val, unsigned int imm,
+ int *res_p)
+{
+ short *res = (short *) res_p;
+ *(__m64 *) res_p = *src_p;
+ if (imm < 4)
+ res[imm] = val;
+}
+
+static void
+sse2_test (void)
+{
+ int r, ck;
+ int i;
+ int failed = 0;
+ __v4hi y = { 3320, -3339, 48, 4392 };
+
+ /* Run the MMX tests */
+ for (i = 0; i < 4; i++)
+ {
+ test_pinsrw ((__m64 *) &y, 0x1234, i, &r);
+ compute_correct_result ((__m64 *) &y, 0x1234, i, &ck);
+ if (r != ck)
+ failed++;
+ }
+
+ if (failed)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaddwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaddwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[0] * src[0] + dst[1] * src[1];
+ res[1] = dst[2] * src[2] + dst[3] * src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaddwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaxsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaxsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] > src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaxsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmaxub (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmaxub (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] > src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmaxub (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pminsw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pminsw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] < src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pminsw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pminub (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pminub (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] < src[i] ? dst[i] : src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pminub (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmovmskb (long long *ll1, int *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ *r = _m_pmovmskb (t1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, int *res_p)
+{
+ char *src = (char *) src_p;
+ int i;
+ int res = 0;
+ for (i = 0; i < 8; i++)
+ res |= ((src[i] & 0x80) >> 7) << i;
+ *res_p = res;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ int r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ test_pmovmskb (&MMXops[i], &r);
+ compute_correct_result (&MMXops[i], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmulhuw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmulhuw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ unsigned int t = dst[i] * src[i];
+ res[i] = t >> 16;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmulhuw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmulhw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmulhw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int tmp;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] * src[i];
+ tmp >>= 16;
+ res[i] = tmp;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmulhw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmullw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pmullw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int tmp;
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ tmp = dst[i] * src[i];
+ res[i] = tmp;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmullw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pmuludq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_mul_su32 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned int *dst = (unsigned int*) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned long long *res = (unsigned long long *) res_p;
+ res[0] = dst[0];
+ res[0] *= src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pmuludq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_por (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_por (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] | src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_por (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psadbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psadbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ int tmp;
+ unsigned int sum = 0;
+ for (i = 0; i < 8; i++)
+ {
+ tmp = dst[i] - src[i];
+ if (tmp < 0)
+ tmp = -tmp;
+ sum += tmp;
+ }
+ res[0] = sum;
+ for (i = 1; i < 4; i++)
+ res[i] = 0;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psadbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pshufw (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pshufw (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pshufw (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pshufw (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pshufw (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_pshufw (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_pshufw (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_pshufw (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_pshufw (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_pshufw (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_pshufw (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_pshufw (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_pshufw (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_pshufw (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_pshufw (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_pshufw (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_pshufw (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_pshufw (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_pshufw (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_pshufw (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_pshufw (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_pshufw (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_pshufw (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_pshufw (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_pshufw (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_pshufw (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_pshufw (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_pshufw (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_pshufw (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_pshufw (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_pshufw (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_pshufw (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_pshufw (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_pshufw (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_pshufw (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_pshufw (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_pshufw (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_pshufw (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_pshufw (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_pshufw (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_pshufw (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_pshufw (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_pshufw (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_pshufw (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_pshufw (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_pshufw (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_pshufw (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_pshufw (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_pshufw (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_pshufw (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_pshufw (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_pshufw (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_pshufw (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_pshufw (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_pshufw (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_pshufw (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_pshufw (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_pshufw (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_pshufw (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_pshufw (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_pshufw (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_pshufw (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_pshufw (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_pshufw (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_pshufw (t1, 63);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned long long src = *(unsigned long long *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ unsigned int shift;
+ for (i = 0; i < 4; i++)
+ {
+ shift = ((imm >> (2 * i)) & 0x3) * 16;
+ res[i] = (src >> shift) & 0xffff;
+ }
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ if (i > 63)
+ break;
+ test_pshufw (&MMXops[i], i, &r);
+ compute_correct_result (&MMXops[i], i, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pslld (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pslld (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned int *dst = (unsigned int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned int *res = (unsigned int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pslld (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_pslldi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_pslldi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_pslldi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_pslldi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_pslldi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_pslldi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_pslldi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_pslldi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_pslldi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_pslldi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_pslldi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_pslldi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_pslldi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_pslldi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_pslldi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_pslldi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_pslldi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_pslldi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_pslldi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_pslldi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_pslldi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_pslldi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_pslldi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_pslldi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_pslldi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_pslldi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_pslldi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_pslldi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_pslldi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_pslldi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_pslldi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_pslldi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_pslldi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned int *res = (unsigned int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psllq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ if (src[0] > 63)
+ res[0] = 0;
+ else
+ res[0] = dst[0] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psllq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psllqi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psllqi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psllqi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psllqi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psllqi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psllqi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psllqi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psllqi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psllqi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psllqi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psllqi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psllqi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psllqi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psllqi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psllqi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psllqi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psllqi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psllqi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psllqi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psllqi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psllqi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psllqi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psllqi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psllqi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psllqi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psllqi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psllqi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psllqi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psllqi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psllqi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psllqi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psllqi (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_psllqi (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_psllqi (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_psllqi (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_psllqi (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_psllqi (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_psllqi (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_psllqi (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_psllqi (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_psllqi (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_psllqi (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_psllqi (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_psllqi (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_psllqi (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_psllqi (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_psllqi (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_psllqi (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_psllqi (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_psllqi (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_psllqi (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_psllqi (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_psllqi (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_psllqi (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_psllqi (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_psllqi (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_psllqi (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_psllqi (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_psllqi (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_psllqi (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_psllqi (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_psllqi (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_psllqi (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_psllqi (t1, 63);
+ break;
+ default:
+ *(__m64 *) r = _m_psllqi (t1, 64);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *src, unsigned int imm,
+ unsigned long long *res)
+{
+ int i;
+ if (imm > 63)
+ res[0] = 0;
+ else
+ res[0] = src[0] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psllw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] << src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psllw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psllwi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psllwi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psllwi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psllwi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psllwi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psllwi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psllwi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psllwi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psllwi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psllwi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psllwi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psllwi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psllwi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psllwi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psllwi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psllwi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psllwi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] << imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrad (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrad (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrad (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psradi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psradi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psradi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psradi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psradi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psradi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psradi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psradi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psradi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psradi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psradi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psradi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psradi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psradi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psradi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psradi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psradi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psradi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psradi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psradi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psradi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psradi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psradi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psradi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psradi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psradi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psradi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psradi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psradi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psradi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psradi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psradi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psradi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_psradi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psradi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psraw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psraw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psraw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrawi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrawi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrawi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrawi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrawi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrawi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrawi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrawi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrawi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrawi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrawi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrawi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrawi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrawi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrawi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrawi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrawi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psrawi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] < 0 ? -1 : 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrawi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrld (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrld (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (src[1] || src[0] > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrld (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrldi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrldi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrldi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrldi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrldi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrldi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrldi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrldi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrldi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrldi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrldi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrldi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrldi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrldi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrldi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrldi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrldi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psrldi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psrldi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psrldi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psrldi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psrldi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psrldi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psrldi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psrldi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psrldi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psrldi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psrldi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psrldi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psrldi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psrldi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psrldi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psrldi (t1, 31);
+ break;
+ default:
+ *(__m64 *) r = _m_psrldi (t1, 32);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ if (imm > 31)
+ for (i = 0; i < 2; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 2; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrldi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrlq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ if (src[0] > 63)
+ res[0] = 0;
+ else
+ res[0] = dst[0] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrlq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psllwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrlqi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrlqi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrlqi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrlqi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrlqi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrlqi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrlqi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrlqi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrlqi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrlqi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrlqi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrlqi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrlqi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrlqi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrlqi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrlqi (t1, 15);
+ break;
+ case 16:
+ *(__m64 *) r = _m_psrlqi (t1, 16);
+ break;
+ case 17:
+ *(__m64 *) r = _m_psrlqi (t1, 17);
+ break;
+ case 18:
+ *(__m64 *) r = _m_psrlqi (t1, 18);
+ break;
+ case 19:
+ *(__m64 *) r = _m_psrlqi (t1, 19);
+ break;
+ case 20:
+ *(__m64 *) r = _m_psrlqi (t1, 20);
+ break;
+ case 21:
+ *(__m64 *) r = _m_psrlqi (t1, 21);
+ break;
+ case 22:
+ *(__m64 *) r = _m_psrlqi (t1, 22);
+ break;
+ case 23:
+ *(__m64 *) r = _m_psrlqi (t1, 23);
+ break;
+ case 24:
+ *(__m64 *) r = _m_psrlqi (t1, 24);
+ break;
+ case 25:
+ *(__m64 *) r = _m_psrlqi (t1, 25);
+ break;
+ case 26:
+ *(__m64 *) r = _m_psrlqi (t1, 26);
+ break;
+ case 27:
+ *(__m64 *) r = _m_psrlqi (t1, 27);
+ break;
+ case 28:
+ *(__m64 *) r = _m_psrlqi (t1, 28);
+ break;
+ case 29:
+ *(__m64 *) r = _m_psrlqi (t1, 29);
+ break;
+ case 30:
+ *(__m64 *) r = _m_psrlqi (t1, 30);
+ break;
+ case 31:
+ *(__m64 *) r = _m_psrlqi (t1, 31);
+ break;
+ case 32:
+ *(__m64 *) r = _m_psrlqi (t1, 32);
+ break;
+ case 33:
+ *(__m64 *) r = _m_psrlqi (t1, 33);
+ break;
+ case 34:
+ *(__m64 *) r = _m_psrlqi (t1, 34);
+ break;
+ case 35:
+ *(__m64 *) r = _m_psrlqi (t1, 35);
+ break;
+ case 36:
+ *(__m64 *) r = _m_psrlqi (t1, 36);
+ break;
+ case 37:
+ *(__m64 *) r = _m_psrlqi (t1, 37);
+ break;
+ case 38:
+ *(__m64 *) r = _m_psrlqi (t1, 38);
+ break;
+ case 39:
+ *(__m64 *) r = _m_psrlqi (t1, 39);
+ break;
+ case 40:
+ *(__m64 *) r = _m_psrlqi (t1, 40);
+ break;
+ case 41:
+ *(__m64 *) r = _m_psrlqi (t1, 41);
+ break;
+ case 42:
+ *(__m64 *) r = _m_psrlqi (t1, 42);
+ break;
+ case 43:
+ *(__m64 *) r = _m_psrlqi (t1, 43);
+ break;
+ case 44:
+ *(__m64 *) r = _m_psrlqi (t1, 44);
+ break;
+ case 45:
+ *(__m64 *) r = _m_psrlqi (t1, 45);
+ break;
+ case 46:
+ *(__m64 *) r = _m_psrlqi (t1, 46);
+ break;
+ case 47:
+ *(__m64 *) r = _m_psrlqi (t1, 47);
+ break;
+ case 48:
+ *(__m64 *) r = _m_psrlqi (t1, 48);
+ break;
+ case 49:
+ *(__m64 *) r = _m_psrlqi (t1, 49);
+ break;
+ case 50:
+ *(__m64 *) r = _m_psrlqi (t1, 50);
+ break;
+ case 51:
+ *(__m64 *) r = _m_psrlqi (t1, 51);
+ break;
+ case 52:
+ *(__m64 *) r = _m_psrlqi (t1, 52);
+ break;
+ case 53:
+ *(__m64 *) r = _m_psrlqi (t1, 53);
+ break;
+ case 54:
+ *(__m64 *) r = _m_psrlqi (t1, 54);
+ break;
+ case 55:
+ *(__m64 *) r = _m_psrlqi (t1, 55);
+ break;
+ case 56:
+ *(__m64 *) r = _m_psrlqi (t1, 56);
+ break;
+ case 57:
+ *(__m64 *) r = _m_psrlqi (t1, 57);
+ break;
+ case 58:
+ *(__m64 *) r = _m_psrlqi (t1, 58);
+ break;
+ case 59:
+ *(__m64 *) r = _m_psrlqi (t1, 59);
+ break;
+ case 60:
+ *(__m64 *) r = _m_psrlqi (t1, 60);
+ break;
+ case 61:
+ *(__m64 *) r = _m_psrlqi (t1, 61);
+ break;
+ case 62:
+ *(__m64 *) r = _m_psrlqi (t1, 62);
+ break;
+ case 63:
+ *(__m64 *) r = _m_psrlqi (t1, 63);
+ break;
+ default:
+ *(__m64 *) r = _m_psrlqi (t1, 64);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *src, unsigned int imm,
+ unsigned long long *res)
+{
+ int i;
+ if (imm > 63)
+ res[0] = 0;
+ else
+ res[0] = src[0] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psllwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psrlw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ unsigned int *src = (unsigned int *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (src[1] || src[0] > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] >> src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psrlw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psrlwi (long long *ll1, unsigned int imm, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ switch (imm)
+ {
+ case 0:
+ *(__m64 *) r = _m_psrlwi (t1, 0);
+ break;
+ case 1:
+ *(__m64 *) r = _m_psrlwi (t1, 1);
+ break;
+ case 2:
+ *(__m64 *) r = _m_psrlwi (t1, 2);
+ break;
+ case 3:
+ *(__m64 *) r = _m_psrlwi (t1, 3);
+ break;
+ case 4:
+ *(__m64 *) r = _m_psrlwi (t1, 4);
+ break;
+ case 5:
+ *(__m64 *) r = _m_psrlwi (t1, 5);
+ break;
+ case 6:
+ *(__m64 *) r = _m_psrlwi (t1, 6);
+ break;
+ case 7:
+ *(__m64 *) r = _m_psrlwi (t1, 7);
+ break;
+ case 8:
+ *(__m64 *) r = _m_psrlwi (t1, 8);
+ break;
+ case 9:
+ *(__m64 *) r = _m_psrlwi (t1, 9);
+ break;
+ case 10:
+ *(__m64 *) r = _m_psrlwi (t1, 10);
+ break;
+ case 11:
+ *(__m64 *) r = _m_psrlwi (t1, 11);
+ break;
+ case 12:
+ *(__m64 *) r = _m_psrlwi (t1, 12);
+ break;
+ case 13:
+ *(__m64 *) r = _m_psrlwi (t1, 13);
+ break;
+ case 14:
+ *(__m64 *) r = _m_psrlwi (t1, 14);
+ break;
+ case 15:
+ *(__m64 *) r = _m_psrlwi (t1, 15);
+ break;
+ default:
+ *(__m64 *) r = _m_psrlwi (t1, 16);
+ break;
+ }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *src_p, unsigned int imm,
+ long long *res_p)
+{
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ if (imm > 15)
+ for (i = 0; i < 4; i++)
+ res[i] = 0;
+ else
+ for (i = 0; i < 4; i++)
+ res[i] = src[i] >> imm;
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ unsigned int count;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i++)
+ {
+ count = MMXops[i];
+ test_psrlwi (&MMXops[i], count, &r);
+ compute_correct_result (&MMXops[i], count, &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ int i;
+ for (i = 0; i < 2; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _mm_sub_si64 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ res_p[0] = dst_p[0] - src_p[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubusb (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubusb (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned char *dst = (unsigned char *) dst_p;
+ unsigned char *src = (unsigned char *) src_p;
+ unsigned char *res = (unsigned char *) res_p;
+ int i;
+ for (i = 0; i < 8; i++)
+ res[i] = saturate_ub (dst[i] - src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubusb (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubusw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubusw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ unsigned short *dst = (unsigned short *) dst_p;
+ unsigned short *src = (unsigned short *) src_p;
+ unsigned short *res = (unsigned short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = saturate_uw (dst[i] - src[i]);
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubusw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_psubw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_psubw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ int i;
+ for (i = 0; i < 4; i++)
+ res[i] = dst[i] - src[i];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_psubw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ res[0] = dst[4];
+ res[1] = src[4];
+ res[2] = dst[5];
+ res[3] = src[5];
+ res[4] = dst[6];
+ res[5] = src[6];
+ res[6] = dst[7];
+ res[7] = src[7];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhdq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhdq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[1];
+ res[1] = src[1];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhdq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckhwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckhwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ res[0] = dst[2];
+ res[1] = src[2];
+ res[2] = dst[3];
+ res[3] = src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckhwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpcklbw (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpcklbw (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ char *dst = (char *) dst_p;
+ char *src = (char *) src_p;
+ char *res = (char *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+ res[2] = dst[1];
+ res[3] = src[1];
+ res[4] = dst[2];
+ res[5] = src[2];
+ res[6] = dst[3];
+ res[7] = src[3];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpcklbw (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpckldq (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpckldq (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ int *dst = (int *) dst_p;
+ int *src = (int *) src_p;
+ int *res = (int *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpckldq (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_punpcklwd (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_punpcklwd (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (long long *dst_p, long long *src_p,
+ long long *res_p)
+{
+ short *dst = (short *) dst_p;
+ short *src = (short *) src_p;
+ short *res = (short *) res_p;
+ res[0] = dst[0];
+ res[1] = src[0];
+ res[2] = dst[1];
+ res[3] = src[1];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_punpcklwd (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-strict-aliasing -msse2" } */
+/* { dg-additional-options "-mno-mmx" { target { ! ia32 } } } */
+
+#include "sse2-check.h"
+#include "mmx-vals.h"
+
+__attribute__((noinline, noclone))
+static void
+test_pxor (long long *ll1, long long *ll2, long long *r)
+{
+ __m64 t1 = *(__m64 *) ll1;
+ __m64 t2 = *(__m64 *) ll2;
+ *(__m64 *) r = _m_pxor (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (unsigned long long *dst,
+ unsigned long long *src,
+ unsigned long long *res)
+{
+ res[0] = dst[0] ^ src[0];
+}
+
+static void
+sse2_test (void)
+{
+ int i;
+ long long r, ck;
+ int fail = 0;
+
+ /* Run the MMX tests */
+ for (i = 0; i < MMX_num_ops; i += 2)
+ {
+ test_pxor (&MMXops[i], &MMXops[i + 1], &r);
+ compute_correct_result (&MMXops[i], &MMXops[i + 1], &ck);
+ if (ck != r)
+ fail++;
+ }
+
+ if (fail != 0)
+ abort ();
+}