+2017-10-30 Steven Munroe <munroesj@gcc.gnu.org>
+
+ * sse2-check.h: New file.
+ * sse2-addpd-1.c: New file.
+ * sse2-addsd-1.c: New file.
+ * sse2-andnpd-1.c: New file.
+ * sse2-andpd-1.c: New file.
+ * sse2-cmppd-1.c: New file.
+ * sse2-cmpsd-1.c: New file.
+ * sse2-comisd-1.c: New file.
+ * sse2-comisd-2.c: New file.
+ * sse2-comisd-3.c: New file.
+ * sse2-comisd-4.c: New file.
+ * sse2-comisd-5.c: New file.
+ * sse2-comisd-6.c: New file.
+ * sse2-cvtdq2pd-1.c: New file.
+ * sse2-cvtdq2ps-1.c: New file.
+ * sse2-cvtpd2dq-1.c: New file.
+ * sse2-cvtpd2ps-1.c: New file.
+ * sse2-cvtps2dq-1.c: New file.
+ * sse2-cvtps2pd-1.c: New file.
+ * sse2-cvtsd2si-1.c: New file.
+ * sse2-cvtsd2si-2.c: New file.
+ * sse2-cvtsd2ss-1.c: New file.
+ * sse2-cvtsi2sd-1.c: New file.
+ * sse2-cvtsi2sd-2.c: New file.
+ * sse2-cvtss2sd-1.c: New file.
+ * sse2-cvttpd2dq-1.c: New file.
+ * sse2-cvttps2dq-1.c: New file.
+ * sse2-cvttsd2si-1.c: New file.
+ * sse2-cvttsd2si-2.c: New file.
+ * sse2-divpd-1.c: New file.
+ * sse2-divsd-1.c: New file.
+ * sse2-maxpd-1.c: New file.
+ * sse2-maxsd-1.c: New file.
+ * sse2-minpd-1.c: New file.
+ * sse2-minsd-1.c: New file.
+ * sse2-mmx.c: New file.
+ * sse2-movhpd-1.c: New file.
+ * sse2-movhpd-2.c: New file.
+ * sse2-movlpd-1.c: New file.
+ * sse2-movlpd-2.c: New file.
+ * sse2-movmskpd-1.c: New file.
+ * sse2-movq-1.c: New file.
+ * sse2-movq-2.c: New file.
+ * sse2-movq-3.c: New file.
+ * sse2-movsd-1.c: New file.
+ * sse2-movsd-2.c: New file.
+ * sse2-movsd-3.c: New file.
+ * sse2-mulpd-1.c: New file.
+ * sse2-mulsd-1.c: New file.
+ * sse2-orpd-1.c: New file.
+ * sse2-packssdw-1.c: New file.
+ * sse2-packsswb-1.c: New file.
+ * sse2-packuswb-1.c: New file.
+ * sse2-paddb-1.c: New file.
+ * sse2-paddd-1.c: New file.
+ * sse2-paddq-1.c: New file.
+ * sse2-paddsb-1.c: New file.
+ * sse2-paddsw-1.c: New file.
+ * sse2-paddusb-1.c: New file.
+ * sse2-paddusw-1.c: New file.
+ * sse2-paddw-1.c: New file.
+ * sse2-pavgb-1.c: New file.
+ * sse2-pavgw-1.c: New file.
+ * sse2-pcmpeqb-1.c: New file.
+ * sse2-pcmpeqd-1.c: New file.
+ * sse2-pcmpeqw-1.c: New file.
+ * sse2-pcmpgtb-1.c: New file.
+ * sse2-pcmpgtd-1.c: New file.
+ * sse2-pcmpgtw-1.c: New file.
+ * sse2-pextrw.c: New file.
+ * sse2-pinsrw.c: New file.
+ * sse2-pmaddwd-1.c: New file.
+ * sse2-pmaxsw-1.c: New file.
+ * sse2-pmaxub-1.c: New file.
+ * sse2-pminsw-1.c: New file.
+ * sse2-pminub-1.c: New file.
+ * sse2-pmovmskb-1.c: New file.
+ * sse2-pmulhuw-1.c: New file.
+ * sse2-pmulhw-1.c: New file.
+ * sse2-pmullw-1.c: New file.
+ * sse2-pmuludq-1.c: New file.
+ * sse2-psadbw-1.c: New file.
+ * sse2-pshufd-1.c: New file.
+ * sse2-pshufhw-1.c: New file.
+ * sse2-pshuflw-1.c: New file.
+ * sse2-pslld-1.c: New file.
+ * sse2-pslld-2.c: New file.
+ * sse2-pslldq-1.c: New file.
+ * sse2-psllq-1.c: New file.
+ * sse2-psllq-2.c: New file.
+ * sse2-psllw-1.c: New file.
+ * sse2-psllw-2.c: New file.
+ * sse2-psrad-1.c: New file.
+ * sse2-psrad-2.c: New file.
+ * sse2-psraw-1.c: New file.
+ * sse2-psraw-2.c: New file.
+ * sse2-psrld-1.c: New file.
+ * sse2-psrld-2.c: New file.
+ * sse2-psrldq-1.c: New file.
+ * sse2-psrlq-1.c: New file.
+ * sse2-psrlq-2.c: New file.
+ * sse2-psrlw-1.c: New file.
+ * sse2-psrlw-2.c: New file.
+ * sse2-psubb-1.c: New file.
+ * sse2-psubd-1.c: New file.
+
2017-10-30 Will Schmidt <will_schmidt@vnet.ibm.com>
* gcc.target/powerpc/fold-vec-perm-longlong.c: Update to use long long
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_addpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_add_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] + s2.a[0];
+ e[1] = s1.a[1] + s2.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+
+#include <stdint.h>
+#include <stdio.h>
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_addsd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_add_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] + s2.a[0];
+ e[1] = s1.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_addsd_1; check_union128d failed\n");
+ printf ("\t [%f,%f] + [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_andnpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_andnot_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ long long source1[2]={34545, 95567};
+ long long source2[2]={674, 57897};
+ long long e[2];
+
+ s1.x = _mm_loadu_pd ((double *)source1);
+ s2.x = _mm_loadu_pd ((double *)source2);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = (~source1[0]) & source2[0];
+ e[1] = (~source1[1]) & source2[1];
+
+ if (check_union128d (u, (double *)e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_andpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_and_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+
+ union
+ {
+ double d[2];
+ long long ll[2];
+ }source1, source2, e;
+
+ s1.x = _mm_set_pd (34545, 95567);
+ s2.x = _mm_set_pd (674, 57897);
+
+ _mm_storeu_pd (source1.d, s1.x);
+ _mm_storeu_pd (source2.d, s2.x);
+
+ u.x = test (s1.x, s2.x);
+
+ e.ll[0] = source1.ll[0] & source2.ll[0];
+ e.ll[1] = source1.ll[1] & source2.ll[1];
+
+ if (check_union128d (u, e.d))
+ abort ();
+}
--- /dev/null
+#include <stdlib.h>
+
+/* Define this to enable the combination of VSX vector double and
+ SSE2 data types. */
+#define __VSX_SSE2__ 1
+
+#include "m128-check.h"
+
+/* define DEBUG replace abort with printf on error. */
+//#define DEBUG 1
+
+#if 1
+
+#define TEST sse2_test
+
+static void sse2_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+ sse2_test ();
+}
+
+int
+main ()
+ {
+#ifdef __BUILTIN_CPU_SUPPORTS__
+ /* Most SSE2 (vector double) intrinsic operations require VSX
+ instructions, but some operations may need only VMX
+ instructions. This also true for SSE2 scalar doubles as they
+ imply that "other half" of the vector remains unchanged or set
+ to zeros. The VSX scalar operations leave ther "other half"
+ undefined, and require additional merge operations.
+ Some conversions (to/from integer) need the direct register
+ transfer instructions from POWER8 for best performance.
+ So we test for arch_2_07. */
+ if ( __builtin_cpu_supports ("arch_2_07") )
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+#endif /* __BUILTIN_CPU_SUPPORTS__ */
+ return 0;
+ }
+#endif
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cmp_pd_1
+#endif
+
+#include <emmintrin.h>
+#include <math.h>
+
+double ps1[] = {2134.3343, 6678.346};
+double ps2[] = {41124.234, 6678.346};
+long long pdd[] = {1, 2}, pd[2];
+union{long long l[2]; double d[2];} pe;
+
+void pd_check(char *id, __m128d dst)
+{
+ __v2di dest = (__v2di)dst;
+
+ if(checkVl(pd, pe.l, 2))
+ {
+ printf("mm_cmp%s_pd FAILED\n", id);
+ printf("dst [%lld, %lld], e.l[%lld, %lld]\n",
+ dest[0], dest[1], pe.l[0], pe.l[1]);
+ }
+}
+
+#define CMP(cmp, rel0, rel1) \
+ pe.l[0] = rel0 ? -1 : 0; \
+ pe.l[1] = rel1 ? -1 : 0; \
+ dest = _mm_loadu_pd((double*)pdd); \
+ source1 = _mm_loadu_pd(ps1); \
+ source2 = _mm_loadu_pd(ps2); \
+ dest = _mm_cmp##cmp##_pd(source1, source2); \
+ _mm_storeu_pd((double*) pd, dest); \
+ pd_check("" #cmp "", dest);
+
+static void
+TEST ()
+{
+ __m128d source1, source2, dest;
+
+ CMP(eq, !isunordered(ps1[0], ps2[0]) && ps1[0] == ps2[0],
+ !isunordered(ps1[1], ps2[1]) && ps1[1] == ps2[1]);
+ CMP(lt, !isunordered(ps1[0], ps2[0]) && ps1[0] < ps2[0],
+ !isunordered(ps1[1], ps2[1]) && ps1[1] < ps2[1]);
+ CMP(le, !isunordered(ps1[0], ps2[0]) && ps1[0] <= ps2[0],
+ !isunordered(ps1[1], ps2[1]) && ps1[1] <= ps2[1]);
+ CMP(unord, isunordered(ps1[0], ps2[0]),
+ isunordered(ps1[1], ps2[1]));
+ CMP(neq, isunordered(ps1[0], ps2[0]) || ps1[0] != ps2[0],
+ isunordered(ps1[1], ps2[1]) || ps1[1] != ps2[01]);
+ CMP(nlt, isunordered(ps1[0], ps2[0]) || ps1[0] >= ps2[0],
+ isunordered(ps1[1], ps2[1]) || ps1[1] >= ps2[1]);
+ CMP(nle, isunordered(ps1[0], ps2[0]) || ps1[0] > ps2[0],
+ isunordered(ps1[1], ps2[1]) || ps1[1] > ps2[1]);
+ CMP(ord, !isunordered(ps1[0], ps2[0]),
+ !isunordered(ps1[1], ps2[1]));
+
+ CMP(ge, isunordered(ps1[0], ps2[0]) || ps1[0] >= ps2[0],
+ isunordered(ps1[1], ps2[1]) || ps1[1] >= ps2[1]);
+ CMP(gt, isunordered(ps1[0], ps2[0]) || ps1[0] > ps2[0],
+ isunordered(ps1[1], ps2[1]) || ps1[1] > ps2[1]);
+ CMP(nge, !isunordered(ps1[0], ps2[0]) && ps1[0] < ps2[0],
+ !isunordered(ps1[1], ps2[1]) && ps1[1] < ps2[1]);
+ CMP(ngt, !isunordered(ps1[0], ps2[0]) && ps1[0] <= ps2[0],
+ !isunordered(ps1[1], ps2[1]) && ps1[1] <= ps2[1]);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cmp_sd_1
+#endif
+
+#include <emmintrin.h>
+#include <math.h>
+
+double s1[] = {2134.3343, 6678.346};
+double s2[] = {41124.234, 6678.346};
+long long dd[] = {1, 2}, d[2];
+union{long long l[2]; double d[2];} e;
+
+void check(char *id, __m128d dst)
+{
+ __v2di dest = (__v2di)dst;
+
+ if(checkVl(d, e.l, 2))
+ {
+ printf("mm_cmp%s_sd FAILED\n", id);
+ printf("dst [%lld, %lld], e.l[%lld]\n",
+ dest[0], dest[1], e.l[0]);
+ }
+}
+
+#define CMP(cmp, rel) \
+ e.l[0] = rel ? -1 : 0; \
+ dest = _mm_loadu_pd((double*)dd); \
+ source1 = _mm_loadu_pd(s1); \
+ source2 = _mm_loadu_pd(s2); \
+ dest = _mm_cmp##cmp##_sd(source1, source2); \
+ _mm_storeu_pd((double*) d, dest); \
+ check("" #cmp "", dest);
+
+static void
+TEST ()
+{
+ __m128d source1, source2, dest;
+
+ e.d[1] = s1[1];
+
+ CMP(eq, !isunordered(s1[0], s2[0]) && s1[0] == s2[0]);
+ CMP(lt, !isunordered(s1[0], s2[0]) && s1[0] < s2[0]);
+ CMP(le, !isunordered(s1[0], s2[0]) && s1[0] <= s2[0]);
+ CMP(unord, isunordered(s1[0], s2[0]));
+ CMP(neq, isunordered(s1[0], s2[0]) || s1[0] != s2[0]);
+ CMP(nlt, isunordered(s1[0], s2[0]) || s1[0] >= s2[0]);
+ CMP(nle, isunordered(s1[0], s2[0]) || s1[0] > s2[0]);
+ CMP(ord, !isunordered(s1[0], s2[0]));
+
+ CMP(ge, isunordered(s1[0], s2[0]) || s1[0] >= s2[0]);
+ CMP(gt, isunordered(s1[0], s2[0]) || s1[0] > s2[0]);
+ CMP(nge, !isunordered(s1[0], s2[0]) && s1[0] < s2[0]);
+ CMP(ngt, !isunordered(s1[0], s2[0]) && s1[0] <= s2[0]);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_comi_sd_1
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_comieq_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,2344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] == s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_comi_sd_2
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_comilt_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,2344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] < s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_comi_sd_3
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_comile_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,2344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] <= s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_comi_sd_4
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_comigt_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,12344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] > s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_comi_sd_5
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_comige_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,2344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] >= s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_comi_sd_6
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_comineq_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,2344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] != s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtepi32_pd
+#endif
+
+#include <emmintrin.h>
+#ifdef _ARCH_PWR8
+static __m128d
+__attribute__((noinline, unused))
+test (__m128i p)
+{
+ return _mm_cvtepi32_pd (p);
+}
+#endif
+
+static void
+TEST (void)
+{
+#ifdef _ARCH_PWR8
+ union128d u;
+ union128i_d s;
+ double e[2];
+
+ s.x = _mm_set_epi32 (123, 321, 456, 987);
+
+ u.x = test (s.x);
+
+ e[0] = (double)s.a[0];
+ e[1] = (double)s.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_cvtepi32_pd; check_union128d failed\n");
+ printf ("\t [%d,%d, %d, %d] -> [%f,%f]\n",
+ s.a[0], s.a[1], s.a[2], s.a[3],
+ u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n",
+ e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtepi32_ps
+#endif
+
+#include <emmintrin.h>
+
+static __m128
+__attribute__((noinline, unused))
+test (__m128i p)
+{
+ return _mm_cvtepi32_ps (p);
+}
+
+static void
+TEST (void)
+{
+ union128 u;
+ union128i_d s;
+ float e[4];
+
+ s.x = _mm_set_epi32 (123, 321, 456, 987);
+
+ u.x = test (s.x);
+
+ e[0] = (float)s.a[0];
+ e[1] = (float)s.a[1];
+ e[2] = (float)s.a[2];
+ e[3] = (float)s.a[3];
+
+ if (check_union128 (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtpd_epi32
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128d p)
+{
+ return _mm_cvtpd_epi32 (p);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u;
+ union128d s;
+ int e[4] = {0};
+
+ s.x = _mm_set_pd (2.78, 7777768.82);
+
+ u.x = test (s.x);
+
+ e[0] = (int)(s.a[0] + 0.5);
+ e[1] = (int)(s.a[1] + 0.5);
+
+ if (check_union128i_d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_cvtpd_epi32; check_union128i_d failed\n");
+ printf ("\t [%f,%f] -> [%d,%d,%d,%d]\n", s.a[0], s.a[1], u.a[0], u.a[1],
+ u.a[2], u.a[3]);
+ printf ("\t expect [%d,%d,%d,%d]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtpd_ps
+#endif
+
+#include <emmintrin.h>
+
+static __m128
+__attribute__((noinline, unused))
+test (__m128d p)
+{
+ return _mm_cvtpd_ps (p);
+}
+
+static void
+TEST (void)
+{
+ union128 u;
+ union128d s;
+ float e[4] = { 0.0 };
+
+ s.x = _mm_set_pd (123.321, 456.987);
+
+ u.x = test (s.x);
+
+ e[0] = (float)s.a[0];
+ e[1] = (float)s.a[1];
+
+ if (check_union128 (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_cvtpd_ps; check_union128 failed\n");
+ printf ("\t [%f,%f] -> [%f,%f,%f,%f]\n", s.a[0], s.a[1], u.a[0], u.a[1],
+ u.a[2], u.a[3]);
+ printf ("\t expect [%f,%f,%f,%f]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtps2dq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128 p)
+{
+ return _mm_cvtps_epi32 (p);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u;
+ union128 s;
+ int e[4] = {0};
+
+ s.x = _mm_set_ps (2.78, 7777768.82, 2.331, 3.456);
+
+ u.x = test (s.x);
+
+ e[0] = (int)(s.a[0] + 0.5);
+ e[1] = (int)(s.a[1] + 0.5);
+ e[2] = (int)(s.a[2] + 0.5);
+ e[3] = (int)(s.a[3] + 0.5);
+
+ if (check_union128i_d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_cvtps2dq_1; check_union128i_d failed\n");
+ printf ("\t [%f,%f,%f,%f] -> [%d,%d,%d,%d]\n", s.a[0], s.a[1], s.a[2],
+ s.a[3], u.a[0], u.a[1], u.a[2], u.a[3]);
+ printf ("\t expect [%d,%d,%d,%d]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtps2pd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128 p)
+{
+ return _mm_cvtps_pd (p);
+}
+
+static void
+TEST (void)
+{
+ union128d u;
+ union128 s;
+ double e[2];
+
+ s.x = _mm_set_ps (2.78, 7777768.82, 2.331, 3.456);
+
+ u.x = test (s.x);
+
+ e[0] = (double)s.a[0];
+ e[1] = (double)s.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_cvtps2pd_1; check_union128d failed\n");
+ printf ("\t cvt\t [%f,%f,%f,%f] -> [%f,%f]\n", s.a[0], s.a[1], s.a[2],
+ s.a[3], u.a[0], u.a[1]);
+ printf ("\t expect\t [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtsd2si_1
+#endif
+
+#include <emmintrin.h>
+
+
+static int
+__attribute__((noinline, unused))
+test (__m128d p)
+{
+ return _mm_cvtsd_si32 (p);
+}
+
+static void
+TEST (void)
+{
+ union128d s;
+ int e;
+ int d;
+
+ s.x = _mm_set_pd (123.321, 456.987);
+
+ d = test (s.x);
+
+ e = (int)(s.a[0] + 0.5);
+
+ if (d != e)
+#if DEBUG
+ {
+ printf ("sse2_test_cvtsd2si_1; failed\n");
+ printf ("\t [%f,%f] -> [%d]\n", s.a[0], s.a[1], d);
+ printf ("\t expect [%d]\n", e);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtsd2si_2
+#endif
+
+#include <emmintrin.h>
+
+static long long
+__attribute__((noinline, unused))
+test (__m128d p)
+{
+ return _mm_cvtsd_si64 (p);
+}
+
+static void
+TEST (void)
+{
+ union128d s;
+ long long e;
+ long long d;
+
+ s.x = _mm_set_pd (829496729501.4, 429496729501.4);
+
+ d = test (s.x);
+
+ e = (long long)(s.a[0] + 0.5);
+
+ if (d != e)
+#if DEBUG
+ {
+ printf ("sse2_test_cvtsd2si_2; failed\n");
+ printf ("\t [%f,%f] -> [%ld]\n", s.a[0], s.a[1], d);
+ printf ("\t expect [%ld]\n", e);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtsd2ss_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128
+__attribute__((noinline, unused))
+test (__m128 p1, __m128d p2)
+{
+ return _mm_cvtsd_ss (p1, p2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1;
+ union128 u, s2;
+ double source1[2] = {123.345, 67.3321};
+ float e[4] = {5633.098, 93.21, 3.34, 4555.2};
+
+ s1.x = _mm_loadu_pd (source1);
+ s2.x = _mm_loadu_ps (e);
+
+ __asm("" : "+v"(s1.x), "+v"(s2.x));
+ u.x = test(s2.x, s1.x);
+
+ e[0] = (float)source1[0];
+
+ if (check_union128(u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_cvtsd2ss_1; check_union128 failed\n");
+ printf ("\t [%f,%f,%f,%f],[%f,%f]\n", s2.a[0], s2.a[1], s2.a[2], s2.a[3],
+ s1.a[0], s1.a[1]);
+ printf ("\t -> \t[%f,%f,%f,%f]\n", u.a[0], u.a[1], u.a[2], u.a[3]);
+ printf ("\texpect\t[%f,%f,%f,%f]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtsi2sd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d p, int b)
+{
+ __asm("" : "+v"(p), "+r"(b));
+ return _mm_cvtsi32_sd (p, b);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s;
+ int b = 128;
+ double e[2];
+
+ s.x = _mm_set_pd (123.321, 456.987);
+
+ u.x = test (s.x, b);
+ e[0] = (double)b;
+ e[1] = s.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtsi2sd_2
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d p, long long b)
+{
+ __asm("" : "+v"(p), "+r"(b));
+ return _mm_cvtsi64_sd (p, b);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s;
+ long long b = 42949672951333LL;
+ double e[2];
+
+ s.x = _mm_set_pd (123.321, 456.987);
+
+ u.x = test (s.x, b);
+ e[0] = (double)b;
+ e[1] = s.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvtss2sd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d a, __m128 b)
+{
+ return _mm_cvtss_sd (a, b);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1;
+ union128 s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (123.321, 456.987);
+ s2.x = _mm_set_ps (123.321, 456.987, 666.45, 231.987);
+
+ u.x = test (s1.x, s2.x);
+
+ e[0] = (double)s2.a[0];
+ e[1] = s1.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_cvtss2sd_1; check_union128d failed\n");
+ printf ("\t [%f,%f], [%f,%f,%f,%f]\n", s1.a[0], s1.a[1], s2.a[0], s2.a[1],
+ s2.a[2], s2.a[3]);
+ printf ("\t -> \t[%f,%f]\n", u.a[0], u.a[1]);
+ printf ("\texpect\t[%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvttpd_epi32
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128d p)
+{
+ return _mm_cvttpd_epi32 (p);
+}
+
+static void
+TEST (void)
+{
+ union128d s;
+ union128i_d u;
+ int e[4] = {0};
+
+ s.x = _mm_set_pd (123.321, 456.987);
+
+ u.x = test (s.x);
+
+ e[0] = (int)s.a[0];
+ e[1] = (int)s.a[1];
+
+ if (check_union128i_d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_cvttpd_epi32; check_union128i_d failed\n");
+ printf ("\t [%f,%f] -> [%d,%d,%d,%d]\n", s.a[0], s.a[1], u.a[0], u.a[1],
+ u.a[2], u.a[3]);
+ printf ("\t expect [%d,%d,%d,%d]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvttps2dq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128 p)
+{
+ return _mm_cvttps_epi32 (p);
+}
+
+static void
+TEST (void)
+{
+ union128 s;
+ union128i_d u;
+ int e[4] = {0};
+
+ s.x = _mm_set_ps (123.321, 456.987, 33.56, 7765.321);
+
+ u.x = test (s.x);
+
+ e[0] = (int)s.a[0];
+ e[1] = (int)s.a[1];
+ e[2] = (int)s.a[2];
+ e[3] = (int)s.a[3];
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvttsd2si_1
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d p)
+{
+ __asm("" : "+v"(p));
+ return _mm_cvttsd_si32 (p);
+}
+
+static void
+TEST (void)
+{
+ union128d s;
+ int e;
+ int d;
+
+ s.x = _mm_set_pd (123.321, 456.987);
+
+ d = test (s.x);
+ e = (int)(s.a[0]);
+
+ if (d != e)
+#if DEBUG
+ {
+ printf ("sse2_test_cvttsd2si_1; failed\n");
+ printf ("\t [%f,%f] -> [%d]\n", s.a[0], s.a[1], d);
+ printf ("\t expect [%d]\n", e);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_cvttsd2si_2
+#endif
+
+#include <emmintrin.h>
+
+static long long
+__attribute__((noinline, unused))
+test (__m128d p)
+{
+ __asm("" : "+v"(p));
+ return _mm_cvttsd_si64 (p);
+}
+
+static void
+TEST (void)
+{
+ union128d s;
+ long long e;
+ long long d;
+
+ s.x = _mm_set_pd (123.321, 42949672339501.4);
+
+ d = test (s.x);
+ e = (long long)(s.a[0]);
+
+ if (d != e)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_divpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_div_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] / s2.a[0];
+ e[1] = s1.a[1] / s2.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_divpd_1; check_union128d failed\n");
+ printf ("\t [%f,%f] * [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_divsd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_div_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] / s2.a[0];
+ e[1] = s1.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_divsd_1; check_union128d failed\n");
+ printf ("\t [%f,%f] / [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_maxpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_max_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] > s2.a[0] ? s1.a[0]:s2.a[0];
+ e[1] = s1.a[1] > s2.a[1] ? s1.a[1]:s2.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_maxsd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_max_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] > s2.a[0] ? s1.a[0]:s2.a[0];
+ e[1] = s1.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_maxsd_3; check_union128d failed\n");
+ printf ("\t [%f,%f] + [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_minpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_min_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] < s2.a[0] ? s1.a[0]:s2.a[0];
+ e[1] = s1.a[1] < s2.a[1] ? s1.a[1]:s2.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_minsd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_min_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] < s2.a[0] ? s1.a[0]:s2.a[0];
+ e[1] = s1.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_minsd_3; check_union128d failed\n");
+ printf ("\t [%f,%f] + [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#include "sse2-check.h"
+
+#ifndef TEST
+#define TEST sse2_test_mmx_1
+#endif
+
+#include <mmintrin.h>
+
+#define N 4
+
+unsigned long long a[N], b[N], result[N];
+
+unsigned long long check_data[N] =
+ { 0x101010101010100full,
+ 0x1010101010101010ull,
+ 0x1010101010101010ull,
+ 0x1010101010101010ull };
+
+__m64
+unsigned_add3 (const __m64 * a, const __m64 * b,
+ __m64 * result, unsigned int count)
+{
+ __m64 _a, _b, one, sum, carry, onesCarry;
+
+ unsigned int i;
+
+ carry = _mm_setzero_si64 ();
+
+ one = _mm_cmpeq_pi8 (carry, carry);
+ one = _mm_sub_si64 (carry, one);
+
+ for (i = 0; i < count; i++)
+ {
+ _a = a[i];
+ _b = b[i];
+
+ sum = _mm_add_si64 (_a, _b);
+ sum = _mm_add_si64 (sum, carry);
+
+ result[i] = sum;
+
+ onesCarry = _mm_and_si64 (_mm_xor_si64 (_a, _b), carry);
+ onesCarry = _mm_or_si64 (_mm_and_si64 (_a, _b), onesCarry);
+ onesCarry = _mm_and_si64 (onesCarry, one);
+
+ _a = _mm_srli_si64 (_a, 1);
+ _b = _mm_srli_si64 (_b, 1);
+
+ carry = _mm_add_si64 (_mm_add_si64 (_a, _b), onesCarry);
+ carry = _mm_srli_si64 (carry, 63);
+ }
+
+ return carry;
+}
+
+void __attribute__((noinline))
+TEST (void)
+{
+ unsigned long long carry;
+ int i;
+
+ /* Really long numbers. */
+ a[3] = a[2] = a[1] = a[0] = 0xd3d3d3d3d3d3d3d3ull;
+ b[3] = b[2] = b[1] = b[0] = 0x3c3c3c3c3c3c3c3cull;
+
+ carry = (unsigned long long) unsigned_add3
+ ((__m64 *)a, (__m64 *)b, (__m64 *)result, N);
+
+ _mm_empty ();
+
+ if (carry != 1)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ if (result [i] != check_data[i])
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movhpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, double *p)
+{
+ __asm("" : "+v"(s1), "+b"(p));
+ return _mm_loadh_pd (s1, p);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1;
+ double s2[2] = {41124.234,2344.2354};
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ u.x = test (s1.x, s2);
+
+ e[0] = s1.a[0];
+ e[1] = s2[0];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movhpd_2
+#endif
+
+#include <emmintrin.h>
+
+static void
+__attribute__((noinline, unused))
+test (double *p, __m128d a)
+{
+ __asm("" : "+v"(a), "+b"(p));
+ return _mm_storeh_pd (p, a);
+}
+
+static void
+TEST (void)
+{
+ union128d s;
+ double d[1];
+ double e[1];
+
+ s.x = _mm_set_pd (2134.3343,1234.635654);
+ test (d, s.x);
+
+ e[0] = s.a[1];
+
+ if (e[0] != d[0])
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movlpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d a, double *e)
+{
+ __asm("" : "+v"(a), "+b"(e));
+ return _mm_loadl_pd (a, e);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1;
+ double d[2] = {2134.3343,1234.635654};
+ double e[2];
+
+ s1.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = _mm_loadu_pd (d);
+
+ u.x = test (s1.x, d);
+
+ e[0] = d[0];
+ e[1] = s1.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movlpd_2
+#endif
+
+#include <emmintrin.h>
+
+static void
+__attribute__((noinline, unused))
+test (double *e, __m128d a)
+{
+ __asm("" : "+v"(a), "+b"(e));
+ return _mm_storel_pd (e, a);
+}
+
+static void
+TEST (void)
+{
+ union128d u;
+ double e[2];
+
+ u.x = _mm_set_pd (41124.234,2344.2354);
+
+ test (e, u.x);
+
+ e[1] = u.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movmskpd_1
+#endif
+
+#include <emmintrin.h>
+
+#ifdef _ARCH_PWR8
+static int
+__attribute__((noinline, unused))
+test (__m128d p)
+{
+ __asm("" : "+v"(p));
+ return _mm_movemask_pd (p);
+}
+#endif
+
+static void
+TEST (void)
+{
+#ifdef _ARCH_PWR8
+ double source[2] = {1.234, -2234.23};
+ union128d s1;
+ int d;
+ int e;
+
+ s1.x = _mm_loadu_pd (source);
+
+ d = test (s1.x);
+
+ e = 0;
+ if (source[0] < 0)
+ e |= 1;
+
+ if (source[1] < 0)
+ e |= 1 << 1;
+
+ if (checkVi (&d, &e, 1))
+#if DEBUG
+ {
+ printf ("sse2_test_movmskpd_1; check_union128d failed\n");
+ printf ("\t [%f,%f] -> [%d]\n",
+ s1.a[0], s1.a[1], d);
+ printf ("\t expect [%d]\n",
+ e);
+ }
+#else
+ abort ();
+#endif
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i b)
+{
+ __asm("" : "+v"(b));
+ return _mm_move_epi64 (b);
+}
+
+static void
+TEST (void)
+{
+ union128i_q u, s1;
+ long long e[2] = { 0 };
+
+ s1.x = _mm_set_epi64x(12876, 3376590);
+ u.x = test (s1.x);
+ e[0] = s1.a[0];
+
+ if (check_union128i_q (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_movq_1; check_union128i_q failed\n");
+ printf ("\t move_epi64 ([%llx, %llx]) -> [%llx, %llx]\n", s1.a[0],
+ s1.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%llx, %llx]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movq_2
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (long long b)
+{
+ __asm("" : "+r" (b));
+ return _mm_cvtsi64_si128 (b);
+}
+
+static void
+TEST (void)
+{
+ union128i_q u;
+ long long b = 4294967295133LL;
+ long long e[2] = {0};
+
+ u.x = test (b);
+
+ e[0] = b;
+
+ if (check_union128i_q (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movq_3
+#endif
+
+#include <emmintrin.h>
+
+static long long
+__attribute__((noinline, unused))
+test (__m128i b)
+{
+ __asm("" : "+v"(b));
+ return _mm_cvtsi128_si64 (b);
+}
+
+static void
+TEST (void)
+{
+ union128i_q u;
+ long long e;
+
+ u.x = _mm_set_epi64x (4294967295133LL, 3844294967295133LL);
+ e = test (u.x);
+ if (e != u.a[0])
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movsd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (double *p)
+{
+ return _mm_load_sd (p);
+}
+
+static void
+TEST (void)
+{
+ union128d u;
+ double d[2] = {128.023, 3345.1234};
+ double e[2];
+
+ u.x = _mm_loadu_pd (e);
+ u.x = test (d);
+
+ e[0] = d[0];
+ e[1] = 0.0;
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movsd_2
+#endif
+
+#include <emmintrin.h>
+
+static void
+__attribute__((noinline, unused))
+test (double *p, __m128d a)
+{
+ _mm_store_sd (p, a);
+}
+
+static void
+TEST (void)
+{
+ union128d u;
+ double d[1];
+ double e[1];
+
+ u.x = _mm_set_pd (128.023, 3345.1234);
+ test (d, u.x);
+
+ e[0] = u.a[0];
+
+ if (checkVd (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_movsd_3
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d a, __m128d b)
+{
+ __asm("" : "+v"(a), "+v"(b));
+ return _mm_move_sd (a, b);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2] = { 256.046, 3345.1234 };
+
+ s1.x = _mm_setr_pd (128.023, 3345.1234);
+ s2.x = _mm_setr_pd (256.046, 4533.1234);
+ __asm("" : "+v"(s1.x), "+v"(s2.x));
+ u.x = test (s1.x, s2.x);
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_movsd_3; check_union128d failed\n");
+ printf ("\t [%f,%f], [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_mulpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_mul_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] * s2.a[0];
+ e[1] = s1.a[1] * s2.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_mul_pd_1; check_union128d failed\n");
+ printf ("\t [%f,%f] * [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_mulsd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_mul_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] * s2.a[0];
+ e[1] = s1.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_mul_sd_1; check_union128d failed\n");
+ printf ("\t [%f,%f] * [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_orpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_or_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+
+ union
+ {
+ double d[2];
+ long long ll[2];
+ }d1, d2, e;
+
+ s1.x = _mm_set_pd (1234, 44386);
+ s2.x = _mm_set_pd (5198, 23098);
+
+ _mm_storeu_pd (d1.d, s1.x);
+ _mm_storeu_pd (d2.d, s2.x);
+
+ u.x = test (s1.x, s2.x);
+
+ e.ll[0] = d1.ll[0] | d2.ll[0];
+ e.ll[1] = d1.ll[1] | d2.ll[1];
+
+ if (check_union128d (u, e.d))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_packssdw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_packs_epi32 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_d s1, s2;
+ union128i_w u;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi32 (2134, -128, 655366, 9999);
+ s2.x = _mm_set_epi32 (41124, 234, 2, -800900);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 4; i++)
+ {
+ if (s1.a[i] > 32767)
+ e[i] = 32767;
+ else if (s1.a[i] < -32768)
+ e[i] = -32768;
+ else
+ e[i] = s1.a[i];
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ if (s2.a[i] > 32767)
+ e[i+4] = 32767;
+ else if (s2.a[i] < -32768)
+ e[i+4] = -32768;
+ else
+ e[i+4] = s2.a[i];
+ }
+
+ if (check_union128i_w (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_packssdw_1; check_union128i_w failed\n");
+ printf (
+ "\t ([%x,%x,%x,%x], [%x,%x,%x,%x]) -> [%x,%x,%x,%x, %x,%x,%x,%x]\n",
+ s1.a[0], s1.a[1], s1.a[2], s1.a[3], s2.a[0], s2.a[1], s2.a[2],
+ s2.a[3], u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6],
+ u.a[7]);
+ printf ("\t expect [%x,%x,%x,%x, %x,%x,%x,%x]\n", e[0], e[1], e[2], e[3],
+ e[4], e[5], e[6], e[7]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_packsswb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_packs_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w s1, s2;
+ union128i_b u;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi16 (2134, -128, 1234, 6354, 1002, 3004, 4050, 9999);
+ s2.x = _mm_set_epi16 (41124, 234, 2344, 2354, 607, 1, 2, -8009);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ if (s1.a[i] > 127)
+ e[i] = 127;
+ else if (s1.a[i] < -128)
+ e[i] = -128;
+ else
+ e[i] = s1.a[i];
+ }
+
+ for (i = 0; i < 8; i++)
+ {
+ if (s2.a[i] > 127)
+ e[i+8] = 127;
+ else if (s2.a[i] < -128)
+ e[i+8] = -128;
+ else
+ e[i+8] = s2.a[i];
+ }
+
+ if (check_union128i_b (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_packsswb_1; check_union128i_w failed\n");
+ printf ("\t ([%x,%x,%x,%x, %x,%x,%x,%x], [%x,%x,%x,%x, %x,%x,%x,%x])\n",
+ s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
+ s1.a[7], s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5],
+ s2.a[6], s2.a[7]);
+ printf ("\t\t -> [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
+ u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
+ u.a[15]);
+ printf (
+ "\t expect [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
+ e[11], e[12], e[13], e[14], e[15]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_packuswb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_packus_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w s1, s2;
+ union128i_ub u;
+ unsigned char e[16];
+ int i, tmp;
+
+ s1.x = _mm_set_epi16 (1, 2, 3, 4, -5, -6, -7, -8);
+ s2.x = _mm_set_epi16 (-9, -10, -11, -12, 13, 14, 15, 16);
+ u.x = test (s1.x, s2.x);
+
+ for (i=0; i<8; i++)
+ {
+ tmp = s1.a[i]<0 ? 0 : s1.a[i];
+ tmp = tmp>255 ? 255 : tmp;
+ e[i] = tmp;
+
+ tmp = s2.a[i]<0 ? 0 : s2.a[i];
+ tmp = tmp>255 ? 255 : tmp;
+ e[i+8] = tmp;
+ }
+
+ if (check_union128i_ub (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_packuswb_1; check_union128i_w failed\n");
+ printf ("\t ([%x,%x,%x,%x, %x,%x,%x,%x], [%x,%x,%x,%x, %x,%x,%x,%x])\n",
+ s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
+ s1.a[7], s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5],
+ s2.a[6], s2.a[7]);
+ printf ("\t\t -> [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
+ u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
+ u.a[15]);
+ printf (
+ "\t expect [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
+ e[11], e[12], e[13], e[14], e[15]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_paddb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_add_epi8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ e[i] = s1.a[i] + s2.a[i];
+
+ if (check_union128i_b (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_paddd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_add_epi32 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s1, s2;
+ int e[4];
+ int i;
+
+ s1.x = _mm_set_epi32 (30,90,-80,-40);
+ s2.x = _mm_set_epi32 (76, -100, -34, -78);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 4; i++)
+ e[i] = s1.a[i] + s2.a[i];
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_paddq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_add_epi64 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_q u, s1, s2;
+ long long e[2];
+ int i;
+
+ s1.x = _mm_set_epi64x (90,-80);
+ s2.x = _mm_set_epi64x (76, -100);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 2; i++)
+ e[i] = s1.a[i] + s2.a[i];
+
+ if (check_union128i_q (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_paddsb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_adds_epi8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i, tmp;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ {
+ tmp = (signed char)s1.a[i] + (signed char)s2.a[i];
+
+ if (tmp > 127)
+ tmp = 127;
+ if (tmp < -128)
+ tmp = -128;
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_b (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_paddsb_1; check_union128i_b failed\n");
+ printf (
+ "\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
+ s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
+ s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
+ s1.a[14], s1.a[15]);
+ printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
+ s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5], s2.a[6],
+ s2.a[7], s2.a[8], s2.a[9], s2.a[10], s2.a[11], s2.a[12], s2.a[13],
+ s2.a[14], s2.a[15]);
+ printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
+ u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
+ u.a[15]);
+ printf (
+ "\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
+ e[11], e[12], e[13], e[14], e[15]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_paddsw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_adds_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i, tmp;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
+ s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp = s1.a[i] + s2.a[i];
+
+ if (tmp > 32767)
+ tmp = 32767;
+ if (tmp < -32768)
+ tmp = -32768;
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_w (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_paddsw_1; check_union128i_w failed\n");
+ printf ("\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x],\n", s1.a[0], s1.a[1],
+ s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6], s1.a[7]);
+ printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x])\n", s2.a[0], s2.a[1], s2.a[2],
+ s2.a[3], s2.a[4], s2.a[5], s2.a[6], s2.a[7]);
+ printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2],
+ u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
+ printf ("\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", e[0], e[1], e[2], e[3],
+ e[4], e[5], e[6], e[7]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_paddusb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_adds_epu8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16] = {0};
+ int i, tmp;
+
+ s1.x = _mm_set_epi8 (30, 2, 3, 4, 10, 20, 30, 90, 80, 40, 100, 15, 98, 25, 98, 7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ {
+ tmp = (unsigned char)s1.a[i] + (unsigned char)s2.a[i];
+
+ if (tmp > 255)
+ tmp = -1;
+ if (tmp < 0)
+ tmp = 0;
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_b (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_paddusb_1; check_union128i_b failed\n");
+ printf (
+ "\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
+ s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
+ s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
+ s1.a[14], s1.a[15]);
+ printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
+ s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5], s2.a[6],
+ s2.a[7], s2.a[8], s2.a[9], s2.a[10], s2.a[11], s2.a[12], s2.a[13],
+ s2.a[14], s2.a[15]);
+ printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
+ u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
+ u.a[15]);
+ printf (
+ "\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
+ e[11], e[12], e[13], e[14], e[15]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_paddusw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_adds_epu16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i, tmp;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,80,40,100,15);
+ s2.x = _mm_set_epi16 (11, 98, 76, 100, 34, 78, 39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp = s1.a[i] + s2.a[i];
+
+ if (tmp > 65535)
+ tmp = -1;
+
+ if (tmp < 0)
+ tmp = 0;
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_paddw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_add_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
+ s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ e[i] = s1.a[i] + s2.a[i];
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pavgb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_avg_epu8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_ub u, s1, s2;
+ unsigned char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,80,40,100,15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ e[i] = (s1.a[i] + s2.a[i]+1)>>1;
+
+ if (check_union128i_ub (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pavgw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_avg_epu16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_uw u, s1, s2;
+ unsigned short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,80,40,100,15);
+ s2.x = _mm_set_epi16 (11, 98, 76, 100, 34, 78, 39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ e[i] = (s1.a[i] + s2.a[i]+1)>>1;
+
+ if (check_union128i_uw (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pcmpeqb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_cmpeq_epi8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,80,40,100,15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ e[i] = (s1.a[i] == s2.a[i]) ? -1:0;
+
+ if (check_union128i_b (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pcmpeqd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_cmpeq_epi32 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s1, s2;
+ int e[4];
+ int i;
+
+ s1.x = _mm_set_epi32 (98, 25, 98,7);
+ s2.x = _mm_set_epi32 (88, 44, 33, 229);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 4; i++)
+ e[i] = (s1.a[i] == s2.a[i]) ? -1:0;
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pcmpeqw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_cmpeq_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (20,30,90,80,40,100,15,98);
+ s2.x = _mm_set_epi16 (34, 78, 39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ e[i] = (s1.a[i] == s2.a[i]) ? -1:0;
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pcmpgtb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_cmpgt_epi8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,80,40,100,15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ e[i] = (s1.a[i] > s2.a[i]) ? -1:0;
+
+ if (check_union128i_b (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pcmpgtd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_cmpgt_epi32 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s1, s2;
+ int e[4];
+ int i;
+
+ s1.x = _mm_set_epi32 (98, 25, 98,7);
+ s2.x = _mm_set_epi32 (88, 44, 33, 229);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 4; i++)
+ e[i] = (s1.a[i] > s2.a[i]) ? -1:0;
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pcmpgtw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_cmpgt_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (20,30,90,80,40,100,15,98);
+ s2.x = _mm_set_epi16 (34, 78, 39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ e[i] = (s1.a[i] > s2.a[i]) ? -1:0;
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pextrw_1
+#endif
+
+#include <emmintrin.h>
+
+#define msk0 0
+#define msk1 1
+#define msk2 2
+#define msk3 3
+#define msk4 4
+#define msk5 5
+#define msk6 6
+#define msk7 7
+
+static void
+TEST (void)
+{
+ union
+ {
+ __m128i x;
+ int i[4];
+ short s[8];
+ } val1;
+ int res[8], masks[8];
+ int i;
+
+ val1.i[0] = 0x04030201;
+ val1.i[1] = 0x08070605;
+ val1.i[2] = 0x0C0B0A09;
+ val1.i[3] = 0x100F0E0D;
+
+ res[0] = _mm_extract_epi16 (val1.x, msk0);
+ res[1] = _mm_extract_epi16 (val1.x, msk1);
+ res[2] = _mm_extract_epi16 (val1.x, msk2);
+ res[3] = _mm_extract_epi16 (val1.x, msk3);
+ res[4] = _mm_extract_epi16 (val1.x, msk4);
+ res[5] = _mm_extract_epi16 (val1.x, msk5);
+ res[6] = _mm_extract_epi16 (val1.x, msk6);
+ res[7] = _mm_extract_epi16 (val1.x, msk7);
+
+ masks[0] = msk0;
+ masks[1] = msk1;
+ masks[2] = msk2;
+ masks[3] = msk3;
+ masks[4] = msk4;
+ masks[5] = msk5;
+ masks[6] = msk6;
+ masks[7] = msk7;
+
+ for (i = 0; i < 8; i++)
+ if (res[i] != val1.s [masks[i]])
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pinsrw_1
+#endif
+
+#include <emmintrin.h>
+#include <string.h>
+
+#define msk0 0x00
+#define msk1 0x01
+#define msk2 0x02
+#define msk3 0x03
+#define msk4 0x04
+#define msk5 0x05
+#define msk6 0x06
+#define msk7 0x07
+
+static void
+TEST (void)
+{
+ union
+ {
+ __m128i x;
+ unsigned int i[4];
+ unsigned short s[8];
+ } res [8], val, tmp;
+ int masks[8];
+ unsigned short ins[4] = { 3, 4, 5, 6 };
+ int i;
+
+ val.i[0] = 0x35251505;
+ val.i[1] = 0x75655545;
+ val.i[2] = 0xB5A59585;
+ val.i[3] = 0xF5E5D5C5;
+
+ /* Check pinsrw imm8, r32, xmm. */
+ res[0].x = _mm_insert_epi16 (val.x, ins[0], msk0);
+ res[1].x = _mm_insert_epi16 (val.x, ins[0], msk1);
+ res[2].x = _mm_insert_epi16 (val.x, ins[0], msk2);
+ res[3].x = _mm_insert_epi16 (val.x, ins[0], msk3);
+ res[4].x = _mm_insert_epi16 (val.x, ins[0], msk4);
+ res[5].x = _mm_insert_epi16 (val.x, ins[0], msk5);
+ res[6].x = _mm_insert_epi16 (val.x, ins[0], msk6);
+ res[7].x = _mm_insert_epi16 (val.x, ins[0], msk7);
+
+ masks[0] = msk0;
+ masks[1] = msk1;
+ masks[2] = msk2;
+ masks[3] = msk3;
+ masks[4] = msk4;
+ masks[5] = msk5;
+ masks[6] = msk6;
+ masks[7] = msk7;
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp.x = val.x;
+ tmp.s[masks[i]] = ins[0];
+ if (memcmp (&tmp, &res[i], sizeof (tmp)))
+ abort ();
+ }
+
+ /* Check pinsrw imm8, m16, xmm. */
+ for (i = 0; i < 8; i++)
+ {
+ res[i].x = _mm_insert_epi16 (val.x, ins[i % 2], msk0);
+ masks[i] = msk0;
+ }
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp.x = val.x;
+ tmp.s[masks[i]] = ins[i % 2];
+ if (memcmp (&tmp, &res[i], sizeof (tmp)))
+ abort ();
+ }
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pmaddwd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_madd_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w s1, s2;
+ union128i_d u;
+ int e[4];
+ int i;
+
+ s1.x = _mm_set_epi16 (2134,3343,1234,6354, 1, 3, 4, 5);
+ s2.x = _mm_set_epi16 (41124,234,2344,2354,9, -1, -8, -10);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 4; i++)
+ e[i] = (s1.a[i*2] * s2.a[i*2])+(s1.a[(i*2) + 1] * s2.a[(i*2) + 1]);
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pmaxsw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_max_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (1,2,3,4,5,6,7,8);
+ s2.x = _mm_set_epi16 (8,7,6,5,4,3,2,1);
+ u.x = test (s1.x, s2.x);
+
+ for (i=0; i<8; i++)
+ e[i] = s1.a[i]>s2.a[i]?s1.a[i]:s2.a[i];
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pmaxub_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_max_epu8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_ub u, s1, s2;
+ unsigned char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
+ s2.x = _mm_set_epi8 (16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+ u.x = test (s1.x, s2.x);
+
+ for (i=0; i<16; i++)
+ e[i] = s1.a[i]>s2.a[i]?s1.a[i]:s2.a[i];
+
+ if (check_union128i_ub (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pminsw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_min_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (1,2,3,4,5,6,7,8);
+ s2.x = _mm_set_epi16 (8,7,6,5,4,3,2,1);
+ u.x = test (s1.x, s2.x);
+
+ for (i=0; i<8; i++)
+ e[i] = s1.a[i]<s2.a[i]?s1.a[i]:s2.a[i];
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pminub_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_min_epu8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_ub u, s1, s2;
+ unsigned char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
+ s2.x = _mm_set_epi8 (16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+ u.x = test (s1.x, s2.x);
+
+ for (i=0; i<16; i++)
+ e[i] = s1.a[i]<s2.a[i]?s1.a[i]:s2.a[i];
+
+ if (check_union128i_ub (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pmovmskb_1
+#endif
+
+#include <emmintrin.h>
+
+#ifdef _ARCH_PWR8
+static int
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_movemask_epi8 (s1);
+}
+#endif
+
+static void
+TEST (void)
+{
+#ifdef _ARCH_PWR8
+ union128i_b s1;
+ int i, u, e=0;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+
+ __asm("" : "+v"(s1.x));
+ u = test (s1.x);
+
+ for (i = 0; i < 16; i++)
+ if (s1.a[i] & (1<<7))
+ e = e | (1<<i);
+
+ if (checkVi (&u, &e, 1))
+#if DEBUG
+ {
+ printf ("sse2_test_pmovmskb_1; checkVi failed\n");
+ printf ("\t ([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x], -> %x)\n",
+ s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
+ s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
+ s1.a[14], s1.a[15], u);
+ printf ("\t expect %x\n", e);
+ }
+#else
+ abort ();
+#endif
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pmulhuw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_mulhi_epu16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_uw u, s1, s2;
+ unsigned short e[8];
+ int i, tmp;
+
+ s1.x = _mm_set_epi16 (10,2067,3033,90,80,40,1000,15);
+ s2.x = _mm_set_epi16 (11, 9834, 7444, 10222, 34, 7833, 39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp = s1.a[i] * s2.a[i];
+
+ e[i] = (tmp & 0xffff0000)>>16;
+ }
+
+ if (check_union128i_uw (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pmulhw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_mulhi_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i, tmp;
+
+ s1.x = _mm_set_epi16 (10,2067,-3033,90,80,40,-1000,15);
+ s2.x = _mm_set_epi16 (11, 9834, 7444, -10222, 34, -7833, 39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp = s1.a[i] * s2.a[i];
+
+ e[i] = (tmp & 0xffff0000)>>16;
+ }
+
+ if (check_union128i_w (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_pmulhw_1; check_union128i_w failed\n");
+ printf ("\tmulhi\t([%x,%x,%x,%x, %x,%x,%x,%x],\n", s1.a[0], s1.a[1],
+ s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6], s1.a[7]);
+ printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x])\n", s2.a[0], s2.a[1], s2.a[2],
+ s2.a[3], s2.a[4], s2.a[5], s2.a[6], s2.a[7]);
+ printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2],
+ u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
+ printf ("\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", e[0], e[1], e[2], e[3],
+ e[4], e[5], e[6], e[7]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pmullw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_mullo_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i, tmp;
+
+ s1.x = _mm_set_epi16 (10,2067,-3033,90,80,40,-1000,15);
+ s2.x = _mm_set_epi16 (11, 9834, 7444, -10222, 34, -7833, 39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp = s1.a[i] * s2.a[i];
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pmuludq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_mul_epu32 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_d s1, s2;
+ union128i_q u;
+ long long e[2];
+
+ s1.x = _mm_set_epi32 (10,2067,3033,905);
+ s2.x = _mm_set_epi32 (11, 9834, 7444, 10222);
+ __asm("" : "+v"(s1.x), "+v"(s2.x));
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] * s2.a[0];
+ e[1] = s1.a[2] * s2.a[2];
+
+ if (check_union128i_q (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_pmuludq_1; check_union128i_q failed\n");
+ printf ("\t ([%x,%x,%x,%x], [%x,%x,%x,%x], -> [%llx, %llx])\n", s1.a[0],
+ s1.a[1], s1.a[2], s1.a[3], s2.a[0], s2.a[1], s2.a[2], s2.a[3],
+ u.a[0], u.a[1]);
+ printf ("\t expect [%llx, %llx]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psadbw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ return _mm_sad_epu8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_ub s1, s2;
+ union128i_w u;
+ short e[8] = { 0 };
+ unsigned char tmp[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
+ s2.x = _mm_set_epi8 (16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ tmp [i] = __builtin_abs (s1.a[i] - s2.a[i]);
+
+ for (i = 0; i < 8; i++)
+ e[0] += tmp[i];
+
+ for (i = 8; i < 16; i++)
+ e[4] += tmp[i];
+
+
+ if (check_union128i_w (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_psadbw_1; check_union128i_w failed\n");
+ printf (
+ "\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
+ s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
+ s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
+ s1.a[14], s1.a[15]);
+ printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
+ s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5], s2.a[6],
+ s2.a[7], s2.a[8], s2.a[9], s2.a[10], s2.a[11], s2.a[12], s2.a[13],
+ s2.a[14], s2.a[15]);
+ printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2],
+ u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
+ printf ("\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", e[0], e[1], e[2], e[3],
+ e[4], e[5], e[6], e[7]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pshufd_1
+#endif
+
+#define N 0xec
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_shuffle_epi32 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s1;
+ int e[4] = { 0 };
+ int i;
+
+ s1.x = _mm_set_epi32 (16,15,14,13);
+ u.x = test (s1.x);
+
+ for (i = 0; i < 4; i++)
+ e[i] = s1.a[((N & (0x3<<(2*i)))>>(2*i))];
+
+ if (check_union128i_d(u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_pshufd_1; check_union128i_d failed\n");
+ printf ("\t ([%x,%x,%x,%x]) -> [%x,%x,%x,%x]\n", s1.a[0], s1.a[1],
+ s1.a[2], s1.a[3], u.a[0], u.a[1], u.a[2], u.a[3]);
+ printf ("\t expect [%x,%x,%x,%x]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pshufhw_1
+#endif
+
+#define N 0xec
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_shufflehi_epi16 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_q s1;
+ union128i_w u;
+ short e[8] = { 0 };
+ int i;
+ int m1[4] = { 0x3, 0x3<<2, 0x3<<4, 0x3<<6 };
+ int m2[4];
+
+ s1.x = _mm_set_epi64x (0xabcde,0xef58a234);
+ u.x = test (s1.x);
+
+ for (i = 0; i < 4; i++)
+ e[i] = (s1.a[0]>>(16 * i)) & 0xffff;
+
+ for (i = 0; i < 4; i++)
+ m2[i] = (N & m1[i])>>(2*i);
+
+ for (i = 0; i < 4; i++)
+ e[i+4] = (s1.a[1] >> (16 * m2[i])) & 0xffff;
+
+ if (check_union128i_w(u, e))
+#if DEBUG
+ {
+ union128i_w s;
+ s.x = s1.x;
+ printf ("sse2_test_pshufhw_1; check_union128i_w failed\n");
+ printf ("\t ([%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx])\n", s.a[0], s.a[1],
+ s.a[2], s.a[3], s.a[4], s.a[5], s.a[6], s.a[7]);
+ printf ("\t\t -> [%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx]\n", u.a[0], u.a[1],
+ u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
+ printf ("\t expect [%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx]\n", e[0], e[1],
+ e[2], e[3], e[4], e[5], e[6], e[7]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pshuflw_1
+#endif
+
+#define N 0xec
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_shufflelo_epi16 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_q s1;
+ union128i_w u;
+ short e[8] = { 0 };
+ int i;
+ int m1[4] = { 0x3, 0x3<<2, 0x3<<4, 0x3<<6 };
+ int m2[4];
+
+ s1.x = _mm_set_epi64x (0xabcde,0xef58a234);
+ u.x = test (s1.x);
+
+ for (i = 0; i < 4; i++)
+ e[i+4] = (s1.a[1]>>(16 * i)) & 0xffff;
+
+ for (i = 0; i < 4; i++)
+ m2[i] = (N & m1[i])>>(2*i);
+
+ for (i = 0; i < 4; i++)
+ e[i] = (s1.a[0] >> (16 * m2[i])) & 0xffff;
+
+ if (check_union128i_w(u, e))
+#if DEBUG
+ {
+ union128i_w s;
+ s.x = s1.x;
+ printf ("sse2_test_pshuflw_1; check_union128i_w failed\n");
+ printf ("\t ([%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx])\n", s.a[0], s.a[1],
+ s.a[2], s.a[3], s.a[4], s.a[5], s.a[6], s.a[7]);
+ printf ("\t\t -> [%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx]\n", u.a[0], u.a[1],
+ u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
+ printf ("\t expect [%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx]\n", e[0], e[1],
+ e[2], e[3], e[4], e[5], e[6], e[7]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pslld_1
+#endif
+
+#define N 0xf
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_slli_epi32 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s;
+ int e[4] = {0};
+ int i;
+
+ s.x = _mm_set_epi32 (1, -2, 3, 4);
+
+ u.x = test (s.x);
+
+ if (N < 32)
+ for (i = 0; i < 4; i++)
+ e[i] = s.a[i] << N;
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pslld_2
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i c)
+{
+ return _mm_sll_epi32 (s1, c);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s;
+ union128i_q c;
+ int e[4] = { 0 };
+ int i;
+
+ s.x = _mm_set_epi32 (2, -3, 0x7000, 0x9000);
+ c.x = _mm_set_epi64x (12, 23);
+
+ __asm("" : "+v"(s.x), "+v"(c.x));
+ u.x = test (s.x, c.x);
+
+ if (c.a[0] < 32)
+ for (i = 0; i < 4; i++)
+ e[i] = s.a[i] << c.a[0];
+
+ if (check_union128i_d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_pslld_2; check_union128i_d failed\n");
+ printf ("\tsll\t([%x,%x,%x,%x], [%llx,%llx]\n", s.a[0], s.a[1], s.a[2],
+ s.a[3], c.a[0], c.a[1]);
+ printf ("\t ->\t [%x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2], u.a[3]);
+ printf ("\texpect\t [%x,%x,%x,%x]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pslldq_1
+#endif
+
+#define N 0x5
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_slli_si128 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s;
+ char src[16] =
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+ char e[16] =
+ { 0 };
+ int i;
+
+ s.x = _mm_loadu_si128 ((__m128i *) src);
+
+ u.x = test (s.x);
+
+ for (i = 0; i < 16 - N; i++)
+ e[i + N] = src[i];
+
+ if (check_union128i_b (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_pslldq_1; check_union128i_b failed\n");
+
+ printf ("\t s ([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
+ s.a[0], s.a[1], s.a[2], s.a[3], s.a[4], s.a[5], s.a[6], s.a[7],
+ s.a[8], s.a[9], s.a[10], s.a[11], s.a[12], s.a[13], s.a[14],
+ s.a[15]);
+ printf (
+ "\t u ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
+ u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14], u.a[15]);
+ printf (
+ "\t expect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
+ e[11], e[12], e[13], e[14], e[15]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psllq_1
+#endif
+
+#define N 60
+
+#include <emmintrin.h>
+
+#ifdef _ARCH_PWR8
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_slli_epi64 (s1, N);
+}
+#endif
+
+static void
+TEST (void)
+{
+#ifdef _ARCH_PWR8
+ union128i_q u, s;
+ long long e[2] = {0};
+ int i;
+
+ s.x = _mm_set_epi64x (-1, 0xf);
+
+ u.x = test (s.x);
+
+ if (N < 64)
+ for (i = 0; i < 2; i++)
+ e[i] = s.a[i] << N;
+
+ if (check_union128i_q (u, e))
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psllq_2
+#endif
+
+#include <emmintrin.h>
+
+#ifdef _ARCH_PWR8
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i c)
+{
+ return _mm_sll_epi64 (s1, c);
+}
+#endif
+
+static void
+TEST (void)
+{
+#ifdef _ARCH_PWR8
+ union128i_q u, s, c;
+ long long e[2] = {0};
+ int i;
+
+ s.x = _mm_set_epi64x (-1, 0xf);
+ c.x = _mm_set_epi64x (60,50);
+
+ __asm("" : "+v"(s.x), "+v"(c.x));
+ u.x = test (s.x, c.x);
+
+ if (c.a[0] < 64)
+ for (i = 0; i < 2; i++)
+ e[i] = s.a[i] << c.a[0];
+
+ if (check_union128i_q (u, e))
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psllw_1
+#endif
+
+#define N 0xb
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_slli_epi16 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s;
+ short e[8] = {0};
+ int i;
+
+ s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000);
+
+ u.x = test (s.x);
+
+ if (N < 16)
+ for (i = 0; i < 8; i++)
+ e[i] = s.a[i] << N;
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psllw_2
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i c)
+{
+ return _mm_sll_epi16 (s1, c);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s;
+ union128i_q c;
+ short e[8] = {0};
+ int i;
+
+ s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000);
+ c.x = _mm_set_epi64x (12, 13);
+
+ __asm("" : "+v"(s.x), "+v"(c.x));
+ u.x = test (s.x, c.x);
+
+ if (c.a[0] < 16)
+ for (i = 0; i < 8; i++)
+ e[i] = s.a[i] << c.a[0];
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrad_1
+#endif
+
+#define N 0xf
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_srai_epi32 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s;
+ int e[4] = {0};
+ int i;
+
+ s.x = _mm_set_epi32 (1, -2, 3, 4);
+
+ u.x = test (s.x);
+
+ if (N < 32)
+ for (i = 0; i < 4; i++)
+ e[i] = s.a[i] >> N;
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrad_2
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i count)
+{
+ return _mm_sra_epi32 (s1, count);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s;
+ union128i_q c;
+ int e[4] = {0};
+ int i;
+
+ s.x = _mm_set_epi32 (1, -2, 3, 4);
+ c.x = _mm_set_epi64x (16, 29);
+
+ __asm("" : "+v"(s.x), "+v"(c.x));
+ u.x = test (s.x, c.x);
+
+ if (c.a[0] < 32)
+ for (i = 0; i < 4; i++)
+ e[i] = s.a[i] >> c.a[0];
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psraw_1
+#endif
+
+#define N 0xb
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_srai_epi16 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s;
+ short e[8] = {0};
+ int i;
+
+ s.x = _mm_set_epi16 (1, -2, 3, 4, -5, 6, 0x7000, 0x9000);
+
+ u.x = test (s.x);
+
+ if (N < 16)
+ for (i = 0; i < 8; i++)
+ e[i] = s.a[i] >> N;
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psraw_2
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i c)
+{
+ return _mm_sra_epi16 (s1, c);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s;
+ union128i_q c;
+ short e[8] = {0};
+ int i;
+
+ s.x = _mm_set_epi16 (1, -2, 3, 4, 5, 6, -0x7000, 0x9000);
+ c.x = _mm_set_epi64x (12, 13);
+
+ __asm("" : "+v"(s.x), "+v"(c.x));
+ u.x = test (s.x, c.x);
+
+ if (c.a[0] < 16)
+ for (i = 0; i < 8; i++)
+ e[i] = s.a[i] >> c.a[0];
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrld_1
+#endif
+
+#define N 0xf
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_srli_epi32 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s;
+ int e[4] = { 0 };
+ unsigned int tmp;
+ int i;
+
+ s.x = _mm_set_epi32 (1, -2, 3, 4);
+
+ u.x = test (s.x);
+
+ if (N < 32)
+ for (i = 0; i < 4; i++)
+ {
+ tmp = s.a[i];
+ e[i] = tmp >> N;
+ }
+
+ if (check_union128i_d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_psrld_1; check_union128i_d failed\n");
+ printf ("\tsrl\t([%x,%x,%x,%x],%d\n", s.a[0], s.a[1], s.a[2], s.a[3], N);
+ printf ("\t ->\t [%x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2], u.a[3]);
+ printf ("\texpect\t [%x,%x,%x,%x]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrld_2
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i c)
+{
+ return _mm_srl_epi32 (s1, c);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s;
+ union128i_q c;
+ int e[4] = { 0 };
+ unsigned int tmp;
+ int i;
+
+ s.x = _mm_set_epi32 (2, -3, 0x7000, 0x9000);
+ c.x = _mm_set_epi64x (12, 23);
+
+ __asm("" : "+v"(s.x), "+v"(c.x));
+ u.x = test (s.x, c.x);
+
+ if (c.a[0] < 32)
+ for (i = 0; i < 4; i++)
+ {
+ tmp = s.a[i];
+ e[i] = tmp >> c.a[0];
+ }
+
+ if (check_union128i_d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_psrld_2; check_union128i_d failed\n");
+ printf ("\tsrld\t([%x,%x,%x,%x], [%llx,%llx]\n", s.a[0], s.a[1], s.a[2],
+ s.a[3], c.a[0], c.a[1]);
+ printf ("\t ->\t [%x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2], u.a[3]);
+ printf ("\texpect\t [%x,%x,%x,%x]\n", e[0], e[1], e[2], e[3]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrldq_1
+#endif
+
+#define N 0x5
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_srli_si128 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s;
+ char src[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 };
+ char e[16] = { 0 };
+ int i;
+
+ s.x = _mm_loadu_si128 ((__m128i *)src);
+
+ u.x = test (s.x);
+
+ for (i = 0; i < 16-N; i++)
+ e[i] = src[i+N];
+
+ if (check_union128i_b (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_psrldq_1; check_union128i_b failed\n");
+ printf ("\tsrl\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
+ s.a[0], s.a[1], s.a[2], s.a[3], s.a[4], s.a[5], s.a[6], s.a[7],
+ s.a[8], s.a[9], s.a[10], s.a[11], s.a[12], s.a[13], s.a[14],
+ s.a[15]);
+ printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
+ u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
+ u.a[15]);
+ printf (
+ "\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
+ e[11], e[12], e[13], e[14], e[15]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrlq_1
+#endif
+
+#define N 60
+
+#include <emmintrin.h>
+
+#ifdef _ARCH_PWR8
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_srli_epi64 (s1, N);
+}
+#endif
+
+static void
+TEST (void)
+{
+#ifdef _ARCH_PWR8
+ union128i_q u, s;
+ long long e[2] = {0};
+ unsigned long long tmp;
+ int i;
+
+ s.x = _mm_set_epi64x (-1, 0xf);
+
+ u.x = test (s.x);
+
+ if (N < 64)
+ for (i = 0; i < 2; i++) {
+ tmp = s.a[i];
+ e[i] = tmp >> N;
+ }
+
+ if (check_union128i_q (u, e))
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrlq_2
+#endif
+
+#include <emmintrin.h>
+
+#ifdef _ARCH_PWR8
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i c)
+{
+ return _mm_srl_epi64 (s1, c);
+}
+#endif
+
+static void
+TEST (void)
+{
+#ifdef _ARCH_PWR8
+ union128i_q u, s, c;
+ long long e[2] = {0};
+ unsigned long long tmp;
+ int i;
+
+ s.x = _mm_set_epi64x (-1, 0xf);
+ c.x = _mm_set_epi64x (60,50);
+
+ __asm("" : "+v"(s.x), "+v"(c.x));
+ u.x = test (s.x, c.x);
+
+ if (c.a[0] < 64)
+ for (i = 0; i < 2; i++){
+ tmp = s.a[i];
+ e[i] =tmp >> c.a[0];
+ }
+
+ if (check_union128i_q (u, e))
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrlw_1
+#endif
+
+#define N 0xb
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1)
+{
+ return _mm_srli_epi16 (s1, N);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s;
+ short e[8] = {0};
+ unsigned short tmp;
+ int i;
+
+ s.x = _mm_set_epi16 (1, -2, 3, -4, 5, 6, 0x7000, 0x9000);
+
+ u.x = test (s.x);
+
+ if (N < 16)
+ for (i = 0; i < 8; i++)
+ {
+ tmp = s.a[i];
+ e[i] = tmp >> N;
+ }
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psrlw_2
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i c)
+{
+ return _mm_srl_epi16 (s1, c);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s;
+ union128i_q c;
+ short e[8] = {0};
+ unsigned short tmp;
+ int i;
+
+ s.x = _mm_set_epi16 (1, -2, 3, 4, 5, 6, -0x7000, 0x9000);
+ c.x = _mm_set_epi64x (12, 13);
+
+ __asm("" : "+v"(s.x), "+v"(c.x));
+ u.x = test (s.x, c.x);
+
+ if (c.a[0] < 16)
+ for (i = 0; i < 8; i++)
+ {
+ tmp = s.a[i];
+ e[i] = tmp >> c.a[0];
+ }
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psubb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_sub_epi8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ e[i] = s1.a[i] - s2.a[i];
+
+ if (check_union128i_b (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psubd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_sub_epi32 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s1, s2;
+ int e[4];
+ int i;
+
+ s1.x = _mm_set_epi32 (30,90,-80,-40);
+ s2.x = _mm_set_epi32 (76, -100, -34, -78);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 4; i++)
+ e[i] = s1.a[i] - s2.a[i];
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psubq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_sub_epi64 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_q u, s1, s2;
+ long long e[2];
+ int i;
+
+ s1.x = _mm_set_epi64x (90,-80);
+ s2.x = _mm_set_epi64x (76, -100);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 2; i++)
+ e[i] = s1.a[i] - s2.a[i];
+
+ if (check_union128i_q (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psubsb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_subs_epi8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i, tmp;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ {
+ tmp = (signed char)s1.a[i] - (signed char)s2.a[i];
+
+ if (tmp > 127)
+ tmp = 127;
+ if (tmp < -128)
+ tmp = -128;
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_b (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psubsw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_subs_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i, tmp;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
+ s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp = s1.a[i] - s2.a[i];
+
+ if (tmp > 32767)
+ tmp = 32767;
+ if (tmp < -32768)
+ tmp = -32768;
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psubusb_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_subs_epu8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16] = { 0 };
+ int i, tmp;
+
+ s1.x = _mm_set_epi8 (30, 2, 3, 4, 10, 20, 30, 90, 80, 40, 100, 15, 98, 25, 98, 7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 16; i++)
+ {
+ tmp = (unsigned char)s1.a[i] - (unsigned char)s2.a[i];
+
+ if (tmp > 255)
+ tmp = -1;
+ if (tmp < 0)
+ tmp = 0;
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_b (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_psubusb_1; check_union128i_b failed\n");
+ printf (
+ "\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
+ s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
+ s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
+ s1.a[14], s1.a[15]);
+ printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
+ s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5], s2.a[6],
+ s2.a[7], s2.a[8], s2.a[9], s2.a[10], s2.a[11], s2.a[12], s2.a[13],
+ s2.a[14], s2.a[15]);
+ printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
+ u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
+ u.a[15]);
+ printf (
+ "\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
+ e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
+ e[11], e[12], e[13], e[14], e[15]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psubusw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_subs_epu16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i, tmp;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,80,40,100,15);
+ s2.x = _mm_set_epi16 (11, 98, 76, 100, 34, 78, 39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp = (unsigned short)s1.a[i] - (unsigned short)s2.a[i];
+
+ if (tmp > 65535)
+ tmp = -1;
+
+ if (tmp < 0)
+ tmp = 0;
+
+ e[i] = tmp;
+ }
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_psubw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_sub_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
+ s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ e[i] = s1.a[i] - s2.a[i];
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_punpckhbw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpackhi_epi8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ e[2*i] = s1.a[8+i];
+ e[2*i + 1] = s2.a[8+i];
+ }
+
+ if (check_union128i_b (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_punpckhdq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpackhi_epi32 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s1, s2;
+ int e[4];
+ int i;
+
+ s1.x = _mm_set_epi32 (10,20,-80,-40);
+ s2.x = _mm_set_epi32 (11, -34, -78, -39);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 2; i++)
+ {
+ e[2*i] = s1.a[2+i];
+ e[2*i+1] = s2.a[2+i];
+ }
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_punpckhqdq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpackhi_epi64 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_q u, s1, s2;
+ long long e[2];
+
+ s1.x = _mm_set_epi64x (10,-40);
+ s2.x = _mm_set_epi64x (1134, -7839);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[1];
+ e[1] = s2.a[1];
+
+ if (check_union128i_q (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_punpckhwd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpackhi_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
+ s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 4; i++)
+ {
+ e[2*i] = s1.a[4+i];
+ e[2*i+1] = s2.a[4+i];
+ }
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_punpcklbw_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpacklo_epi8 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_b u, s1, s2;
+ char e[16];
+ int i;
+
+ s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+ s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 8; i++)
+ {
+ e[2*i] = s1.a[i];
+ e[2*i + 1] = s2.a[i];
+ }
+
+ if (check_union128i_b (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_punpckldq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpacklo_epi32 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_d u, s1, s2;
+ int e[4];
+ int i;
+
+ s1.x = _mm_set_epi32 (10,20,-80,-40);
+ s2.x = _mm_set_epi32 (11, -34, -78, -39);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 2; i++)
+ {
+ e[2*i] = s1.a[i];
+ e[2*i+1] = s2.a[i];
+ }
+
+ if (check_union128i_d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_punpcklqdq_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpacklo_epi64 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_q u, s1, s2;
+ long long e[2];
+
+ s1.x = _mm_set_epi64x (10,-40);
+ s2.x = _mm_set_epi64x (1134, -7839);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0];
+ e[1] = s2.a[0];
+
+ if (check_union128i_q (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_punpcklwd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpacklo_epi16 (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128i_w u, s1, s2;
+ short e[8];
+ int i;
+
+ s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
+ s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 4; i++)
+ {
+ e[2*i] = s1.a[i];
+ e[2*i+1] = s2.a[i];
+ }
+
+ if (check_union128i_w (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_shufpd_1
+#endif
+
+#define N 0xab
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_shuffle_pd (s1, s2, N);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2] = {0.0};
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (453.345635,54646.464356);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = (N & (1 << 0)) ? s1.a[1] : s1.a[0];
+ e[1] = (N & (1 << 1)) ? s2.a[1] : s2.a[0];
+
+ if (check_union128d(u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_sqrt_pd_1
+#endif
+
+#include <emmintrin.h>
+#include <math.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1)
+{
+ return _mm_sqrt_pd (s1);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1;
+ __m128d bogus = { 123.0, 456.0 };
+ double e[2];
+ int i;
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ u.x = test (s1.x);
+
+ for (i = 0; i < 2; i++)
+ {
+ __m128d tmp = _mm_load_sd (&s1.a[i]);
+ tmp = _mm_sqrt_sd (bogus, tmp);
+ _mm_store_sd (&e[i], tmp);
+ }
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_sqrt_pd_1; check_union128d failed\n");
+ printf ("\t [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_subpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_sub_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] - s2.a[0];
+ e[1] = s1.a[1] - s2.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_subsd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_sub_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0] - s2.a[0];
+ e[1] = s1.a[1];
+
+ if (check_union128d (u, e))
+#if DEBUG
+ {
+ printf ("sse2_test_subsd_1; check_union128d failed\n");
+ printf ("\t [%f,%f] - [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
+ s2.a[1], u.a[0], u.a[1]);
+ printf ("\t expect [%f,%f]\n", e[0], e[1]);
+ }
+#else
+ abort ();
+#endif
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_ucomisd_1
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_ucomieq_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,2344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] == s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_ucomisd_2
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_ucomilt_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,12344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] < s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_ucomisd_3
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_ucomile_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1] = {0};
+ int e[1] = {0};
+
+ s1.x = _mm_set_pd (2134.3343,12344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] <= s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_ucomisd_4
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_ucomigt_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,12344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] > s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_ucomisd_5
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_ucomige_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,12344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] >= s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_ucomisd_6
+#endif
+
+#include <emmintrin.h>
+
+static int
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_ucomineq_sd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d s1, s2;
+ int d[1];
+ int e[1];
+
+ s1.x = _mm_set_pd (2134.3343,12344.2354);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ d[0] = test (s1.x, s2.x);
+ e[0] = s1.a[0] != s2.a[0];
+
+ if (checkVi (d, e, 1))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_unpckhpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpackhi_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[1];
+ e[1] = s2.a[1];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_unpcklpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ __asm("" : "+v"(s1), "+v"(s2));
+ return _mm_unpacklo_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union128d u, s1, s2;
+ double e[2];
+
+ s1.x = _mm_set_pd (2134.3343,1234.635654);
+ s2.x = _mm_set_pd (41124.234,2344.2354);
+ u.x = test (s1.x, s2.x);
+
+ e[0] = s1.a[0];
+ e[1] = s2.a[0];
+
+ if (check_union128d (u, e))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_xorpd_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128d
+__attribute__((noinline, unused))
+test (__m128d s1, __m128d s2)
+{
+ return _mm_xor_pd (s1, s2);
+}
+
+static void
+TEST (void)
+{
+ union
+ {
+ double d[2];
+ long long l[2];
+ }source1, source2, e;
+
+ union128d u, s1, s2;
+ int i;
+
+ s1.x = _mm_set_pd (11.1321456, 2.287332);
+ s2.x = _mm_set_pd (3.37768, 4.43222234);
+
+ _mm_storeu_pd (source1.d, s1.x);
+ _mm_storeu_pd (source2.d, s2.x);
+
+ u.x = test (s1.x, s2.x);
+
+ for (i = 0; i < 2; i++)
+ e.l[i] = source1.l[i] ^ source2.l[i];
+
+ if (check_union128d (u, e.d))
+ abort ();
+}