+2015-05-19 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+
+ * gcc.target/s390/s390.exp
+ (check_effective_target_vector): New check.
+ * gcc.target/s390/vector/vec-abi-1.c: New test.
+ * gcc.target/s390/vector/vec-abi-2.c: New test.
+ * gcc.target/s390/vector/vec-abi-3.c: New test.
+ * gcc.target/s390/vector/vec-abi-4.c: New test.
+ * gcc.target/s390/vector/vec-abi-align-1.c: New test.
+ * gcc.target/s390/vector/vec-abi-single-1.c: New test.
+ * gcc.target/s390/vector/vec-abi-single-2.c: New test.
+ * gcc.target/s390/vector/vec-abi-struct-1.c: New test.
+ * gcc.target/s390/vector/vec-abi-vararg-1.c: New test.
+ * gcc.target/s390/vector/vec-abi-vararg-2.c: New test.
+ * gcc.target/s390/vector/vec-clobber-1.c: New test.
+ * gcc.target/s390/vector/vec-cmp-1.c: New test.
+ * gcc.target/s390/vector/vec-cmp-2.c: New test.
+ * gcc.target/s390/vector/vec-dbl-math-compile-1.c: New test.
+ * gcc.target/s390/vector/vec-genbytemask-1.c: New test.
+ * gcc.target/s390/vector/vec-genbytemask-2.c: New test.
+ * gcc.target/s390/vector/vec-genmask-1.c: New test.
+ * gcc.target/s390/vector/vec-genmask-2.c: New test.
+ * gcc.target/s390/vector/vec-init-1.c: New test.
+ * gcc.target/s390/vector/vec-int-math-compile-1.c: New test.
+ * gcc.target/s390/vector/vec-shift-1.c: New test.
+ * gcc.target/s390/vector/vec-sub-1.c: New test.
+
2015-05-19 Christophe Lyon <christophe.lyon@linaro.org>
* gcc.target/aarch64/advsimd-intrinsics/vqshrun_n.c: New file.
}] "-march=zEC12 -mzarch" ] } { return 0 } else { return 1 }
}
+# Return 1 if vector (va - vector add) instructions are understood by
+# the assembler and can be executed. This also covers checking for
+# the VX kernel feature. A kernel without that feature does not
+# enable the vector facility and the following check will die with a
+# signal.
+proc check_effective_target_vector { } {
+ if { ![check_runtime s390_check_vector [subst {
+ int main (void)
+ {
+ asm ("va %%v24, %%v26, %%v28, 3" : : : "v24", "v26", "v28");
+ return 0;
+ }
+ }] "-march=z13 -mzarch" ] } { return 0 } else { return 1 }
+}
+
# If a testcase doesn't have special options, use these.
global DEFAULT_CFLAGS
if ![info exists DEFAULT_CFLAGS] then {
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
"" $DEFAULT_CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.\[cS\]]] \
+ "" $DEFAULT_CFLAGS
+
# All done.
dg-finish
--- /dev/null
+/* Check calling convention in the vector ABI. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* Make sure the last argument is fetched from the argument overflow area. */
+/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,160\\(%r15\\)" { target lp64 } } } */
+/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,96\\(%r15\\)" { target ilp32 } } } */
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df
+add (v2df a, v2df b, v2df c, v2df d,
+ v2df e, v2df f, v2df g, v2df h, v2df i)
+{
+ return a + b + c + d + e + f + g + h + i;
+}
--- /dev/null
+/* Check calling convention in the vector ABI. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* This needs to be v24 = v24 * v26 + v28 */
+/* { dg-final { scan-assembler "vfmadb\t%v24,%v24,%v26,%v28" } } */
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df
+madd (v2df a, v2df b, v2df c)
+{
+ return a * b + c;
+}
--- /dev/null
+/* Check calling convention in the vector ABI regarding vector like structs. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* addA */
+/* { dg-final { scan-assembler-times "vfadb\t%v24,%v24,%v26" 1 } } */
+
+/* addB and addE*/
+/* { dg-final { scan-assembler-times "vah\t%v24,%v\[0-9\]*,%v\[0-9\]*" 2 } } */
+
+/* addC */
+/* { dg-final { scan-assembler-times "vag\t%v24,%v\[0-9\]*,%v\[0-9\]*" 1 } } */
+
+/* addB and addC are expected to read the arguments via pointers in r2 and r3 */
+/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r2\\)" 2 } } */
+/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r3\\)" 2 } } */
+
+/* addD */
+/* { dg-final { scan-assembler-times "vaf\t%v24,%v24,%v26" 1 } } */
+
+/* addE */
+/* { dg-final { scan-assembler-times "vah\t%v24,%v24,%v26" 1 } } */
+
+/* addF */
+/* { dg-final { scan-assembler-times "vab\t%v24,%v\[0-9\]*,%v\[0-9\]*" 1 } } */
+/* { dg-final { scan-assembler-times "srlg\t%r\[0-9\]*,%r2,32" 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times "srlg\t%r\[0-9\]*,%r3,32" 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times "llgfr\t%.*,%r2" 1 { target { ! lp64 } } } } */
+/* { dg-final { scan-assembler-times "llgfr\t%.*,%r4" 1 { target { ! lp64 } } } } */
+
+
+typedef double v2df __attribute__((vector_size(16)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+
+typedef short v2hi __attribute__((vector_size(4)));
+typedef char v4qi __attribute__((vector_size(4)));
+
+/* Vector like structs are passed in VRs. */
+struct A { v2df a; };
+
+v2df
+addA (struct A a, struct A b)
+{
+ return a.a + b.a;
+}
+
+/* Only single element vectors qualify as vector type parms. This one
+ is passed as a struct. Since it is bigger than 8 bytes it is passed
+ on the stack with the reference being put into r2/r3. */
+struct B { v8hi a; char b;};
+
+v8hi
+addB (struct B a, struct B b)
+{
+ return a.a + b.a;
+}
+
+/* The resulting struct is bigger than 16 bytes and therefore passed
+ on the stack with the references residing in r2/r3. */
+struct C { v2di __attribute__((aligned(32))) a; };
+
+v2di
+addC (struct C a, struct C b)
+{
+ return a.a + b.a;
+}
+
+/* The attribute here does not have any effect. So this struct stays
+ vector like and hence is passed in a VR. */
+struct D { v4si __attribute__((aligned(16))) a; };
+
+v4si
+addD (struct D a, struct D b)
+{
+ return a.a + b.a;
+}
+
+
+/* Smaller vectors are passed in vector registers. This also applies
+ for vector like structs. */
+struct E { v2hi a; };
+
+v2hi
+addE (struct E a, struct E b)
+{
+ return a.a + b.a;
+}
+
+/* This struct is not passed in VRs because of padding. But since it
+ fits in a GPR and has a power of two size. It is passed in
+ GPRs. */
+struct F { v4qi __attribute__((aligned(8))) a; };
+
+v4qi
+addF (struct F a, struct F b)
+{
+ return a.a + b.a;
+}
--- /dev/null
+/* Check calling convention in the vector ABI. Smaller vector need to
+ be placed left-justified in the stack slot. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "lde\t%.*,160\\\(%r15\\\)" 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times "lde\t%.*,168\\\(%r15\\\)" 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times "lde\t%.*,96\\\(%r15\\\)" 1 { target { ! lp64 } } } } */
+/* { dg-final { scan-assembler-times "lde\t%.*,100\\\(%r15\\\)" 1 { target { ! lp64 } } } } */
+
+typedef char __attribute__((vector_size(4))) v4qi;
+
+v4qi
+foo (v4qi a, v4qi b, v4qi c, v4qi d, v4qi e,
+ v4qi f, v4qi g, v4qi h, v4qi i, v4qi j)
+{
+ return (a + b + c + d + e + f + g + h + i + j);
+}
--- /dev/null
+/* Check alignment convention in the vector ABI. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+#include <stddef.h>
+
+/* Vector types get an 8 byte alignment. */
+typedef double v2df __attribute__((vector_size(16)));
+typedef struct
+{
+ char a;
+ v2df b;
+} A;
+char c1[offsetof (A, b) == 8 ? 0 : -1];
+
+/* Smaller vector allow for smaller alignments. */
+typedef char v4qi __attribute__((vector_size(4)));
+typedef struct
+{
+ char a;
+ v4qi b;
+} B;
+char c2[offsetof (B, b) == 4 ? 0 : -1];
+
+
+typedef double v4df __attribute__((vector_size(32)));
+typedef struct
+{
+ char a;
+ v4df b;
+} C;
+char c3[offsetof (C, b) == 8 ? 0 : -1];
+
+/* However, we allow the programmer to chose a bigger alignment. */
+typedef struct
+{
+ char a;
+ v2df b __attribute__((aligned(16)));
+} D;
+char c4[offsetof (D, b) == 16 ? 0 : -1];
+
+typedef struct
+{
+ char a;
+ v2df b;
+} __attribute__((packed)) E;
+char c5[offsetof (E, b) == 1 ? 0 : -1];
--- /dev/null
+/* Check calling convention in the vector ABI for single element vectors. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "vlr\t%v24,%v26" 7 } } */
+
+typedef int __attribute__((vector_size(16))) v4si;
+
+typedef char __attribute__((vector_size(1))) v1qi;
+typedef short int __attribute__((vector_size(2))) v1hi;
+typedef int __attribute__((vector_size(4))) v1si;
+typedef long long __attribute__((vector_size(8))) v1di;
+typedef float __attribute__((vector_size(4))) v1sf;
+typedef double __attribute__((vector_size(8))) v1df;
+typedef long double __attribute__((vector_size(16))) v1tf;
+
+v1qi foo1 (v4si a, v1qi b) { return b; }
+v1hi foo2 (v4si a, v1hi b) { return b; }
+v1si foo3 (v4si a, v1si b) { return b; }
+v1di foo4 (v4si a, v1di b) { return b; }
+v1sf foo5 (v4si a, v1sf b) { return b; }
+v1df foo6 (v4si a, v1df b) { return b; }
+v1tf foo7 (v4si a, v1tf b) { return b; }
--- /dev/null
+/* Check calling convention in the vector ABI for single element vectors. */
+
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "vlr\t%v24,%v26" 1 } } */
+
+typedef int __attribute__((vector_size(16))) v4si;
+
+typedef __int128_t __attribute__((vector_size(16))) v1ti;
+
+v1ti foo (v4si a, v1ti b) { return b; }
--- /dev/null
+/* Check calling convention in the vector ABI. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* c.i and c.j are passed by reference since a struct with two
+ elements is no vector type argument. */
+/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,0\\(%r3\\)" } } */
+/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,8\\(%r3\\)" } } */
+
+/* just_v2si is passed in a vector reg if it as an incoming arg.
+ However, as return value it is passed via hidden first pointer
+ argument. */
+/* { dg-final { scan-assembler ".*st.*\t%v\[0-9\]*,0\\(%r2\\)" } } */
+
+/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */
+
+typedef int __attribute__ ((vector_size(8))) v2si;
+
+struct just_v2si
+{
+ v2si i;
+};
+
+struct two_v2si
+{
+ v2si i, j;
+};
+
+struct just_v2si
+add_structvecs (v2si a, struct just_v2si b, struct two_v2si c)
+{
+ struct just_v2si res;
+
+ res.i = a + b.i + c.i + c.j;
+ return res;
+}
--- /dev/null
+/* Check calling convention with variable argument lists in the vector
+ ABI. */
+
+/* { dg-do run { target { s390*-*-* } } } */
+/* { dg-require-effective-target vector } */
+/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
+
+/* Make sure arguments are fetched from the argument overflow area. */
+/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,352\\(%r15\\)" { target lp64 } } } */
+/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,368\\(%r15\\)" { target lp64 } } } */
+/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,376\\(%r15\\)" { target lp64 } } } */
+/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,392\\(%r15\\)" { target lp64 } } } */
+
+/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,208\\(%r15\\)" { target ilp32 } } } */
+/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,224\\(%r15\\)" { target ilp32 } } } */
+/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,232\\(%r15\\)" { target ilp32 } } } */
+/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,248\\(%r15\\)" { target ilp32 } } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+
+#include <stdarg.h>
+
+extern void abort (void);
+
+typedef long long v2di __attribute__((vector_size(16)));
+typedef int v2si __attribute__((vector_size(8)));
+
+v2di __attribute__((noinline))
+add (int a, ...)
+{
+ int i;
+ va_list va;
+ v2di di_result = { 0, 0 };
+ v2si si_result = (v2si){ 0, 0 };
+
+ va_start (va, a);
+
+ di_result += va_arg (va, v2di);
+ si_result += va_arg (va, v2si);
+ di_result += va_arg (va, v2di);
+ si_result += va_arg (va, v2si);
+
+ va_end (va);
+
+ di_result[0] += si_result[0];
+ di_result[1] += si_result[1];
+
+ return di_result;
+}
+
+int
+main ()
+{
+ v2di r = add (4, (v2di){ 11, 21 }, (v2si){ 12, 22 }, (v2di){ 13, 23 }, (v2si){ 14, 24 });
+
+ if (r[0] != 50 || r[1] != 90)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* Check calling convention in the vector ABI. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13 -Wno-implicit-function-declaration" } */
+
+
+typedef long v2di __attribute__((vector_size(16)));
+extern v2di foo1 (int, v2di);
+extern v2di foo2 (int, int);
+extern v2di foo3 (int, ...);
+
+v2di bar1 (int a) { return foo2 (1, a); }
+v2di bar2 (int a) { return foo3 (1, a); }
+v2di bar3 (v2di a) { return foo1 (1, a); }
+v2di bar4 (v2di a) { return foo3 (1, a); }
+
+int bar5 (int a) { return foo4 (1, a); }
+int bar6 (v2di a) { return foo4 (1, a); } /* { dg-error "Vector argument passed to unprototyped function" } */
--- /dev/null
+/* { dg-do run { target { s390*-*-* } } } */
+/* { dg-require-effective-target vector } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* For FP zero checks we use the ltdbr instruction. Since this is an
+ load and test it actually writes the FPR. Whenever an FPR gets
+ written the rest of the overlapping VR is clobbered. */
+typedef double __attribute__((vector_size(16))) v2df;
+
+v2df a = { 1.0, 2.0 };
+
+extern void abort (void);
+
+void __attribute__((noinline))
+foo (v2df a)
+{
+ v2df b = { 1.0, 3.0 };
+
+ b -= a;
+
+ /* Take away all the VRs not overlapping with FPRs. */
+ asm volatile ("" : : :
+ "v16","v17","v18","v19",
+ "v20","v21","v22","v23",
+ "v24","v25","v26","v27",
+ "v28","v29","v30","v31");
+ if (b[0] != 0.0) /* ltdbr */
+ abort ();
+ if (b[1] != 1.0)
+ abort ();
+}
+
+int
+main ()
+{
+ foo (a);
+ return 0;
+}
--- /dev/null
+/* Check that the proper unsigned compare instructions are being generated. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "vchlb" 1 } } */
+/* { dg-final { scan-assembler-times "vchlh" 1 } } */
+/* { dg-final { scan-assembler-times "vchlf" 1 } } */
+/* { dg-final { scan-assembler-times "vchlg" 1 } } */
+
+typedef __attribute__((vector_size(16))) signed char v16qi;
+typedef __attribute__((vector_size(16))) unsigned char uv16qi;
+
+typedef __attribute__((vector_size(16))) signed short v8hi;
+typedef __attribute__((vector_size(16))) unsigned short uv8hi;
+
+typedef __attribute__((vector_size(16))) signed int v4si;
+typedef __attribute__((vector_size(16))) unsigned int uv4si;
+
+typedef __attribute__((vector_size(16))) signed long long v2di;
+typedef __attribute__((vector_size(16))) unsigned long long uv2di;
+
+v16qi
+f (uv16qi a, uv16qi b)
+{
+ return a > b;
+}
+
+v8hi
+g (uv8hi a, uv8hi b)
+{
+ return a > b;
+}
+
+v4si
+h (uv4si a, uv4si b)
+{
+ return a > b;
+}
+
+v2di
+i (uv2di a, uv2di b)
+{
+ return a > b;
+}
--- /dev/null
+/* Check that the proper signed compare instructions are being generated. */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "vchb" 1 } } */
+/* { dg-final { scan-assembler-times "vchh" 1 } } */
+/* { dg-final { scan-assembler-times "vchf" 1 } } */
+/* { dg-final { scan-assembler-times "vchg" 1 } } */
+
+typedef __attribute__((vector_size(16))) signed char v16qi;
+typedef __attribute__((vector_size(16))) signed short v8hi;
+typedef __attribute__((vector_size(16))) signed int v4si;
+typedef __attribute__((vector_size(16))) signed long long v2di;
+
+v16qi
+f (v16qi a, v16qi b)
+{
+ return a > b;
+}
+
+v8hi
+g (v8hi a, v8hi b)
+{
+ return a > b;
+}
+
+v4si
+h (v4si a, v4si b)
+{
+ return a > b;
+}
+
+v2di
+i (v2di a, v2di b)
+{
+ return a > b;
+}
--- /dev/null
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
+
+typedef __attribute__((vector_size(16))) double v2df;
+
+v2df
+adddbl (v2df a, v2df b)
+{
+ return a + b;
+}
+/* { dg-final { scan-assembler-times "vfadb" 1 } } */
+
+v2df
+subdbl (v2df a, v2df b)
+{
+ return a - b;
+}
+/* { dg-final { scan-assembler-times "vfsdb" 1 } } */
+
+v2df
+muldbl (v2df a, v2df b)
+{
+ return a * b;
+}
+/* { dg-final { scan-assembler-times "vfmdb" 1 } } */
+
+v2df
+divdbl (v2df a, v2df b)
+{
+ return a / b;
+}
+/* { dg-final { scan-assembler-times "vfd" 1 } } */
+
+v2df
+fmadbl (v2df a, v2df b, v2df c)
+{
+ return a * b + c;
+}
+/* { dg-final { scan-assembler-times "vfma" 1 } } */
+
+v2df
+fmsdbl (v2df a, v2df b, v2df c)
+{
+ return a * b - c;
+}
+/* { dg-final { scan-assembler-times "vfms" 1 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
+/* { dg-require-effective-target vector } */
+
+typedef unsigned char uv16qi __attribute__((vector_size(16)));
+typedef unsigned short uv8hi __attribute__((vector_size(16)));
+typedef unsigned int uv4si __attribute__((vector_size(16)));
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
+
+uv2di __attribute__((noinline))
+foo1 ()
+{
+ return (uv2di){ 0xff00ff00ff00ff00, 0x00ff00ff00ff00ff };
+}
+/* { dg-final { scan-assembler-times "vgbm\t%v24,43605" 1 } } */
+
+uv4si __attribute__((noinline))
+foo2 ()
+{
+ return (uv4si){ 0xff0000ff, 0x0000ffff, 0xffff0000, 0x00ffff00 };
+}
+/* { dg-final { scan-assembler-times "vgbm\t%v24,37830" 1 } } */
+
+uv8hi __attribute__((noinline))
+foo3a ()
+{
+ return (uv8hi){ 0xff00, 0xff00, 0xff00, 0xff00,
+ 0xff00, 0xff00, 0xff00, 0xff00 };
+}
+/* { dg-final { scan-assembler-times "vgbm\t%v24,43690" 1 } } */
+
+uv8hi __attribute__((noinline))
+foo3b ()
+{
+ return (uv8hi){ 0x00ff, 0x00ff, 0x00ff, 0x00ff,
+ 0x00ff, 0x00ff, 0x00ff, 0x00ff };
+}
+/* { dg-final { scan-assembler-times "vgbm\t%v24,21845" 1 } } */
+
+uv16qi __attribute__((noinline))
+foo4 ()
+{
+ return (uv16qi){ 0xff, 0xff, 0xff, 0xff,
+ 0, 0, 0, 0,
+ 0xff, 0, 0xff, 0,
+ 0, 0xff, 0, 0xff };
+}
+/* { dg-final { scan-assembler-times "vgbm\t%v24,61605" 1 } } */
+
+int
+main ()
+{
+ if (foo1()[1] != 0x00ff00ff00ff00ffULL)
+ __builtin_abort ();
+
+ if (foo2()[1] != 0x0000ffff)
+ __builtin_abort ();
+
+ if (foo3a()[1] != 0xff00)
+ __builtin_abort ();
+
+ if (foo3b()[1] != 0x00ff)
+ __builtin_abort ();
+
+ if (foo4()[1] != 0xff)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+typedef unsigned char uv16qi __attribute__((vector_size(16)));
+typedef unsigned short uv8hi __attribute__((vector_size(16)));
+typedef unsigned int uv4si __attribute__((vector_size(16)));
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
+
+/* The elements differ. */
+uv2di __attribute__((noinline))
+foo1 ()
+{
+ return (uv2di){ 0x001fffffffffff00, 0x0000ffffffffff00 };
+}
+
+/* Non-contiguous bitmasks */
+
+uv4si __attribute__((noinline))
+foo2 ()
+{
+ return (uv4si){ 0xff00100f, 0xff00100f, 0xff00100f, 0xff00100f };
+}
+
+uv8hi __attribute__((noinline))
+foo3a ()
+{
+ return (uv8hi){ 0xf700, 0xf700, 0xf700, 0xf700,
+ 0xf700, 0xf700, 0xf700, 0xf700 };
+}
+
+uv8hi __attribute__((noinline))
+foo3b ()
+{
+ return (uv8hi){ 0x10ff, 0x10ff, 0x10ff, 0x10ff,
+ 0x10ff, 0x10ff, 0x10ff, 0x10ff };
+}
+
+uv16qi __attribute__((noinline))
+foo4 ()
+{
+ return (uv16qi){ 0x82, 0x82, 0x82, 0x82,
+ 0x82, 0x82, 0x82, 0x82,
+ 0x82, 0x82, 0x82, 0x82,
+ 0x82, 0x82, 0x82, 0x82 };
+}
+/* { dg-final { scan-assembler-not "vgbm" } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
+/* { dg-require-effective-target vector } */
+
+typedef unsigned char uv16qi __attribute__((vector_size(16)));
+typedef unsigned short uv8hi __attribute__((vector_size(16)));
+typedef unsigned int uv4si __attribute__((vector_size(16)));
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
+
+uv2di __attribute__((noinline))
+foo1 ()
+{
+ return (uv2di){ 0x000fffffffffff00, 0x000fffffffffff00 };
+}
+/* { dg-final { scan-assembler-times "vgmg\t%v24,12,55" 1 } } */
+
+uv4si __attribute__((noinline))
+foo2 ()
+{
+ return (uv4si){ 0xff00000f, 0xff00000f, 0xff00000f, 0xff00000f };
+}
+/* { dg-final { scan-assembler-times "vgmf\t%v24,28,7" 1 } } */
+
+uv8hi __attribute__((noinline))
+foo3a ()
+{
+ return (uv8hi){ 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+ 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+}
+/* { dg-final { scan-assembler-times "vgmh\t%v24,0,11" 1 } } */
+
+uv8hi __attribute__((noinline))
+foo3b ()
+{
+ return (uv8hi){ 0x0fff, 0x0fff, 0x0fff, 0x0fff,
+ 0x0fff, 0x0fff, 0x0fff, 0x0fff };
+}
+/* { dg-final { scan-assembler-times "vgmh\t%v24,4,15" 1 } } */
+
+uv16qi __attribute__((noinline))
+foo4 ()
+{
+ return (uv16qi){ 0x8, 0x8, 0x8, 0x8,
+ 0x8, 0x8, 0x8, 0x8,
+ 0x8, 0x8, 0x8, 0x8,
+ 0x8, 0x8, 0x8, 0x8 };
+}
+/* { dg-final { scan-assembler-times "vgmb\t%v24,4,4" 1 } } */
+
+int
+main ()
+{
+ if (foo1()[1] != 0x000fffffffffff00ULL)
+ __builtin_abort ();
+
+ if (foo2()[1] != 0xff00000f)
+ __builtin_abort ();
+
+ if (foo3a()[1] != 0xfff0)
+ __builtin_abort ();
+
+ if (foo3b()[1] != 0x0fff)
+ __builtin_abort ();
+
+ if (foo4()[1] != 0x8)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+typedef unsigned char uv16qi __attribute__((vector_size(16)));
+typedef unsigned short uv8hi __attribute__((vector_size(16)));
+typedef unsigned int uv4si __attribute__((vector_size(16)));
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
+
+/* The elements differ. */
+uv2di __attribute__((noinline))
+foo1 ()
+{
+ return (uv2di){ 0x000fffffffffff00, 0x0000ffffffffff00 };
+}
+
+/* Non-contiguous bitmasks */
+
+uv4si __attribute__((noinline))
+foo2 ()
+{
+ return (uv4si){ 0xff00100f, 0xff00100f, 0xff00100f, 0xff00100f };
+}
+
+uv8hi __attribute__((noinline))
+foo3a ()
+{
+ return (uv8hi){ 0xf700, 0xf700, 0xf700, 0xf700,
+ 0xf700, 0xf700, 0xf700, 0xf700 };
+}
+
+uv8hi __attribute__((noinline))
+foo3b ()
+{
+ return (uv8hi){ 0x10ff, 0x10ff, 0x10ff, 0x10ff,
+ 0x10ff, 0x10ff, 0x10ff, 0x10ff };
+}
+
+uv16qi __attribute__((noinline))
+foo4 ()
+{
+ return (uv16qi){ 0x82, 0x82, 0x82, 0x82,
+ 0x82, 0x82, 0x82, 0x82,
+ 0x82, 0x82, 0x82, 0x82,
+ 0x82, 0x82, 0x82, 0x82 };
+}
+/* { dg-final { scan-assembler-not "vgm" } } */
--- /dev/null
+/* Check that the vec_init expander does its job. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+
+
+
+
+typedef __attribute__((vector_size(16))) signed int v4si;
+
+extern v4si G;
+
+v4si
+f (signed int a)
+{
+ return G == a;
+}
+/* { dg-final { scan-assembler-times "vrepf" 1 } } */
+
+v4si
+g (signed int *a)
+{
+ return G == *a;
+}
+/* { dg-final { scan-assembler-times "vlrepf" 1 } } */
+
+v4si
+h ()
+{
+ return G == 1;
+}
+/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,31" 1 } } */
+
+v4si
+i ()
+{
+ return G == -1;
+}
+/* { dg-final { scan-assembler-times "vone" 1 } } */
+
+v4si
+j ()
+{
+ return G == 0;
+}
+/* { dg-final { scan-assembler-times "vzero" 1 } } */
+
+v4si
+k ()
+{
+ return G == (v4si){ 0xff80, 0xff80, 0xff80, 0xff80 };
+}
+/* { dg-final { scan-assembler-times "vgmf\t%v.*,16,24" 1 } } */
+
+v4si
+l ()
+{
+ return G == (v4si){ 0xf000000f, 0xf000000f, 0xf000000f, 0xf000000f };
+}
+/* { dg-final { scan-assembler-times "vgmf\t%v.*,28,3" 1 } } */
+
+v4si
+m ()
+{
+ return G == (v4si){ 0x00ff00ff, 0x0000ffff, 0xffff0000, 0xff00ff00 };
+}
+/* { dg-final { scan-assembler-times "vgbm\t%v.*,21450" 1 } } */
--- /dev/null
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+typedef __attribute__((vector_size(16))) signed int v4si;
+
+v4si
+adddbl (v4si a, v4si b)
+{
+ return a + b;
+}
+
+v4si
+subdbl (v4si a, v4si b)
+{
+ return a - b;
+}
+
+v4si
+muldbl (v4si a, v4si b)
+{
+ return a * b;
+}
+
+v4si
+divdbl (v4si a, v4si b)
+{
+ return a / b;
+}
+
+v4si
+fmadbl (v4si a, v4si b, v4si c)
+{
+ return a * b + c;
+}
+
+v4si
+fmsdbl (v4si a, v4si b, v4si c)
+{
+ return a * b - c;
+}
--- /dev/null
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "veslb" 2 } } */
+/* { dg-final { scan-assembler-times "veslh" 2 } } */
+/* { dg-final { scan-assembler-times "veslf" 2 } } */
+/* { dg-final { scan-assembler-times "veslg" 2 } } */
+
+/* { dg-final { scan-assembler-times "vesrab" 1 } } */
+/* { dg-final { scan-assembler-times "vesrah" 1 } } */
+/* { dg-final { scan-assembler-times "vesraf" 1 } } */
+/* { dg-final { scan-assembler-times "vesrag" 1 } } */
+
+/* { dg-final { scan-assembler-times "vesrlb" 1 } } */
+/* { dg-final { scan-assembler-times "vesrlh" 1 } } */
+/* { dg-final { scan-assembler-times "vesrlf" 1 } } */
+/* { dg-final { scan-assembler-times "vesrlg" 1 } } */
+
+/* { dg-final { scan-assembler-times "veslvb" 2 } } */
+/* { dg-final { scan-assembler-times "veslvh" 2 } } */
+/* { dg-final { scan-assembler-times "veslvf" 2 } } */
+/* { dg-final { scan-assembler-times "veslvg" 2 } } */
+
+/* { dg-final { scan-assembler-times "vesravb" 1 } } */
+/* { dg-final { scan-assembler-times "vesravh" 1 } } */
+/* { dg-final { scan-assembler-times "vesravf" 1 } } */
+/* { dg-final { scan-assembler-times "vesravg" 1 } } */
+
+/* { dg-final { scan-assembler-times "vesrlvb" 1 } } */
+/* { dg-final { scan-assembler-times "vesrlvh" 1 } } */
+/* { dg-final { scan-assembler-times "vesrlvf" 1 } } */
+/* { dg-final { scan-assembler-times "vesrlvg" 1 } } */
+
+typedef __attribute__((vector_size(16))) signed char v16qi;
+typedef __attribute__((vector_size(16))) unsigned char uv16qi;
+
+typedef __attribute__((vector_size(16))) signed short v8hi;
+typedef __attribute__((vector_size(16))) unsigned short uv8hi;
+
+typedef __attribute__((vector_size(16))) signed int v4si;
+typedef __attribute__((vector_size(16))) unsigned int uv4si;
+
+typedef __attribute__((vector_size(16))) signed long long v2di;
+typedef __attribute__((vector_size(16))) unsigned long long uv2di;
+
+uv16qi g_uvqi0, g_uvqi1, g_uvqi2;
+v16qi g_vqi0, g_vqi1, g_vqi2;
+
+uv8hi g_uvhi0, g_uvhi1, g_uvhi2;
+v8hi g_vhi0, g_vhi1, g_vhi2;
+
+uv4si g_uvsi0, g_uvsi1, g_uvsi2;
+v4si g_vsi0, g_vsi1, g_vsi2;
+
+uv2di g_uvdi0, g_uvdi1, g_uvdi2;
+v2di g_vdi0, g_vdi1, g_vdi2;
+
+void
+shift_left_by_scalar (int s)
+{
+ g_uvqi0 = g_uvqi1 << s;
+ g_vqi0 = g_vqi1 << s;
+ g_uvhi0 = g_uvhi1 << s;
+ g_vhi0 = g_vhi1 << s;
+ g_uvsi0 = g_uvsi1 << s;
+ g_vsi0 = g_vsi1 << s;
+ g_uvdi0 = g_uvdi1 << s;
+ g_vdi0 = g_vdi1 << s;
+}
+
+void
+shift_right_by_scalar (int s)
+{
+ g_uvqi0 = g_uvqi1 >> s;
+ g_vqi0 = g_vqi1 >> s;
+ g_uvhi0 = g_uvhi1 >> s;
+ g_vhi0 = g_vhi1 >> s;
+ g_uvsi0 = g_uvsi1 >> s;
+ g_vsi0 = g_vsi1 >> s;
+ g_uvdi0 = g_uvdi1 >> s;
+ g_vdi0 = g_vdi1 >> s;
+}
+
+void
+shift_left_by_vector ()
+{
+ g_uvqi0 = g_uvqi1 << g_uvqi2;
+ g_vqi0 = g_vqi1 << g_vqi2;
+ g_uvhi0 = g_uvhi1 << g_uvhi2;
+ g_vhi0 = g_vhi1 << g_vhi2;
+ g_uvsi0 = g_uvsi1 << g_uvsi2;
+ g_vsi0 = g_vsi1 << g_vsi2;
+ g_uvdi0 = g_uvdi1 << g_uvdi2;
+ g_vdi0 = g_vdi1 << g_vdi2;
+}
+
+void
+shift_right_by_vector ()
+{
+ g_uvqi0 = g_uvqi1 >> g_uvqi2;
+ g_vqi0 = g_vqi1 >> g_vqi2;
+ g_uvhi0 = g_uvhi1 >> g_uvhi2;
+ g_vhi0 = g_vhi1 >> g_vhi2;
+ g_uvsi0 = g_uvsi1 >> g_uvsi2;
+ g_vsi0 = g_vsi1 >> g_vsi2;
+ g_uvdi0 = g_uvdi1 >> g_uvdi2;
+ g_vdi0 = g_vdi1 >> g_vdi2;
+}
--- /dev/null
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "vsb" 2 } } */
+/* { dg-final { scan-assembler-times "vsh" 2 } } */
+/* { dg-final { scan-assembler-times "vsf" 2 } } */
+/* { dg-final { scan-assembler-times "vsg" 2 } } */
+/* { dg-final { scan-assembler-times "vfs" 1 } } */
+
+
+typedef unsigned char uv16qi __attribute__((vector_size(16)));
+typedef signed char v16qi __attribute__((vector_size(16)));
+typedef unsigned short uv8hi __attribute__((vector_size(16)));
+typedef signed short v8hi __attribute__((vector_size(16)));
+typedef unsigned int uv4si __attribute__((vector_size(16)));
+typedef signed int v4si __attribute__((vector_size(16)));
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
+typedef signed long long v2di __attribute__((vector_size(16)));
+typedef double v2df __attribute__((vector_size(16)));
+
+uv16qi g_uvqi0, g_uvqi1, g_uvqi2;
+v16qi g_vqi0, g_vqi1, g_vqi2;
+
+uv8hi g_uvhi0, g_uvhi1, g_uvhi2;
+v8hi g_vhi0, g_vhi1, g_vhi2;
+
+uv4si g_uvsi0, g_uvsi1, g_uvsi2;
+v4si g_vsi0, g_vsi1, g_vsi2;
+
+uv2di g_uvdi0, g_uvdi1, g_uvdi2;
+v2di g_vdi0, g_vdi1, g_vdi2;
+
+v2df g_vdf0, g_vdf1, g_vdf2;
+
+void
+sub1 ()
+{
+ g_vqi0 = g_vqi1 - g_vqi2;
+ g_uvqi0 = g_uvqi1 - g_uvqi2;
+
+ g_vhi0 = g_vhi1 - g_vhi2;
+ g_uvhi0 = g_uvhi1 - g_uvhi2;
+
+ g_vsi0 = g_vsi1 - g_vsi2;
+ g_uvsi0 = g_uvsi1 - g_uvsi2;
+
+ g_vdi0 = g_vdi1 - g_vdi2;
+ g_uvdi0 = g_uvdi1 - g_uvdi2;
+
+ g_vdf0 = g_vdf1 - g_vdf2;
+}