From 0e4d63c5eef5c0565ac1d42ae1ac43a12c9fd071 Mon Sep 17 00:00:00 2001 From: Alex Velenko Date: Wed, 23 Apr 2014 17:02:49 +0000 Subject: [PATCH] [AArch64] VDUP Testcases This patch adds vdup intrinsic testcases for AArch64. those testcases are nice to have, as it allows to reason about vdup consistency for both LE and BE compiler flavors. From-SVN: r209713 --- gcc/testsuite/ChangeLog | 6 + .../gcc.target/aarch64/vdup_lane_1.c | 430 ++++++++++++ .../gcc.target/aarch64/vdup_lane_2.c | 343 ++++++++++ gcc/testsuite/gcc.target/aarch64/vdup_n_1.c | 619 ++++++++++++++++++ 4 files changed, 1398 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vdup_n_1.c diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f25a2b7f520..126ad08300b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2014-04-23 Alex Velenko + + * gcc.target/aarch64/vdup_lane_1.c: New testcase. + * gcc.target/aarch64/vdup_lane_2.c: New testcase. + * gcc.target/aarch64/vdup_n_1.c: New testcase. + 2014-04-23 Kyrylo Tkachov * gcc.target/arm/rev16.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c new file mode 100644 index 00000000000..4582471c8aa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c @@ -0,0 +1,430 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps -O1" } */ + +#include + +extern void abort (void); + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_f32_0 (float32x2_t a) +{ + return vdup_lane_f32 (a, 0); +} + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_f32_1 (float32x2_t a) +{ + return vdup_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_f32 () +{ + float32x2_t a; + float32x2_t b; + int i; + float32_t c[2] = { 0.0 , 3.14 }; + float32_t d[2]; + + a = vld1_f32 (c); + b = wrap_vdup_lane_f32_0 (a); + vst1_f32 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_f32_1 (a); + vst1_f32 (d, b); + for (i = 0; i < 2; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_f32_0 (float32x2_t a) +{ + return vdupq_lane_f32 (a, 0); +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_f32_1 (float32x2_t a) +{ + return vdupq_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_f32 () +{ + float32x2_t a; + float32x4_t b; + int i; + float32_t c[2] = { 0.0 , 3.14 }; + float32_t d[4]; + + a = vld1_f32 (c); + b = wrap_vdupq_lane_f32_0 (a); + vst1q_f32 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_f32_1 (a); + vst1q_f32 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_lane_s8_0 (int8x8_t a) +{ + return vdup_lane_s8 (a, 0); +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_lane_s8_1 (int8x8_t a) +{ + return vdup_lane_s8 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s8 () +{ + int8x8_t a; + int8x8_t b; + int i; + /* Only two first cases are interesting. */ + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int8_t d[8]; + + a = vld1_s8 (c); + b = wrap_vdup_lane_s8_0 (a); + vst1_s8 (d, b); + for (i = 0; i < 8; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s8_1 (a); + vst1_s8 (d, b); + for (i = 0; i < 8; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_lane_s8_0 (int8x8_t a) +{ + return vdupq_lane_s8 (a, 0); +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_lane_s8_1 (int8x8_t a) +{ + return vdupq_lane_s8 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s8 () +{ + int8x8_t a; + int8x16_t b; + int i; + /* Only two first cases are interesting. */ + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int8_t d[16]; + + a = vld1_s8 (c); + b = wrap_vdupq_lane_s8_0 (a); + vst1q_s8 (d, b); + for (i = 0; i < 16; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s8_1 (a); + vst1q_s8 (d, b); + for (i = 0; i < 16; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_lane_s16_0 (int16x4_t a) +{ + return vdup_lane_s16 (a, 0); +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_lane_s16_1 (int16x4_t a) +{ + return vdup_lane_s16 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s16 () +{ + int16x4_t a; + int16x4_t b; + int i; + /* Only two first cases are interesting. */ + int16_t c[4] = { 0, 1, 2, 3 }; + int16_t d[4]; + + a = vld1_s16 (c); + b = wrap_vdup_lane_s16_0 (a); + vst1_s16 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s16_1 (a); + vst1_s16 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_lane_s16_0 (int16x4_t a) +{ + return vdupq_lane_s16 (a, 0); +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_lane_s16_1 (int16x4_t a) +{ + return vdupq_lane_s16 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s16 () +{ + int16x4_t a; + int16x8_t b; + int i; + /* Only two first cases are interesting. */ + int16_t c[4] = { 0, 1, 2, 3 }; + int16_t d[8]; + + a = vld1_s16 (c); + b = wrap_vdupq_lane_s16_0 (a); + vst1q_s16 (d, b); + for (i = 0; i < 8; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s16_1 (a); + vst1q_s16 (d, b); + for (i = 0; i < 8; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_s32_0 (int32x2_t a) +{ + return vdup_lane_s32 (a, 0); +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_s32_1 (int32x2_t a) +{ + return vdup_lane_s32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s32 () +{ + int32x2_t a; + int32x2_t b; + int i; + int32_t c[2] = { 0, 1 }; + int32_t d[2]; + + a = vld1_s32 (c); + b = wrap_vdup_lane_s32_0 (a); + vst1_s32 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s32_1 (a); + vst1_s32 (d, b); + for (i = 0; i < 2; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_s32_0 (int32x2_t a) +{ + return vdupq_lane_s32 (a, 0); +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_s32_1 (int32x2_t a) +{ + return vdupq_lane_s32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s32 () +{ + int32x2_t a; + int32x4_t b; + int i; + int32_t c[2] = { 0, 1 }; + int32_t d[4]; + + a = vld1_s32 (c); + b = wrap_vdupq_lane_s32_0 (a); + vst1q_s32 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s32_1 (a); + vst1q_s32 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_lane_s64_0 (int64x1_t a) +{ + return vdup_lane_s64 (a, 0); +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_lane_s64_1 (int64x1_t a) +{ + return vdup_lane_s64 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s64 () +{ + int64x1_t a; + int64x1_t b; + int64_t c[1]; + int64_t d[1]; + + c[0] = 0; + a = vld1_s64 (c); + b = wrap_vdup_lane_s64_0 (a); + vst1_s64 (d, b); + if (c[0] != d[0]) + return 1; + + c[0] = 1; + a = vld1_s64 (c); + b = wrap_vdup_lane_s64_1 (a); + vst1_s64 (d, b); + if (c[0] != d[0]) + return 1; + return 0; +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_lane_s64_0 (int64x1_t a) +{ + return vdupq_lane_s64 (a, 0); +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_lane_s64_1 (int64x1_t a) +{ + return vdupq_lane_s64 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s64 () +{ + int64x1_t a; + int64x2_t b; + int i; + int64_t c[1]; + int64_t d[2]; + + c[0] = 0; + a = vld1_s64 (c); + b = wrap_vdupq_lane_s64_0 (a); + vst1q_s64 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + c[0] = 1; + a = vld1_s64 (c); + b = wrap_vdupq_lane_s64_1 (a); + vst1q_s64 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + return 0; +} + +int +main () +{ + + if (test_vdup_lane_f32 ()) + abort (); + if (test_vdup_lane_s8 ()) + abort (); + if (test_vdup_lane_s16 ()) + abort (); + if (test_vdup_lane_s32 ()) + abort (); + if (test_vdup_lane_s64 ()) + abort (); + if (test_vdupq_lane_f32 ()) + abort (); + if (test_vdupq_lane_s8 ()) + abort (); + if (test_vdupq_lane_s16 ()) + abort (); + if (test_vdupq_lane_s32 ()) + abort (); + if (test_vdupq_lane_s64 ()) + abort (); + + return 0; +} + +/* Asm check for test_vdup_lane_s8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdupq_lane_s8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdup_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ +/* Asm check for test_vdup_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdupq_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ +/* Asm check for test_vdupq_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ + +/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c new file mode 100644 index 00000000000..7c04e759a52 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c @@ -0,0 +1,343 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O1 --save-temps" } */ + +#include + +#define force_simd(V1) asm volatile ("" \ + : "=w"(V1) \ + : "w"(V1) \ + : /* No clobbers */) + +extern void abort (void); + +float32_t __attribute__ ((noinline)) +wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a) +{ + return vdups_lane_f32 (a, 0); +} + +float32_t __attribute__ ((noinline)) +wrap_vdups_lane_f32_1 (float32x2_t a) +{ + return vdups_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdups_lane_f32 () +{ + float32x2_t a; + float32_t b; + float32_t c[2] = { 0.0, 1.0 }; + + a = vld1_f32 (c); + b = wrap_vdups_lane_f32_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_f32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +float64_t __attribute__ ((noinline)) +wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a) +{ + return vdupd_lane_f64 (a, 0); +} + +int __attribute__ ((noinline)) +test_vdupd_lane_f64 () +{ + float64x1_t a; + float64_t b; + float64_t c[1] = { 0.0 }; + a = vld1_f64 (c); + b = wrap_vdupd_lane_f64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int8_t __attribute__ ((noinline)) +wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a) +{ + int8_t result = vdupb_lane_s8 (a, 0); + force_simd (result); + return result; +} + +int8_t __attribute__ ((noinline)) +wrap_vdupb_lane_s8_1 (int8x8_t a) +{ + int8_t result = vdupb_lane_s8 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdupb_lane_s8 () +{ + int8x8_t a; + int8_t b; + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + a = vld1_s8 (c); + b = wrap_vdupb_lane_s8_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdupb_lane_s8_1 (a); + if (c[1] != b) + return 1; + + return 0; +} + +uint8_t __attribute__ ((noinline)) +wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a) +{ + uint8_t result = vdupb_lane_u8 (a, 0); + force_simd (result); + return result; +} + +uint8_t __attribute__ ((noinline)) +wrap_vdupb_lane_u8_1 (uint8x8_t a) +{ + uint8_t result = vdupb_lane_u8 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdupb_lane_u8 () +{ + uint8x8_t a; + uint8_t b; + uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + a = vld1_u8 (c); + b = wrap_vdupb_lane_u8_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdupb_lane_u8_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +int16_t __attribute__ ((noinline)) +wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a) +{ + int16_t result = vduph_lane_s16 (a, 0); + force_simd (result); + return result; +} + +int16_t __attribute__ ((noinline)) +wrap_vduph_lane_s16_1 (int16x4_t a) +{ + int16_t result = vduph_lane_s16 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vduph_lane_s16 () +{ + int16x4_t a; + int16_t b; + int16_t c[4] = { 0, 1, 2, 3 }; + + a = vld1_s16 (c); + b = wrap_vduph_lane_s16_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vduph_lane_s16_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint16_t __attribute__ ((noinline)) +wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a) +{ + uint16_t result = vduph_lane_u16 (a, 0); + force_simd (result); + return result; +} + +uint16_t __attribute__ ((noinline)) +wrap_vduph_lane_u16_1 (uint16x4_t a) +{ + uint16_t result = vduph_lane_u16 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vduph_lane_u16 () +{ + uint16x4_t a; + uint16_t b; + uint16_t c[4] = { 0, 1, 2, 3 }; + + a = vld1_u16 (c); + b = wrap_vduph_lane_u16_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vduph_lane_u16_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +int32_t __attribute__ ((noinline)) +wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a) +{ + int32_t result = vdups_lane_s32 (a, 0); + force_simd (result); + return result; +} + +int32_t __attribute__ ((noinline)) +wrap_vdups_lane_s32_1 (int32x2_t a) +{ + int32_t result = vdups_lane_s32 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdups_lane_s32 () +{ + int32x2_t a; + int32_t b; + int32_t c[2] = { 0, 1 }; + + a = vld1_s32 (c); + b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_s32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint32_t __attribute__ ((noinline)) +wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a) +{ + uint32_t result = vdups_lane_u32 (a, 0); + force_simd (result); + return result; +} + +uint32_t __attribute__ ((noinline)) +wrap_vdups_lane_u32_1 (uint32x2_t a) +{ + uint32_t result = vdups_lane_u32 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdups_lane_u32 () +{ + uint32x2_t a; + uint32_t b; + uint32_t c[2] = { 0, 1 }; + a = vld1_u32 (c); + b = wrap_vdups_lane_u32_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_u32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint64_t __attribute__ ((noinline)) +wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a) +{ + return vdupd_lane_u64 (a, 0);; +} + +int __attribute__ ((noinline)) +test_vdupd_lane_u64 () +{ + uint64x1_t a; + uint64_t b; + uint64_t c[1] = { 0 }; + + a = vld1_u64 (c); + b = wrap_vdupd_lane_u64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int64_t __attribute__ ((noinline)) +wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a) +{ + return vdupd_lane_u64 (a, 0); +} + +int __attribute__ ((noinline)) +test_vdupd_lane_s64 () +{ + int64x1_t a; + int64_t b; + int64_t c[1] = { 0 }; + + a = vld1_s64 (c); + b = wrap_vdupd_lane_s64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int +main () +{ + if (test_vdups_lane_f32 ()) + abort (); + if (test_vdupd_lane_f64 ()) + abort (); + if (test_vdupb_lane_s8 ()) + abort (); + if (test_vdupb_lane_u8 ()) + abort (); + if (test_vduph_lane_s16 ()) + abort (); + if (test_vduph_lane_u16 ()) + abort (); + if (test_vdups_lane_s32 ()) + abort (); + if (test_vdups_lane_u32 ()) + abort (); + if (test_vdupd_lane_s64 ()) + abort (); + if (test_vdupd_lane_u64 ()) + abort (); + return 0; +} + +/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */ +/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */ +/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ + +/* Asm check for vduph_lane_h16, vduph_lane_h16. */ +/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */ +/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */ + +/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */ +/* Can't generate "dup s, v[0]" for vdups_lane_s32 and vdups_lane_u32. */ +/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */ +/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */ + +/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */ +/* Attempts to make the compiler generate vdupd are not practical. */ +/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } } + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c new file mode 100644 index 00000000000..a79910d68d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c @@ -0,0 +1,619 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O1 --save-temps" } */ + +#include + +extern void abort (void); + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_n_f32 (float32_t a) +{ + return vdup_n_f32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_f32 () +{ + float32_t a = 1.0; + float32x2_t b; + float32_t c[2]; + int i; + + b = wrap_vdup_n_f32 (a); + vst1_f32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_f32 (float32_t a) +{ + return vdupq_n_f32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_f32 () +{ + float32_t a = 1.0; + float32x4_t b; + float32_t c[4]; + int i; + + b = wrap_vdupq_n_f32 (a); + vst1q_f32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +float64x1_t __attribute__ ((noinline)) +wrap_vdup_n_f64 (float64_t a) +{ + return vdup_n_f64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_f64 () +{ + float64_t a = 1.0; + float64x1_t b; + float64_t c[1]; + int i; + + b = wrap_vdup_n_f64 (a); + vst1_f64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +float64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_f64 (float64_t a) +{ + return vdupq_n_f64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_f64 () +{ + float64_t a = 1.0; + float64x2_t b; + float64_t c[2]; + int i; + + b = wrap_vdupq_n_f64 (a); + vst1q_f64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly8x8_t __attribute__ ((noinline)) +wrap_vdup_n_p8 (poly8_t a) +{ + return vdup_n_p8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_p8 () +{ + poly8_t a = 1; + poly8x8_t b; + poly8_t c[8]; + int i; + + b = wrap_vdup_n_p8 (a); + vst1_p8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_p8 (poly8_t a) +{ + return vdupq_n_p8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_p8 () +{ + poly8_t a = 1; + poly8x16_t b; + poly8_t c[16]; + int i; + + b = wrap_vdupq_n_p8 (a); + vst1q_p8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_n_s8 (int8_t a) +{ + return vdup_n_s8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s8 () +{ + int8_t a = 1; + int8x8_t b; + int8_t c[8]; + int i; + + b = wrap_vdup_n_s8 (a); + vst1_s8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_s8 (int8_t a) +{ + return vdupq_n_s8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s8 () +{ + int8_t a = 1; + int8x16_t b; + int8_t c[16]; + int i; + + b = wrap_vdupq_n_s8 (a); + vst1q_s8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint8x8_t __attribute__ ((noinline)) +wrap_vdup_n_u8 (uint8_t a) +{ + return vdup_n_u8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u8 () +{ + uint8_t a = 1; + uint8x8_t b; + uint8_t c[8]; + int i; + + b = wrap_vdup_n_u8 (a); + vst1_u8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_u8 (uint8_t a) +{ + return vdupq_n_u8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u8 () +{ + uint8_t a = 1; + uint8x16_t b; + uint8_t c[16]; + int i; + + b = wrap_vdupq_n_u8 (a); + vst1q_u8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly16x4_t __attribute__ ((noinline)) +wrap_vdup_n_p16 (poly16_t a) +{ + return vdup_n_p16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_p16 () +{ + poly16_t a = 1; + poly16x4_t b; + poly16_t c[4]; + int i; + + b = wrap_vdup_n_p16 (a); + vst1_p16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_p16 (poly16_t a) +{ + return vdupq_n_p16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_p16 () +{ + poly16_t a = 1; + poly16x8_t b; + poly16_t c[8]; + int i; + + b = wrap_vdupq_n_p16 (a); + vst1q_p16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_n_s16 (int16_t a) +{ + return vdup_n_s16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s16 () +{ + int16_t a = 1; + int16x4_t b; + int16_t c[4]; + int i; + + b = wrap_vdup_n_s16 (a); + vst1_s16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_s16 (int16_t a) +{ + return vdupq_n_s16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s16 () +{ + int16_t a = 1; + int16x8_t b; + int16_t c[8]; + int i; + + b = wrap_vdupq_n_s16 (a); + vst1q_s16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint16x4_t __attribute__ ((noinline)) +wrap_vdup_n_u16 (uint16_t a) +{ + return vdup_n_u16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u16 () +{ + uint16_t a = 1; + uint16x4_t b; + uint16_t c[4]; + int i; + + b = wrap_vdup_n_u16 (a); + vst1_u16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_u16 (uint16_t a) +{ + return vdupq_n_u16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u16 () +{ + uint16_t a = 1; + uint16x8_t b; + uint16_t c[8]; + int i; + + b = wrap_vdupq_n_u16 (a); + vst1q_u16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_n_s32 (int32_t a) +{ + return vdup_n_s32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s32 () +{ + int32_t a = 1; + int32x2_t b; + int32_t c[2]; + int i; + + b = wrap_vdup_n_s32 (a); + vst1_s32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_s32 (int32_t a) +{ + return vdupq_n_s32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s32 () +{ + int32_t a = 1; + int32x4_t b; + int32_t c[4]; + int i; + + b = wrap_vdupq_n_s32 (a); + vst1q_s32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint32x2_t __attribute__ ((noinline)) +wrap_vdup_n_u32 (uint32_t a) +{ + return vdup_n_u32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u32 () +{ + uint32_t a = 1; + uint32x2_t b; + uint32_t c[2]; + int i; + + b = wrap_vdup_n_u32 (a); + vst1_u32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_u32 (uint32_t a) +{ + return vdupq_n_u32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u32 () +{ + uint32_t a = 1; + uint32x4_t b; + uint32_t c[4]; + int i; + + b = wrap_vdupq_n_u32 (a); + vst1q_u32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_n_s64 (int64_t a) +{ + return vdup_n_s64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s64 () +{ + int64_t a = 1; + int64x1_t b; + int64_t c[1]; + int i; + + b = wrap_vdup_n_s64 (a); + vst1_s64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_s64 (int64_t a) +{ + return vdupq_n_s64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s64 () +{ + int64_t a = 1; + int64x2_t b; + int64_t c[2]; + int i; + + b = wrap_vdupq_n_s64 (a); + vst1q_s64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint64x1_t __attribute__ ((noinline)) +wrap_vdup_n_u64 (uint64_t a) +{ + return vdup_n_u64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u64 () +{ + uint64_t a = 1; + uint64x1_t b; + uint64_t c[1]; + int i; + + b = wrap_vdup_n_u64 (a); + vst1_u64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_u64 (uint64_t a) +{ + return vdupq_n_u64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u64 () +{ + uint64_t a = 1; + uint64x2_t b; + uint64_t c[2]; + int i; + + b = wrap_vdupq_n_u64 (a); + vst1q_u64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +int +main () +{ + if (test_vdup_n_f32 ()) + abort (); + if (test_vdup_n_f64 ()) + abort (); + if (test_vdup_n_p8 ()) + abort (); + if (test_vdup_n_u8 ()) + abort (); + if (test_vdup_n_s8 ()) + abort (); + if (test_vdup_n_p16 ()) + abort (); + if (test_vdup_n_s16 ()) + abort (); + if (test_vdup_n_u16 ()) + abort (); + if (test_vdup_n_s32 ()) + abort (); + if (test_vdup_n_u32 ()) + abort (); + if (test_vdup_n_s64 ()) + abort (); + if (test_vdup_n_u64 ()) + abort (); + if (test_vdupq_n_f32 ()) + abort (); + if (test_vdupq_n_f64 ()) + abort (); + if (test_vdupq_n_p8 ()) + abort (); + if (test_vdupq_n_u8 ()) + abort (); + if (test_vdupq_n_s8 ()) + abort (); + if (test_vdupq_n_p16 ()) + abort (); + if (test_vdupq_n_s16 ()) + abort (); + if (test_vdupq_n_u16 ()) + abort (); + if (test_vdupq_n_s32 ()) + abort (); + if (test_vdupq_n_u32 ()) + abort (); + if (test_vdupq_n_s64 ()) + abort (); + if (test_vdupq_n_u64 ()) + abort (); + return 0; +} + +/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64. + Cannot force floating point value in general purpose regester. */ + +/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdup_n_s32, test_vdup_n_u32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */ + +/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */ + +/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out. + Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+" + are not practical. */ + +/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */ -- 2.30.2