From 36054fabf5b7b47ffa8c7c6f93c436dd8e8c807c Mon Sep 17 00:00:00 2001 From: James Greenhalgh Date: Wed, 1 May 2013 15:37:52 +0000 Subject: [PATCH] [AArch64] Refactor reduc_plus patterns. gcc/ * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_. * config/aarch64/aarch64-simd-builtins.def (reduc_splus_): Add new modes. (reduc_uplus_): New. * config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove. (reduc_uplus_v4sf): Likewise. (reduc_splus_v4sf): Likewise. (aarch64_addv): Likewise. (reduc_uplus_): Likewise. (reduc_splus_): Likewise. (aarch64_addvv2di): Likewise. (reduc_uplus_v2di): Likewise. (reduc_splus_v2di): Likewise. (aarch64_addvv2si): Likewise. (reduc_uplus_v2si): Likewise. (reduc_splus_v2si): Likewise. (reduc_plus_): New. (reduc_plus_v2di): Likewise. (reduc_plus_v2si): Likewise. (reduc_plus_v4sf): Likewise. (aarch64_addpv4sf): Likewise. * config/aarch64/arm_neon.h (vaddv_<8, 16, 32, 64): Rewrite using builtins. * config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV, add UNSPEC_SADDV, UNSPEC_UADDV. (SUADDV): New. (sur): Add UNSPEC_SADDV, UNSPEC_UADDV. gcc/testsuite/ * gcc.target/aarch64/vect-vaddv.c: New. From-SVN: r198500 --- gcc/ChangeLog | 31 +++ gcc/config/aarch64/aarch64-builtins.c | 2 +- gcc/config/aarch64/aarch64-simd-builtins.def | 5 +- gcc/config/aarch64/aarch64-simd.md | 156 +++--------- gcc/config/aarch64/arm_neon.h | 225 ++++++++---------- gcc/config/aarch64/iterators.md | 6 +- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.target/aarch64/vect-vaddv.c | 128 ++++++++++ 8 files changed, 303 insertions(+), 254 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-vaddv.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ff6ceb6d2f8..f21e90bb1bb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2013-05-01 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_. + * config/aarch64/aarch64-simd-builtins.def + (reduc_splus_): Add new modes. + (reduc_uplus_): New. + * config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove. + (reduc_uplus_v4sf): Likewise. + (reduc_splus_v4sf): Likewise. + (aarch64_addv): Likewise. + (reduc_uplus_): Likewise. + (reduc_splus_): Likewise. + (aarch64_addvv2di): Likewise. + (reduc_uplus_v2di): Likewise. + (reduc_splus_v2di): Likewise. + (aarch64_addvv2si): Likewise. + (reduc_uplus_v2si): Likewise. + (reduc_splus_v2si): Likewise. + (reduc_plus_): New. + (reduc_plus_v2di): Likewise. + (reduc_plus_v2si): Likewise. + (reduc_plus_v4sf): Likewise. + (aarch64_addpv4sf): Likewise. + * config/aarch64/arm_neon.h + (vaddv_<8, 16, 32, 64): Rewrite using builtins. + * config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV, + add UNSPEC_SADDV, UNSPEC_UADDV. + (SUADDV): New. + (sur): Add UNSPEC_SADDV, UNSPEC_UADDV. + 2013-05-01 James Greenhalgh * config/aarch64/arm_neon.h diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 8eb32c65d35..4fdfe247a21 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1365,7 +1365,7 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) switch (fcode) { - BUILTIN_VDQF (UNOP, addv, 0) + BUILTIN_VALL (UNOP, reduc_splus_, 10) new_stmt = gimple_build_assign_with_ops ( REDUC_PLUS_EXPR, gimple_call_lhs (stmt), diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 97a597e2e2c..e4201732bcd 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -234,8 +234,9 @@ BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0) BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) - /* Implemented by aarch64_addv. */ - BUILTIN_VDQF (UNOP, addv, 0) + /* Implemented by reduc_plus_. */ + BUILTIN_VALL (UNOP, reduc_splus_, 10) + BUILTIN_VDQ (UNOP, reduc_uplus_, 10) /* Implemented by reduc__. */ BUILTIN_VDQIF (UNOP, reduc_smax_, 10) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8a487397bf2..13384aa85af 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1438,156 +1438,70 @@ (set_attr "simd_mode" "")] ) -;; FP 'across lanes' add. +;; 'across lanes' add. -(define_insn "aarch64_addpv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=w") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" - "faddp\\t%0.4s, %1.4s, %1.4s" - [(set_attr "simd_type" "simd_fadd") - (set_attr "simd_mode" "V4SF")] -) - -(define_expand "reduc_uplus_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=w") - (match_operand:V4SF 1 "register_operand" "w"))] - "TARGET_SIMD" -{ - rtx tmp = gen_reg_rtx (V4SFmode); - emit_insn (gen_aarch64_addpv4sf (tmp, operands[1])); - emit_insn (gen_aarch64_addpv4sf (operands[0], tmp)); - DONE; -}) - -(define_expand "reduc_splus_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=w") - (match_operand:V4SF 1 "register_operand" "w"))] - "TARGET_SIMD" -{ - rtx tmp = gen_reg_rtx (V4SFmode); - emit_insn (gen_aarch64_addpv4sf (tmp, operands[1])); - emit_insn (gen_aarch64_addpv4sf (operands[0], tmp)); - DONE; -}) - -(define_expand "aarch64_addvv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=w") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" -{ - emit_insn (gen_reduc_splus_v4sf (operands[0], operands[1])); - DONE; -}) - -(define_insn "aarch64_addv" - [(set (match_operand:V2F 0 "register_operand" "=w") - (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" - "faddp\\t%0, %1." - [(set_attr "simd_type" "simd_fadd") - (set_attr "simd_mode" "")] -) - -(define_expand "reduc_uplus_" - [(set (match_operand:V2F 0 "register_operand" "=w") - (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" - "" -) - -(define_expand "reduc_splus_" - [(set (match_operand:V2F 0 "register_operand" "=w") - (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" - "" -) - -;; Reduction across lanes. - -(define_insn "aarch64_addv" +(define_insn "reduc_plus_" [(set (match_operand:VDQV 0 "register_operand" "=w") (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] - UNSPEC_ADDV))] + SUADDV))] "TARGET_SIMD" "addv\\t%0, %1." [(set_attr "simd_type" "simd_addv") (set_attr "simd_mode" "")] ) -(define_expand "reduc_splus_" - [(set (match_operand:VDQV 0 "register_operand" "=w") - (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_expand "reduc_uplus_" - [(set (match_operand:VDQV 0 "register_operand" "=w") - (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_insn "aarch64_addvv2di" +(define_insn "reduc_plus_v2di" [(set (match_operand:V2DI 0 "register_operand" "=w") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")] - UNSPEC_ADDV))] + SUADDV))] "TARGET_SIMD" "addp\\t%d0, %1.2d" - [(set_attr "simd_type" "simd_add") + [(set_attr "simd_type" "simd_addv") (set_attr "simd_mode" "V2DI")] ) -(define_expand "reduc_uplus_v2di" - [(set (match_operand:V2DI 0 "register_operand" "=w") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_expand "reduc_splus_v2di" - [(set (match_operand:V2DI 0 "register_operand" "=w") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_insn "aarch64_addvv2si" +(define_insn "reduc_plus_v2si" [(set (match_operand:V2SI 0 "register_operand" "=w") (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - UNSPEC_ADDV))] + SUADDV))] "TARGET_SIMD" "addp\\t%0.2s, %1.2s, %1.2s" - [(set_attr "simd_type" "simd_add") + [(set_attr "simd_type" "simd_addv") (set_attr "simd_mode" "V2SI")] ) -(define_expand "reduc_uplus_v2si" - [(set (match_operand:V2SI 0 "register_operand" "=w") - (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - UNSPEC_ADDV))] +(define_insn "reduc_plus_" + [(set (match_operand:V2F 0 "register_operand" "=w") + (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] + SUADDV))] "TARGET_SIMD" - "" + "faddp\\t%0, %1." + [(set_attr "simd_type" "simd_fadd") + (set_attr "simd_mode" "")] ) -(define_expand "reduc_splus_v2si" - [(set (match_operand:V2SI 0 "register_operand" "=w") - (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - UNSPEC_ADDV))] +(define_insn "aarch64_addpv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] + UNSPEC_FADDV))] "TARGET_SIMD" - "" + "faddp\\t%0.4s, %1.4s, %1.4s" + [(set_attr "simd_type" "simd_fadd") + (set_attr "simd_mode" "V4SF")] ) +(define_expand "reduc_plus_v4sf" + [(set (match_operand:V4SF 0 "register_operand") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] + SUADDV))] + "TARGET_SIMD" +{ + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_aarch64_addpv4sf (tmp, operands[1])); + emit_insn (gen_aarch64_addpv4sf (operands[0], tmp)); + DONE; +}) + ;; 'across lanes' max and min ops. (define_insn "reduc__" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index cdefa86f57c..608db35b3dd 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -4655,116 +4655,6 @@ vaddlvq_u32 (uint32x4_t a) return result; } -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vaddv_s8 (int8x8_t a) -{ - int8_t result; - __asm__ ("addv %b0,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vaddv_s16 (int16x4_t a) -{ - int16_t result; - __asm__ ("addv %h0,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vaddv_u8 (uint8x8_t a) -{ - uint8_t result; - __asm__ ("addv %b0,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vaddv_u16 (uint16x4_t a) -{ - uint16_t result; - __asm__ ("addv %h0,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vaddvq_s8 (int8x16_t a) -{ - int8_t result; - __asm__ ("addv %b0,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vaddvq_s16 (int16x8_t a) -{ - int16_t result; - __asm__ ("addv %h0,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vaddvq_s32 (int32x4_t a) -{ - int32_t result; - __asm__ ("addv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vaddvq_u8 (uint8x16_t a) -{ - uint8_t result; - __asm__ ("addv %b0,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vaddvq_u16 (uint16x8_t a) -{ - uint16_t result; - __asm__ ("addv %h0,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vaddvq_u32 (uint32x4_t a) -{ - uint32_t result; - __asm__ ("addv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c) { @@ -16995,22 +16885,6 @@ vaddlv_u32 (uint32x2_t a) return result; } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vaddv_s32 (int32x2_t a) -{ - int32_t result; - __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vaddv_u32 (uint32x2_t a) -{ - uint32_t result; - __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vpaddd_s64 (int64x2_t __a) { @@ -18026,24 +17900,117 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b) return __a + __b; } +/* vaddv */ + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vaddv_s8 (int8x8_t __a) +{ + return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddv_s16 (int16x4_t __a) +{ + return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddv_s32 (int32x2_t __a) +{ + return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vaddv_u8 (uint8x8_t __a) +{ + return vget_lane_u8 ((uint8x8_t) + __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddv_u16 (uint16x4_t __a) +{ + return vget_lane_u16 ((uint16x4_t) + __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddv_u32 (uint32x2_t __a) +{ + return vget_lane_u32 ((uint32x2_t) + __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vaddvq_s8 (int8x16_t __a) +{ + return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddvq_s16 (int16x8_t __a) +{ + return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddvq_s32 (int32x4_t __a) +{ + return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddvq_s64 (int64x2_t __a) +{ + return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vaddvq_u8 (uint8x16_t __a) +{ + return vgetq_lane_u8 ((uint8x16_t) + __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddvq_u16 (uint16x8_t __a) +{ + return vgetq_lane_u16 ((uint16x8_t) + __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddvq_u32 (uint32x4_t __a) +{ + return vgetq_lane_u32 ((uint32x4_t) + __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddvq_u64 (uint64x2_t __a) +{ + return vgetq_lane_u64 ((uint64x2_t) + __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0); +} + __extension__ static __inline float32_t __attribute__ ((__always_inline__)) vaddv_f32 (float32x2_t __a) { - float32x2_t t = __builtin_aarch64_addvv2sf (__a); + float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a); return vget_lane_f32 (t, 0); } __extension__ static __inline float32_t __attribute__ ((__always_inline__)) vaddvq_f32 (float32x4_t __a) { - float32x4_t t = __builtin_aarch64_addvv4sf (__a); + float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a); return vgetq_lane_f32 (t, 0); } __extension__ static __inline float64_t __attribute__ ((__always_inline__)) vaddvq_f64 (float64x2_t __a) { - float64x2_t t = __builtin_aarch64_addvv2df (__a); + float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a); return vgetq_lane_f64 (t, 0); } diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3f9a58419b4..5945d23436d 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -170,7 +170,8 @@ UNSPEC_FMINNMV ; Used in aarch64-simd.md. UNSPEC_FMINV ; Used in aarch64-simd.md. UNSPEC_FADDV ; Used in aarch64-simd.md. - UNSPEC_ADDV ; Used in aarch64-simd.md. + UNSPEC_SADDV ; Used in aarch64-simd.md. + UNSPEC_UADDV ; Used in aarch64-simd.md. UNSPEC_SMAXV ; Used in aarch64-simd.md. UNSPEC_SMINV ; Used in aarch64-simd.md. UNSPEC_UMAXV ; Used in aarch64-simd.md. @@ -686,6 +687,8 @@ (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV UNSPEC_FMAXNMV UNSPEC_FMINNMV]) +(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV]) + (define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD UNSPEC_SRHADD UNSPEC_URHADD UNSPEC_SHSUB UNSPEC_UHSUB @@ -777,6 +780,7 @@ (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r") (UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u") (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su") + (UNSPEC_SADDV "s") (UNSPEC_UADDV "u") (UNSPEC_SSLI "s") (UNSPEC_USLI "u") (UNSPEC_SSRI "s") (UNSPEC_USRI "u") (UNSPEC_USRA "u") (UNSPEC_SSRA "s") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0574267d979..b02d20a54ef 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-05-01 James Greenhalgh + + * gcc.target/aarch64/vect-vaddv.c: New. + 2013-05-01 James Greenhalgh * gcc.target/aarch64/vect-vmaxv.c: New. diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c b/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c new file mode 100644 index 00000000000..7db12047e29 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c @@ -0,0 +1,128 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps -ffast-math" } */ + +#include + +extern void abort (void); +extern float fabsf (float); +extern double fabs (double); + +#define NUM_TESTS 16 +#define DELTA 0.000001 + +int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; +int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; +int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; +int64_t input_int64[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; + +uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; +uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; +uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; + +uint64_t input_uint64[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; + +float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f, + 200.0f, -800.0f, -13.0f, -0.5f, + 7.9f, -870.0f, 10.4f, 310.11f, + 0.0f, -865.0f, -2213.0f, -1.5f}; + +double input_float64[] = {0.1, -0.1, 0.4, 10.3, + 200.0, -800.0, -13.0, -0.5, + 7.9, -870.0, 10.4, 310.11, + 0.0, -865.0, -2213.0, -1.5}; + +#define EQUALF(a, b) (fabsf (a - b) < DELTA) +#define EQUALD(a, b) (fabs (a - b) < DELTA) +#define EQUALL(a, b) (a == b) + +#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \ +int \ +test_vaddv##SUFFIX##_##TYPE##x##LANES##_t (void) \ +{ \ + int i, j; \ + int moves = (NUM_TESTS - LANES) + 1; \ + TYPE##_t out_l[NUM_TESTS]; \ + TYPE##_t out_v[NUM_TESTS]; \ + \ + /* Calculate linearly. */ \ + for (i = 0; i < moves; i++) \ + { \ + out_l[i] = input_##TYPE[i]; \ + for (j = 1; j < LANES; j++) \ + out_l[i] += input_##TYPE[i + j]; \ + } \ + \ + /* Calculate using vector reduction intrinsics. */ \ + for (i = 0; i < moves; i++) \ + { \ + TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \ + out_v[i] = vaddv##Q##_##SUFFIX (t1); \ + } \ + \ + /* Compare. */ \ + for (i = 0; i < moves; i++) \ + { \ + if (!EQUAL##FLOAT (out_v[i], out_l[i])) \ + return 0; \ + } \ + return 1; \ +} + +#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \ +TEST (STYPE, , TYPE, W32, F) \ +TEST (STYPE, q, TYPE, W64, F) \ + +BUILD_VARIANTS (int8, s8, 8, 16, L) +BUILD_VARIANTS (uint8, u8, 8, 16, L) +/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */ +/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */ +BUILD_VARIANTS (int16, s16, 4, 8, L) +BUILD_VARIANTS (uint16, u16, 4, 8, L) +/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */ +/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */ +BUILD_VARIANTS (int32, s32, 2, 4, L) +BUILD_VARIANTS (uint32, u32, 2, 4, L) +/* { dg-final { scan-assembler "addp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "addv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +TEST (s64, q, int64, 2, D) +TEST (u64, q, uint64, 2, D) +/* { dg-final { scan-assembler "addp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */ + +BUILD_VARIANTS (float32, f32, 2, 4, F) +/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "faddp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +TEST (f64, q, float64, 2, D) +/* { dg-final { scan-assembler "faddp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */ + +#undef TEST +#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \ +{ \ + if (!test_vaddv##SUFFIX##_##TYPE##x##LANES##_t ()) \ + abort (); \ +} + +int +main (int argc, char **argv) +{ +BUILD_VARIANTS (int8, s8, 8, 16, L) +BUILD_VARIANTS (uint8, u8, 8, 16, L) +BUILD_VARIANTS (int16, s16, 4, 8, L) +BUILD_VARIANTS (uint16, u16, 4, 8, L) +BUILD_VARIANTS (int32, s32, 2, 4, L) +BUILD_VARIANTS (uint32, u32, 2, 4, L) + +BUILD_VARIANTS (float32, f32, 2, 4, F) +TEST (f64, q, float64, 2, D) + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ -- 2.30.2