From: Alan Lawrence Date: Mon, 27 Oct 2014 15:20:18 +0000 (+0000) Subject: [AArch64] Use new reduc_plus_scal optabs, inc. for __builtins X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f5156c3ead861a319698af615ef739ef96532e02;p=gcc.git [AArch64] Use new reduc_plus_scal optabs, inc. for __builtins * config/aarch64/aarch64-simd-builtins.def (reduc_splus_/VDQF, reduc_uplus_/VDQF, reduc_splus_v4sf): Remove. (reduc_plus_scal_, reduc_plus_scal_v4sf): New. * config/aarch64/aarch64-simd.md (reduc_plus_mode): Remove. (reduc_splus_, reduc_uplus_, reduc_plus_scal_): New. (reduc_plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to... (aarch64_reduc_plus_internal): ...this. (reduc_plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to... (aarch64_reduc_plus_internalv2si): ...this. (reduc_splus_/V2F): Rename to... (aarch64_reduc_plus_internal): ...this. * config/aarch64/iterators.md (UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove. (UNSPEC_ADDV): New. (sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV. * config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8, vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64, vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32, vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to __builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper. From-SVN: r216738 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index aeb5c68e7d0..0656a1978e4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,33 @@ +2014-10-27 Alan Lawrence + + * config/aarch64/aarch64-simd-builtins.def + (reduc_splus_/VDQF, reduc_uplus_/VDQF, reduc_splus_v4sf): + Remove. + (reduc_plus_scal_, reduc_plus_scal_v4sf): New. + + * config/aarch64/aarch64-simd.md (reduc_plus_mode): Remove. + (reduc_splus_, reduc_uplus_, reduc_plus_scal_): New. + + (reduc_plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to... + (aarch64_reduc_plus_internal): ...this. + + (reduc_plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to... + (aarch64_reduc_plus_internalv2si): ...this. + + (reduc_splus_/V2F): Rename to... + (aarch64_reduc_plus_internal): ...this. + + * config/aarch64/iterators.md + (UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove. + (UNSPEC_ADDV): New. + (sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV. + + * config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8, + vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64, + vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32, + vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to + __builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper. + 2014-10-27 Alan Lawrence PR tree-optimization/61114 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 5d0e7d8a714..7fe7c62028d 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -222,9 +222,8 @@ BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0) BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0) - /* Implemented by reduc_plus_. */ - BUILTIN_VALL (UNOP, reduc_splus_, 10) - BUILTIN_VDQ (UNOP, reduc_uplus_, 10) + /* Implemented by aarch64_reduc_plus_. */ + BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) /* Implemented by reduc__. */ BUILTIN_VDQIF (UNOP, reduc_smax_, 10) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index b260bc62a2f..76a9366fa99 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1760,25 +1760,52 @@ ;; 'across lanes' add. -(define_insn "reduc_plus_" +(define_expand "reduc_plus_scal_" + [(match_operand: 0 "register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")] + UNSPEC_ADDV)] + "TARGET_SIMD" + { + rtx elt = GEN_INT (ENDIAN_LANE_N (mode, 0)); + rtx scratch = gen_reg_rtx (mode); + emit_insn (gen_aarch64_reduc_plus_internal (scratch, operands[1])); + emit_insn (gen_aarch64_get_lane (operands[0], scratch, elt)); + DONE; + } +) + +(define_expand "reduc_plus_scal_" + [(match_operand: 0 "register_operand" "=w") + (match_operand:V2F 1 "register_operand" "w")] + "TARGET_SIMD" + { + rtx elt = GEN_INT (ENDIAN_LANE_N (mode, 0)); + rtx scratch = gen_reg_rtx (mode); + emit_insn (gen_aarch64_reduc_plus_internal (scratch, operands[1])); + emit_insn (gen_aarch64_get_lane (operands[0], scratch, elt)); + DONE; + } +) + +(define_insn "aarch64_reduc_plus_internal" [(set (match_operand:VDQV 0 "register_operand" "=w") (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] - SUADDV))] + UNSPEC_ADDV))] "TARGET_SIMD" "add\\t%0, %1." [(set_attr "type" "neon_reduc_add")] ) -(define_insn "reduc_plus_v2si" +(define_insn "aarch64_reduc_plus_internalv2si" [(set (match_operand:V2SI 0 "register_operand" "=w") (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - SUADDV))] + UNSPEC_ADDV))] "TARGET_SIMD" "addp\\t%0.2s, %1.2s, %1.2s" [(set_attr "type" "neon_reduc_add")] ) -(define_insn "reduc_splus_" +(define_insn "aarch64_reduc_plus_internal" [(set (match_operand:V2F 0 "register_operand" "=w") (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] UNSPEC_FADDV))] @@ -1796,14 +1823,17 @@ [(set_attr "type" "neon_fp_reduc_add_s_q")] ) -(define_expand "reduc_splus_v4sf" - [(set (match_operand:V4SF 0 "register_operand") +(define_expand "reduc_plus_scal_v4sf" + [(set (match_operand:SF 0 "register_operand") (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] UNSPEC_FADDV))] "TARGET_SIMD" { - emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1])); - emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0])); + rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0)); + rtx scratch = gen_reg_rtx (V4SFmode); + emit_insn (gen_aarch64_addpv4sf (scratch, operands[1])); + emit_insn (gen_aarch64_addpv4sf (scratch, scratch)); + emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); DONE; }) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 9eb04c4881a..3d3772fb26b 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -12964,121 +12964,103 @@ vaddd_u64 (uint64_t __a, uint64_t __b) __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vaddv_s8 (int8x8_t __a) { - return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0); + return __builtin_aarch64_reduc_plus_scal_v8qi (__a); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vaddv_s16 (int16x4_t __a) { - return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0); + return __builtin_aarch64_reduc_plus_scal_v4hi (__a); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vaddv_s32 (int32x2_t __a) { - return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0); + return __builtin_aarch64_reduc_plus_scal_v2si (__a); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vaddv_u8 (uint8x8_t __a) { - return vget_lane_u8 ((uint8x8_t) - __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), - 0); + return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vaddv_u16 (uint16x4_t __a) { - return vget_lane_u16 ((uint16x4_t) - __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), - 0); + return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vaddv_u32 (uint32x2_t __a) { - return vget_lane_u32 ((uint32x2_t) - __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), - 0); + return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a); } __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vaddvq_s8 (int8x16_t __a) { - return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), - 0); + return __builtin_aarch64_reduc_plus_scal_v16qi (__a); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vaddvq_s16 (int16x8_t __a) { - return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0); + return __builtin_aarch64_reduc_plus_scal_v8hi (__a); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vaddvq_s32 (int32x4_t __a) { - return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0); + return __builtin_aarch64_reduc_plus_scal_v4si (__a); } __extension__ static __inline int64_t __attribute__ ((__always_inline__)) vaddvq_s64 (int64x2_t __a) { - return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0); + return __builtin_aarch64_reduc_plus_scal_v2di (__a); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vaddvq_u8 (uint8x16_t __a) { - return vgetq_lane_u8 ((uint8x16_t) - __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), - 0); + return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vaddvq_u16 (uint16x8_t __a) { - return vgetq_lane_u16 ((uint16x8_t) - __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), - 0); + return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vaddvq_u32 (uint32x4_t __a) { - return vgetq_lane_u32 ((uint32x4_t) - __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), - 0); + return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a); } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) vaddvq_u64 (uint64x2_t __a) { - return vgetq_lane_u64 ((uint64x2_t) - __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), - 0); + return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a); } __extension__ static __inline float32_t __attribute__ ((__always_inline__)) vaddv_f32 (float32x2_t __a) { - float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a); - return vget_lane_f32 (__t, 0); + return __builtin_aarch64_reduc_plus_scal_v2sf (__a); } __extension__ static __inline float32_t __attribute__ ((__always_inline__)) vaddvq_f32 (float32x4_t __a) { - float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a); - return vgetq_lane_f32 (__t, 0); + return __builtin_aarch64_reduc_plus_scal_v4sf (__a); } __extension__ static __inline float64_t __attribute__ ((__always_inline__)) vaddvq_f64 (float64x2_t __a) { - float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a); - return vgetq_lane_f64 (__t, 0); + return __builtin_aarch64_reduc_plus_scal_v2df (__a); } /* vbsl */ @@ -19777,7 +19759,7 @@ vpadd_u32 (uint32x2_t __a, uint32x2_t __b) __extension__ static __inline float64_t __attribute__ ((__always_inline__)) vpaddd_f64 (float64x2_t __a) { - return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0); + return __builtin_aarch64_reduc_plus_scal_v2df (__a); } __extension__ static __inline int64_t __attribute__ ((__always_inline__)) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index efd006f8361..74c71fcc804 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -213,8 +213,7 @@ UNSPEC_FMINNMV ; Used in aarch64-simd.md. UNSPEC_FMINV ; Used in aarch64-simd.md. UNSPEC_FADDV ; Used in aarch64-simd.md. - UNSPEC_SADDV ; Used in aarch64-simd.md. - UNSPEC_UADDV ; Used in aarch64-simd.md. + UNSPEC_ADDV ; Used in aarch64-simd.md. UNSPEC_SMAXV ; Used in aarch64-simd.md. UNSPEC_SMINV ; Used in aarch64-simd.md. UNSPEC_UMAXV ; Used in aarch64-simd.md. @@ -859,8 +858,6 @@ (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV UNSPEC_FMAXNMV UNSPEC_FMINNMV]) -(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV]) - (define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD UNSPEC_SRHADD UNSPEC_URHADD UNSPEC_SHSUB UNSPEC_UHSUB @@ -965,7 +962,6 @@ (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r") (UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u") (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su") - (UNSPEC_SADDV "s") (UNSPEC_UADDV "u") (UNSPEC_SSLI "s") (UNSPEC_USLI "u") (UNSPEC_SSRI "s") (UNSPEC_USRI "u") (UNSPEC_USRA "u") (UNSPEC_SSRA "s")