From 3553c658533e430b232997bdfd97faf6606fb102 Mon Sep 17 00:00:00 2001 From: Dennis Zhang Date: Tue, 3 Nov 2020 16:56:02 +0000 Subject: [PATCH] aarch64: intrinsics extract half of bf16 vector This patch implements ACLE intrinsics vget_low_bf16 and vget_high_bf16 to extract lower or higher half from a bfloat16x8 vector. The vget_high_bf16 is done by 'dup' instruction. The vget_low_bf16 is just to return the lower half of a vector register. Tests include both big- and little-endian cases. gcc/ChangeLog: 2020-11-03 Dennis Zhang * config/aarch64/aarch64-simd-builtins.def (vget_lo_half): New entry. (vget_hi_half): Likewise. * config/aarch64/aarch64-simd.md (aarch64_vget_lo_halfv8bf): New entry. (aarch64_vget_hi_halfv8bf): Likewise. * config/aarch64/arm_neon.h (vget_low_bf16): New intrinsic. (vget_high_bf16): Likewise. gcc/testsuite/ChangeLog * gcc.target/aarch64/advsimd-intrinsics/bf16_get.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bf16_get-be.c: New test. --- gcc/ChangeLog | 9 +++++++ gcc/config/aarch64/aarch64-simd-builtins.def | 4 +++ gcc/config/aarch64/aarch64-simd.md | 21 +++++++++++++++ gcc/config/aarch64/arm_neon.h | 14 ++++++++++ gcc/testsuite/ChangeLog | 5 ++++ .../aarch64/advsimd-intrinsics/bf16_get-be.c | 27 +++++++++++++++++++ .../aarch64/advsimd-intrinsics/bf16_get.c | 27 +++++++++++++++++++ 7 files changed, 107 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get-be.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5de466df347..748614418b2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2020-11-03 Dennis Zhang + + * config/aarch64/aarch64-simd-builtins.def (vget_lo_half): New entry. + (vget_hi_half): Likewise. + * config/aarch64/aarch64-simd.md (aarch64_vget_lo_halfv8bf): New entry. + (aarch64_vget_hi_halfv8bf): Likewise. + * config/aarch64/arm_neon.h (vget_low_bf16): New intrinsic. + (vget_high_bf16): Likewise. + 2020-11-03 Bernd Edlinger PR target/97205 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 5b78bc536e0..6d898d4503a 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -722,6 +722,10 @@ VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, ALL, v4sf) VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, ALL, v4sf) + /* Implemented by aarch64_vget_lo/hi_halfv8bf. */ + VAR1 (UNOP, vget_lo_half, 0, AUTO_FP, v8bf) + VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf) + /* Implemented by aarch64_simd_mmlav16qi. */ VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi) VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 030a086d31c..a667efa15ed 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7159,6 +7159,27 @@ [(set_attr "type" "neon_dot")] ) +;; vget_low/high_bf16 +(define_expand "aarch64_vget_lo_halfv8bf" + [(match_operand:V4BF 0 "register_operand") + (match_operand:V8BF 1 "register_operand")] + "TARGET_BF16_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false); + emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); + DONE; +}) + +(define_expand "aarch64_vget_hi_halfv8bf" + [(match_operand:V4BF 0 "register_operand") + (match_operand:V8BF 1 "register_operand")] + "TARGET_BF16_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true); + emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); + DONE; +}) + ;; bfmmla (define_insn "aarch64_bfmmlaqv4sf" [(set (match_operand:V4SF 0 "register_operand" "=w") diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 69cccd32786..0009e50f3fe 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -35680,6 +35680,20 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); } +__extension__ extern __inline bfloat16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vget_low_bf16 (bfloat16x8_t __a) +{ + return __builtin_aarch64_vget_lo_halfv8bf (__a); +} + +__extension__ extern __inline bfloat16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vget_high_bf16 (bfloat16x8_t __a) +{ + return __builtin_aarch64_vget_hi_halfv8bf (__a); +} + __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvt_f32_bf16 (bfloat16x4_t __a) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d4814947cab..7068f80a4be 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-11-03 Dennis Zhang + + * gcc.target/aarch64/advsimd-intrinsics/bf16_get.c: New test. + * gcc.target/aarch64/advsimd-intrinsics/bf16_get-be.c: New test. + 2020-11-03 Bernd Edlinger PR target/97205 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get-be.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get-be.c new file mode 100644 index 00000000000..bd9bb110974 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get-be.c @@ -0,0 +1,27 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-mbig-endian -save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include + +/* +**test_vget_low_bf16: +** ret +*/ +bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a) +{ + return vget_low_bf16 (a); +} + +/* +**test_vget_high_bf16: +** dup d0, v0.d\[1\] +** ret +*/ +bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a) +{ + return vget_high_bf16 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c new file mode 100644 index 00000000000..2193753ffbb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c @@ -0,0 +1,27 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include + +/* +**test_vget_low_bf16: +** ret +*/ +bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a) +{ + return vget_low_bf16 (a); +} + +/* +**test_vget_high_bf16: +** dup d0, v0.d\[1\] +** ret +*/ +bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a) +{ + return vget_high_bf16 (a); +} -- 2.30.2