From 1f520d3412962e22b0338461d82f41abba8a4f12 Mon Sep 17 00:00:00 2001 From: Delia Burduv Date: Wed, 4 Mar 2020 19:25:09 +0000 Subject: [PATCH] aarch64: ACLE intrinsics for BFCVTN, BFCVTN2 and BFCVT This patch adds the Armv8.6-a ACLE intrinsics for bfcvtn, bfcvtn2 and bfcvt as part of the BFloat16 extension. (https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics) The intrinsics are declared in arm_bf16.h and arm_neon.h and the RTL patterns are defined in aarch64-simd.md. 2020-03-06 Delia Burduv gcc/ * config/aarch64/aarch64-simd-builtins.def (bfcvtn): New built-in function. (bfcvtn_q): New built-in function. (bfcvtn2): New built-in function. (bfcvt): New built-in function. * config/aarch64/aarch64-simd.md (aarch64_bfcvtn): New pattern. (aarch64_bfcvtn2v8bf): New pattern. (aarch64_bfcvtbf): New pattern. * config/aarch64/arm_bf16.h (float32_t): New typedef. (vcvth_bf16_f32): New intrinsic. * config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic. (vcvtq_low_bf16_f32): New intrinsic. (vcvtq_high_bf16_f32): New intrinsic. * config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator. (UNSPEC_BFCVTN): New UNSPEC. (UNSPEC_BFCVTN2): New UNSPEC. (UNSPEC_BFCVT): New UNSPEC. * config/arm/types.md (bf_cvt): New type. gcc/testsuite/ * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: New test. --- gcc/ChangeLog | 22 +++++++++ gcc/config/aarch64/aarch64-simd-builtins.def | 6 +++ gcc/config/aarch64/aarch64-simd.md | 29 +++++++++++ gcc/config/aarch64/arm_bf16.h | 13 +++++ gcc/config/aarch64/arm_neon.h | 21 ++++++++ gcc/config/aarch64/iterators.md | 6 +++ gcc/testsuite/ChangeLog | 7 +++ .../advsimd-intrinsics/bfcvt-compile.c | 48 +++++++++++++++++++ .../aarch64/advsimd-intrinsics/bfcvt-nobf16.c | 10 ++++ .../aarch64/advsimd-intrinsics/bfcvt-nosimd.c | 16 +++++++ .../advsimd-intrinsics/bfcvtn-nobf16.c | 10 ++++ .../advsimd-intrinsics/bfcvtnq2-untied.c | 20 ++++++++ 12 files changed, 208 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 78a118903a7..a5b04abc5dd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2020-03-06 Delia Burduv + + * config/aarch64/aarch64-simd-builtins.def + (bfcvtn): New built-in function. + (bfcvtn_q): New built-in function. + (bfcvtn2): New built-in function. + (bfcvt): New built-in function. + * config/aarch64/aarch64-simd.md + (aarch64_bfcvtn): New pattern. + (aarch64_bfcvtn2v8bf): New pattern. + (aarch64_bfcvtbf): New pattern. + * config/aarch64/arm_bf16.h (float32_t): New typedef. + (vcvth_bf16_f32): New intrinsic. + * config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic. + (vcvtq_low_bf16_f32): New intrinsic. + (vcvtq_high_bf16_f32): New intrinsic. + * config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator. + (UNSPEC_BFCVTN): New UNSPEC. + (UNSPEC_BFCVTN2): New UNSPEC. + (UNSPEC_BFCVT): New UNSPEC. + * config/arm/types.md (bf_cvt): New type. + 2020-03-05 H.J. Lu PR target/89229 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index d8bb96f8ed6..cc0bd0e6b59 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -714,3 +714,9 @@ VAR1 (TERNOP, simd_smmla, 0, v16qi) VAR1 (TERNOPU, simd_ummla, 0, v16qi) VAR1 (TERNOP_SSUS, simd_usmmla, 0, v16qi) + + /* Implemented by aarch64_bfcvtn{q}{2} */ + VAR1 (UNOP, bfcvtn, 0, v4bf) + VAR1 (UNOP, bfcvtn_q, 0, v8bf) + VAR1 (BINOP, bfcvtn2, 0, v8bf) + VAR1 (UNOP, bfcvt, 0, bf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 89aaf8c018e..035f3163223 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7207,3 +7207,32 @@ "mmla\\t%0.4s, %2.16b, %3.16b" [(set_attr "type" "neon_mla_s_q")] ) + +;; bfcvtn +(define_insn "aarch64_bfcvtn" + [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") + (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] + UNSPEC_BFCVTN))] + "TARGET_BF16_SIMD" + "bfcvtn\\t%0.4h, %1.4s" + [(set_attr "type" "neon_fp_cvt_narrow_s_q")] +) + +(define_insn "aarch64_bfcvtn2v8bf" + [(set (match_operand:V8BF 0 "register_operand" "=w") + (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "w")] + UNSPEC_BFCVTN2))] + "TARGET_BF16_SIMD" + "bfcvtn2\\t%0.8h, %2.4s" + [(set_attr "type" "neon_fp_cvt_narrow_s_q")] +) + +(define_insn "aarch64_bfcvtbf" + [(set (match_operand:BF 0 "register_operand" "=w") + (unspec:BF [(match_operand:SF 1 "register_operand" "w")] + UNSPEC_BFCVT))] + "TARGET_BF16_FP" + "bfcvt\\t%h0, %s1" + [(set_attr "type" "f_cvt")] +) diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h index 3759c0d1cb4..984875dcc01 100644 --- a/gcc/config/aarch64/arm_bf16.h +++ b/gcc/config/aarch64/arm_bf16.h @@ -28,5 +28,18 @@ #define _AARCH64_BF16_H_ typedef __bf16 bfloat16_t; +typedef float float32_t; + +#pragma GCC push_options +#pragma GCC target ("+nothing+bf16+nosimd") + +__extension__ extern __inline bfloat16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvth_bf16_f32 (float32_t __a) +{ + return __builtin_aarch64_bfcvtbf (__a); +} + +#pragma GCC pop_options #endif diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index b6f42ac6302..cc4ce76d16e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -35634,6 +35634,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); } +__extension__ extern __inline bfloat16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvt_bf16_f32 (float32x4_t __a) +{ + return __builtin_aarch64_bfcvtnv4bf (__a); +} + +__extension__ extern __inline bfloat16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_low_bf16_f32 (float32x4_t __a) +{ + return __builtin_aarch64_bfcvtn_qv8bf (__a); +} + +__extension__ extern __inline bfloat16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a) +{ + return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a); +} + #pragma GCC pop_options /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index ec1b92c5379..b56a050ac09 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -462,6 +462,9 @@ ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) +;; Bfloat16 modes to which V4SF can be converted +(define_mode_iterator V4SF_TO_BF [V4BF V8BF]) + ;; ------------------------------------------------------------------ ;; Unspec enumerations for Advance SIMD. These could well go into ;; aarch64.md but for their use in int_iterators here. @@ -828,6 +831,9 @@ UNSPEC_BFMLALB ; Used in aarch64-sve.md. UNSPEC_BFMLALT ; Used in aarch64-sve.md. UNSPEC_BFMMLA ; Used in aarch64-sve.md. + UNSPEC_BFCVTN ; Used in aarch64-simd.md. + UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. + UNSPEC_BFCVT ; Used in aarch64-simd.md. ]) ;; ------------------------------------------------------------------ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6b8b5c03c4d..09d59730730 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2020-03-06 Delia Burduv + + * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New test. + * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c: New test. + * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: New test. + * gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: New test. + 2020-03-06 Kito Cheng PR tree-optimization/90883 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c new file mode 100644 index 00000000000..bbea630b182 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c @@ -0,0 +1,48 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include + +/* +**test_bfcvtn: +** bfcvtn v0.4h, v0.4s +** ret +*/ +bfloat16x4_t test_bfcvtn (float32x4_t a) +{ + return vcvt_bf16_f32 (a); +} + +/* +**test_bfcvtnq: +** bfcvtn v0.4h, v0.4s +** ret +*/ +bfloat16x8_t test_bfcvtnq (float32x4_t a) +{ + return vcvtq_low_bf16_f32 (a); +} + +/* +**test_bfcvtnq2: +** bfcvtn2 v0.8h, v1.4s +** ret +*/ +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a) +{ + return vcvtq_high_bf16_f32 (inactive, a); +} + +/* +**test_bfcvt: +** bfcvt h0, s0 +** ret +*/ +bfloat16_t test_bfcvt (float32_t a) +{ + return vcvth_bf16_f32 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c new file mode 100644 index 00000000000..9904d65f914 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c @@ -0,0 +1,10 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+nobf16" } */ + +#include + +bfloat16_t test_bfcvt (float32_t a) +{ + /* { dg-error "inlining failed .* 'vcvth_bf16_f32" "" { target *-*-* } 0 } */ + return vcvth_bf16_f32 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c new file mode 100644 index 00000000000..c2631a541ea --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c @@ -0,0 +1,16 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target aarch64_asm_bf16_ok } */ +/* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */ +/* { dg-final { check-function-bodies "**" "" "-O[^0]" } } */ + +#include + +/* +**test_bfcvt: +** bfcvt h0, s0 +** ret +*/ +bfloat16_t test_bfcvt (float32_t a) +{ + return vcvth_bf16_f32 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c new file mode 100644 index 00000000000..b3b6db12358 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c @@ -0,0 +1,10 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+nobf16" } */ + +#include + +bfloat16x4_t test_bfcvtn (float32x4_t a) +{ + /* { dg-error "inlining failed .* 'vcvt_bf16_f32" "" { target *-*-* } 0 } */ + return vcvt_bf16_f32 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c new file mode 100644 index 00000000000..4b730e39d4e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c @@ -0,0 +1,20 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include + +/* +**test_bfcvtnq2_untied: +** mov v0.16b, v1.16b +** bfcvtn2 v0.8h, v2.4s +** ret +*/ +bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive, + float32x4_t a) +{ + return vcvtq_high_bf16_f32 (inactive, a); +} -- 2.30.2