From 84747acf8da36425f7e36cf99b251ee047b2e3a5 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 16 Dec 2020 20:43:47 +0000 Subject: [PATCH] AArch64: Add NEON, SVE and SVE2 RTL patterns for Complex Addition. This adds implementation for the optabs for add complex operations. With this the following C code: void f90 (float complex a[restrict N], float complex b[restrict N], float complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + (b[i] * I); } generates f90: mov x3, 0 .p2align 3,,7 .L2: ldr q0, [x0, x3] ldr q1, [x1, x3] fcadd v0.4s, v0.4s, v1.4s, #90 str q0, [x2, x3] add x3, x3, 16 cmp x3, 1600 bne .L2 ret instead of f90: add x3, x1, 1600 .p2align 3,,7 .L2: ld2 {v4.4s - v5.4s}, [x0], 32 ld2 {v2.4s - v3.4s}, [x1], 32 fsub v0.4s, v4.4s, v3.4s fadd v1.4s, v5.4s, v2.4s st2 {v0.4s - v1.4s}, [x2], 32 cmp x3, x1 bne .L2 ret gcc/ChangeLog: * config/aarch64/aarch64-simd.md (cadd3): New. * config/aarch64/iterators.md (SVE2_INT_CADD_OP): New. * config/aarch64/aarch64-sve.md (cadd3): New. * config/aarch64/aarch64-sve2.md (cadd3): New. --- gcc/config/aarch64/aarch64-simd.md | 8 ++++++++ gcc/config/aarch64/aarch64-sve.md | 14 ++++++++++++++ gcc/config/aarch64/aarch64-sve2.md | 10 ++++++++++ gcc/config/aarch64/iterators.md | 4 ++++ 4 files changed, 36 insertions(+) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 68baf416045..05d18f8bd3a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -449,6 +449,14 @@ [(set_attr "type" "neon_fcadd")] ) +(define_expand "cadd3" + [(set (match_operand:VHSDF 0 "register_operand") + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + FCADD))] + "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" +) + (define_insn "aarch64_fcmla" [(set (match_operand:VHSDF 0 "register_operand" "=w") (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 6359c40bdec..6a5194f54f9 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -5480,6 +5480,20 @@ "TARGET_SVE" ) +;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer +(define_expand "@cadd3" + [(set (match_operand:SVE_FULL_F 0 "register_operand") + (unspec:SVE_FULL_F + [(match_dup 3) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 1 "register_operand") + (match_operand:SVE_FULL_F 2 "register_operand")] + SVE_COND_FCADD))] + "TARGET_SVE" +{ + operands[3] = aarch64_ptrue_reg (mode); +}) + ;; Predicated FCADD, merging with the first input. (define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 772c35079c9..1897ddf69c3 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1799,6 +1799,16 @@ [(set_attr "movprfx" "*,yes")] ) +;; unpredicated optab pattern for auto-vectorizer +(define_expand "cadd3" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (unspec:SVE_FULL_I + [(match_operand:SVE_FULL_I 1 "register_operand") + (match_operand:SVE_FULL_I 2 "register_operand")] + SVE2_INT_CADD_OP))] + "TARGET_SVE2" +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Complex ternary operations ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index fb1426b7752..b8ee4220603 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2598,6 +2598,10 @@ UNSPEC_SQRDCMLAH180 UNSPEC_SQRDCMLAH270]) +;; Same as SVE2_INT_CADD but exclude the saturating instructions +(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90 + UNSPEC_CADD270]) + (define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT UNSPEC_CDOT90 UNSPEC_CDOT180 -- 2.30.2