AArch64: Add NEON, SVE and SVE2 RTL patterns for Complex Addition.
authorTamar Christina <tamar.christina@arm.com>
Wed, 16 Dec 2020 20:43:47 +0000 (20:43 +0000)
committerTamar Christina <tamar.christina@arm.com>
Wed, 16 Dec 2020 20:43:54 +0000 (20:43 +0000)
This adds implementation for the optabs for add complex operations.  With this
the following C code:

  void f90 (float complex a[restrict N], float complex b[restrict N],
    float complex c[restrict N])
  {
    for (int i=0; i < N; i++)
      c[i] = a[i] + (b[i] * I);
  }

generates

  f90:
  mov     x3, 0
  .p2align 3,,7
  .L2:
  ldr     q0, [x0, x3]
  ldr     q1, [x1, x3]
  fcadd   v0.4s, v0.4s, v1.4s, #90
  str     q0, [x2, x3]
  add     x3, x3, 16
  cmp     x3, 1600
  bne     .L2
  ret

instead of

  f90:
  add     x3, x1, 1600
  .p2align 3,,7
  .L2:
  ld2     {v4.4s - v5.4s}, [x0], 32
  ld2     {v2.4s - v3.4s}, [x1], 32
  fsub    v0.4s, v4.4s, v3.4s
  fadd    v1.4s, v5.4s, v2.4s
  st2     {v0.4s - v1.4s}, [x2], 32
  cmp     x3, x1
  bne     .L2
  ret

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (cadd<rot><mode>3): New.
* config/aarch64/iterators.md (SVE2_INT_CADD_OP): New.
* config/aarch64/aarch64-sve.md (cadd<rot><mode>3): New.
* config/aarch64/aarch64-sve2.md (cadd<rot><mode>3): New.

gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64-sve2.md
gcc/config/aarch64/iterators.md

index 68baf416045178b0ebcfeb8de2d201f625f1c317..05d18f8bd3ac09c56c82dc73cff855315eb302b7 100644 (file)
   [(set_attr "type" "neon_fcadd")]
 )
 
+(define_expand "cadd<rot><mode>3"
+  [(set (match_operand:VHSDF 0 "register_operand")
+       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
+                      (match_operand:VHSDF 2 "register_operand")]
+                      FCADD))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+)
+
 (define_insn "aarch64_fcmla<rot><mode>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
index 6359c40bdecda6c126bd70bef66561dd1da44dc9..6a5194f54f9c3d7b985890df523ceb4221b0cc63 100644 (file)
   "TARGET_SVE"
 )
 
+;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
+(define_expand "@cadd<rot><mode>3"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+       (unspec:SVE_FULL_F
+         [(match_dup 3)
+          (const_int SVE_RELAXED_GP)
+          (match_operand:SVE_FULL_F 1 "register_operand")
+          (match_operand:SVE_FULL_F 2 "register_operand")]
+         SVE_COND_FCADD))]
+  "TARGET_SVE"
+{
+  operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+})
+
 ;; Predicated FCADD, merging with the first input.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
index 772c35079c9441448534471fba4dba622322b8fc..1897ddf69c34496247e40ba36d97b93a9a9d7670 100644 (file)
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; unpredicated optab pattern for auto-vectorizer
+(define_expand "cadd<rot><mode>3"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+       (unspec:SVE_FULL_I
+         [(match_operand:SVE_FULL_I 1 "register_operand")
+          (match_operand:SVE_FULL_I 2 "register_operand")]
+         SVE2_INT_CADD_OP))]
+  "TARGET_SVE2"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Complex ternary operations
 ;; -------------------------------------------------------------------------
index fb1426b7752890848cb49722ef7442d96cb1408b..b8ee4220603ce6da4e8044f61042d98a7e60b17d 100644 (file)
                                    UNSPEC_SQRDCMLAH180
                                    UNSPEC_SQRDCMLAH270])
 
+;; Same as SVE2_INT_CADD but exclude the saturating instructions
+(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90
+                                      UNSPEC_CADD270])
+
 (define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT
                                    UNSPEC_CDOT90
                                    UNSPEC_CDOT180