Arm: Add NEON and MVE complex mul, mla and mls patterns.

author Tamar Christina <tamar.christina@arm.com>

Mon, 25 Jan 2021 08:56:37 +0000 (08:56 +0000)

committer Tamar Christina <tamar.christina@arm.com>

Mon, 25 Jan 2021 08:56:37 +0000 (08:56 +0000)
author Tamar Christina <tamar.christina@arm.com>
Mon, 25 Jan 2021 08:56:37 +0000 (08:56 +0000)
committer Tamar Christina <tamar.christina@arm.com>
Mon, 25 Jan 2021 08:56:37 +0000 (08:56 +0000)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md

index 2e0aacbd3f742538073e441b53fcffc45e37c790..b9027905307fe19d60d164cef23dac6ab119cd9b 100644 (file)
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1186,6 +1186,33 @@
                       (UNSPEC_VCMLA180 "180")
                       (UNSPEC_VCMLA270 "270")])
  
+;; The complex operations when performed on a real complex number require two
+;; instructions to perform the operation. e.g. complex multiplication requires
+;; two VCMUL with a particular rotation value.
+;;
+;; These values can be looked up in rotsplit1 and rotsplit2.  as an example
+;; VCMUL needs the first instruction to use #0 and the second #90.
+(define_int_attr rotsplit1 [(UNSPEC_VCMLA "0")
+                           (UNSPEC_VCMLA_CONJ "0")
+                           (UNSPEC_VCMUL "0")
+                           (UNSPEC_VCMUL_CONJ "0")
+                           (UNSPEC_VCMLA180 "180")
+                           (UNSPEC_VCMLA180_CONJ "180")])
+
+(define_int_attr rotsplit2 [(UNSPEC_VCMLA "90")
+                           (UNSPEC_VCMLA_CONJ "270")
+                           (UNSPEC_VCMUL "90")
+                           (UNSPEC_VCMUL_CONJ "270")
+                           (UNSPEC_VCMLA180 "270")
+                           (UNSPEC_VCMLA180_CONJ "90")])
+
+(define_int_attr conj_op [(UNSPEC_VCMLA180 "")
+                         (UNSPEC_VCMLA180_CONJ "_conj")
+                         (UNSPEC_VCMLA "")
+                         (UNSPEC_VCMLA_CONJ "_conj")
+                         (UNSPEC_VCMUL "")
+                         (UNSPEC_VCMUL_CONJ "_conj")])
+
  (define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90")
                           (UNSPEC_VCADD270 "_rot270")
                           (UNSPEC_VCMLA "")
@@ -1200,6 +1227,9 @@
  (define_int_iterator VCMUL [UNSPEC_VCMUL UNSPEC_VCMUL90
                             UNSPEC_VCMUL180 UNSPEC_VCMUL270])
  
+(define_int_attr fcmac1 [(UNSPEC_VCMLA "a") (UNSPEC_VCMLA_CONJ "a")
+                        (UNSPEC_VCMLA180 "s") (UNSPEC_VCMLA180_CONJ "s")])
+
  (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
                             (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8")
                             (UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8")
@@ -1723,3 +1753,13 @@
  (define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
  (define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
  (define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
+
+;; Define iterators for VCMLA operations
+(define_int_iterator VCMLA_OP [UNSPEC_VCMLA
+                              UNSPEC_VCMLA_CONJ
+                              UNSPEC_VCMLA180
+                              UNSPEC_VCMLA180_CONJ])
+
+;; Define iterators for VCMLA operations as MUL
+(define_int_iterator VCMUL_OP [UNSPEC_VCMUL
+                              UNSPEC_VCMUL_CONJ])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md

index 62ff12365ab3f92f177704927d230fefc415f1cb..465f71c4eee5f77e4d5904e8508c4134d1c9573f 100644 (file)
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -4101,15 +4101,16 @@
  (define_insn "mve_vcmlaq<mve_rot><mode>"
    [
     (set (match_operand:MVE_0 0 "s_register_operand" "=w,w")
-       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0,Dz")
-                      (match_operand:MVE_0 2 "s_register_operand" "w,w")
-                      (match_operand:MVE_0 3 "s_register_operand" "w,w")]
-        VCMLA))
+       (plus:MVE_0 (match_operand:MVE_0 1 "reg_or_zero_operand" "Dz,0")
+                   (unspec:MVE_0
+                       [(match_operand:MVE_0 2 "s_register_operand" "w,w")
+                        (match_operand:MVE_0 3 "s_register_operand" "w,w")]
+                    VCMLA)))
    ]
    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
    "@
-   vcmla.f%#<V_sz_elem>        %q0, %q2, %q3, #<rot>
-   vcmul.f%#<V_sz_elem>        %q0, %q2, %q3, #<rot>"
+   vcmul.f%#<V_sz_elem>        %q0, %q2, %q3, #<rot>
+   vcmla.f%#<V_sz_elem>        %q0, %q2, %q3, #<rot>"
    [(set_attr "type" "mve_move")
  ])
  
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md

index e904db97ea7bd4cb0f32199038ace3d334ffb8f9..fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468 100644 (file)
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2952,6 +2952,25 @@
    [(set_attr "type" "neon_fcmla")]
  )
  
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cmul<conj_op><mode>3"
+  [(set (match_operand:VDF 0 "register_operand")
+       (unspec:VDF [(match_operand:VDF 1 "register_operand")
+                    (match_operand:VDF 2 "register_operand")]
+                   VCMUL_OP))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+{
+  rtx res1 = gen_reg_rtx (<MODE>mode);
+  rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
+                                             operands[2], operands[1]));
+  emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
+                                             operands[2], operands[1]));
+  DONE;
+})
+
  
  ;; These instructions map to the __builtins for the Dot Product operations.
  (define_insn "neon_<sup>dot<vsi2qi>"
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md

index 97a803e8da50c0119d15bcd4af47c298d3758c47..c6ebb6fc2b6a8d9e46f126dd857222a892c84093 100644 (file)
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -510,10 +510,13 @@
    UNSPEC_VCMLA90
    UNSPEC_VCMLA180
    UNSPEC_VCMLA270
+  UNSPEC_VCMLA_CONJ
+  UNSPEC_VCMLA180_CONJ
    UNSPEC_VCMUL
    UNSPEC_VCMUL90
    UNSPEC_VCMUL180
    UNSPEC_VCMUL270
+  UNSPEC_VCMUL_CONJ
    UNSPEC_MATMUL_S
    UNSPEC_MATMUL_U
    UNSPEC_MATMUL_US
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md

index ff448da126b2250605d772ad423c70c16b753338..692b28ea8ccb18abac016a0c1b45ac7d0bf073d4 100644 (file)
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -215,6 +215,63 @@
                       && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
  )
  
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cmul<conj_op><mode>3"
+  [(set (match_operand:VQ_HSF 0 "register_operand")
+        (unspec:VQ_HSF [(match_operand:VQ_HSF 1 "register_operand")
+                       (match_operand:VQ_HSF 2 "register_operand")]
+                      VCMUL_OP))]
+  "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT))
+   && !BYTES_BIG_ENDIAN"
+{
+  rtx res1 = gen_reg_rtx (<MODE>mode);
+  if (TARGET_COMPLEX)
+    {
+      rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+      emit_insn (gen_arm_vcmla<rotsplit1><mode> (res1, tmp,
+                                                operands[2], operands[1]));
+    }
+  else
+    emit_insn (gen_arm_vcmla<rotsplit1><mode> (res1, CONST0_RTX (<MODE>mode),
+                                              operands[2], operands[1]));
+
+  emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], res1,
+                                            operands[2], operands[1]));
+  DONE;
+})
+
+(define_expand "arm_vcmla<rot><mode>"
+  [(set (match_operand:VF 0 "register_operand")
+       (plus:VF (match_operand:VF 1 "register_operand")
+                (unspec:VF [(match_operand:VF 2 "register_operand")
+                            (match_operand:VF 3 "register_operand")]
+                            VCMLA)))]
+  "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+                     && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
+)
+
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cml<fcmac1><conj_op><mode>4"
+  [(set (match_operand:VF 0 "register_operand")
+       (plus:VF (match_operand:VF 1 "register_operand")
+                (unspec:VF [(match_operand:VF 2 "register_operand")
+                            (match_operand:VF 3 "register_operand")]
+                           VCMLA_OP)))]
+  "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+                     && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_arm_vcmla<rotsplit1><mode> (tmp, operands[1],
+                                            operands[3], operands[2]));
+  emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], tmp,
+                                            operands[3], operands[2]));
+  DONE;
+})
+
  (define_expand "movmisalign<mode>"
   [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
         (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
author	Tamar Christina <tamar.christina@arm.com>
	Mon, 25 Jan 2021 08:56:37 +0000 (08:56 +0000)
committer	Tamar Christina <tamar.christina@arm.com>
	Mon, 25 Jan 2021 08:56:37 +0000 (08:56 +0000)
gcc/config/arm/iterators.md		patch \| blob \| history
gcc/config/arm/mve.md		patch \| blob \| history
gcc/config/arm/neon.md		patch \| blob \| history
gcc/config/arm/unspecs.md		patch \| blob \| history
gcc/config/arm/vec-common.md		patch \| blob \| history