(UNSPEC_VCMLA180 "180")
(UNSPEC_VCMLA270 "270")])
+;; The complex operations when performed on a real complex number require two
+;; instructions to perform the operation. e.g. complex multiplication requires
+;; two VCMUL with a particular rotation value.
+;;
+;; These values can be looked up in rotsplit1 and rotsplit2. as an example
+;; VCMUL needs the first instruction to use #0 and the second #90.
+(define_int_attr rotsplit1 [(UNSPEC_VCMLA "0")
+ (UNSPEC_VCMLA_CONJ "0")
+ (UNSPEC_VCMUL "0")
+ (UNSPEC_VCMUL_CONJ "0")
+ (UNSPEC_VCMLA180 "180")
+ (UNSPEC_VCMLA180_CONJ "180")])
+
+(define_int_attr rotsplit2 [(UNSPEC_VCMLA "90")
+ (UNSPEC_VCMLA_CONJ "270")
+ (UNSPEC_VCMUL "90")
+ (UNSPEC_VCMUL_CONJ "270")
+ (UNSPEC_VCMLA180 "270")
+ (UNSPEC_VCMLA180_CONJ "90")])
+
+(define_int_attr conj_op [(UNSPEC_VCMLA180 "")
+ (UNSPEC_VCMLA180_CONJ "_conj")
+ (UNSPEC_VCMLA "")
+ (UNSPEC_VCMLA_CONJ "_conj")
+ (UNSPEC_VCMUL "")
+ (UNSPEC_VCMUL_CONJ "_conj")])
+
(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90")
(UNSPEC_VCADD270 "_rot270")
(UNSPEC_VCMLA "")
(define_int_iterator VCMUL [UNSPEC_VCMUL UNSPEC_VCMUL90
UNSPEC_VCMUL180 UNSPEC_VCMUL270])
+(define_int_attr fcmac1 [(UNSPEC_VCMLA "a") (UNSPEC_VCMLA_CONJ "a")
+ (UNSPEC_VCMLA180 "s") (UNSPEC_VCMLA180_CONJ "s")])
+
(define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
(UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8")
(UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8")
(define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
(define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
+
+;; Define iterators for VCMLA operations
+(define_int_iterator VCMLA_OP [UNSPEC_VCMLA
+ UNSPEC_VCMLA_CONJ
+ UNSPEC_VCMLA180
+ UNSPEC_VCMLA180_CONJ])
+
+;; Define iterators for VCMLA operations as MUL
+(define_int_iterator VCMUL_OP [UNSPEC_VCMUL
+ UNSPEC_VCMUL_CONJ])
(define_insn "mve_vcmlaq<mve_rot><mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "=w,w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0,Dz")
- (match_operand:MVE_0 2 "s_register_operand" "w,w")
- (match_operand:MVE_0 3 "s_register_operand" "w,w")]
- VCMLA))
+ (plus:MVE_0 (match_operand:MVE_0 1 "reg_or_zero_operand" "Dz,0")
+ (unspec:MVE_0
+ [(match_operand:MVE_0 2 "s_register_operand" "w,w")
+ (match_operand:MVE_0 3 "s_register_operand" "w,w")]
+ VCMLA)))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"@
- vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>
- vcmul.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>"
+ vcmul.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>
+ vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>"
[(set_attr "type" "mve_move")
])
[(set_attr "type" "neon_fcmla")]
)
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cmul<conj_op><mode>3"
+ [(set (match_operand:VDF 0 "register_operand")
+ (unspec:VDF [(match_operand:VDF 1 "register_operand")
+ (match_operand:VDF 2 "register_operand")]
+ VCMUL_OP))]
+ "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+{
+ rtx res1 = gen_reg_rtx (<MODE>mode);
+ rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+ emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
+ operands[2], operands[1]));
+ emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
+ operands[2], operands[1]));
+ DONE;
+})
+
;; These instructions map to the __builtins for the Dot Product operations.
(define_insn "neon_<sup>dot<vsi2qi>"
UNSPEC_VCMLA90
UNSPEC_VCMLA180
UNSPEC_VCMLA270
+ UNSPEC_VCMLA_CONJ
+ UNSPEC_VCMLA180_CONJ
UNSPEC_VCMUL
UNSPEC_VCMUL90
UNSPEC_VCMUL180
UNSPEC_VCMUL270
+ UNSPEC_VCMUL_CONJ
UNSPEC_MATMUL_S
UNSPEC_MATMUL_U
UNSPEC_MATMUL_US
&& ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
)
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cmul<conj_op><mode>3"
+ [(set (match_operand:VQ_HSF 0 "register_operand")
+ (unspec:VQ_HSF [(match_operand:VQ_HSF 1 "register_operand")
+ (match_operand:VQ_HSF 2 "register_operand")]
+ VCMUL_OP))]
+ "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT))
+ && !BYTES_BIG_ENDIAN"
+{
+ rtx res1 = gen_reg_rtx (<MODE>mode);
+ if (TARGET_COMPLEX)
+ {
+ rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+ emit_insn (gen_arm_vcmla<rotsplit1><mode> (res1, tmp,
+ operands[2], operands[1]));
+ }
+ else
+ emit_insn (gen_arm_vcmla<rotsplit1><mode> (res1, CONST0_RTX (<MODE>mode),
+ operands[2], operands[1]));
+
+ emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], res1,
+ operands[2], operands[1]));
+ DONE;
+})
+
+(define_expand "arm_vcmla<rot><mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (plus:VF (match_operand:VF 1 "register_operand")
+ (unspec:VF [(match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand")]
+ VCMLA)))]
+ "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
+)
+
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cml<fcmac1><conj_op><mode>4"
+ [(set (match_operand:VF 0 "register_operand")
+ (plus:VF (match_operand:VF 1 "register_operand")
+ (unspec:VF [(match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand")]
+ VCMLA_OP)))]
+ "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_arm_vcmla<rotsplit1><mode> (tmp, operands[1],
+ operands[3], operands[2]));
+ emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], tmp,
+ operands[3], operands[2]));
+ DONE;
+})
+
(define_expand "movmisalign<mode>"
[(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]