This adds implementation for the optabs for add complex operations. With this
the following C code:
void f90 (float complex a[restrict N], float complex b[restrict N],
float complex c[restrict N])
{
for (int i=0; i < N; i++)
c[i] = a[i] + (b[i] * I);
}
generates
f90:
mov x3, 0
.p2align 3,,7
.L2:
ldr q0, [x0, x3]
ldr q1, [x1, x3]
fcadd v0.4s, v0.4s, v1.4s, #90
str q0, [x2, x3]
add x3, x3, 16
cmp x3, 1600
bne .L2
ret
instead of
f90:
add x3, x1, 1600
.p2align 3,,7
.L2:
ld2 {v4.4s - v5.4s}, [x0], 32
ld2 {v2.4s - v3.4s}, [x1], 32
fsub v0.4s, v4.4s, v3.4s
fadd v1.4s, v5.4s, v2.4s
st2 {v0.4s - v1.4s}, [x2], 32
cmp x3, x1
bne .L2
ret
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (cadd<rot><mode>3): New.
* config/aarch64/iterators.md (SVE2_INT_CADD_OP): New.
* config/aarch64/aarch64-sve.md (cadd<rot><mode>3): New.
* config/aarch64/aarch64-sve2.md (cadd<rot><mode>3): New.
[(set_attr "type" "neon_fcadd")]
)
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:VHSDF 0 "register_operand")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
+ (match_operand:VHSDF 2 "register_operand")]
+ FCADD))]
+ "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+)
+
(define_insn "aarch64_fcmla<rot><mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
"TARGET_SVE"
)
+;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
+(define_expand "@cadd<rot><mode>3"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand")
+ (unspec:SVE_FULL_F
+ [(match_dup 3)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_FULL_F 1 "register_operand")
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+ SVE_COND_FCADD))]
+ "TARGET_SVE"
+{
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+})
+
;; Predicated FCADD, merging with the first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
[(set_attr "movprfx" "*,yes")]
)
+;; unpredicated optab pattern for auto-vectorizer
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand")]
+ SVE2_INT_CADD_OP))]
+ "TARGET_SVE2"
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Complex ternary operations
;; -------------------------------------------------------------------------
UNSPEC_SQRDCMLAH180
UNSPEC_SQRDCMLAH270])
+;; Same as SVE2_INT_CADD but exclude the saturating instructions
+(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90
+ UNSPEC_CADD270])
+
(define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT
UNSPEC_CDOT90
UNSPEC_CDOT180