[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
+(define_insn "*aarch64_mla_elt_merge<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+ (plus:VDQHS
+ (mult:VDQHS (vec_duplicate:VDQHS
+ (match_operand:<VEL> 1 "register_operand" "w"))
+ (match_operand:VDQHS 2 "register_operand" "w"))
+ (match_operand:VDQHS 3 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
+ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
(define_insn "aarch64_mls<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
+(define_insn "*aarch64_mls_elt_merge<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+ (minus:VDQHS
+ (match_operand:VDQHS 1 "register_operand" "0")
+ (mult:VDQHS (vec_duplicate:VDQHS
+ (match_operand:<VEL> 2 "register_operand" "w"))
+ (match_operand:VDQHS 3 "register_operand" "w"))))]
+ "TARGET_SIMD"
+ "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
+ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
;; Max/Min operations.
(define_insn "<su><maxmin><mode>3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+typedef short int __attribute__ ((vector_size (16))) v8hi;
+
+v8hi
+mla8hi (v8hi v0, v8hi v1, short int v2)
+{
+ /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
+ return v0 + v1 * v2;
+}
+
+
+v8hi
+mls8hi (v8hi v0, v8hi v1, short int v2)
+{
+ /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
+ return v0 - v1 * v2;
+}
+
+typedef short int __attribute__ ((vector_size (8))) v4hi;
+
+v4hi
+mla4hi (v4hi v0, v4hi v1, short int v2)
+{
+ /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
+ return v0 + v1 * v2;
+}
+
+v4hi
+mls4hi (v4hi v0, v4hi v1, short int v2)
+{
+ /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
+ return v0 - v1 * v2;
+}
+
+typedef int __attribute__ ((vector_size (16))) v4si;
+
+v4si
+mla4si (v4si v0, v4si v1, int v2)
+{
+ /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
+ return v0 + v1 * v2;
+}
+
+v4si
+mls4si (v4si v0, v4si v1, int v2)
+{
+ /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
+ return v0 - v1 * v2;
+}
+
+typedef int __attribute__((vector_size (8))) v2si;
+
+v2si
+mla2si (v2si v0, v2si v1, int v2)
+{
+ /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
+ return v0 + v1 * v2;
+}
+
+v2si
+mls2si (v2si v0, v2si v1, int v2)
+{
+ /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
+ return v0 - v1 * v2;
+}