(plus:<VWIDE>
           (mult:<VWIDE>
             (ANY_EXTEND:<VWIDE>
-              (vec_duplicate:VD_HSI
-                     (match_operand:<VEL> 3 "register_operand" "<h_con>")))
+              (match_operand:VD_HSI 2 "register_operand" "w"))
             (ANY_EXTEND:<VWIDE>
-              (match_operand:VD_HSI 2 "register_operand" "w")))
+              (vec_duplicate:VD_HSI
+                     (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
           (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
           (match_operand:<VWIDE> 1 "register_operand" "0")
           (mult:<VWIDE>
             (ANY_EXTEND:<VWIDE>
-              (vec_duplicate:VD_HSI
-                     (match_operand:<VEL> 3 "register_operand" "<h_con>")))
+              (match_operand:VD_HSI 2 "register_operand" "w"))
             (ANY_EXTEND:<VWIDE>
-              (match_operand:VD_HSI 2 "register_operand" "w")))))]
+              (vec_duplicate:VD_HSI
+                     (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
   "TARGET_SIMD"
   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (mult:<VWIDE>
           (ANY_EXTEND:<VWIDE>
-            (vec_duplicate:<VCOND>
-             (match_operand:<VEL> 2 "register_operand" "<h_con>")))
+            (match_operand:VD_HSI 1 "register_operand" "w"))
           (ANY_EXTEND:<VWIDE>
-            (match_operand:VD_HSI 1 "register_operand" "w"))))]
+            (vec_duplicate:<VCOND>
+             (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
   "TARGET_SIMD"
   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
 
--- /dev/null
+/* { dg-do compile { target aarch64-*-* } } */
+
+#include <arm_neon.h>
+
+/*
+**add:
+**     smlal   v0.4s, v1.4h, v2.h[3]
+**     ret
+*/
+
+int32x4_t add(int32x4_t acc, int16x4_t b, int16x4_t c) {
+    return vmlal_n_s16(acc, b, c[3]);
+}
+
+/*
+**sub:
+**     smlsl   v0.4s, v1.4h, v2.h[3]
+**     ret
+*/
+
+int32x4_t sub(int32x4_t acc, int16x4_t b, int16x4_t c) {
+    return vmlsl_n_s16(acc, b, c[3]);
+}
+
+/*
+**smull:
+**     smull   v0.4s, v1.4h, v2.h[3]
+**     ret
+*/
+
+int32x4_t smull(int16x4_t b, int16x4_t c) {
+    return vmull_n_s16(b, c[3]);
+}
+
+/*
+**umull:
+**     umull   v0.4s, v1.4h, v2.h[3]
+**     ret
+*/
+
+uint32x4_t umull(uint16x4_t b, uint16x4_t c) {
+    return vmull_n_u16(b, c[3]);
+}
+
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" {-O[^0]} } } */