re PR target/35714 (x86 poor code with pmaddwd)
authorUros Bizjak <ubizjak@gmail.com>
Wed, 7 May 2008 13:12:02 +0000 (15:12 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Wed, 7 May 2008 13:12:02 +0000 (15:12 +0200)
PR target/35714
* config/i386/mmx.md (mmx_subv2sf3): New expander.
(*mmx_subv2sf3): Rename from mmx_subv2sf3 insn pattern.
(*mmx_eqv2sf3): Rename from mmx_eqv2sf3 insn pattern.
(mmx_eqv2sf3): New expander.  Use ix86_fixup_binary_operands_no_copy
to handle nonimmediate operands.
(*mmx_paddwd): Rename from mmx_paddwd insn pattern.
(mmx_paddwd): New expander.  Use ix86_fixup_binary_operands_no_copy
to handle nonimmediate operands.
(*mmx_pmulhrwv4hi3): Rename from mmx_pmulhrwv4hi3 insn pattern.
(mmx_pmulhrwv4hi3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse2_umulv1siv1di3): Rename from sse2_umulv1siv1di3 insn pattern.
(sse2_umulv1siv1di3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*mmx_eq<mode>3): Rename from mmx_eq<mode>3 insn pattern.
(mmx_eq<mode>3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*mmx_uavgv8qi3): Rename from mmx_uavgv8qi3 insn pattern.
(mmx_uavgv8qi3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*mmx_uavgv4hi3): Rename from mmx_uavgv4hi3 insn pattern.
(mmx_uavgv4hi3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.

* config/i386/sse.md
(*sse_movhlps): Rename from sse_movhlps insn pattern.
(sse_movhlps): New expander.  Use ix86_fixup_binary_operands
to handle nonimmediate operands.
(*sse_movlhps): Rename from sse_movlhps insn pattern.
(sse_movlhps): New expander.  Use ix86_fixup_binary_operands
to handle nonimmediate operands.
(*sse_loadhps): Rename from sse_loadhps insn pattern.
(sse_loadhps): New expander.  Use ix86_fixup_binary_operands
to handle nonimmediate operands.
(*sse_loadlps): Rename from sse_loadlps insn pattern.
(sse_loadlps): New expander.  Use ix86_fixup_binary_operands
to handle nonimmediate operands.
(*sse2_unpckhpd): Rename from sse2_unpckhpd insn pattern.
(sse2_unpckhpd): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse2_unpcklpd): Rename from sse2_unpcklpd insn pattern.
(sse2_unpcklpd): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse_loadhpd): Rename from sse_loadhpd insn pattern.
(sse_loadhpd): New expander.  Use ix86_fixup_binary_operands
to handle nonimmediate operands.
(*sse_loadlpd): Rename from sse_loadlpd insn pattern.
(sse_loadlpd): New expander.  Use ix86_fixup_binary_operands
to handle nonimmediate operands.
(*sse2_<plusminus_insn><mode>3): Rename from
sse2_<plusminus_insn><mode>3 insn pattern.
(sse2_<plusminus_insn><mode>3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse2_umulv2siv2di3): Rename from sse2_umulv2siv2di3 insn pattern.
(sse2_umulv2siv2di3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse4_1_mulv2siv2di3): Rename from sse4_1_mulv2siv2di3 insn pattern.
(sse4_1_mulv2siv2di3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse2_pmaddwd): Rename from sse2_pmaddwd insn pattern.
(sse2_pmaddwd): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse2_eq<mode>3): Rename from sse2_eq<mode>3 insn pattern.
(sse2_eq<mode>3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse4_1_eqv2di3): Rename from sse4_1_eqv2di3 insn pattern.
(sse4_1_eqv2di3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse2_uavgv16qi3): Rename from sse2_uavgv16qi3 insn pattern.
(sse2_uavgv16qi3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse2_uavgv16qi3): Rename from sse2_uavgv16qi3 insn pattern.
(sse2_uavgv16qi3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*sse2_uavgv8hi3): Rename from sse2_uavgv8hi3 insn pattern.
(sse2_uavgv8hi3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*ssse3_pmulhrswv8hi3): Rename from ssse3_pmulhrswv8hi3 insn pattern.
(ssse3_pmulhrswv8hi3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
(*ssse3_pmulhrswv4hi3): Rename from ssse3_pmulhrswv4hi3 insn pattern.
(ssse3_pmulhrswv4hi3): New expander.  Use
ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.

(<sse>_vm<plusminus_insn><mode>3): Do not use ix86_binary_operator_ok.
(<sse>_vmmul<mode>3): Ditto.
(divv4sf3): Do not use ix86_fixup_binary_operands_no_copy.
(divv2df3): Ditto.
(ssse3_pmaddubsw128): Use register_operand for operand 1.
(ssse3_pmaddubsw): Ditto.

* config/i386/sse.md (ix86_fixup_binary_operands): Assert that src1
and src2 must have the same mode when swapped.
(ix86_expand_binop_builtin): Do not use ix86_fixup_binary_operands
and ix86_binary_operator_ok.  Do not force operands in registers
when optimizing.

testsuite/ChangeLog:

PR target/35714
* gcc.target/i386/pr35714.c: New test.

From-SVN: r135041

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/mmx.md
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr35714.c [new file with mode: 0644]

index 0a0e5215cda7c47a0a428f44997ea5450f8d6793..441f09edd42be54b7b576bb9ebd2d966b96913bd 100644 (file)
@@ -1,3 +1,103 @@
+2008-05-08  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/35714
+       * config/i386/mmx.md (mmx_subv2sf3): New expander.
+       (*mmx_subv2sf3): Rename from mmx_subv2sf3 insn pattern.
+       (*mmx_eqv2sf3): Rename from mmx_eqv2sf3 insn pattern.
+       (mmx_eqv2sf3): New expander.  Use ix86_fixup_binary_operands_no_copy
+       to handle nonimmediate operands.
+       (*mmx_paddwd): Rename from mmx_paddwd insn pattern.
+       (mmx_paddwd): New expander.  Use ix86_fixup_binary_operands_no_copy
+       to handle nonimmediate operands.
+       (*mmx_pmulhrwv4hi3): Rename from mmx_pmulhrwv4hi3 insn pattern.
+       (mmx_pmulhrwv4hi3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse2_umulv1siv1di3): Rename from sse2_umulv1siv1di3 insn pattern.
+       (sse2_umulv1siv1di3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*mmx_eq<mode>3): Rename from mmx_eq<mode>3 insn pattern.
+       (mmx_eq<mode>3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*mmx_uavgv8qi3): Rename from mmx_uavgv8qi3 insn pattern.
+       (mmx_uavgv8qi3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*mmx_uavgv4hi3): Rename from mmx_uavgv4hi3 insn pattern.
+       (mmx_uavgv4hi3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+
+       * config/i386/sse.md
+       (*sse_movhlps): Rename from sse_movhlps insn pattern.
+       (sse_movhlps): New expander.  Use ix86_fixup_binary_operands
+       to handle nonimmediate operands.
+       (*sse_movlhps): Rename from sse_movlhps insn pattern.
+       (sse_movlhps): New expander.  Use ix86_fixup_binary_operands
+       to handle nonimmediate operands.
+       (*sse_loadhps): Rename from sse_loadhps insn pattern.
+       (sse_loadhps): New expander.  Use ix86_fixup_binary_operands
+       to handle nonimmediate operands.
+       (*sse_loadlps): Rename from sse_loadlps insn pattern.
+       (sse_loadlps): New expander.  Use ix86_fixup_binary_operands
+       to handle nonimmediate operands.
+       (*sse2_unpckhpd): Rename from sse2_unpckhpd insn pattern.
+       (sse2_unpckhpd): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse2_unpcklpd): Rename from sse2_unpcklpd insn pattern.
+       (sse2_unpcklpd): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse_loadhpd): Rename from sse_loadhpd insn pattern.
+       (sse_loadhpd): New expander.  Use ix86_fixup_binary_operands
+       to handle nonimmediate operands.
+       (*sse_loadlpd): Rename from sse_loadlpd insn pattern.
+       (sse_loadlpd): New expander.  Use ix86_fixup_binary_operands
+       to handle nonimmediate operands.
+       (*sse2_<plusminus_insn><mode>3): Rename from
+       sse2_<plusminus_insn><mode>3 insn pattern.
+       (sse2_<plusminus_insn><mode>3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse2_umulv2siv2di3): Rename from sse2_umulv2siv2di3 insn pattern.
+       (sse2_umulv2siv2di3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse4_1_mulv2siv2di3): Rename from sse4_1_mulv2siv2di3 insn pattern.
+       (sse4_1_mulv2siv2di3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse2_pmaddwd): Rename from sse2_pmaddwd insn pattern.
+       (sse2_pmaddwd): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse2_eq<mode>3): Rename from sse2_eq<mode>3 insn pattern.
+       (sse2_eq<mode>3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse4_1_eqv2di3): Rename from sse4_1_eqv2di3 insn pattern.
+       (sse4_1_eqv2di3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse2_uavgv16qi3): Rename from sse2_uavgv16qi3 insn pattern.
+       (sse2_uavgv16qi3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse2_uavgv16qi3): Rename from sse2_uavgv16qi3 insn pattern.
+       (sse2_uavgv16qi3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*sse2_uavgv8hi3): Rename from sse2_uavgv8hi3 insn pattern.
+       (sse2_uavgv8hi3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*ssse3_pmulhrswv8hi3): Rename from ssse3_pmulhrswv8hi3 insn pattern.
+       (ssse3_pmulhrswv8hi3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+       (*ssse3_pmulhrswv4hi3): Rename from ssse3_pmulhrswv4hi3 insn pattern.
+       (ssse3_pmulhrswv4hi3): New expander.  Use
+       ix86_fixup_binary_operands_no_copy to handle nonimmediate operands.
+
+       (<sse>_vm<plusminus_insn><mode>3): Do not use ix86_binary_operator_ok.
+       (<sse>_vmmul<mode>3): Ditto.
+       (divv4sf3): Do not use ix86_fixup_binary_operands_no_copy.
+       (divv2df3): Ditto.
+       (ssse3_pmaddubsw128): Use register_operand for operand 1.
+       (ssse3_pmaddubsw): Ditto.
+
+       * config/i386/sse.md (ix86_fixup_binary_operands): Assert that src1
+       and src2 must have the same mode when swapped.
+       (ix86_expand_binop_builtin): Do not use ix86_fixup_binary_operands
+       and ix86_binary_operator_ok.  Do not force operands in registers
+       when optimizing.
+
 2008-05-07  Jan Hubicka  <jh@suse.cz>
 
        * cgraph.c (dump_cgraph_node): Update.
index b123fa09e532c4108ddba945a720bd0d54f5fb37..678e7a567c16596e010ab923be50940d2ead39cd 100644 (file)
@@ -10689,7 +10689,12 @@ ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
   /* Canonicalize operand order.  */
   if (ix86_swap_binary_operands_p (code, mode, operands))
     {
-      rtx temp = src1;
+      rtx temp;
+
+      /* It is invalid to swap operands of different modes.  */
+      gcc_assert (GET_MODE (src1) == GET_MODE (src2));
+
+      temp = src1;
       src1 = src2;
       src2 = temp;
     }
@@ -20128,7 +20133,7 @@ safe_vector_operand (rtx x, enum machine_mode mode)
 static rtx
 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
 {
-  rtx pat, xops[3];
+  rtx pat;
   tree arg0 = CALL_EXPR_ARG (exp, 0);
   tree arg1 = CALL_EXPR_ARG (exp, 1);
   rtx op0 = expand_normal (arg0);
@@ -20159,30 +20164,12 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
     op1 = copy_to_mode_reg (mode1, op1);
 
-  /* ??? Using ix86_fixup_binary_operands is problematic when
-     we've got mismatched modes.  Fake it.  */
-
-  xops[0] = target;
-  xops[1] = op0;
-  xops[2] = op1;
-
-  if (tmode == mode0 && tmode == mode1)
-    {
-      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
-      op0 = xops[1];
-      op1 = xops[2];
-    }
-  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
-    {
-      op0 = force_reg (mode0, op0);
-      op1 = force_reg (mode1, op1);
-      target = gen_reg_rtx (tmode);
-    }
-
   pat = GEN_FCN (icode) (target, op0, op1);
   if (! pat)
     return 0;
+
   emit_insn (pat);
+
   return target;
 }
 
index 2f2c02f75a6a3f446634f3984a4623aed783be5f..0a507e07a2f388e7fc4a817aed2674bb4ba9c369 100644 (file)
   [(set_attr "type" "mmxadd")
    (set_attr "mode" "V2SF")])
 
-(define_insn "mmx_subv2sf3"
+(define_expand "mmx_subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "")
+        (minus:V2SF (match_operand:V2SF 1 "register_operand" "")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW"
+  "")
+
+(define_expand "mmx_subrv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "")
+        (minus:V2SF (match_operand:V2SF 2 "register_operand" "")
+                   (match_operand:V2SF 1 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW"
+  "")
+
+(define_insn "*mmx_subv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y,y")
         (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
                    (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
   [(set_attr "type" "mmxadd")
    (set_attr "mode" "V2SF")])
 
-(define_expand "mmx_subrv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-        (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "")
-                   (match_operand:V2SF 1 "nonimmediate_operand" "")))]
-  "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "")
-
 (define_expand "mmx_mulv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "")
        (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "")
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+(define_expand "mmx_eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "")
+       (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "")
+                (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
+
+(define_insn "*mmx_eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
+  "pfcmpeq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "V2SF")])
+
 (define_insn "mmx_gtv2sf3"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
        (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
   [(set_attr "type" "mmxcmp")
    (set_attr "mode" "V2SF")])
 
-(define_insn "mmx_eqv2sf3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
-                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
-  "pfcmpeq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "V2SF")])
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point conversion operations
   [(set_attr "type" "mmxmul")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_pmaddwd"
+(define_expand "mmx_pmaddwd"
+  [(set (match_operand:V2SI 0 "register_operand" "")
+        (plus:V2SI
+         (mult:V2SI
+           (sign_extend:V2SI
+             (vec_select:V2HI
+               (match_operand:V4HI 1 "nonimmediate_operand" "")
+               (parallel [(const_int 0) (const_int 2)])))
+           (sign_extend:V2SI
+             (vec_select:V2HI
+               (match_operand:V4HI 2 "nonimmediate_operand" "")
+               (parallel [(const_int 0) (const_int 2)]))))
+         (mult:V2SI
+           (sign_extend:V2SI
+             (vec_select:V2HI (match_dup 1)
+               (parallel [(const_int 1) (const_int 3)])))
+           (sign_extend:V2SI
+             (vec_select:V2HI (match_dup 2)
+               (parallel [(const_int 1) (const_int 3)]))))))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
         (plus:V2SI
          (mult:V2SI
   [(set_attr "type" "mmxmul")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_pmulhrwv4hi3"
+(define_expand "mmx_pmulhrwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+       (truncate:V4HI
+         (lshiftrt:V4SI
+           (plus:V4SI
+             (mult:V4SI
+               (sign_extend:V4SI
+                 (match_operand:V4HI 1 "nonimmediate_operand" ""))
+               (sign_extend:V4SI
+                 (match_operand:V4HI 2 "nonimmediate_operand" "")))
+             (const_vector:V4SI [(const_int 32768) (const_int 32768)
+                                 (const_int 32768) (const_int 32768)]))
+           (const_int 16))))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_pmulhrwv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
        (truncate:V4HI
          (lshiftrt:V4SI
   [(set_attr "type" "mmxmul")
    (set_attr "mode" "DI")])
 
-(define_insn "sse2_umulv1siv1di3"
+(define_expand "sse2_umulv1siv1di3"
+  [(set (match_operand:V1DI 0 "register_operand" "")
+        (mult:V1DI
+         (zero_extend:V1DI
+           (vec_select:V1SI
+             (match_operand:V2SI 1 "nonimmediate_operand" "")
+             (parallel [(const_int 0)])))
+         (zero_extend:V1DI
+           (vec_select:V1SI
+             (match_operand:V2SI 2 "nonimmediate_operand" "")
+             (parallel [(const_int 0)])))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
+
+(define_insn "*sse2_umulv1siv1di3"
   [(set (match_operand:V1DI 0 "register_operand" "=y")
         (mult:V1DI
          (zero_extend:V1DI
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "mmx_eq<mode>3"
+(define_expand "mmx_eq<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "")
+        (eq:MMXMODEI
+         (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
+         (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*mmx_eq<mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
         (eq:MMXMODEI
          (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "mmx_uavgv8qi3"
+(define_expand "mmx_uavgv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "")
+       (truncate:V8QI
+         (lshiftrt:V8HI
+           (plus:V8HI
+             (plus:V8HI
+               (zero_extend:V8HI
+                 (match_operand:V8QI 1 "nonimmediate_operand" ""))
+               (zero_extend:V8HI
+                 (match_operand:V8QI 2 "nonimmediate_operand" "")))
+             (const_vector:V8HI [(const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)]))
+           (const_int 1))))]
+  "TARGET_SSE || TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
+
+(define_insn "*mmx_uavgv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
        (truncate:V8QI
          (lshiftrt:V8HI
   [(set_attr "type" "mmxshft")
    (set_attr "mode" "DI")])
 
-(define_insn "mmx_uavgv4hi3"
+(define_expand "mmx_uavgv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+       (truncate:V4HI
+         (lshiftrt:V4SI
+           (plus:V4SI
+             (plus:V4SI
+               (zero_extend:V4SI
+                 (match_operand:V4HI 1 "nonimmediate_operand" ""))
+               (zero_extend:V4SI
+                 (match_operand:V4HI 2 "nonimmediate_operand" "")))
+             (const_vector:V4SI [(const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)]))
+           (const_int 1))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
+
+(define_insn "*mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
        (truncate:V4HI
          (lshiftrt:V4SI
index 589da9b73b231257a6445687d041a810336685b0..69e498e2220dd982ad449deca909bd4d43c8bcac 100644 (file)
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
          (match_dup 1)
          (const_int 1)))]
-  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
-   && ix86_binary_operator_ok (<CODE>, V4SFmode, operands)"
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
    (set_attr "mode" "<ssescalarmode>")])
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
          (match_dup 1)
          (const_int 1)))]
-  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
-   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
   "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssemul")
    (set_attr "mode" "<ssescalarmode>")])
                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
   "TARGET_SSE"
 {
-  ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
-
   if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
       && flag_finite_math_only && !flag_trapping_math
       && flag_unsafe_math_optimizations)
        (div:V2DF (match_operand:V2DF 1 "register_operand" "")
                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
+  "")
 
 (define_insn "<sse>_div<mode>3"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "sse_movhlps"
+(define_expand "sse_movhlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "")
+           (match_operand:V4SF 2 "nonimmediate_operand" ""))
+         (parallel [(const_int 6)
+                    (const_int 7)
+                    (const_int 2)
+                    (const_int 3)])))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "*sse_movhlps"
   [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
        (vec_select:V4SF
          (vec_concat:V8SF
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V4SF,V2SF,V2SF")])
 
-(define_insn "sse_movlhps"
+(define_expand "sse_movlhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+       (vec_select:V4SF
+         (vec_concat:V8SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "")
+           (match_operand:V4SF 2 "nonimmediate_operand" ""))
+         (parallel [(const_int 0)
+                    (const_int 1)
+                    (const_int 4)
+                    (const_int 5)])))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "*sse_movlhps"
   [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
        (vec_select:V4SF
          (vec_concat:V8SF
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V2SF,V4SF,V2SF")])
 
-(define_insn "sse_loadhps"
+(define_expand "sse_loadhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+       (vec_concat:V4SF
+         (vec_select:V2SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "")
+           (parallel [(const_int 0) (const_int 1)]))
+         (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "*sse_loadhps"
   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
        (vec_concat:V4SF
          (vec_select:V2SF
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V2SF,V4SF,V2SF")])
 
-(define_insn "sse_loadlps"
+(define_expand "sse_loadlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+       (vec_concat:V4SF
+         (match_operand:V2SF 2 "nonimmediate_operand" "")
+         (vec_select:V2SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "")
+           (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "*sse_loadlps"
   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
        (vec_concat:V4SF
          (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "sse2_unpckhpd"
+(define_expand "sse2_unpckhpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+       (vec_select:V2DF
+         (vec_concat:V4DF
+           (match_operand:V2DF 1 "nonimmediate_operand" "")
+           (match_operand:V2DF 2 "nonimmediate_operand" ""))
+         (parallel [(const_int 1)
+                    (const_int 3)])))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "*sse2_unpckhpd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
        (vec_select:V2DF
          (vec_concat:V4DF
   DONE;
 })
 
-(define_insn "sse2_unpcklpd"
+(define_expand "sse2_unpcklpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+       (vec_select:V2DF
+         (vec_concat:V4DF
+           (match_operand:V2DF 1 "nonimmediate_operand" "")
+           (match_operand:V2DF 2 "nonimmediate_operand" ""))
+         (parallel [(const_int 0)
+                    (const_int 2)])))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "*sse2_unpcklpd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
        (vec_select:V2DF
          (vec_concat:V4DF
   DONE;
 })
 
-(define_insn "sse2_loadhpd"
+(define_expand "sse2_loadhpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+       (vec_concat:V2DF
+         (vec_select:DF
+           (match_operand:V2DF 1 "nonimmediate_operand" "")
+           (parallel [(const_int 0)]))
+         (match_operand:DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "*sse2_loadhpd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
        (vec_concat:V2DF
          (vec_select:DF
   operands[0] = adjust_address (operands[0], DFmode, 8);
 })
 
-(define_insn "sse2_loadlpd"
+(define_expand "sse2_loadlpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+       (vec_concat:V2DF
+         (match_operand:DF 2 "nonimmediate_operand" "")
+         (vec_select:DF
+           (match_operand:V2DF 1 "vector_move_operand" "")
+           (parallel [(const_int 1)]))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "*sse2_loadlpd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m")
        (vec_concat:V2DF
          (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x*fr")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_<plusminus_insn><mode>3"
+(define_expand "sse2_<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODE12 0 "register_operand" "")
+       (sat_plusminus:SSEMODE12
+         (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
+         (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*sse2_<plusminus_insn><mode>3"
   [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
        (sat_plusminus:SSEMODE12
          (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_umulv2siv2di3"
+(define_expand "sse2_umulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+       (mult:V2DI
+         (zero_extend:V2DI
+           (vec_select:V2SI
+             (match_operand:V4SI 1 "nonimmediate_operand" "")
+             (parallel [(const_int 0) (const_int 2)])))
+         (zero_extend:V2DI
+           (vec_select:V2SI
+             (match_operand:V4SI 2 "nonimmediate_operand" "")
+             (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*sse2_umulv2siv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
        (mult:V2DI
          (zero_extend:V2DI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse4_1_mulv2siv2di3"
+(define_expand "sse4_1_mulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+       (mult:V2DI
+         (sign_extend:V2DI
+           (vec_select:V2SI
+             (match_operand:V4SI 1 "nonimmediate_operand" "")
+             (parallel [(const_int 0) (const_int 2)])))
+         (sign_extend:V2DI
+           (vec_select:V2SI
+             (match_operand:V4SI 2 "nonimmediate_operand" "")
+             (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE4_1"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+(define_insn "*sse4_1_mulv2siv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
        (mult:V2DI
          (sign_extend:V2DI
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_pmaddwd"
+(define_expand "sse2_pmaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+       (plus:V4SI
+         (mult:V4SI
+           (sign_extend:V4SI
+             (vec_select:V4HI
+               (match_operand:V8HI 1 "nonimmediate_operand" "")
+               (parallel [(const_int 0)
+                          (const_int 2)
+                          (const_int 4)
+                          (const_int 6)])))
+           (sign_extend:V4SI
+             (vec_select:V4HI
+               (match_operand:V8HI 2 "nonimmediate_operand" "")
+               (parallel [(const_int 0)
+                          (const_int 2)
+                          (const_int 4)
+                          (const_int 6)]))))
+         (mult:V4SI
+           (sign_extend:V4SI
+             (vec_select:V4HI (match_dup 1)
+               (parallel [(const_int 1)
+                          (const_int 3)
+                          (const_int 5)
+                          (const_int 7)])))
+           (sign_extend:V4SI
+             (vec_select:V4HI (match_dup 2)
+               (parallel [(const_int 1)
+                          (const_int 3)
+                          (const_int 5)
+                          (const_int 7)]))))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*sse2_pmaddwd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
        (plus:V4SI
          (mult:V4SI
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "sse2_eq<mode>3"
+(define_expand "sse2_eq<mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "")
+       (eq:SSEMODE124
+         (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
+         (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2 && !TARGET_SSE5"
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*sse2_eq<mode>3"
   [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
        (eq:SSEMODE124
          (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse4_1_eqv2di3"
+(define_expand "sse4_1_eqv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+       (eq:V2DI
+         (match_operand:V2DI 1 "nonimmediate_operand" "")
+         (match_operand:V2DI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE4_1"
+  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
+
+(define_insn "*sse4_1_eqv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
        (eq:V2DI
          (match_operand:V2DI 1 "nonimmediate_operand" "%0")
 (define_insn "sse4_2_gtv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
        (gt:V2DI
-         (match_operand:V2DI 1 "nonimmediate_operand" "0")
+         (match_operand:V2DI 1 "register_operand" "0")
          (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE4_2"
   "pcmpgtq\t{%2, %0|%0, %2}"
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "sse2_uavgv16qi3"
+(define_expand "sse2_uavgv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+       (truncate:V16QI
+         (lshiftrt:V16HI
+           (plus:V16HI
+             (plus:V16HI
+               (zero_extend:V16HI
+                 (match_operand:V16QI 1 "nonimmediate_operand" ""))
+               (zero_extend:V16HI
+                 (match_operand:V16QI 2 "nonimmediate_operand" "")))
+             (const_vector:V16QI [(const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)
+                                  (const_int 1) (const_int 1)]))
+           (const_int 1))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
+
+(define_insn "*sse2_uavgv16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
        (truncate:V16QI
          (lshiftrt:V16HI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_uavgv8hi3"
+(define_expand "sse2_uavgv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+       (truncate:V8HI
+         (lshiftrt:V8SI
+           (plus:V8SI
+             (plus:V8SI
+               (zero_extend:V8SI
+                 (match_operand:V8HI 1 "nonimmediate_operand" ""))
+               (zero_extend:V8SI
+                 (match_operand:V8HI 2 "nonimmediate_operand" "")))
+             (const_vector:V8HI [(const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)]))
+           (const_int 1))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
+
+(define_insn "*sse2_uavgv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
        (truncate:V8HI
          (lshiftrt:V8SI
          (mult:V8HI
            (zero_extend:V8HI
              (vec_select:V4QI
-               (match_operand:V16QI 1 "nonimmediate_operand" "0")
+               (match_operand:V16QI 1 "register_operand" "0")
                (parallel [(const_int 0)
                           (const_int 2)
                           (const_int 4)
          (mult:V4HI
            (zero_extend:V4HI
              (vec_select:V4QI
-               (match_operand:V8QI 1 "nonimmediate_operand" "0")
+               (match_operand:V8QI 1 "register_operand" "0")
                (parallel [(const_int 0)
                           (const_int 2)
                           (const_int 4)
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
-(define_insn "ssse3_pmulhrswv8hi3"
+(define_expand "ssse3_pmulhrswv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+       (truncate:V8HI
+         (lshiftrt:V8SI
+           (plus:V8SI
+             (lshiftrt:V8SI
+               (mult:V8SI
+                 (sign_extend:V8SI
+                   (match_operand:V8HI 1 "nonimmediate_operand" ""))
+                 (sign_extend:V8SI
+                   (match_operand:V8HI 2 "nonimmediate_operand" "")))
+               (const_int 14))
+             (const_vector:V8HI [(const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)]))
+           (const_int 1))))]
+  "TARGET_SSSE3"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*ssse3_pmulhrswv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
        (truncate:V8HI
          (lshiftrt:V8SI
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "ssse3_pmulhrswv4hi3"
+(define_expand "ssse3_pmulhrswv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+       (truncate:V4HI
+         (lshiftrt:V4SI
+           (plus:V4SI
+             (lshiftrt:V4SI
+               (mult:V4SI
+                 (sign_extend:V4SI
+                   (match_operand:V4HI 1 "nonimmediate_operand" ""))
+                 (sign_extend:V4SI
+                   (match_operand:V4HI 2 "nonimmediate_operand" "")))
+               (const_int 14))
+             (const_vector:V4HI [(const_int 1) (const_int 1)
+                                 (const_int 1) (const_int 1)]))
+           (const_int 1))))]
+  "TARGET_SSSE3"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*ssse3_pmulhrswv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
        (truncate:V4HI
          (lshiftrt:V4SI
index b872afd3e19e179469e87a9239f05701546c0844..35eec51b66441ab77a286edebf0be689d0595f1d 100644 (file)
@@ -1,3 +1,8 @@
+2008-05-08  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/35714
+       * gcc.target/i386/pr35714.c: New test.
+
 2008-05-07  Jakub Jelinek  <jakub@redhat.com>
 
        PR middle-end/36013
diff --git a/gcc/testsuite/gcc.target/i386/pr35714.c b/gcc/testsuite/gcc.target/i386/pr35714.c
new file mode 100644 (file)
index 0000000..13ca47c
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+#include <emmintrin.h>
+
+extern __m128i a;
+
+__m128i madd (__m128i b)
+{
+  return _mm_madd_epi16(a, b);
+}
+
+__m128i madd_swapped (__m128i b)
+{
+    return _mm_madd_epi16(b, a);
+}
+
+/* { dg-final { scan-assembler-not "movaps" } } */