sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm as flags setting insn.
authorUros Bizjak <uros@gcc.gnu.org>
Wed, 6 Jun 2007 06:53:29 +0000 (08:53 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Wed, 6 Jun 2007 06:53:29 +0000 (08:53 +0200)
* config/i386/sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm
as flags setting insn.
(sse4_2_pcmpistr_cconly): Prefer pcmpistrm as flags setting insn.

* config/i386/i386.md (UNSPEC_ROUNDP, UNSPEC_ROUNDS): Remove.
(UNSPEC_ROUND): New.
("sse4_1_round<mode>2"): New insn pattern.
("rint<mode>2"): Expand using "sse4_1_round<mode>2" pattern for
SSE4.1 targets.
("floor<mode>2"): Rename from floordf2 and floorsf2.  Macroize
expander using SSEMODEF mode macro.  Expand using
"sse4_1_round<mode>2" pattern for SSE4.1 targets.
("ceil<mode>2"): Rename from ceildf2 and ceilsf2.  Macroize
expander using SSEMODEF mode macro.  Expand using
"sse4_1_round<mode>2" pattern for SSE4.1 targets.
("btrunc<mode>2"): Rename from btruncdf2 and btruncsf2.  Macroize
expander using SSEMODEF mode macro.  Expand using
"sse4_1_round<mode>2" pattern for SSE4.1 targets.
* config/i386/sse.md ("sse4_1_roundpd", "sse4_1_roundps"): Use
UNSPEC_ROUND instead of UNSPEC_ROUNDP.
("sse4_1_roundsd", "sse4_1_roundss"): Use UNSPEC_ROUND instead of
UNSPEC_ROUNDS.

From-SVN: r125356

gcc/ChangeLog
gcc/config/i386/i386.md
gcc/config/i386/sse.md

index 99faa9725440f97efd0430f00bee716a533acd00..937ac42ec568f32a169a91cfd91e9c59fa53980e 100644 (file)
@@ -1,3 +1,30 @@
+2007-06-06  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm
+       as flags setting insn.
+       (sse4_2_pcmpistr_cconly): Prefer pcmpistrm as flags setting insn.
+
+2007-06-06  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.md (UNSPEC_ROUNDP, UNSPEC_ROUNDS): Remove.
+       (UNSPEC_ROUND): New.
+       ("sse4_1_round<mode>2"): New insn pattern.
+       ("rint<mode>2"): Expand using "sse4_1_round<mode>2" pattern for
+       SSE4.1 targets.
+       ("floor<mode>2"): Rename from floordf2 and floorsf2.  Macroize
+       expander using SSEMODEF mode macro.  Expand using
+       "sse4_1_round<mode>2" pattern for SSE4.1 targets.
+       ("ceil<mode>2"): Rename from ceildf2 and ceilsf2.  Macroize
+       expander using SSEMODEF mode macro.  Expand using
+       "sse4_1_round<mode>2" pattern for SSE4.1 targets.
+       ("btrunc<mode>2"): Rename from btruncdf2 and btruncsf2.  Macroize
+       expander using SSEMODEF mode macro.  Expand using
+       "sse4_1_round<mode>2" pattern for SSE4.1 targets.
+       * config/i386/sse.md ("sse4_1_roundpd", "sse4_1_roundps"): Use
+       UNSPEC_ROUND instead of UNSPEC_ROUNDP.
+       ("sse4_1_roundsd", "sse4_1_roundss"): Use UNSPEC_ROUND instead of
+       UNSPEC_ROUNDS.
+
 2007-06-06  Jan Sjodin  <jan.sjodin@amd.com>
            Sebastian Pop  <sebpop@gmail.com>
 
@@ -53,7 +80,8 @@
        * cfgexpand (label_rtx_for_bb): Likewise.
        (expand_gimple_basic_block): Likewise.
        * cfghooks.c (dump_bb): Likewise.
-       (lv_adjust_loop_header_phi): Avoid using C++ keywords as variable names.
+       (lv_adjust_loop_header_phi): Avoid using C++ keywords as
+       variable names.
        (lv_add_condition_to_bb): Likewise.
        * cfglayout (relink_block_chain): Cast according to the coding
        conventions.
@@ -64,7 +92,8 @@
        (dump_recorded_exit): Likewise.
        * cfgloop.h (enum loop_estimation): Move out of struct scope...
        (struct loop): ... from here.
-       * cfgloopmanip.c (rpe_enum_p): Cast according to the coding conventions.
+       * cfgloopmanip.c (rpe_enum_p): Cast according to the coding
+       conventions.
        * cfgrtl.c (rtl_create_basic_block): Likewise.
        (rtl_split_block): Likewise.
        (rtl_dump_bb): Likewise.
index 3e9a15fbf7c283472f742bae4602c3cb6fb32ef9..43e58ae8dc7eab44a359a6e5a28964c53ffdbb31 100644 (file)
    (UNSPEC_MPSADBW             138)
    (UNSPEC_PHMINPOSUW          139)
    (UNSPEC_PTEST               140)
-   (UNSPEC_ROUNDP              141)
-   (UNSPEC_ROUNDS              142)
+   (UNSPEC_ROUND               141)
 
    ; For SSE4.2 support
    (UNSPEC_CRC32               143)
 })
 \f
 
+(define_insn "sse4_1_round<mode>2"
+  [(set (match_operand:SSEMODEF 0 "register_operand" "=x")
+       (unspec:SSEMODEF [(match_operand:SSEMODEF 1 "register_operand" "x")
+                         (match_operand:SI 2 "const_0_to_15_operand" "n")]
+                        UNSPEC_ROUND))]
+  "TARGET_SSE4_1"
+  "rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "rintxf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
     && flag_unsafe_math_optimizations)
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
        && !flag_trapping_math
-       && !optimize_size)"
+       && (TARGET_SSE4_1 || !optimize_size))"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !flag_trapping_math
-      && !optimize_size)
-    ix86_expand_rint (operand0, operand1);
+      && (TARGET_SSE4_1 || !optimize_size))
+    {
+      if (TARGET_SSE4_1)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (0x04)));
+      else
+       ix86_expand_rint (operand0, operand1);
+    }
   else
     {
       rtx op0 = gen_reg_rtx (XFmode);
    && !flag_trapping_math && !flag_rounding_math
    && !optimize_size"
 {
-  if ((<MODE>mode != DFmode) || TARGET_64BIT)
+  if (TARGET_64BIT || (<MODE>mode != DFmode))
     ix86_expand_round (operand0, operand1);
   else
     ix86_expand_rounddf_32 (operand0, operand1);
   DONE;
 })
 
-(define_expand "floordf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
+(define_expand "floor<mode>2"
+  [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+   (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations && !optimize_size)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math
+       && (TARGET_SSE4_1 || !optimize_size))"
 {
-  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_SSE4_1 || !optimize_size))
     {
-      if (TARGET_64BIT)
+      if (TARGET_SSE4_1)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (0x01)));
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
        ix86_expand_floorceil (operand0, operand1, true);
       else
        ix86_expand_floorceildf_32 (operand0, operand1, true);
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = gen_reg_rtx (XFmode);
 
-      emit_insn (gen_extenddfxf2 (op1, operands[1]));
-      emit_insn (gen_frndintxf2_floor (op0, op1));
-
-      emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
-    }
-  DONE;
-})
-
-(define_expand "floorsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
-{
-  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
-    ix86_expand_floorceil (operand0, operand1, true);
-  else
-    {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
-
-      emit_insn (gen_extendsfxf2 (op1, operands[1]));
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_floor (op0, op1));
 
-      emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
     }
   DONE;
 })
   DONE;
 })
 
-(define_expand "ceildf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
+(define_expand "ceil<mode>2"
+  [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+   (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations && !optimize_size)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math
+       && (TARGET_SSE4_1 || !optimize_size))"
 {
-  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_SSE4_1 || !optimize_size))
     {
-      if (TARGET_64BIT)
+      if (TARGET_SSE4_1)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (0x02)));
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
        ix86_expand_floorceil (operand0, operand1, false);
       else
        ix86_expand_floorceildf_32 (operand0, operand1, false);
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = gen_reg_rtx (XFmode);
 
-      emit_insn (gen_extenddfxf2 (op1, operands[1]));
-      emit_insn (gen_frndintxf2_ceil (op0, op1));
-
-      emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
-    }
-  DONE;
-})
-
-(define_expand "ceilsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
-{
-  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
-    ix86_expand_floorceil (operand0, operand1, false);
-  else
-    {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
-
-      emit_insn (gen_extendsfxf2 (op1, operands[1]));
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_ceil (op0, op1));
 
-      emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
     }
   DONE;
 })
   DONE;
 })
 
-(define_expand "btruncdf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
+(define_expand "btrunc<mode>2"
+  [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+   (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations && !optimize_size)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math
+       && (TARGET_SSE4_1 || !optimize_size))"
 {
-  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_SSE4_1 || !optimize_size))
     {
-      if (TARGET_64BIT)
+      if (TARGET_SSE4_1)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (0x03)));
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
        ix86_expand_trunc (operand0, operand1);
       else
        ix86_expand_truncdf_32 (operand0, operand1);
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = gen_reg_rtx (XFmode);
 
-      emit_insn (gen_extenddfxf2 (op1, operands[1]));
-      emit_insn (gen_frndintxf2_trunc (op0, op1));
-
-      emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
-    }
-  DONE;
-})
-
-(define_expand "btruncsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
-{
-  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
-    ix86_expand_trunc (operand0, operand1);
-  else
-    {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
-
-      emit_insn (gen_extendsfxf2 (op1, operands[1]));
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_trunc (op0, op1));
 
-      emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
     }
   DONE;
 })
index bdb653d7a37cfa12427f90178dabef56d8c2e173..042146ec6136bb048852c723eb623cefa0fd8a14 100644 (file)
   [(set (match_operand:V2DF 0 "register_operand" "=x")
        (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
                      (match_operand:SI 2 "const_0_to_15_operand" "n")]
-                    UNSPEC_ROUNDP))]
+                    UNSPEC_ROUND))]
   "TARGET_SSE4_1"
   "roundpd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ssecvt")
   [(set (match_operand:V4SF 0 "register_operand" "=x")
        (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
                      (match_operand:SI 2 "const_0_to_15_operand" "n")]
-                    UNSPEC_ROUNDP))]
+                    UNSPEC_ROUND))]
   "TARGET_SSE4_1"
   "roundps\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ssecvt")
        (vec_merge:V2DF
          (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
                        (match_operand:SI 3 "const_0_to_15_operand" "n")]
-                      UNSPEC_ROUNDS)
+                      UNSPEC_ROUND)
          (match_operand:V2DF 1 "register_operand" "0")
          (const_int 1)))]
   "TARGET_SSE4_1"
        (vec_merge:V4SF
          (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
                        (match_operand:SI 3 "const_0_to_15_operand" "n")]
-                      UNSPEC_ROUNDS)
+                      UNSPEC_ROUND)
          (match_operand:V4SF 1 "register_operand" "0")
          (const_int 1)))]
   "TARGET_SSE4_1"
           (match_operand:SI 3 "register_operand" "d,d,d,d")
           (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
          UNSPEC_PCMPESTR))
-   (clobber (match_scratch:SI    5 "=c,c,X,X"))
-   (clobber (match_scratch:V16QI 6 "=X,X,Y0,Y0"))]
+   (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
+   (clobber (match_scratch:SI    6 "= X, X,c,c"))]
   "TARGET_SSE4_2"
   "@
-   pcmpestri\t{%4, %2, %0|%0, %2, %4}
-   pcmpestri\t{%4, %2, %0|%0, %2, %4}
    pcmpestrm\t{%4, %2, %0|%0, %2, %4}
-   pcmpestrm\t{%4, %2, %0|%0, %2, %4}"
+   pcmpestrm\t{%4, %2, %0|%0, %2, %4}
+   pcmpestri\t{%4, %2, %0|%0, %2, %4}
+   pcmpestri\t{%4, %2, %0|%0, %2, %4}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
           (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
           (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
          UNSPEC_PCMPISTR))
-   (clobber (match_scratch:SI    3 "=c,c,X,X"))
-   (clobber (match_scratch:V16QI 4 "=X,X,Y0,Y0"))]
+   (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
+   (clobber (match_scratch:SI    4 "= X, X,c,c"))]
   "TARGET_SSE4_2"
   "@
-   pcmpistri\t{%2, %1, %0|%0, %1, %2}
-   pcmpistri\t{%2, %1, %0|%0, %1, %2}
    pcmpistrm\t{%2, %1, %0|%0, %1, %2}
-   pcmpistrm\t{%2, %1, %0|%0, %1, %2}"
+   pcmpistrm\t{%2, %1, %0|%0, %1, %2}
+   pcmpistri\t{%2, %1, %0|%0, %1, %2}
+   pcmpistri\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")