re PR target/72867 (SSE/AVX/AVX512: incorrect optimization of VMINPS/VMAXPS at compil...

author Uros Bizjak <ubizjak@gmail.com>

Mon, 15 Aug 2016 18:46:53 +0000 (20:46 +0200)

committer Uros Bizjak <uros@gcc.gnu.org>

Mon, 15 Aug 2016 18:46:53 +0000 (20:46 +0200)
author Uros Bizjak <ubizjak@gmail.com>
Mon, 15 Aug 2016 18:46:53 +0000 (20:46 +0200)
committer Uros Bizjak <uros@gcc.gnu.org>
Mon, 15 Aug 2016 18:46:53 +0000 (20:46 +0200)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index fa52700c953f8533624605360ee9e112f3afbd27..c380045cafef2dc6c1b1df85540d9911dd438737 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,28 @@
+2016-08-15  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/72867
+       * config/i386/sse.md (<code><mode>3<mask_name><round_saeonly_name>):
+       Emit ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>
+       for !flag_finite_math_only or flag_signed_zeros.
+       (*<code><mode>3<mask_name><round_saeonly_name>): Rename from
+       *<code><mode>3_finite<mask_name><round_saeonly_name>.  Do not
+       depend on flag_finite_math_only.
+       (ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>):
+       New insn pattern.
+       (*<code><mode>3<mask_name><round_saeonly_name>): Remove.
+       (*ieee_smin<mode>3): Ditto.
+       (*ieee_smax<mode>3): Ditto.
+       * config/i386/mmx.md (mmx_<code>v2sf3): Emit
+       mmx_ieee_<ieee_maxmin>v2sf3 for !flag_finite_math_only or
+       flag_signed_zeros.
+       (*mmx_<code>v2sf3): Rename from *mmx_<code>v2sf3_finite.  Do not
+       depend on flag_finite_math_only.
+       (mmx_ieee_<ieee_maxmin>v2sf3): New insn pattern.
+       (*mmx_<code>v2sf3): Remove.
+       * config/i386/subst.md (round_saeonly_mask_arg3): New subst attribute.
+       * config/i386/i386.c (ix86_expand_sse_fp_mimnax): Check
+       flag_signed_zeros instead of !flag_unsafe_math_optimizations.
+
  2016-08-15  Segher Boessenkool  <segher@kernel.crashing.org>
  
         PR rtl-optimization/73650
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 7170194a3a176a3ea2cd20c5334ad19a99895d24..02bfcbf49b23ddfe303715386aa8f99e8c7243f3 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23404,7 +23404,7 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
  
    /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
       but MODE may be a vector mode and thus not appropriate.  */
-  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+  if (!flag_finite_math_only || flag_signed_zeros)
      {
        int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
        rtvec v;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

index 25ba1ebce2ca55e607c6aadf5e31caa2778fdf73..e5d2fd01146da90263b81c2a3b8c5de8092da23c 100644 (file)
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -885,6 +885,14 @@
                               (umax "maxu") (umin "minu")])
  (define_code_attr maxmin_float [(smax "max") (smin "min")])
  
+(define_int_iterator IEEE_MAXMIN
+       [UNSPEC_IEEE_MAX
+        UNSPEC_IEEE_MIN])
+
+(define_int_attr ieee_maxmin
+       [(UNSPEC_IEEE_MAX "max")
+        (UNSPEC_IEEE_MIN "min")])
+
  ;; Mapping of logic operators
  (define_code_iterator any_logic [and ior xor])
  (define_code_iterator any_or [ior xor])
@@ -17401,14 +17409,6 @@
  ;; Their operands are not commutative, and thus they may be used in the
  ;; presence of -0.0 and NaN.
  
-(define_int_iterator IEEE_MAXMIN
-       [UNSPEC_IEEE_MAX
-        UNSPEC_IEEE_MIN])
-
-(define_int_attr ieee_maxmin
-       [(UNSPEC_IEEE_MAX "max")
-        (UNSPEC_IEEE_MIN "min")])
-
  (define_insn "*ieee_s<ieee_maxmin><mode>3"
    [(set (match_operand:MODEF 0 "register_operand" "=x,v")
         (unspec:MODEF
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md

index 65e8b460508d744d116193bf0b402dab345c1830..99922332e8730a626abc0d647520a77d769d4daf 100644 (file)
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -296,10 +296,6 @@
     (set_attr "prefix_extra" "1")
     (set_attr "mode" "V2SF")])
  
-;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
-;; isn't really correct, as those rtl operators aren't defined when
-;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
-
  (define_expand "mmx_<code>v2sf3"
    [(set (match_operand:V2SF 0 "register_operand")
          (smaxmin:V2SF
@@ -307,30 +303,47 @@
           (match_operand:V2SF 2 "nonimmediate_operand")))]
    "TARGET_3DNOW"
  {
-  if (!flag_finite_math_only)
-    operands[1] = force_reg (V2SFmode, operands[1]);
-  ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
+  if (!flag_finite_math_only || flag_signed_zeros)
+    {
+      operands[1] = force_reg (V2SFmode, operands[1]);
+      emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
+                (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
  })
  
-(define_insn "*mmx_<code>v2sf3_finite"
+;; These versions of the min/max patterns are intentionally ignorant of
+;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
+;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
+;; are undefined in this condition, we're certain this is correct.
+
+(define_insn "*mmx_<code>v2sf3"
    [(set (match_operand:V2SF 0 "register_operand" "=y")
          (smaxmin:V2SF
           (match_operand:V2SF 1 "nonimmediate_operand" "%0")
           (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW && flag_finite_math_only
-   && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
+  "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
    "pf<maxmin_float>\t{%2, %0|%0, %2}"
    [(set_attr "type" "mmxadd")
     (set_attr "prefix_extra" "1")
     (set_attr "mode" "V2SF")])
  
-(define_insn "*mmx_<code>v2sf3"
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
    [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (smaxmin:V2SF
-         (match_operand:V2SF 1 "register_operand" "0")
-         (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+        (unspec:V2SF
+         [(match_operand:V2SF 1 "register_operand" "0")
+          (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+         IEEE_MAXMIN))]
    "TARGET_3DNOW"
-  "pf<maxmin_float>\t{%2, %0|%0, %2}"
+  "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
    [(set_attr "type" "mmxadd")
     (set_attr "prefix_extra" "1")
     (set_attr "mode" "V2SF")])
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index 9f60ad006bd92239c418a01ece66d48b03f03263..9a39c74752d4005cc97158eaaacd11b275bee8de 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1633,10 +1633,6 @@
     (set_attr "prefix" "orig,vex")
     (set_attr "mode" "SF")])
  
-;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
-;; isn't really correct, as those rtl operators aren't defined when
-;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
-
  (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
    [(set (match_operand:VF 0 "register_operand")
         (smaxmin:VF
@@ -1644,18 +1640,30 @@
           (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
    "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
  {
-  if (!flag_finite_math_only)
-    operands[1] = force_reg (<MODE>mode, operands[1]);
-  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+  if (!flag_finite_math_only || flag_signed_zeros)
+    {
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
+                (operands[0], operands[1], operands[2]
+                 <mask_operand_arg34>
+                 <round_saeonly_mask_arg3>));
+      DONE;
+    }
+  else
+    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
  })
  
-(define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
+;; These versions of the min/max patterns are intentionally ignorant of
+;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
+;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
+;; are undefined in this condition, we're certain this is correct.
+
+(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
    [(set (match_operand:VF 0 "register_operand" "=x,v")
         (smaxmin:VF
           (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
           (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
-  "TARGET_SSE && flag_finite_math_only
-   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
     && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
    "@
     <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -1666,16 +1674,23 @@
     (set_attr "prefix" "<mask_prefix3>")
     (set_attr "mode" "<MODE>")])
  
-(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
    [(set (match_operand:VF 0 "register_operand" "=x,v")
-       (smaxmin:VF
-         (match_operand:VF 1 "register_operand" "0,v")
-         (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
-  "TARGET_SSE && !flag_finite_math_only
+       (unspec:VF
+         [(match_operand:VF 1 "register_operand" "0,v")
+          (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
+         IEEE_MAXMIN))]
+  "TARGET_SSE
     && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
    "@
-   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
-   v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
+   <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
+   v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
    [(set_attr "isa" "noavx,avx")
     (set_attr "type" "sseadd")
     (set_attr "btver2_sse_attr" "maxmin")
@@ -1700,42 +1715,6 @@
     (set_attr "prefix" "<round_saeonly_prefix>")
     (set_attr "mode" "<ssescalarmode>")])
  
-;; These versions of the min/max patterns implement exactly the operations
-;;   min = (op1 < op2 ? op1 : op2)
-;;   max = (!(op1 < op2) ? op1 : op2)
-;; Their operands are not commutative, and thus they may be used in the
-;; presence of -0.0 and NaN.
-
-(define_insn "*ieee_smin<mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,v")
-       (unspec:VF
-         [(match_operand:VF 1 "register_operand" "0,v")
-          (match_operand:VF 2 "vector_operand" "xBm,vm")]
-        UNSPEC_IEEE_MIN))]
-  "TARGET_SSE"
-  "@
-   min<ssemodesuffix>\t{%2, %0|%0, %2}
-   vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseadd")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*ieee_smax<mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,v")
-       (unspec:VF
-         [(match_operand:VF 1 "register_operand" "0,v")
-          (match_operand:VF 2 "vector_operand" "xBm,vm")]
-        UNSPEC_IEEE_MAX))]
-  "TARGET_SSE"
-  "@
-   max<ssemodesuffix>\t{%2, %0|%0, %2}
-   vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseadd")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "<MODE>")])
-
  (define_insn "avx_addsubv4df3"
    [(set (match_operand:V4DF 0 "register_operand" "=x")
         (vec_merge:V4DF
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md

index e2f67c4a00c77a46f42ad000c561344bfd122944..f0b8cf411bcdc1b5e819881ca887cd269beac45f 100644 (file)
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -161,6 +161,7 @@
  (define_subst_attr "round_saeonly_mask_op4" "round_saeonly" "" "<round_saeonly_mask_operand4>")
  (define_subst_attr "round_saeonly_mask_scalar_merge_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_merge_operand4>")
  (define_subst_attr "round_saeonly_sd_mask_op5" "round_saeonly" "" "<round_saeonly_sd_mask_operand5>")
+(define_subst_attr "round_saeonly_mask_arg3" "round_saeonly" "" ", operands[<mask_expand_op3>]")
  (define_subst_attr "round_saeonly_constraint" "round_saeonly" "vm" "v")
  (define_subst_attr "round_saeonly_constraint2" "round_saeonly" "m" "v")
  (define_subst_attr "round_saeonly_nimm_predicate" "round_saeonly" "vector_operand" "register_operand")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index eeaa5d5bc2c74fb37b414cb14c55ffa1edfa2a3e..4b4c9ca2a1c0c8c31d1da7454b375670bb281878 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-08-15  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/72867
+       * gcc.target/i386/pr72867.c: New test.
+
  2016-08-16  Eric Botcazou  <ebotcazou@adacore.com>
  
         * c-c++-common/dump-ada-spec-5.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/pr72867.c b/gcc/testsuite/gcc.target/i386/pr72867.c

new file mode 100644 (file)

index 0000000..b8614be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr72867.c
@@ -0,0 +1,23 @@
+/* PR target/72867 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target sse } */
+
+#include "sse-check.h"
+#include <xmmintrin.h>
+
+static void
+sse_test (void)
+{
+  float nan = __builtin_nanf ("");
+  
+  __m128 x = _mm_min_ps(_mm_set1_ps(nan), _mm_set1_ps(1.0f));
+
+  if (x[0] != 1.0f)
+    abort ();
+
+  x = _mm_min_ps(_mm_set1_ps(1.f), _mm_set1_ps(nan));
+
+  if (!__builtin_isnan (x[0]))
+    abort ();
+}
author	Uros Bizjak <ubizjak@gmail.com>
	Mon, 15 Aug 2016 18:46:53 +0000 (20:46 +0200)
committer	Uros Bizjak <uros@gcc.gnu.org>
	Mon, 15 Aug 2016 18:46:53 +0000 (20:46 +0200)
gcc/ChangeLog		patch \| blob \| history
gcc/config/i386/i386.c		patch \| blob \| history
gcc/config/i386/i386.md		patch \| blob \| history
gcc/config/i386/mmx.md		patch \| blob \| history
gcc/config/i386/sse.md		patch \| blob \| history
gcc/config/i386/subst.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/i386/pr72867.c	[new file with mode: 0644]	patch \| blob