From a2106317cd6673e110b347c70f21e25fbb23379e Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Mon, 11 Jan 2021 10:32:07 +0100
Subject: [PATCH] widening_mul: Pattern recognize unsigned multiplication with
 overflow check [PR95852]

The following patch pattern recognizes some forms of multiplication followed
by overflow check through division by one of the operands compared to the
other one, with optional removal of guarding non-zero check for that operand
if possible.  The patterns are replaced with effectively
__builtin_mul_overflow or __builtin_mul_overflow_p.  The testcases cover 64
different forms of that.

2021-01-11  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/95852
	* tree-ssa-math-opts.c (maybe_optimize_guarding_check): New function.
	(uaddsub_overflow_check_p): Renamed to ...
	(arith_overflow_check_p): ... this.  Handle also multiplication
	with overflow check.
	(match_uaddsub_overflow): Renamed to ...
	(match_arith_overflow): ... this.  Add cfg_changed argument.  Handle
	also multiplication with overflow check.  Adjust function comment.
	(math_opts_dom_walker::after_dom_children): Adjust callers.  Call
	match_arith_overflow also for MULT_EXPR.

	* gcc.target/i386/pr95852-1.c: New test.
	* gcc.target/i386/pr95852-2.c: New test.
---
 gcc/testsuite/gcc.target/i386/pr95852-1.c | 266 +++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr95852-2.c | 266 +++++++++++++++++++
 gcc/tree-ssa-math-opts.c                  | 309 ++++++++++++++++++++--
 3 files changed, 818 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95852-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95852-2.c

diff --git a/gcc/testsuite/gcc.target/i386/pr95852-1.c b/gcc/testsuite/gcc.target/i386/pr95852-1.c
new file mode 100644
index 00000000000..a3f85ddabcb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95852-1.c
@@ -0,0 +1,266 @@
+/* PR tree-optimization/95852 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -masm=att" } */
+/* { dg-final { scan-tree-dump-times " = \.MUL_OVERFLOW " 32 "optimized" } } */
+/* { dg-final { scan-assembler-times "\tmull\t" 32 } } */
+/* { dg-final { scan-assembler-times "\tseto\t" 8 } } */
+/* { dg-final { scan-assembler-times "\tsetno\t" 8 } } */
+/* { dg-final { scan-assembler-times "\tjn\?o\t" 16 } } */
+
+unsigned fn (void);
+
+int
+f1 (unsigned x, unsigned y, unsigned *res)
+{
+  *res = x * y;
+  return x && (*res / x) != y;
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (x && (r / x) != y)
+    return fn ();
+  return r;
+}
+
+int
+f3 (unsigned x, unsigned y, unsigned *res)
+{
+  *res = x * y;
+  return !x || (*res / x) == y;
+}
+
+unsigned
+f4 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (!x || (r / x) == y)
+    return fn ();
+  return r;
+}
+
+int
+f5 (int x, int y, int *res)
+{
+  *res = (unsigned) x * y;
+  return x && ((unsigned) *res / x) != (unsigned) y;
+}
+
+int
+f6 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (x && ((unsigned) r / x) != (unsigned) y)
+    return fn ();
+  return r;
+}
+
+int
+f7 (int x, int y, int *res)
+{
+  *res = (unsigned) x * y;
+  return !x || ((unsigned) *res / x) == (unsigned) y;
+}
+
+int
+f8 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (!x || ((unsigned) r / x) == (unsigned) y)
+    return fn ();
+  return r;
+}
+
+int
+f9 (unsigned x, unsigned y, unsigned *res)
+{
+  *res = x * y;
+  return y && (*res / y) != x;
+}
+
+unsigned
+f10 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (y && (r / y) != x)
+    return fn ();
+  return r;
+}
+
+int
+f11 (unsigned x, unsigned y, unsigned *res)
+{
+  *res = x * y;
+  return !y || (*res / y) == x;
+}
+
+unsigned
+f12 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (!y || (r / y) == x)
+    return fn ();
+  return r;
+}
+
+int
+f13 (int x, int y, int *res)
+{
+  *res = (unsigned) x * y;
+  return y && ((unsigned) *res / y) != (unsigned) x;
+}
+
+int
+f14 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (y && ((unsigned) r / y) != (unsigned) x)
+    return fn ();
+  return r;
+}
+
+int
+f15 (int x, int y, int *res)
+{
+  *res = (unsigned) x * y;
+  return !y || ((unsigned) *res / y) == (unsigned) x;
+}
+
+int
+f16 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (!y || ((unsigned) r / y) == (unsigned) x)
+    return fn ();
+  return r;
+}
+
+int
+f17 (unsigned x, unsigned *res)
+{
+  *res = x * 35U;
+  return x && (*res / x) != 35U;
+}
+
+unsigned
+f18 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if (x && (r / x) != 35U)
+    return fn ();
+  return r;
+}
+
+int
+f19 (unsigned x, unsigned *res)
+{
+  *res = x * 35U;
+  return !x || (*res / x) == 35U;
+}
+
+unsigned
+f20 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if (!x || (r / x) == 35U)
+    return fn ();
+  return r;
+}
+
+int
+f21 (int x, int *res)
+{
+  *res = (unsigned) x * 35;
+  return x && ((unsigned) *res / x) != 35U;
+}
+
+int
+f22 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (x && ((unsigned) r / x) != 35U)
+    return fn ();
+  return r;
+}
+
+int
+f23 (int x, int *res)
+{
+  *res = (unsigned) x * 35;
+  return !x || ((unsigned) *res / x) == 35U;
+}
+
+int
+f24 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (!x || ((unsigned) r / x) == 35U)
+    return fn ();
+  return r;
+}
+
+int
+f25 (unsigned x, unsigned *res)
+{
+  *res = x * 35U;
+  return (*res / 35U) != x;
+}
+
+unsigned
+f26 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if ((r / 35U) != x)
+    return fn ();
+  return r;
+}
+
+int
+f27 (unsigned x, unsigned *res)
+{
+  *res = x * 35U;
+  return !35U || (*res / 35U) == x;
+}
+
+unsigned
+f28 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if ((r / 35U) == x)
+    return fn ();
+  return r;
+}
+
+int
+f29 (int x, int *res)
+{
+  *res = (unsigned) x * 35;
+  return 35 && ((unsigned) *res / 35) != (unsigned) x;
+}
+
+int
+f30 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (((unsigned) r / 35) != (unsigned) x)
+    return fn ();
+  return r;
+}
+
+int
+f31 (int x, int *res)
+{
+  *res = (unsigned) x * 35;
+  return ((unsigned) *res / 35) == (unsigned) x;
+}
+
+int
+f32 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (((unsigned) r / 35) == (unsigned) x)
+    return fn ();
+  return r;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr95852-2.c b/gcc/testsuite/gcc.target/i386/pr95852-2.c
new file mode 100644
index 00000000000..de85cecfe15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95852-2.c
@@ -0,0 +1,266 @@
+/* PR tree-optimization/95852 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -masm=att" } */
+/* { dg-final { scan-tree-dump-times " = \.MUL_OVERFLOW " 32 "optimized" } } */
+/* { dg-final { scan-assembler-times "\tmull\t" 32 } } */
+/* { dg-final { scan-assembler-times "\tseto\t" 8 } } */
+/* { dg-final { scan-assembler-times "\tsetno\t" 8 } } */
+/* { dg-final { scan-assembler-times "\tjn\?o\t" 16 } } */
+
+unsigned fn (void);
+
+int
+f1 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  return x && (r / x) != y;
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (x && (r / x) != y)
+    return fn ();
+  return 0;
+}
+
+int
+f3 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  return !x || (r / x) == y;
+}
+
+unsigned
+f4 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (!x || (r / x) == y)
+    return fn ();
+  return 0;
+}
+
+int
+f5 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  return x && ((unsigned) r / x) != (unsigned) y;
+}
+
+int
+f6 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (x && ((unsigned) r / x) != (unsigned) y)
+    return fn ();
+  return 0;
+}
+
+int
+f7 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  return !x || ((unsigned) r / x) == (unsigned) y;
+}
+
+int
+f8 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (!x || ((unsigned) r / x) == (unsigned) y)
+    return fn ();
+  return 0;
+}
+
+int
+f9 (unsigned x, unsigned y)
+{
+  unsigned r = x * y;
+  return y && (r / y) != x;
+}
+
+unsigned
+f10 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (y && (r / y) != x)
+    return fn ();
+  return 0;
+}
+
+int
+f11 (unsigned x, unsigned y)
+{
+  unsigned r = x * y;
+  return !y || (r / y) == x;
+}
+
+unsigned
+f12 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (!y || (r / y) == x)
+    return fn ();
+  return 0;
+}
+
+int
+f13 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  return y && ((unsigned) r / y) != (unsigned) x;
+}
+
+int
+f14 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (y && ((unsigned) r / y) != (unsigned) x)
+    return fn ();
+  return 0;
+}
+
+int
+f15 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  return !y || ((unsigned) r / y) == (unsigned) x;
+}
+
+int
+f16 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (!y || ((unsigned) r / y) == (unsigned) x)
+    return fn ();
+  return 0;
+}
+
+int
+f17 (unsigned x)
+{
+  unsigned r = x * 35U;
+  return x && (r / x) != 35U;
+}
+
+unsigned
+f18 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if (x && (r / x) != 35U)
+    return fn ();
+  return 0;
+}
+
+int
+f19 (unsigned x)
+{
+  unsigned r = x * 35U;
+  return !x || (r / x) == 35U;
+}
+
+unsigned
+f20 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if (!x || (r / x) == 35U)
+    return fn ();
+  return 0;
+}
+
+int
+f21 (int x)
+{
+  int r = (unsigned) x * 35;
+  return x && ((unsigned) r / x) != 35U;
+}
+
+int
+f22 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (x && ((unsigned) r / x) != 35U)
+    return fn ();
+  return 0;
+}
+
+int
+f23 (int x)
+{
+  int r = (unsigned) x * 35;
+  return !x || ((unsigned) r / x) == 35U;
+}
+
+int
+f24 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (!x || ((unsigned) r / x) == 35U)
+    return fn ();
+  return 0;
+}
+
+int
+f25 (unsigned x)
+{
+  unsigned r = x * 35U;
+  return (r / 35U) != x;
+}
+
+unsigned
+f26 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if ((r / 35U) != x)
+    return fn ();
+  return 0;
+}
+
+int
+f27 (unsigned x)
+{
+  unsigned r = x * 35U;
+  return !35U || (r / 35U) == x;
+}
+
+unsigned
+f28 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if ((r / 35U) == x)
+    return fn ();
+  return 0;
+}
+
+int
+f29 (int x)
+{
+  int r = (unsigned) x * 35;
+  return 35 && ((unsigned) r / 35) != (unsigned) x;
+}
+
+int
+f30 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (((unsigned) r / 35) != (unsigned) x)
+    return fn ();
+  return 0;
+}
+
+int
+f31 (int x)
+{
+  int r = (unsigned) x * 35;
+  return ((unsigned) r / 35) == (unsigned) x;
+}
+
+int
+f32 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (((unsigned) r / 35) == (unsigned) x)
+    return fn ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 52849728ab2..1f28679ef2e 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -3451,17 +3451,228 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
 }
 
 
-/* Helper function of match_uaddsub_overflow.  Return 1
+/* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
+   a check for non-zero like:
+   _1 = x_4(D) * y_5(D);
+   *res_7(D) = _1;
+   if (x_4(D) != 0)
+     goto <bb 3>; [50.00%]
+   else
+     goto <bb 4>; [50.00%]
+
+   <bb 3> [local count: 536870913]:
+   _2 = _1 / x_4(D);
+   _9 = _2 != y_5(D);
+   _10 = (int) _9;
+
+   <bb 4> [local count: 1073741824]:
+   # iftmp.0_3 = PHI <_10(3), 0(2)>
+   then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
+   optimize the x_4(D) != 0 condition to 1.  */
+
+static void
+maybe_optimize_guarding_check (gimple **mul_stmts, gimple *cond_stmt,
+			       gimple *div_stmt, bool *cfg_changed)
+{
+  basic_block bb = gimple_bb (cond_stmt);
+  if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
+    return;
+  edge pred_edge = single_pred_edge (bb);
+  basic_block pred_bb = pred_edge->src;
+  if (EDGE_COUNT (pred_bb->succs) != 2)
+    return;
+  edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
+  edge other_succ_edge = NULL;
+  if (gimple_code (cond_stmt) == GIMPLE_COND)
+    {
+      if (EDGE_COUNT (bb->succs) != 2)
+	return;
+      other_succ_edge = EDGE_SUCC (bb, 0);
+      if (gimple_cond_code (cond_stmt) == NE_EXPR)
+	{
+	  if (other_succ_edge->flags & EDGE_TRUE_VALUE)
+	    other_succ_edge = EDGE_SUCC (bb, 1);
+	}
+      else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
+	other_succ_edge = EDGE_SUCC (bb, 0);
+      if (other_edge->dest != other_succ_edge->dest)
+	return;
+    }
+  else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
+    return;
+  gimple *zero_cond = last_stmt (pred_bb);
+  if (zero_cond == NULL
+      || gimple_code (zero_cond) != GIMPLE_COND
+      || (gimple_cond_code (zero_cond)
+	  != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
+      || !integer_zerop (gimple_cond_rhs (zero_cond)))
+    return;
+  tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
+  if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
+    return;
+  if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
+    {
+      /* Allow the divisor to be result of a same precision cast
+	 from zero_cond_lhs.  */
+      tree rhs2 = gimple_assign_rhs2 (div_stmt);
+      if (TREE_CODE (rhs2) != SSA_NAME)
+	return;
+      gimple *g = SSA_NAME_DEF_STMT (rhs2);
+      if (!gimple_assign_cast_p (g)
+	  || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
+	  || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
+	  || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
+	      != TYPE_PRECISION (TREE_TYPE (rhs2))))
+	return;
+    }
+  gimple_stmt_iterator gsi = gsi_for_stmt (div_stmt);
+  gsi_prev_nondebug (&gsi);
+  if (!gsi_end_p (gsi))
+    {
+      /* If original mul_stmt has a single use, allow it in the same bb,
+	 we are looking then just at __builtin_mul_overflow_p.
+	 Though, in that case the original mul_stmt will be replaced
+	 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
+      for (int i = 2; i >= 0; --i)
+	{
+	  if (gsi_stmt (gsi) != mul_stmts[i])
+	    return;
+	  gsi_prev_nondebug (&gsi);
+	}
+      /* Allow up to 2 extra casts.  Given the way we check PHIs,
+	 nothing from this bb should be consumed by any other bb
+	 anyway.  */
+      for (int i = 0; i < 2 && !gsi_end_p (gsi); i++)
+	{
+	  gimple *g = gsi_stmt (gsi);
+	  if (!gimple_assign_cast_p (g))
+	    return;
+	  gsi_prev_nondebug (&gsi);
+	}
+      if (!gsi_end_p (gsi))
+	return;
+    }
+  gsi = gsi_for_stmt (div_stmt);
+  gsi_next_nondebug (&gsi);
+  if (gsi_stmt (gsi) != cond_stmt)
+    return;
+  if (gimple_code (cond_stmt) == GIMPLE_COND)
+    {
+      basic_block succ_bb = other_edge->dest;
+      for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
+	   gsi_next (&gpi))
+	{
+	  gphi *phi = gpi.phi ();
+	  tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
+	  tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
+	  if (!operand_equal_p (v1, v2, 0))
+	    return;
+	}
+    }
+  else
+    {
+      tree lhs = gimple_assign_lhs (cond_stmt);
+      if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+	return;
+      gsi_next_nondebug (&gsi);
+      if (!gsi_end_p (gsi))
+	{
+	  if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
+	    return;
+	  gimple *cast_stmt = gsi_stmt (gsi);
+	  if (!gimple_assign_cast_p (cast_stmt))
+	    return;
+	  tree new_lhs = gimple_assign_lhs (cast_stmt);
+	  gsi_next_nondebug (&gsi);
+	  if (!gsi_end_p (gsi)
+	      || !new_lhs
+	      || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
+	      || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
+	    return;
+	  lhs = new_lhs;
+	}
+      edge succ_edge = single_succ_edge (bb);
+      basic_block succ_bb = succ_edge->dest;
+      gsi = gsi_start_phis (succ_bb);
+      if (gsi_end_p (gsi))
+	return;
+      gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
+      gsi_next (&gsi);
+      if (!gsi_end_p (gsi))
+	return;
+      if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
+	return;
+      tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
+      if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
+	{
+	  tree cond = gimple_assign_rhs1 (cond_stmt);
+	  if (TREE_CODE (cond) == NE_EXPR)
+	    {
+	      if (!operand_equal_p (other_val,
+				    gimple_assign_rhs3 (cond_stmt), 0))
+		return;
+	    }
+	  else if (!operand_equal_p (other_val,
+				     gimple_assign_rhs2 (cond_stmt), 0))
+	    return;
+	}
+      else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
+	{
+	  if (!integer_zerop (other_val))
+	    return;
+	}
+      else if (!integer_onep (other_val))
+	return;
+    }
+  gcond *zero_gcond = as_a <gcond *> (zero_cond);
+  if (pred_edge->flags & EDGE_TRUE_VALUE)
+    gimple_cond_make_true (zero_gcond);
+  else
+    gimple_cond_make_false (zero_gcond);
+  update_stmt (zero_cond);
+  *cfg_changed = true;
+}
+
+/* Helper function of match_arith_overflow.  Return 1
    if USE_STMT is unsigned overflow check ovf != 0 for
    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
    and 0 otherwise.  */
 
 static int
-uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
-			  tree *other)
+arith_overflow_check_p (gimple *stmt, gimple *&use_stmt, tree maxval,
+			tree *other)
 {
   enum tree_code ccode = ERROR_MARK;
   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  tree lhs = gimple_assign_lhs (stmt);
+  tree rhs1 = gimple_assign_rhs1 (stmt);
+  tree rhs2 = gimple_assign_rhs2 (stmt);
+  tree multop = NULL_TREE, divlhs = NULL_TREE;
+
+  if (code == MULT_EXPR)
+    {
+      if (!is_gimple_assign (use_stmt))
+	return 0;
+      if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
+	return 0;
+      if (gimple_assign_rhs1 (use_stmt) != lhs)
+	return 0;
+      if (gimple_assign_rhs2 (use_stmt) == rhs1)
+	multop = rhs2;
+      else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
+	multop = rhs1;
+      else
+	return 0;
+      if (stmt_ends_bb_p (use_stmt))
+	return 0;
+      divlhs = gimple_assign_lhs (use_stmt);
+      if (!divlhs)
+	return 0;
+      use_operand_p use;
+      if (!single_imm_use (divlhs, &use, &use_stmt))
+	return 0;
+    }
   if (gimple_code (use_stmt) == GIMPLE_COND)
     {
       ccode = gimple_cond_code (use_stmt);
@@ -3497,11 +3708,6 @@ uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
     return 0;
 
-  enum tree_code code = gimple_assign_rhs_code (stmt);
-  tree lhs = gimple_assign_lhs (stmt);
-  tree rhs1 = gimple_assign_rhs1 (stmt);
-  tree rhs2 = gimple_assign_rhs2 (stmt);
-
   switch (ccode)
     {
     case GT_EXPR:
@@ -3547,6 +3753,17 @@ uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
 	  return ccode == LT_EXPR ? 1 : -1;
 	}
       break;
+    case EQ_EXPR:
+    case NE_EXPR:
+      /* r = a * b; _1 = r / a; _1 == b
+	 r = a * b; _1 = r / b; _1 == a
+	 r = a * b; _1 = r / a; _1 != b
+	 r = a * b; _1 = r / b; _1 != a.  */
+      if (code == MULT_EXPR
+	  && ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
+	      || (crhs2 == divlhs && crhs1 == multop)))
+	return ccode == NE_EXPR ? 1 : -1;
+      break;
     default:
       break;
     }
@@ -3557,7 +3774,7 @@ uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
    x = y - z;
    if (x > y)
    where there are other uses of x and replace it with
-   _7 = SUB_OVERFLOW (y, z);
+   _7 = .SUB_OVERFLOW (y, z);
    x = REALPART_EXPR <_7>;
    _8 = IMAGPART_EXPR <_7>;
    if (_8)
@@ -3571,9 +3788,9 @@ uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
    where y and z have unsigned types with maximum max
    and there are other uses of x and all of those cast x
    back to that unsigned type and again replace it with
-   _7 = ADD_OVERFLOW (y, z);
+   _7 = .ADD_OVERFLOW (y, z);
    _9 = REALPART_EXPR <_7>;
-   _8 = IMAGPART_EXPR <_8>;
+   _8 = IMAGPART_EXPR <_7>;
    if (_8)
    and replace (utype) x with _9.
 
@@ -3581,13 +3798,34 @@ uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
    x = ~z;
    if (y > x)
    and replace it with
-   _7 = ADD_OVERFLOW (y, z);
-   _8 = IMAGPART_EXPR <_8>;
-   if (_8)  */
+   _7 = .ADD_OVERFLOW (y, z);
+   _8 = IMAGPART_EXPR <_7>;
+   if (_8)
+
+   And also recognize:
+   z = x * y;
+   if (x != 0)
+     goto <bb 3>; [50.00%]
+   else
+     goto <bb 4>; [50.00%]
+
+   <bb 3> [local count: 536870913]:
+   _2 = z / x;
+   _9 = _2 != y;
+   _10 = (int) _9;
+
+   <bb 4> [local count: 1073741824]:
+   # iftmp.0_3 = PHI <_10(3), 0(2)>
+   and replace it with
+   _7 = .MUL_OVERFLOW (x, y);
+   z = IMAGPART_EXPR <_7>;
+   _8 = IMAGPART_EXPR <_7>;
+   _9 = _8 != 0;
+   iftmp.0_3 = (int) _9;  */
 
 static bool
-match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
-			enum tree_code code)
+match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
+		      enum tree_code code, bool *cfg_changed)
 {
   tree lhs = gimple_assign_lhs (stmt);
   tree type = TREE_TYPE (lhs);
@@ -3602,11 +3840,13 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
 
   gcc_checking_assert (code == PLUS_EXPR
 		       || code == MINUS_EXPR
+		       || code == MULT_EXPR
 		       || code == BIT_NOT_EXPR);
   if (!INTEGRAL_TYPE_P (type)
       || !TYPE_UNSIGNED (type)
       || has_zero_uses (lhs)
       || (code != PLUS_EXPR
+	  && code != MULT_EXPR
 	  && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
 			    TYPE_MODE (type)) == CODE_FOR_nothing))
     return false;
@@ -3620,7 +3860,7 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
 	continue;
 
       tree other = NULL_TREE;
-      if (uaddsub_overflow_check_p (stmt, use_stmt, NULL_TREE, &other))
+      if (arith_overflow_check_p (stmt, use_stmt, NULL_TREE, &other))
 	{
 	  if (code == BIT_NOT_EXPR)
 	    {
@@ -3643,9 +3883,12 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
 
   tree maxval = NULL_TREE;
   if (!ovf_use_seen
-      || (code == BIT_NOT_EXPR ? use_seen : !use_seen)
+      || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
       || (code == PLUS_EXPR
 	  && optab_handler (uaddv4_optab,
+			    TYPE_MODE (type)) == CODE_FOR_nothing)
+      || (code == MULT_EXPR
+	  && optab_handler (umulv4_optab,
 			    TYPE_MODE (type)) == CODE_FOR_nothing))
     {
       if (code != PLUS_EXPR)
@@ -3704,7 +3947,7 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
 	  if (is_gimple_debug (use_stmt))
 	    continue;
 
-	  if (uaddsub_overflow_check_p (stmt, use_stmt, maxval, NULL))
+	  if (arith_overflow_check_p (stmt, use_stmt, maxval, NULL))
 	    {
 	      ovf_use_seen = true;
 	      use_bb = gimple_bb (use_stmt);
@@ -3824,13 +4067,16 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
     *gsi = gsi_for_stmt (cond_stmt);
 
   tree ctype = build_complex_type (type);
-  gcall *g = gimple_build_call_internal (code != MINUS_EXPR
+  gcall *g = gimple_build_call_internal (code == MULT_EXPR
+					 ? IFN_MUL_OVERFLOW
+					 : code != MINUS_EXPR
 					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
 					 2, rhs1, rhs2);
   tree ctmp = make_ssa_name (ctype);
   gimple_call_set_lhs (g, ctmp);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   tree new_lhs = maxval ? make_ssa_name (type) : lhs;
+  gimple *mul_stmts[3] = { NULL, NULL, NULL };
   gassign *g2;
   if (code != BIT_NOT_EXPR)
     {
@@ -3844,6 +4090,11 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
 	}
       else
 	gsi_replace (gsi, g2, true);
+      if (code == MULT_EXPR)
+	{
+	  mul_stmts[0] = g;
+	  mul_stmts[1] = g2;
+	}
     }
   tree ovf = make_ssa_name (type);
   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
@@ -3852,13 +4103,16 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
   else
     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
+  if (code == MULT_EXPR)
+    mul_stmts[2] = g2;
 
   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
     {
       if (is_gimple_debug (use_stmt))
 	continue;
 
-      int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt, maxval, NULL);
+      gimple *orig_use_stmt = use_stmt;
+      int ovf_use = arith_overflow_check_p (stmt, use_stmt, maxval, NULL);
       if (ovf_use == 0)
 	{
 	  gcc_assert (code != BIT_NOT_EXPR);
@@ -3901,6 +4155,14 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
 	    }
 	}
       update_stmt (use_stmt);
+      if (code == MULT_EXPR && use_stmt != orig_use_stmt)
+	{
+	  gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
+	  maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
+					 cfg_changed);
+	  gsi_remove (&gsi2, true);
+	  release_ssa_name (gimple_assign_lhs (orig_use_stmt));
+	}
     }
   if (maxval)
     {
@@ -4238,16 +4500,17 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 		  release_defs (stmt);
 		  continue;
 		}
+	      match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
 	      break;
 
 	    case PLUS_EXPR:
 	    case MINUS_EXPR:
 	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
-		match_uaddsub_overflow (&gsi, stmt, code);
+		match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
 	      break;
 
 	    case BIT_NOT_EXPR:
-	      if (match_uaddsub_overflow (&gsi, stmt, code))
+	      if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
 		continue;
 	      break;
 
-- 
2.30.2