re PR tree-optimization/15255 ([tree-ssa] a * 2 + a * 2 is not converted to a * 4)
authorRichard Guenther <rguenther@suse.de>
Wed, 13 Aug 2008 08:57:20 +0000 (08:57 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 13 Aug 2008 08:57:20 +0000 (08:57 +0000)
2008-08-13  Richard Guenther  <rguenther@suse.de>

PR tree-optimization/15255
* tree-ssa-reassoc.c (linearize_expr_tree): Declare.
(struct oecount_s): New struct and VEC types.
(cvec): New global.
(oecount_hash): New function.
(oecount_eq): Likewise.
(oecount_cmp): Likewise.
(zero_one_operation): New function.
(build_and_add_sum): Likewise.
(undistribute_ops_list): Perform un-distribution of multiplication
and division on the chain of summands.
(should_break_up_subtract): Also break up subtracts for factors.
(reassociate_bb): Delete dead visited statements.
Call undistribute_ops_list.  Re-sort and optimize if it did something.
* passes.c (init_optimization_passes): Move DSE before
reassociation.
* tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Correctly handle
PHI nodes.

* gcc.dg/tree-ssa/reassoc-14.c: New testcase.
* gcc.dg/tree-ssa/reassoc-15.c: Likewise.
* gcc.dg/tree-ssa/reassoc-16.c: Likewise.
* gcc.dg/torture/reassoc-1.c: Likewise.
* gcc.dg/tree-ssa/recip-2.c: Adjust.
* gcc.dg/tree-ssa/recip-6.c: Likewise.
* gcc.dg/tree-ssa/recip-7.c: Likewise.
* gfortran.dg/reassoc_4.f: Likewise.

From-SVN: r139048

15 files changed:
gcc/ChangeLog
gcc/passes.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/torture/reassoc-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/recip-2.c
gcc/testsuite/gcc.dg/tree-ssa/recip-6.c
gcc/testsuite/gcc.dg/tree-ssa/recip-7.c
gcc/testsuite/gfortran.dg/reassoc_4.f [new file with mode: 0644]
gcc/tree-ssa-loop-niter.c
gcc/tree-ssa-reassoc.c

index b22b949a45cfe3dee8e91c3e3168014a586ee3b6..805d73dd3759948af542701d1bb389ce67058a3d 100644 (file)
@@ -1,3 +1,24 @@
+2008-08-13  Richard Guenther  <rguenther@suse.de>
+
+       PR tree-optimization/15255
+       * tree-ssa-reassoc.c (linearize_expr_tree): Declare.
+       (struct oecount_s): New struct and VEC types.
+       (cvec): New global.
+       (oecount_hash): New function.
+       (oecount_eq): Likewise.
+       (oecount_cmp): Likewise.
+       (zero_one_operation): New function.
+       (build_and_add_sum): Likewise.
+       (undistribute_ops_list): Perform un-distribution of multiplication
+       and division on the chain of summands.
+       (should_break_up_subtract): Also break up subtracts for factors.
+       (reassociate_bb): Delete dead visited statements.
+       Call undistribute_ops_list.  Re-sort and optimize if it did something.
+       * passes.c (init_optimization_passes): Move DSE before
+       reassociation.
+       * tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Correctly handle
+       PHI nodes.
+
 2008-08-12  Janis Johnson  <janis187@us.ibm.com>
 
        * doc/invoke.texi (-fipa-pta): Say the option is experimental.
index 64470ba2b0bc1548c7215ffdfa7c34a4b49cad77..ee3826b39db36d582cf44e46b7a6491d865702e4 100644 (file)
@@ -633,9 +633,9 @@ init_optimization_passes (void)
         only examines PHIs to discover const/copy propagation
         opportunities.  */
       NEXT_PASS (pass_phi_only_cprop);
+      NEXT_PASS (pass_dse);
       NEXT_PASS (pass_reassoc);
       NEXT_PASS (pass_dce);
-      NEXT_PASS (pass_dse);
       NEXT_PASS (pass_forwprop);
       NEXT_PASS (pass_phiopt);
       NEXT_PASS (pass_object_sizes);
index 83d6afe1d1a13a921a3fb713bc3f29afba4d0e8b..ca8c8cfe5d12a1cd771779b862a6a211ece4c130 100644 (file)
@@ -1,3 +1,15 @@
+2008-08-13  Richard Guenther  <rguenther@suse.de>
+
+       PR tree-optimization/15255
+       * gcc.dg/tree-ssa/reassoc-14.c: New testcase.
+       * gcc.dg/tree-ssa/reassoc-15.c: Likewise.
+       * gcc.dg/tree-ssa/reassoc-16.c: Likewise.
+       * gcc.dg/torture/reassoc-1.c: Likewise.
+       * gcc.dg/tree-ssa/recip-2.c: Adjust.
+       * gcc.dg/tree-ssa/recip-6.c: Likewise.
+       * gcc.dg/tree-ssa/recip-7.c: Likewise.
+       * gfortran.dg/reassoc_4.f: Likewise.
+
 2008-08-12  Janis Johnson  <janis187@us.ibm.com>
 
        * gcc.target/i386/pr32000-2.c: Use dg-skip-if for target expression.
diff --git a/gcc/testsuite/gcc.dg/torture/reassoc-1.c b/gcc/testsuite/gcc.dg/torture/reassoc-1.c
new file mode 100644 (file)
index 0000000..f0c9014
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+
+int x;
+
+int __attribute__((noinline))
+foo(int a, int b, int w)
+{
+  int tmp1 = a * w;
+  int tmp2 = b * w;
+  x = tmp1;
+  return tmp1 + tmp2;
+}
+
+extern void abort (void);
+
+int main()
+{
+  if (foo(1, 2, 3) != 9)
+    abort ();
+  if (x != 3)
+    abort ();
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c
new file mode 100644 (file)
index 0000000..24141ef
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+int test1 (int x, int y, int z, int weight)
+{
+  int tmp1 = x * weight;
+  int tmp2 = y * weight;
+  int tmp3 = (x - y) * weight;
+  return tmp1 + (tmp2 + tmp3);
+}
+
+int test2 (int x, int y, int z, int weight)
+{
+  int tmp1 = x * weight;
+  int tmp2 = y * weight * weight;
+  int tmp3 = z * weight * weight * weight;
+  return tmp1 + tmp2 + tmp3;
+}
+
+/* There should be one multiplication left in test1 and three in test2.  */
+
+/* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c
new file mode 100644 (file)
index 0000000..d9b74d2
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+int test3 (int x, int y, int z, int weight, int w1, int w2, int w3)
+{
+  int wtmp1 = w1 * weight;
+  int wtmp2 = w2 * weight;
+  int wtmp3 = w3 * weight;
+  int tmp1 = x * wtmp1;
+  int tmp2 = y * wtmp2;
+  int tmp3 = z * wtmp3;
+  return tmp1 + tmp2 + tmp3;
+}
+
+/* The multiplication with weight should be un-distributed.
+   ???  This pattern is not recognized currently.  */
+
+/* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c
new file mode 100644 (file)
index 0000000..4dd54a8
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+
+double test1 (double x, double y, double z, double weight)
+{
+  double tmp1 = x / weight;
+  double tmp2 = y / weight;
+  double tmp3 = -x / weight;
+  return tmp1 + tmp2 + tmp3;
+}
+
+/* The division should be un-distributed and all references to x should
+   be gone.  */
+
+/* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c
new file mode 100644 (file)
index 0000000..255c786
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+
+double test2 (double x, double y, double ddj, int b)
+{
+  double tmp1, tmp2, sum;
+  sum = 0.0;
+  if (b)
+    sum = 1.0;
+  tmp1 = sum/ddj;
+  tmp2 = x/ddj;
+  return tmp1 + y + tmp2;
+}
+
+/* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c
new file mode 100644 (file)
index 0000000..ce52cd0
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-reassoc1" } */
+
+int
+ETree_nFactorEntriesInFront (int b, int m)
+{
+  int nent = b*b + 2*b*m;
+  return nent;
+}
+
+/* { dg-final { scan-tree-dump-times "\\\*" 2 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
index af628053ad59c18edd0db5c43f38bbd63703a5de..be754145e7dbfd1b2a9054bc5533ba25ab38e9f8 100644 (file)
@@ -1,7 +1,9 @@
 /* { dg-do compile } */
 /* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
 
-float e(float a, float b, float c, float d, float e, float f)
+float u, v, w, x, y, z;
+
+void e(float a, float b, float c, float d, float e, float f)
 {
   if (a < b)
     {
@@ -20,7 +22,12 @@ float e(float a, float b, float c, float d, float e, float f)
   /* This should not be left as a multiplication.  */
   c = 1 / c;
 
-  return a + b + c + d + e + f;
+  u = a;
+  v = b;
+  w = c;
+  x = d;
+  y = e;
+  z = f;
 }
 
 /* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
index 60fefd01da5ebe25da3b5526754fc6a5ef497b11..b3334fb862a7c469100c2df4688cb760d752248b 100644 (file)
@@ -5,7 +5,9 @@
 
 extern int f2();
 
-double f1(double y, double z, double w)
+double m, n, o;
+
+void f1(double y, double z, double w)
 {
   double b, c, d, e, f;
 
@@ -18,7 +20,9 @@ double f1(double y, double z, double w)
   e = c / y;
   f = 1 / y;
 
-  return d + e + f;
+  m = d;
+  n = e;
+  o = f;
 }
 
 /* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
index af1bf3c008bba26513f01e8469fba650c728f93f..98bbdca6e2cb5d3e397b78560283ba20a9dd92d8 100644 (file)
@@ -5,7 +5,9 @@
 
 extern double h();
 
-double f(int x, double z, double w)
+double m, n, o;
+
+void f(int x, double z, double w)
 {
   double b, c, d, e, f;
   double y = h ();
@@ -19,7 +21,9 @@ double f(int x, double z, double w)
   e = c / y;
   f = 1 / y;
 
-  return d + e + f;
+  m = d;
+  n = e;
+  o = f;
 }
 
 /* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
diff --git a/gcc/testsuite/gfortran.dg/reassoc_4.f b/gcc/testsuite/gfortran.dg/reassoc_4.f
new file mode 100644 (file)
index 0000000..7004365
--- /dev/null
@@ -0,0 +1,43 @@
+! { dg-do compile }
+! { dg-options "-O3 -ffast-math -fdump-tree-reassoc1" }
+      subroutine anisonl(w,vo,anisox,s,ii1,jj1,weight)
+      integer ii1,jj1,i1,iii1,j1,jjj1,k1,l1,m1,n1
+      real*8 w(3,3),vo(3,3),anisox(3,3,3,3),s(60,60),weight
+!
+!     This routine replaces the following lines in e_c3d.f for
+!     an anisotropic material
+!
+                      do i1=1,3
+                        iii1=ii1+i1-1
+                        do j1=1,3
+                          jjj1=jj1+j1-1
+                          do k1=1,3
+                            do l1=1,3
+                              s(iii1,jjj1)=s(iii1,jjj1)
+     &                          +anisox(i1,k1,j1,l1)*w(k1,l1)*weight
+                              do m1=1,3
+                                s(iii1,jjj1)=s(iii1,jjj1)
+     &                              +anisox(i1,k1,m1,l1)*w(k1,l1)
+     &                                 *vo(j1,m1)*weight
+     &                              +anisox(m1,k1,j1,l1)*w(k1,l1)
+     &                                 *vo(i1,m1)*weight
+                                do n1=1,3
+                                  s(iii1,jjj1)=s(iii1,jjj1)
+     &                              +anisox(m1,k1,n1,l1)
+     &                              *w(k1,l1)*vo(i1,m1)*vo(j1,n1)*weight
+                                enddo
+                              enddo
+                            enddo
+                          enddo
+                        enddo
+                      enddo
+
+      return
+      end
+
+! There should be 22 multiplications left after un-distributing
+! weigth, w(k1,l1), vo(i1,m1) and vo(j1,m1) on the innermost two
+! unrolled loops.
+
+! { dg-final { scan-tree-dump-times "\[0-9\] \\\* " 22 "reassoc1" } }
+! { dg-final { cleanup-tree-dump "reassoc1" } }
index b8247a0000c15b83b77a9db33f5f254a27f2cde6..1eedf7544af80fff7b2834e3db8b044c2234cc82 100644 (file)
@@ -2914,6 +2914,12 @@ stmt_dominates_stmt_p (gimple s1, gimple s2)
     {
       gimple_stmt_iterator bsi;
 
+      if (gimple_code (s2) == GIMPLE_PHI)
+       return false;
+
+      if (gimple_code (s1) == GIMPLE_PHI)
+       return true;
+
       for (bsi = gsi_start_bb (bb1); gsi_stmt (bsi) != s2; gsi_next (&bsi))
        if (gsi_stmt (bsi) == s1)
          return true;
index a3facd8baa1071e31f395136203a2d47e6f056de..be68331faf2e40ea5f9bfa2400e22fc2cb2026b0 100644 (file)
@@ -727,6 +727,415 @@ eliminate_using_constants (enum tree_code opcode,
     }
 }
 
+
+static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
+                                bool, bool);
+
+/* Structure for tracking and counting operands.  */
+typedef struct oecount_s {
+  int cnt;
+  enum tree_code oecode;
+  tree op;
+} oecount;
+
+DEF_VEC_O(oecount);
+DEF_VEC_ALLOC_O(oecount,heap);
+
+/* The heap for the oecount hashtable and the sorted list of operands.  */
+static VEC (oecount, heap) *cvec;
+
+/* Hash function for oecount.  */
+
+static hashval_t
+oecount_hash (const void *p)
+{
+  const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
+  return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
+}
+
+/* Comparison function for oecount.  */
+
+static int
+oecount_eq (const void *p1, const void *p2)
+{
+  const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
+  const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
+  return (c1->oecode == c2->oecode
+         && c1->op == c2->op);
+}
+
+/* Comparison function for qsort sorting oecount elements by count.  */
+
+static int
+oecount_cmp (const void *p1, const void *p2)
+{
+  const oecount *c1 = (const oecount *)p1;
+  const oecount *c2 = (const oecount *)p2;
+  return c1->cnt - c2->cnt;
+}
+
+/* Walks the linear chain with result *DEF searching for an operation
+   with operand OP and code OPCODE removing that from the chain.  *DEF
+   is updated if there is only one operand but no operation left.  */
+
+static void
+zero_one_operation (tree *def, enum tree_code opcode, tree op)
+{
+  gimple stmt = SSA_NAME_DEF_STMT (*def);
+
+  do
+    {
+      tree name = gimple_assign_rhs1 (stmt);
+
+      /* If this is the operation we look for and one of the operands
+         is ours simply propagate the other operand into the stmts
+        single use.  */
+      if (gimple_assign_rhs_code (stmt) == opcode
+         && (name == op
+             || gimple_assign_rhs2 (stmt) == op))
+       {
+         gimple use_stmt;
+         use_operand_p use;
+         gimple_stmt_iterator gsi;
+         if (name == op)
+           name = gimple_assign_rhs2 (stmt);
+         gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
+         single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
+         if (gimple_assign_lhs (stmt) == *def)
+           *def = name;
+         SET_USE (use, name);
+         if (TREE_CODE (name) != SSA_NAME)
+           update_stmt (use_stmt);
+         gsi = gsi_for_stmt (stmt);
+         gsi_remove (&gsi, true);
+         release_defs (stmt);
+         return;
+       }
+
+      /* Continue walking the chain.  */
+      gcc_assert (name != op
+                 && TREE_CODE (name) == SSA_NAME);
+      stmt = SSA_NAME_DEF_STMT (name);
+    }
+  while (1);
+}
+
+/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
+   the result.  Places the statement after the definition of either
+   OP1 or OP2.  Returns the new statement.  */
+
+static gimple
+build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
+{
+  gimple op1def = NULL, op2def = NULL;
+  gimple_stmt_iterator gsi;
+  tree op;
+  gimple sum;
+
+  /* Create the addition statement.  */
+  sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
+  op = make_ssa_name (tmpvar, sum);
+  gimple_assign_set_lhs (sum, op);
+
+  /* Find an insertion place and insert.  */
+  if (TREE_CODE (op1) == SSA_NAME)
+    op1def = SSA_NAME_DEF_STMT (op1);
+  if (TREE_CODE (op2) == SSA_NAME)
+    op2def = SSA_NAME_DEF_STMT (op2);
+  if ((!op1def || gimple_nop_p (op1def))
+      && (!op2def || gimple_nop_p (op2def)))
+    {
+      gsi = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR));
+      gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
+    }
+  else if ((!op1def || gimple_nop_p (op1def))
+          || (op2def && !gimple_nop_p (op2def)
+              && stmt_dominates_stmt_p (op1def, op2def)))
+    {
+      if (gimple_code (op2def) == GIMPLE_PHI)
+       {
+         gsi = gsi_start_bb (gimple_bb (op2def));
+         gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
+       }
+      else
+       {
+         gsi = gsi_for_stmt (op2def);
+         gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
+       }
+    }
+  else
+    {
+      if (gimple_code (op1def) == GIMPLE_PHI)
+       {
+         gsi = gsi_start_bb (gimple_bb (op1def));
+         gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
+       }
+      else
+       {
+         gsi = gsi_for_stmt (op1def);
+         gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
+       }
+    }
+  update_stmt (sum);
+
+  return sum;
+}
+
+/* Perform un-distribution of divisions and multiplications.
+   A * X + B * X is transformed into (A + B) * X and A / X + B / X
+   to (A + B) / X for real X.
+
+   The algorithm is organized as follows.
+
+    - First we walk the addition chain *OPS looking for summands that
+      are defined by a multiplication or a real division.  This results
+      in the candidates bitmap with relevant indices into *OPS.
+
+    - Second we build the chains of multiplications or divisions for
+      these candidates, counting the number of occurences of (operand, code)
+      pairs in all of the candidates chains.
+
+    - Third we sort the (operand, code) pairs by number of occurence and
+      process them starting with the pair with the most uses.
+
+      * For each such pair we walk the candidates again to build a
+        second candidate bitmap noting all multiplication/division chains
+       that have at least one occurence of (operand, code).
+
+      * We build an alternate addition chain only covering these
+        candidates with one (operand, code) operation removed from their
+       multiplication/division chain.
+
+      * The first candidate gets replaced by the alternate addition chain
+        multiplied/divided by the operand.
+
+      * All candidate chains get disabled for further processing and
+        processing of (operand, code) pairs continues.
+
+  The alternate addition chains built are re-processed by the main
+  reassociation algorithm which allows optimizing a * x * y + b * y * x
+  to (a + b ) * x * y in one invocation of the reassociation pass.  */
+
+static bool
+undistribute_ops_list (enum tree_code opcode,
+                      VEC (operand_entry_t, heap) **ops, struct loop *loop)
+{
+  unsigned int length = VEC_length (operand_entry_t, *ops);
+  operand_entry_t oe1;
+  unsigned i, j;
+  sbitmap candidates, candidates2;
+  unsigned nr_candidates, nr_candidates2;
+  sbitmap_iterator sbi0;
+  VEC (operand_entry_t, heap) **subops;
+  htab_t ctable;
+  bool changed = false;
+
+  if (length <= 1
+      || opcode != PLUS_EXPR)
+    return false;
+
+  /* Build a list of candidates to process.  */
+  candidates = sbitmap_alloc (length);
+  sbitmap_zero (candidates);
+  nr_candidates = 0;
+  for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
+    {
+      enum tree_code dcode;
+      gimple oe1def;
+
+      if (TREE_CODE (oe1->op) != SSA_NAME)
+       continue;
+      oe1def = SSA_NAME_DEF_STMT (oe1->op);
+      if (!is_gimple_assign (oe1def))
+       continue;
+      dcode = gimple_assign_rhs_code (oe1def);
+      if ((dcode != MULT_EXPR
+          && dcode != RDIV_EXPR)
+         || !is_reassociable_op (oe1def, dcode, loop))
+       continue;
+
+      SET_BIT (candidates, i);
+      nr_candidates++;
+    }
+
+  if (nr_candidates < 2)
+    {
+      sbitmap_free (candidates);
+      return false;
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "searching for un-distribute opportunities ");
+      print_generic_expr (dump_file,
+       VEC_index (operand_entry_t, *ops,
+                  sbitmap_first_set_bit (candidates))->op, 0);
+      fprintf (dump_file, " %d\n", nr_candidates);
+    }
+
+  /* Build linearized sub-operand lists and the counting table.  */
+  cvec = NULL;
+  ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
+  subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
+                    VEC_length (operand_entry_t, *ops));
+  EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
+    {
+      gimple oedef;
+      enum tree_code oecode;
+      unsigned j;
+
+      oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
+      oecode = gimple_assign_rhs_code (oedef);
+      linearize_expr_tree (&subops[i], oedef,
+                          associative_tree_code (oecode), false);
+
+      for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
+       {
+         oecount c;
+         void **slot;
+         size_t idx;
+         c.oecode = oecode;
+         c.cnt = 1;
+         c.op = oe1->op;
+         VEC_safe_push (oecount, heap, cvec, &c);
+         idx = VEC_length (oecount, cvec) + 41;
+         slot = htab_find_slot (ctable, (void *)idx, INSERT);
+         if (!*slot)
+           {
+             *slot = (void *)idx;
+           }
+         else
+           {
+             VEC_pop (oecount, cvec);
+             VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
+           }
+       }
+    }
+  htab_delete (ctable);
+
+  /* Sort the counting table.  */
+  qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec),
+        sizeof (oecount), oecount_cmp);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      oecount *c;
+      fprintf (dump_file, "Candidates:\n");
+      for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j)
+       {
+         fprintf (dump_file, "  %u %s: ", c->cnt,
+                  c->oecode == MULT_EXPR
+                  ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
+         print_generic_expr (dump_file, c->op, 0);
+         fprintf (dump_file, "\n");
+       }
+    }
+
+  /* Process the (operand, code) pairs in order of most occurence.  */
+  candidates2 = sbitmap_alloc (length);
+  while (!VEC_empty (oecount, cvec))
+    {
+      oecount *c = VEC_last (oecount, cvec);
+      if (c->cnt < 2)
+       break;
+
+      /* Now collect the operands in the outer chain that contain
+         the common operand in their inner chain.  */
+      sbitmap_zero (candidates2);
+      nr_candidates2 = 0;
+      EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
+       {
+         gimple oedef;
+         enum tree_code oecode;
+         unsigned j;
+         tree op = VEC_index (operand_entry_t, *ops, i)->op;
+
+         /* If we undistributed in this chain already this may be
+            a constant.  */
+         if (TREE_CODE (op) != SSA_NAME)
+           continue;
+
+         oedef = SSA_NAME_DEF_STMT (op);
+         oecode = gimple_assign_rhs_code (oedef);
+         if (oecode != c->oecode)
+           continue;
+
+         for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
+           {
+             if (oe1->op == c->op)
+               {
+                 SET_BIT (candidates2, i);
+                 ++nr_candidates2;
+                 break;
+               }
+           }
+       }
+
+      if (nr_candidates2 >= 2)
+       {
+         operand_entry_t oe1, oe2;
+         tree tmpvar;
+         gimple prod;
+         int first = sbitmap_first_set_bit (candidates2);
+
+         /* Build the new addition chain.  */
+         oe1 = VEC_index (operand_entry_t, *ops, first);
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           {
+             fprintf (dump_file, "Building (");
+             print_generic_expr (dump_file, oe1->op, 0);
+           }
+         tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
+         add_referenced_var (tmpvar);
+         zero_one_operation (&oe1->op, c->oecode, c->op);
+         EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
+           {
+             gimple sum;
+             oe2 = VEC_index (operand_entry_t, *ops, i);
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               {
+                 fprintf (dump_file, " + ");
+                 print_generic_expr (dump_file, oe2->op, 0);
+               }
+             zero_one_operation (&oe2->op, c->oecode, c->op);
+             sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
+             oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node);
+             oe2->rank = 0;
+             oe1->op = gimple_get_lhs (sum);
+           }
+
+         /* Apply the multiplication/division.  */
+         prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           {
+             fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
+             print_generic_expr (dump_file, c->op, 0);
+             fprintf (dump_file, "\n");
+           }
+
+         /* Record it in the addition chain and disable further
+            undistribution with this op.  */
+         oe1->op = gimple_assign_lhs (prod);
+         oe1->rank = get_rank (oe1->op);
+         VEC_free (operand_entry_t, heap, subops[first]);
+
+         changed = true;
+       }
+
+      VEC_pop (oecount, cvec);
+    }
+
+  for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
+    VEC_free (operand_entry_t, heap, subops[i]);
+  free (subops);
+  VEC_free (oecount, heap, cvec);
+  sbitmap_free (candidates);
+  sbitmap_free (candidates2);
+
+  return changed;
+}
+
+
 /* Perform various identities and other optimizations on the list of
    operand entries, stored in OPS.  The tree code for the binary
    operation between all the operands is OPCODE.  */
@@ -1097,7 +1506,8 @@ should_break_up_subtract (gimple stmt)
   if (TREE_CODE (lhs) == SSA_NAME
       && (immusestmt = get_single_immediate_use (lhs))
       && is_gimple_assign (immusestmt)
-      && gimple_assign_rhs_code (immusestmt) == PLUS_EXPR)
+      && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
+         ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
     return true;
   return false;
 }
@@ -1125,7 +1535,8 @@ break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
    Place the operands of the expression tree in the vector named OPS.  */
 
 static void
-linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
+linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
+                    bool is_associative, bool set_visited)
 {
   gimple_stmt_iterator gsinow, gsilhs;
   tree binlhs = gimple_assign_rhs1 (stmt);
@@ -1136,7 +1547,8 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
   struct loop *loop = loop_containing_stmt (stmt);
 
-  gimple_set_visited (stmt, true);
+  if (set_visited)
+    gimple_set_visited (stmt, true);
 
   if (TREE_CODE (binlhs) == SSA_NAME)
     {
@@ -1160,6 +1572,13 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
     {
       tree temp;
 
+      /* If this is not a associative operation like division, give up.  */
+      if (!is_associative)
+       {
+         add_to_ops_vec (ops, binrhs);
+         return;
+       }
+
       if (!binrhsisreassoc)
        {
          add_to_ops_vec (ops, binrhs);
@@ -1203,7 +1622,8 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
   gsinow = gsi_for_stmt (stmt);
   gsilhs = gsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
   gsi_move_before (&gsilhs, &gsinow);
-  linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs));
+  linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
+                      is_associative, set_visited);
   add_to_ops_vec (ops, binrhs);
 }
 
@@ -1344,7 +1764,16 @@ reassociate_bb (basic_block bb)
          /* If this was part of an already processed statement,
             we don't need to touch it again. */
          if (gimple_visited_p (stmt))
-           continue;
+           {
+             /* This statement might have become dead because of previous
+                reassociations.  */
+             if (has_zero_uses (gimple_get_lhs (stmt)))
+               {
+                 gsi_remove (&gsi, true);
+                 release_defs (stmt);
+               }
+             continue;
+           }
 
          lhs = gimple_assign_lhs (stmt);
          rhs1 = gimple_assign_rhs1 (stmt);
@@ -1375,12 +1804,21 @@ reassociate_bb (basic_block bb)
                continue;
 
              gimple_set_visited (stmt, true);
-             linearize_expr_tree (&ops, stmt);
+             linearize_expr_tree (&ops, stmt, true, true);
              qsort (VEC_address (operand_entry_t, ops),
                     VEC_length (operand_entry_t, ops),
                     sizeof (operand_entry_t),
                     sort_by_operand_rank);
              optimize_ops_list (rhs_code, &ops);
+             if (undistribute_ops_list (rhs_code, &ops,
+                                        loop_containing_stmt (stmt)))
+               {
+                 qsort (VEC_address (operand_entry_t, ops),
+                        VEC_length (operand_entry_t, ops),
+                        sizeof (operand_entry_t),
+                        sort_by_operand_rank);
+                 optimize_ops_list (rhs_code, &ops);
+               }
 
              if (VEC_length (operand_entry_t, ops) == 1)
                {