re PR tree-optimization/64434 (Performance regression after operand canonicalization...
authorYuri Rumyantsev <ysrumyan@gmail.com>
Thu, 15 Jan 2015 11:39:20 +0000 (11:39 +0000)
committerIlya Enkovich <ienkovich@gcc.gnu.org>
Thu, 15 Jan 2015 11:39:20 +0000 (11:39 +0000)
gcc/

PR tree-optimization/64434
* cfgexpand.c (reorder_operands): New function.
(expand_gimple_basic_block): Insert call of reorder_operands if
optimized is true.

gcc/testsuite/

PR tree-optimization/64434
* gcc.dg/torture/pr64434.c: New test.

From-SVN: r219646

gcc/ChangeLog
gcc/cfgexpand.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/torture/pr64434.c [new file with mode: 0644]

index f363748aa889fc483f16f9b163bb1bb0f443d670..3fc2bceb1a5a545e06e45b1b1ad3e79239f4c0ae 100644 (file)
@@ -1,3 +1,10 @@
+2015-01-15  Yuri Rumyantsev  <ysrumyan@gmail.com>
+
+       PR tree-optimization/64434
+       * cfgexpand.c (reorder_operands): New function.
+       (expand_gimple_basic_block): Insert call of reorder_operands if
+       optimized is true.
+
 2015-01-15  Matthew Fortune  <matthew.fortune@imgtec.com>
 
        * config/mips/micromips.md (*swp): Remove explicit parallel.
index c92c786ea388576b31022ce6f9b74eb9cde66cf0..c8eae425af86e6cada4a106295abc64bf57b852d 100644 (file)
@@ -4983,6 +4983,84 @@ expand_debug_locations (void)
   flag_strict_aliasing = save_strict_alias;
 }
 
+/* Performs swapping operands of commutative operations to expand
+   the expensive one first.  */
+
+static void
+reorder_operands (basic_block bb)
+{
+  unsigned int *lattice;  /* Hold cost of each statement.  */
+  unsigned int i = 0, n = 0;
+  gimple_stmt_iterator gsi;
+  gimple_seq stmts;
+  gimple stmt;
+  bool swap;
+  tree op0, op1;
+  ssa_op_iter iter;
+  use_operand_p use_p;
+  gimple def0, def1;
+
+  /* Compute cost of each statement using estimate_num_insns.  */
+  stmts = bb_seq (bb);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      stmt = gsi_stmt (gsi);
+      gimple_set_uid (stmt, n++);
+    }
+  lattice = XNEWVEC (unsigned int, n);
+  for (gsi = gsi_start (stmts); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      unsigned cost;
+      stmt = gsi_stmt (gsi);
+      cost = estimate_num_insns (stmt, &eni_size_weights);
+      lattice[i] = cost;
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+       {
+         tree use = USE_FROM_PTR (use_p);
+         gimple def_stmt;
+         if (TREE_CODE (use) != SSA_NAME)
+           continue;
+         def_stmt = get_gimple_for_ssa_name (use);
+         if (!def_stmt)
+           continue;
+         lattice[i] += lattice[gimple_uid (def_stmt)];
+       }
+      i++;
+      if (!is_gimple_assign (stmt)
+         || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
+       continue;
+      op0 = gimple_op (stmt, 1);
+      op1 = gimple_op (stmt, 2);
+      if (TREE_CODE (op0) != SSA_NAME
+         || TREE_CODE (op1) != SSA_NAME)
+       continue;
+      /* Swap operands if the second one is more expensive.  */
+      def0 = get_gimple_for_ssa_name (op0);
+      if (!def0)
+       continue;
+      def1 = get_gimple_for_ssa_name (op1);
+      if (!def1)
+       continue;
+      swap = false;
+      if (lattice[gimple_uid (def1)] > lattice[gimple_uid (def0)])
+       swap = true;
+      if (swap)
+       {
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           {
+             fprintf (dump_file, "Swap operands in stmt:\n");
+             print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+             fprintf (dump_file, "Cost left opnd=%d, right opnd=%d\n",
+                      lattice[gimple_uid (def0)],
+                      lattice[gimple_uid (def1)]);
+           }
+         swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+                            gimple_assign_rhs2_ptr (stmt));
+       }
+    }
+  XDELETE (lattice);
+}
+
 /* Expand basic block BB from GIMPLE trees to RTL.  */
 
 static basic_block
@@ -5004,6 +5082,8 @@ expand_gimple_basic_block (basic_block bb, bool disable_tail_calls)
      cannot use the gsi_*_bb() routines because they expect the basic
      block to be in GIMPLE, instead of RTL.  Therefore, we need to
      access the BB sequence directly.  */
+  if (optimize)
+    reorder_operands (bb);
   stmts = bb_seq (bb);
   bb->il.gimple.seq = NULL;
   bb->il.gimple.phi_nodes = NULL;
index 842ebf445c05ffd02d975753348546922104d985..c5b27a4962f574e68ccc91515a9b5e0ded11fd85 100644 (file)
@@ -1,3 +1,8 @@
+2015-01-15  Yuri Rumyantsev  <ysrumyan@gmail.com>
+
+       PR tree-optimization/64434
+       * gcc.dg/torture/pr64434.c: New test.
+
 2015-01-15  Matthew Fortune  <matthew.fortune@imgtec.com>
 
        * gcc.target/mips/mips.exp (mips-dg-options): -mips3d requires
diff --git a/gcc/testsuite/gcc.dg/torture/pr64434.c b/gcc/testsuite/gcc.dg/torture/pr64434.c
new file mode 100644 (file)
index 0000000..60fc806
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-rtl-expand-details" } */
+
+#define N 256
+int a1[N], a2[N], a3[N], a4[N];
+
+void foo ()
+{
+  int i;
+  for (i=0; i<N; i++) {
+    int c;
+    c = a3[i] + (a1[i] * a2[i]);
+    a4[i] = c + 1;
+    a1[i] = a2[i] - 1;
+  }
+}
+
+/* { dg-final { scan-rtl-dump-times "Swap operands" 1 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+