+2015-11-13 Ajit Agarwal <ajitkum@xilinx.com>
+ Jeff Law <law@redhat.com>
+
+ * Makefile.in (OBJS): Add gimple-ssa-split-paths.o
+ * common.opt (-fsplit-paths): New flag controlling path splitting.
+ * doc/invoke.texi (fsplit-paths): Document.
+ * opts.c (default_options_table): Add -fsplit-paths to -O2.
+ * passes.def: Add split_paths pass.
+ * timevar.def (TV_SPLIT_PATHS): New timevar.
+ * tracer.c: Include "tracer.h"
+ (ignore_bb_p): No longer static.
+ (transform_duplicate): New function, broken out of tail_duplicate.
+ (tail_duplicate): Use transform_duplicate.
+ * tracer.h (ignore_bb_p): Declare
+ (transform_duplicate): Likewise.
+ * tree-pass.h (make_pass_split_paths): Declare.
+ * gimple-ssa-split-paths.c: New file.
+
2015-11-13 Kai Tietz <ktietz70@googlemail.com>
Marek Polacek <polacek@redhat.com>
Jason Merrill <jason@redhat.com>
gimple-pretty-print.o \
gimple-ssa-backprop.o \
gimple-ssa-isolate-paths.o \
+ gimple-ssa-split-paths.o \
gimple-ssa-strength-reduction.o \
gimple-streamer-in.o \
gimple-streamer-out.o \
Common Report Var(flag_tree_vrp) Init(0) Optimization
Perform Value Range Propagation on trees.
+fsplit-paths
+Common Report Var(flag_split_paths) Init(0) Optimization
+Split paths leading to loop backedges.
+
funit-at-a-time
Common Report Var(flag_unit_at_a_time) Init(1)
Compile whole compilation unit at a time.
-fdump-tree-fre@r{[}-@var{n}@r{]} @gol
-fdump-tree-vtable-verify @gol
-fdump-tree-vrp@r{[}-@var{n}@r{]} @gol
+-fdump-tree-split-paths@r{[}-@var{n}@r{]} @gol
-fdump-tree-storeccp@r{[}-@var{n}@r{]} @gol
-fdump-final-insns=@var{file} @gol
-fcompare-debug@r{[}=@var{opts}@r{]} -fcompare-debug-second @gol
-fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
-fsemantic-interposition -fshrink-wrap -fsignaling-nans @gol
-fsingle-precision-constant -fsplit-ivs-in-unroller @gol
+-fsplit-paths @gol
-fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
-fstack-protector -fstack-protector-all -fstack-protector-strong @gol
-fstack-protector-explicit -fstdarg-opt -fstrict-aliasing @gol
given for the same pass, then the latter option overrides the earlier
one.
+@item split-paths
+@opindex fdump-tree-split-paths
+Dump each function after splitting paths to loop backedges. The file
+name is made by appending @file{.split-paths} to the source file name.
+
@item all
Turn on all options, except @option{raw}, @option{slim}, @option{verbose}
and @option{lineno}.
-frerun-cse-after-loop @gol
-fsched-interblock -fsched-spec @gol
-fschedule-insns -fschedule-insns2 @gol
+-fsplit-paths @gol
-fstrict-aliasing -fstrict-overflow @gol
-ftree-builtin-call-dce @gol
-ftree-switch-conversion -ftree-tail-merge @gol
@item -ftree-sink
@opindex ftree-sink
-Perform forward store motion on trees. This flag is
+Perform forward store motion on trees. This flag is
enabled by default at @option{-O} and higher.
@item -ftree-bit-ccp
elimination is only done if @option{-fdelete-null-pointer-checks} is
enabled.
+@item -fsplit-paths
+@opindex fsplit-paths
+Split paths leading to loop backedges. This can improve dead code
+elimination and common subexpression elimination. This is enabled by
+default at @option{-O2} and above.
+
@item -fsplit-ivs-in-unroller
@opindex fsplit-ivs-in-unroller
Enables expression of values of induction variables in later iterations
--- /dev/null
+/* Support routines for Splitting Paths to loop backedges
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "cfganal.h"
+#include "cfgloop.h"
+#include "gimple-iterator.h"
+#include "tracer.h"
+
+/* Given LATCH, the latch block in a loop, see if the shape of the
+ path reaching LATCH is suitable for being split by duplication.
+ If so, return the block that will be duplicated into its predecessor
+ paths. Else return NULL. */
+
+static basic_block
+find_block_to_duplicate_for_splitting_paths (basic_block latch)
+{
+ /* We should have simple latches at this point. So the latch should
+ have a single successor. This implies the predecessor of the latch
+ likely has the loop exit. And it's that predecessor we're most
+ interested in. To keep things simple, we're going to require that
+ the latch have a single predecessor too. */
+ if (single_succ_p (latch) && single_pred_p (latch))
+ {
+ basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch);
+ gcc_assert (single_pred_edge (latch)->src == bb);
+
+ /* If BB has been marked as not to be duplicated, then honor that
+ request. */
+ if (ignore_bb_p (bb))
+ return NULL;
+
+ gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb));
+ /* The immediate dominator of the latch must end in a conditional. */
+ if (!last || gimple_code (last) != GIMPLE_COND)
+ return NULL;
+
+ /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond
+ region. Verify that it is.
+
+ First, verify that BB has two predecessors (each arm of the
+ IF-THEN-ELSE) and two successors (the latch and exit). */
+ if (EDGE_COUNT (bb->preds) == 2 && EDGE_COUNT (bb->succs) == 2)
+ {
+ /* Now verify that BB's immediate dominator ends in a
+ conditional as well. */
+ basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb);
+ gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb_idom));
+ if (!last || gimple_code (last) != GIMPLE_COND)
+ return NULL;
+
+ /* And that BB's immediate dominator's successors are the
+ the predecessors of BB. */
+ if (!find_edge (bb_idom, EDGE_PRED (bb, 0)->src)
+ || !find_edge (bb_idom, EDGE_PRED (bb, 1)->src))
+ return NULL;
+
+ /* So at this point we have a simple diamond for an IF-THEN-ELSE
+ construct starting at BB_IDOM, with a join point at BB. BB
+ pass control outside the loop or to the loop latch.
+
+ We're going to want to create two duplicates of BB, one for
+ each successor of BB_IDOM. */
+ return bb;
+ }
+ }
+ return NULL;
+}
+
+/* Return TRUE if BB is a reasonable block to duplicate by examining
+ its size, false otherwise. BB will always be a loop latch block.
+
+ Should this use the same tests as we do for jump threading? */
+
+static bool
+is_feasible_trace (basic_block bb)
+{
+ int num_stmt = 0;
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (!is_gimple_debug (stmt))
+ num_stmt++;
+ }
+
+ /* We may want to limit how many statements we copy. */
+ if (num_stmt > 1)
+ return true;
+
+ return false;
+}
+
+/* If the immediate dominator of the latch of the loop is
+ block with conditional branch, then the loop latch is
+ duplicated to its predecessors path preserving the SSA
+ semantics.
+
+ CFG before transformation.
+
+ 2
+ |
+ |
+ +---->3
+ | / \
+ | / \
+ | 4 5
+ | \ /
+ | \ /
+ | 6
+ | / \
+ | / \
+ | 8 7
+ | | |
+ ---+ E
+
+
+
+ Block 8 is the latch. We're going to make copies of block 6 (9 & 10)
+ and wire things up so they look like this:
+
+ 2
+ |
+ |
+ +---->3
+ | / \
+ | / \
+ | 4 5
+ | | |
+ | | |
+ | 9 10
+ | |\ /|
+ | | \ / |
+ | | 7 |
+ | | | |
+ | | E |
+ | | |
+ | \ /
+ | \ /
+ +-----8
+
+
+ Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
+ enables CSE, DCE and other optimizations to occur on a larger block
+ of code. */
+
+static bool
+split_paths ()
+{
+ bool changed = false;
+ loop_p loop;
+
+ loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
+ initialize_original_copy_tables ();
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+ {
+ /* See if there is a block that we can duplicate to split the
+ path to the loop latch. */
+ basic_block bb = find_block_to_duplicate_for_splitting_paths (loop->latch);
+
+ /* BB is the merge point for an IF-THEN-ELSE we want to transform.
+
+ Essentially we want to create two duplicates of BB and append
+ a duplicate to the THEN and ELSE clauses. This will split the
+ path leading to the latch. BB will be unreachable and removed. */
+ if (bb && is_feasible_trace (bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Duplicating join block %d into predecessor paths\n",
+ bb->index);
+ basic_block pred0 = EDGE_PRED (bb, 0)->src;
+ basic_block pred1 = EDGE_PRED (bb, 1)->src;
+ transform_duplicate (pred0, bb);
+ transform_duplicate (pred1, bb);
+ changed = true;
+ }
+ }
+
+ loop_optimizer_finalize ();
+ free_original_copy_tables ();
+ return changed;
+}
+
+/* Main entry point for splitting paths. Returns TODO_cleanup_cfg if any
+ paths where split, otherwise return zero. */
+
+static unsigned int
+execute_split_paths ()
+{
+ /* If we don't have at least 2 real blocks and backedges in the
+ CFG, then there's no point in trying to perform path splitting. */
+ if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
+ || !mark_dfs_back_edges ())
+ return 0;
+
+ bool changed = split_paths();
+ if (changed)
+ free_dominance_info (CDI_DOMINATORS);
+
+ return changed ? TODO_cleanup_cfg : 0;
+}
+
+static bool
+gate_split_paths ()
+{
+ return flag_split_paths;
+}
+
+namespace {
+
+const pass_data pass_data_split_paths =
+{
+ GIMPLE_PASS, /* type */
+ "split-paths", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_SPLIT_PATHS, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_split_paths : public gimple_opt_pass
+{
+ public:
+ pass_split_paths (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_split_paths, ctxt)
+ {}
+ /* opt_pass methods: */
+ opt_pass * clone () { return new pass_split_paths (m_ctxt); }
+ virtual bool gate (function *) { return gate_split_paths (); }
+ virtual unsigned int execute (function *) { return execute_split_paths (); }
+
+}; // class pass_split_paths
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_split_paths (gcc::context *ctxt)
+{
+ return new pass_split_paths (ctxt);
+}
{ OPT_LEVELS_2_PLUS, OPT_fisolate_erroneous_paths_dereference, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fipa_ra, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_flra_remat, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fsplit_paths, NULL, 1 },
/* -O3 optimizations. */
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
POP_INSERT_PASSES ()
NEXT_PASS (pass_simduid_cleanup);
NEXT_PASS (pass_lower_vector_ssa);
+ NEXT_PASS (pass_split_paths);
NEXT_PASS (pass_cse_reciprocals);
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_strength_reduction);
+2015-11-13 Ajit Agarwal <ajitkum@xilinx.com>
+ Jeff Law <law@redhat.com>
+
+ * gcc.dg/tree-ssa/split-path-1.c: New test.
+
2015-11-13 Nathan Sidwell <nathan@codesourcery.com>
* c-c++-common/goacc/loop-auto-1.c: New.
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-split-paths-details " } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define RGBMAX 255
+
+int
+test()
+{
+ int i, Pels;
+ unsigned char sum = 0;
+ unsigned char xr, xg, xb;
+ unsigned char xc, xm, xy, xk;
+ unsigned char *ReadPtr, *EritePtr;
+
+ ReadPtr = ( unsigned char *) malloc (sizeof (unsigned char) * 100);
+ EritePtr = ( unsigned char *) malloc (sizeof (unsigned char) * 100);
+
+ for (i = 0; i < 100;i++)
+ {
+ ReadPtr[i] = 100 - i;
+ }
+
+ for (i = 0; i < 100; i++)
+ {
+ xr = *ReadPtr++;
+ xg = *ReadPtr++;
+ xb = *ReadPtr++;
+
+ xc = (unsigned char) (RGBMAX - xr);
+ xm = (unsigned char) (RGBMAX - xg);
+ xy = (unsigned char) (RGBMAX - xb);
+
+ if (xc < xm)
+ {
+ xk = (unsigned char) (xc < xy ? xc : xy);
+ }
+ else
+ {
+ xk = (unsigned char) (xm < xy ? xm : xy);
+ }
+
+ xc = (unsigned char) (xc - xk);
+ xm = (unsigned char) (xm - xk);
+ xy = (unsigned char) (xy - xk);
+
+ *EritePtr++ = xc;
+ *EritePtr++ = xm;
+ *EritePtr++ = xy;
+ *EritePtr++ = xk;
+ sum += *EritePtr;
+ }
+ return sum;
+}
+
+int
+main()
+{
+ if (test() != 33)
+ abort();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Duplicating join block" "split-paths" } } */
DEFTIMEVAR (TV_REE , "ree")
DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue")
DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
+DEFTIMEVAR (TV_SPLIT_PATHS , "split paths")
DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments")
DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2")
DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers")
#include "tree-inline.h"
#include "cfgloop.h"
#include "fibonacci_heap.h"
+#include "tracer.h"
static int count_insns (basic_block);
-static bool ignore_bb_p (const_basic_block);
static bool better_p (const_edge, const_edge);
static edge find_best_successor (basic_block);
static edge find_best_predecessor (basic_block);
}
/* Return true if we should ignore the basic block for purposes of tracing. */
-static bool
+bool
ignore_bb_p (const_basic_block bb)
{
if (bb->index < NUM_FIXED_BLOCKS)
return i;
}
+/* Duplicate block BB2, placing it after BB in the CFG. Return the
+ newly created block. */
+basic_block
+transform_duplicate (basic_block bb, basic_block bb2)
+{
+ edge e;
+ basic_block copy;
+
+ e = find_edge (bb, bb2);
+
+ copy = duplicate_block (bb2, e, bb);
+ flush_pending_stmts (e);
+
+ add_phi_args_after_copy (©, 1, NULL);
+
+ return (copy);
+}
+
/* Look for basic blocks in frequency order, construct traces and tail duplicate
if profitable. */
entries or at least rotate the loop. */
&& bb2->loop_father->header != bb2)
{
- edge e;
- basic_block copy;
-
nduplicated += counts [bb2->index];
-
- e = find_edge (bb, bb2);
-
- copy = duplicate_block (bb2, e, bb);
- flush_pending_stmts (e);
-
- add_phi_args_after_copy (©, 1, NULL);
+ basic_block copy = transform_duplicate (bb, bb2);
/* Reconsider the original copy of block we've duplicated.
Removing the most common predecessor may make it to be
--- /dev/null
+/* Header file for Tracer.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_TRACER_H
+#define GCC_TRACER_H
+
+extern basic_block transform_duplicate (basic_block bb, basic_block bb2);
+extern bool ignore_bb_p (const_basic_block bb);
+
+#endif /* GCC_TRACER_H */
extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_split_paths (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_build_alias (gcc::context *ctxt);