From 8fe17e23b052741c8cbec99c7173c3c07f8e8c64 Mon Sep 17 00:00:00 2001 From: Ajit Agarwal Date: Fri, 13 Nov 2015 23:31:51 +0000 Subject: [PATCH] [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation * Makefile.in (OBJS): Add gimple-ssa-split-paths.o * common.opt (-fsplit-paths): New flag controlling path splitting. * doc/invoke.texi (fsplit-paths): Document. * opts.c (default_options_table): Add -fsplit-paths to -O2. * passes.def: Add split_paths pass. * timevar.def (TV_SPLIT_PATHS): New timevar. * tracer.c: Include "tracer.h" (ignore_bb_p): No longer static. (transform_duplicate): New function, broken out of tail_duplicate. (tail_duplicate): Use transform_duplicate. * tracer.h (ignore_bb_p): Declare (transform_duplicate): Likewise. * tree-pass.h (make_pass_split_paths): Declare. * gimple-ssa-split-paths.c: New file. * gcc.dg/tree-ssa/split-path-1.c: New test. Co-Authored-By: Jeff Law From-SVN: r230364 --- gcc/ChangeLog | 18 ++ gcc/Makefile.in | 1 + gcc/common.opt | 4 + gcc/doc/invoke.texi | 16 +- gcc/gimple-ssa-split-paths.c | 270 +++++++++++++++++++ gcc/opts.c | 1 + gcc/passes.def | 1 + gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.dg/tree-ssa/split-path-1.c | 67 +++++ gcc/timevar.def | 1 + gcc/tracer.c | 33 ++- gcc/tracer.h | 26 ++ gcc/tree-pass.h | 1 + 13 files changed, 431 insertions(+), 13 deletions(-) create mode 100644 gcc/gimple-ssa-split-paths.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/split-path-1.c create mode 100644 gcc/tracer.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dde26959f14..a7abe379db8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2015-11-13 Ajit Agarwal + Jeff Law + + * Makefile.in (OBJS): Add gimple-ssa-split-paths.o + * common.opt (-fsplit-paths): New flag controlling path splitting. + * doc/invoke.texi (fsplit-paths): Document. + * opts.c (default_options_table): Add -fsplit-paths to -O2. + * passes.def: Add split_paths pass. + * timevar.def (TV_SPLIT_PATHS): New timevar. + * tracer.c: Include "tracer.h" + (ignore_bb_p): No longer static. + (transform_duplicate): New function, broken out of tail_duplicate. + (tail_duplicate): Use transform_duplicate. + * tracer.h (ignore_bb_p): Declare + (transform_duplicate): Likewise. + * tree-pass.h (make_pass_split_paths): Declare. + * gimple-ssa-split-paths.c: New file. + 2015-11-13 Kai Tietz Marek Polacek Jason Merrill diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d3fd5e96442..5c294df7b8a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1277,6 +1277,7 @@ OBJS = \ gimple-pretty-print.o \ gimple-ssa-backprop.o \ gimple-ssa-isolate-paths.o \ + gimple-ssa-split-paths.o \ gimple-ssa-strength-reduction.o \ gimple-streamer-in.o \ gimple-streamer-out.o \ diff --git a/gcc/common.opt b/gcc/common.opt index 757ce858735..3eb520eaef2 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2403,6 +2403,10 @@ ftree-vrp Common Report Var(flag_tree_vrp) Init(0) Optimization Perform Value Range Propagation on trees. +fsplit-paths +Common Report Var(flag_split_paths) Init(0) Optimization +Split paths leading to loop backedges. + funit-at-a-time Common Report Var(flag_unit_at_a_time) Init(1) Compile whole compilation unit at a time. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c18df986f05..eeb79e65c0e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -354,6 +354,7 @@ Objective-C and Objective-C++ Dialects}. -fdump-tree-fre@r{[}-@var{n}@r{]} @gol -fdump-tree-vtable-verify @gol -fdump-tree-vrp@r{[}-@var{n}@r{]} @gol +-fdump-tree-split-paths@r{[}-@var{n}@r{]} @gol -fdump-tree-storeccp@r{[}-@var{n}@r{]} @gol -fdump-final-insns=@var{file} @gol -fcompare-debug@r{[}=@var{opts}@r{]} -fcompare-debug-second @gol @@ -448,6 +449,7 @@ Objective-C and Objective-C++ Dialects}. -fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol -fsemantic-interposition -fshrink-wrap -fsignaling-nans @gol -fsingle-precision-constant -fsplit-ivs-in-unroller @gol +-fsplit-paths @gol -fsplit-wide-types -fssa-backprop -fssa-phiopt @gol -fstack-protector -fstack-protector-all -fstack-protector-strong @gol -fstack-protector-explicit -fstdarg-opt -fstrict-aliasing @gol @@ -7171,6 +7173,11 @@ output on to @file{stderr}. If two conflicting dump filenames are given for the same pass, then the latter option overrides the earlier one. +@item split-paths +@opindex fdump-tree-split-paths +Dump each function after splitting paths to loop backedges. The file +name is made by appending @file{.split-paths} to the source file name. + @item all Turn on all options, except @option{raw}, @option{slim}, @option{verbose} and @option{lineno}. @@ -7808,6 +7815,7 @@ also turns on the following optimization flags: -frerun-cse-after-loop @gol -fsched-interblock -fsched-spec @gol -fschedule-insns -fschedule-insns2 @gol +-fsplit-paths @gol -fstrict-aliasing -fstrict-overflow @gol -ftree-builtin-call-dce @gol -ftree-switch-conversion -ftree-tail-merge @gol @@ -8821,7 +8829,7 @@ currently enabled, but may be enabled by @option{-O2} in the future. @item -ftree-sink @opindex ftree-sink -Perform forward store motion on trees. This flag is +Perform forward store motion on trees. This flag is enabled by default at @option{-O} and higher. @item -ftree-bit-ccp @@ -9127,6 +9135,12 @@ enabled by default at @option{-O2} and higher. Null pointer check elimination is only done if @option{-fdelete-null-pointer-checks} is enabled. +@item -fsplit-paths +@opindex fsplit-paths +Split paths leading to loop backedges. This can improve dead code +elimination and common subexpression elimination. This is enabled by +default at @option{-O2} and above. + @item -fsplit-ivs-in-unroller @opindex fsplit-ivs-in-unroller Enables expression of values of induction variables in later iterations diff --git a/gcc/gimple-ssa-split-paths.c b/gcc/gimple-ssa-split-paths.c new file mode 100644 index 00000000000..602e916f88e --- /dev/null +++ b/gcc/gimple-ssa-split-paths.c @@ -0,0 +1,270 @@ +/* Support routines for Splitting Paths to loop backedges + Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Ajit Kumar Agarwal . + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "cfganal.h" +#include "cfgloop.h" +#include "gimple-iterator.h" +#include "tracer.h" + +/* Given LATCH, the latch block in a loop, see if the shape of the + path reaching LATCH is suitable for being split by duplication. + If so, return the block that will be duplicated into its predecessor + paths. Else return NULL. */ + +static basic_block +find_block_to_duplicate_for_splitting_paths (basic_block latch) +{ + /* We should have simple latches at this point. So the latch should + have a single successor. This implies the predecessor of the latch + likely has the loop exit. And it's that predecessor we're most + interested in. To keep things simple, we're going to require that + the latch have a single predecessor too. */ + if (single_succ_p (latch) && single_pred_p (latch)) + { + basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch); + gcc_assert (single_pred_edge (latch)->src == bb); + + /* If BB has been marked as not to be duplicated, then honor that + request. */ + if (ignore_bb_p (bb)) + return NULL; + + gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb)); + /* The immediate dominator of the latch must end in a conditional. */ + if (!last || gimple_code (last) != GIMPLE_COND) + return NULL; + + /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond + region. Verify that it is. + + First, verify that BB has two predecessors (each arm of the + IF-THEN-ELSE) and two successors (the latch and exit). */ + if (EDGE_COUNT (bb->preds) == 2 && EDGE_COUNT (bb->succs) == 2) + { + /* Now verify that BB's immediate dominator ends in a + conditional as well. */ + basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb); + gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb_idom)); + if (!last || gimple_code (last) != GIMPLE_COND) + return NULL; + + /* And that BB's immediate dominator's successors are the + the predecessors of BB. */ + if (!find_edge (bb_idom, EDGE_PRED (bb, 0)->src) + || !find_edge (bb_idom, EDGE_PRED (bb, 1)->src)) + return NULL; + + /* So at this point we have a simple diamond for an IF-THEN-ELSE + construct starting at BB_IDOM, with a join point at BB. BB + pass control outside the loop or to the loop latch. + + We're going to want to create two duplicates of BB, one for + each successor of BB_IDOM. */ + return bb; + } + } + return NULL; +} + +/* Return TRUE if BB is a reasonable block to duplicate by examining + its size, false otherwise. BB will always be a loop latch block. + + Should this use the same tests as we do for jump threading? */ + +static bool +is_feasible_trace (basic_block bb) +{ + int num_stmt = 0; + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (!is_gimple_debug (stmt)) + num_stmt++; + } + + /* We may want to limit how many statements we copy. */ + if (num_stmt > 1) + return true; + + return false; +} + +/* If the immediate dominator of the latch of the loop is + block with conditional branch, then the loop latch is + duplicated to its predecessors path preserving the SSA + semantics. + + CFG before transformation. + + 2 + | + | + +---->3 + | / \ + | / \ + | 4 5 + | \ / + | \ / + | 6 + | / \ + | / \ + | 8 7 + | | | + ---+ E + + + + Block 8 is the latch. We're going to make copies of block 6 (9 & 10) + and wire things up so they look like this: + + 2 + | + | + +---->3 + | / \ + | / \ + | 4 5 + | | | + | | | + | 9 10 + | |\ /| + | | \ / | + | | 7 | + | | | | + | | E | + | | | + | \ / + | \ / + +-----8 + + + Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which + enables CSE, DCE and other optimizations to occur on a larger block + of code. */ + +static bool +split_paths () +{ + bool changed = false; + loop_p loop; + + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); + initialize_original_copy_tables (); + calculate_dominance_info (CDI_DOMINATORS); + + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + { + /* See if there is a block that we can duplicate to split the + path to the loop latch. */ + basic_block bb = find_block_to_duplicate_for_splitting_paths (loop->latch); + + /* BB is the merge point for an IF-THEN-ELSE we want to transform. + + Essentially we want to create two duplicates of BB and append + a duplicate to the THEN and ELSE clauses. This will split the + path leading to the latch. BB will be unreachable and removed. */ + if (bb && is_feasible_trace (bb)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Duplicating join block %d into predecessor paths\n", + bb->index); + basic_block pred0 = EDGE_PRED (bb, 0)->src; + basic_block pred1 = EDGE_PRED (bb, 1)->src; + transform_duplicate (pred0, bb); + transform_duplicate (pred1, bb); + changed = true; + } + } + + loop_optimizer_finalize (); + free_original_copy_tables (); + return changed; +} + +/* Main entry point for splitting paths. Returns TODO_cleanup_cfg if any + paths where split, otherwise return zero. */ + +static unsigned int +execute_split_paths () +{ + /* If we don't have at least 2 real blocks and backedges in the + CFG, then there's no point in trying to perform path splitting. */ + if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1 + || !mark_dfs_back_edges ()) + return 0; + + bool changed = split_paths(); + if (changed) + free_dominance_info (CDI_DOMINATORS); + + return changed ? TODO_cleanup_cfg : 0; +} + +static bool +gate_split_paths () +{ + return flag_split_paths; +} + +namespace { + +const pass_data pass_data_split_paths = +{ + GIMPLE_PASS, /* type */ + "split-paths", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_SPLIT_PATHS, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_split_paths : public gimple_opt_pass +{ + public: + pass_split_paths (gcc::context *ctxt) + : gimple_opt_pass (pass_data_split_paths, ctxt) + {} + /* opt_pass methods: */ + opt_pass * clone () { return new pass_split_paths (m_ctxt); } + virtual bool gate (function *) { return gate_split_paths (); } + virtual unsigned int execute (function *) { return execute_split_paths (); } + +}; // class pass_split_paths + +} // anon namespace + +gimple_opt_pass * +make_pass_split_paths (gcc::context *ctxt) +{ + return new pass_split_paths (ctxt); +} diff --git a/gcc/opts.c b/gcc/opts.c index 930ae431157..be04cf56412 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -523,6 +523,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_2_PLUS, OPT_fisolate_erroneous_paths_dereference, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fipa_ra, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_flra_remat, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_fsplit_paths, NULL, 1 }, /* -O3 optimizations. */ { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, diff --git a/gcc/passes.def b/gcc/passes.def index c0ab6b98e4b..db822d33597 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -274,6 +274,7 @@ along with GCC; see the file COPYING3. If not see POP_INSERT_PASSES () NEXT_PASS (pass_simduid_cleanup); NEXT_PASS (pass_lower_vector_ssa); + NEXT_PASS (pass_split_paths); NEXT_PASS (pass_cse_reciprocals); NEXT_PASS (pass_reassoc); NEXT_PASS (pass_strength_reduction); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 33011306f35..ee92aaf3275 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-11-13 Ajit Agarwal + Jeff Law + + * gcc.dg/tree-ssa/split-path-1.c: New test. + 2015-11-13 Nathan Sidwell * c-c++-common/goacc/loop-auto-1.c: New. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-1.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-1.c new file mode 100644 index 00000000000..12398924dba --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-1.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-split-paths-details " } */ + +#include +#include + +#define RGBMAX 255 + +int +test() +{ + int i, Pels; + unsigned char sum = 0; + unsigned char xr, xg, xb; + unsigned char xc, xm, xy, xk; + unsigned char *ReadPtr, *EritePtr; + + ReadPtr = ( unsigned char *) malloc (sizeof (unsigned char) * 100); + EritePtr = ( unsigned char *) malloc (sizeof (unsigned char) * 100); + + for (i = 0; i < 100;i++) + { + ReadPtr[i] = 100 - i; + } + + for (i = 0; i < 100; i++) + { + xr = *ReadPtr++; + xg = *ReadPtr++; + xb = *ReadPtr++; + + xc = (unsigned char) (RGBMAX - xr); + xm = (unsigned char) (RGBMAX - xg); + xy = (unsigned char) (RGBMAX - xb); + + if (xc < xm) + { + xk = (unsigned char) (xc < xy ? xc : xy); + } + else + { + xk = (unsigned char) (xm < xy ? xm : xy); + } + + xc = (unsigned char) (xc - xk); + xm = (unsigned char) (xm - xk); + xy = (unsigned char) (xy - xk); + + *EritePtr++ = xc; + *EritePtr++ = xm; + *EritePtr++ = xy; + *EritePtr++ = xk; + sum += *EritePtr; + } + return sum; +} + +int +main() +{ + if (test() != 33) + abort(); + + return 0; +} + +/* { dg-final { scan-tree-dump "Duplicating join block" "split-paths" } } */ diff --git a/gcc/timevar.def b/gcc/timevar.def index b429faf6d55..45e3b709af1 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -252,6 +252,7 @@ DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") DEFTIMEVAR (TV_REE , "ree") DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue") DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2") +DEFTIMEVAR (TV_SPLIT_PATHS , "split paths") DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments") DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2") DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers") diff --git a/gcc/tracer.c b/gcc/tracer.c index 941dc204eee..c2dba4c2503 100644 --- a/gcc/tracer.c +++ b/gcc/tracer.c @@ -51,9 +51,9 @@ #include "tree-inline.h" #include "cfgloop.h" #include "fibonacci_heap.h" +#include "tracer.h" static int count_insns (basic_block); -static bool ignore_bb_p (const_basic_block); static bool better_p (const_edge, const_edge); static edge find_best_successor (basic_block); static edge find_best_predecessor (basic_block); @@ -85,7 +85,7 @@ bb_seen_p (basic_block bb) } /* Return true if we should ignore the basic block for purposes of tracing. */ -static bool +bool ignore_bb_p (const_basic_block bb) { if (bb->index < NUM_FIXED_BLOCKS) @@ -226,6 +226,24 @@ find_trace (basic_block bb, basic_block *trace) return i; } +/* Duplicate block BB2, placing it after BB in the CFG. Return the + newly created block. */ +basic_block +transform_duplicate (basic_block bb, basic_block bb2) +{ + edge e; + basic_block copy; + + e = find_edge (bb, bb2); + + copy = duplicate_block (bb2, e, bb); + flush_pending_stmts (e); + + add_phi_args_after_copy (©, 1, NULL); + + return (copy); +} + /* Look for basic blocks in frequency order, construct traces and tail duplicate if profitable. */ @@ -321,17 +339,8 @@ tail_duplicate (void) entries or at least rotate the loop. */ && bb2->loop_father->header != bb2) { - edge e; - basic_block copy; - nduplicated += counts [bb2->index]; - - e = find_edge (bb, bb2); - - copy = duplicate_block (bb2, e, bb); - flush_pending_stmts (e); - - add_phi_args_after_copy (©, 1, NULL); + basic_block copy = transform_duplicate (bb, bb2); /* Reconsider the original copy of block we've duplicated. Removing the most common predecessor may make it to be diff --git a/gcc/tracer.h b/gcc/tracer.h new file mode 100644 index 00000000000..cd1792a73fb --- /dev/null +++ b/gcc/tracer.h @@ -0,0 +1,26 @@ +/* Header file for Tracer. + Copyright (C) 2015 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_TRACER_H +#define GCC_TRACER_H + +extern basic_block transform_duplicate (basic_block bb, basic_block bb2); +extern bool ignore_bb_p (const_basic_block bb); + +#endif /* GCC_TRACER_H */ diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 49e22a9d091..da677617826 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -390,6 +390,7 @@ extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_split_paths (gcc::context *ctxt); extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt); extern gimple_opt_pass *make_pass_build_alias (gcc::context *ctxt); -- 2.30.2