From 912442c25dfc470ad8fc11d29d9a5b1e8dbcd042 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 18 Nov 2015 18:33:38 +0000 Subject: [PATCH] nvptx.c (bb_pair_t, [...]): New types. gcc/ * config/nvptx/nvptx.c (bb_pair_t, bb_pair_vec_t): New types. (pseudo_node_t, struct bracket, bracket_vec_t): New types. (struct bb_sese): New struct. (bb_sese::~bb_sese, bb_sese::append, bb_sese::remove): New. (BB_GET_SESE, BB_SET_SESE): Define. (nvptx_sese_number, nvptx_sese_pseudo, nvptx_sese_color): New. (nvptx_find_sese): New. (nvptx_neuter_pars): Find SESE regions when optimizing. gcc/testsuite/ * gcc.dg/goacc/nvptx-sese-1.c: New. From-SVN: r230561 --- gcc/ChangeLog | 11 + gcc/config/nvptx/nvptx.c | 657 +++++++++++++++++++++- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.dg/goacc/nvptx-sese-1.c | 35 ++ 4 files changed, 702 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/goacc/nvptx-sese-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e266e105af3..1f566825407 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2015-11-18 Nathan Sidwell + + * config/nvptx/nvptx.c (bb_pair_t, bb_pair_vec_t): New types. + (pseudo_node_t, struct bracket, bracket_vec_t): New types. + (struct bb_sese): New struct. + (bb_sese::~bb_sese, bb_sese::append, bb_sese::remove): New. + (BB_GET_SESE, BB_SET_SESE): Define. + (nvptx_sese_number, nvptx_sese_pseudo, nvptx_sese_color): New. + (nvptx_find_sese): New. + (nvptx_neuter_pars): Find SESE regions when optimizing. + 2015-11-18 Alan Modra * config/rs6000/rs6000.c (use_toc_relative_ref): Ignore diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 4436ac4de94..211f4357428 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2605,6 +2605,631 @@ nvptx_discover_pars (bb_insn_map_t *map) return par; } +/* Analyse a group of BBs within a partitioned region and create N + Single-Entry-Single-Exit regions. Some of those regions will be + trivial ones consisting of a single BB. The blocks of a + partitioned region might form a set of disjoint graphs -- because + the region encloses a differently partitoned sub region. + + We use the linear time algorithm described in 'Finding Regions Fast: + Single Entry Single Exit and control Regions in Linear Time' + Johnson, Pearson & Pingali. That algorithm deals with complete + CFGs, where a back edge is inserted from END to START, and thus the + problem becomes one of finding equivalent loops. + + In this case we have a partial CFG. We complete it by redirecting + any incoming edge to the graph to be from an arbitrary external BB, + and similarly redirecting any outgoing edge to be to that BB. + Thus we end up with a closed graph. + + The algorithm works by building a spanning tree of an undirected + graph and keeping track of back edges from nodes further from the + root in the tree to nodes nearer to the root in the tree. In the + description below, the root is up and the tree grows downwards. + + We avoid having to deal with degenerate back-edges to the same + block, by splitting each BB into 3 -- one for input edges, one for + the node itself and one for the output edges. Such back edges are + referred to as 'Brackets'. Cycle equivalent nodes will have the + same set of brackets. + + Determining bracket equivalency is done by maintaining a list of + brackets in such a manner that the list length and final bracket + uniquely identify the set. + + We use coloring to mark all BBs with cycle equivalency with the + same color. This is the output of the 'Finding Regions Fast' + algorithm. Notice it doesn't actually find the set of nodes within + a particular region, just unorderd sets of nodes that are the + entries and exits of SESE regions. + + After determining cycle equivalency, we need to find the minimal + set of SESE regions. Do this with a DFS coloring walk of the + complete graph. We're either 'looking' or 'coloring'. When + looking, and we're in the subgraph, we start coloring the color of + the current node, and remember that node as the start of the + current color's SESE region. Every time we go to a new node, we + decrement the count of nodes with thet color. If it reaches zero, + we remember that node as the end of the current color's SESE region + and return to 'looking'. Otherwise we color the node the current + color. + + This way we end up with coloring the inside of non-trivial SESE + regions with the color of that region. */ + +/* A pair of BBs. We use this to represent SESE regions. */ +typedef std::pair bb_pair_t; +typedef auto_vec bb_pair_vec_t; + +/* A node in the undirected CFG. The discriminator SECOND indicates just + above or just below the BB idicated by FIRST. */ +typedef std::pair pseudo_node_t; + +/* A bracket indicates an edge towards the root of the spanning tree of the + undirected graph. Each bracket has a color, determined + from the currrent set of brackets. */ +struct bracket +{ + pseudo_node_t back; /* Back target */ + + /* Current color and size of set. */ + unsigned color; + unsigned size; + + bracket (pseudo_node_t back_) + : back (back_), color (~0u), size (~0u) + { + } + + unsigned get_color (auto_vec &color_counts, unsigned length) + { + if (length != size) + { + size = length; + color = color_counts.length (); + color_counts.quick_push (0); + } + color_counts[color]++; + return color; + } +}; + +typedef auto_vec bracket_vec_t; + +/* Basic block info for finding SESE regions. */ + +struct bb_sese +{ + int node; /* Node number in spanning tree. */ + int parent; /* Parent node number. */ + + /* The algorithm splits each node A into Ai, A', Ao. The incoming + edges arrive at pseudo-node Ai and the outgoing edges leave at + pseudo-node Ao. We have to remember which way we arrived at a + particular node when generating the spanning tree. dir > 0 means + we arrived at Ai, dir < 0 means we arrived at Ao. */ + int dir; + + /* Lowest numbered pseudo-node reached via a backedge from thsis + node, or any descendant. */ + pseudo_node_t high; + + int color; /* Cycle-equivalence color */ + + /* Stack of brackets for this node. */ + bracket_vec_t brackets; + + bb_sese (unsigned node_, unsigned p, int dir_) + :node (node_), parent (p), dir (dir_) + { + } + ~bb_sese (); + + /* Push a bracket ending at BACK. */ + void push (const pseudo_node_t &back) + { + if (dump_file) + fprintf (dump_file, "Pushing backedge %d:%+d\n", + back.first ? back.first->index : 0, back.second); + brackets.safe_push (bracket (back)); + } + + void append (bb_sese *child); + void remove (const pseudo_node_t &); + + /* Set node's color. */ + void set_color (auto_vec &color_counts) + { + color = brackets.last ().get_color (color_counts, brackets.length ()); + } +}; + +bb_sese::~bb_sese () +{ +} + +/* Destructively append CHILD's brackets. */ + +void +bb_sese::append (bb_sese *child) +{ + if (int len = child->brackets.length ()) + { + int ix; + + if (dump_file) + { + for (ix = 0; ix < len; ix++) + { + const pseudo_node_t &pseudo = child->brackets[ix].back; + fprintf (dump_file, "Appending (%d)'s backedge %d:%+d\n", + child->node, pseudo.first ? pseudo.first->index : 0, + pseudo.second); + } + } + if (!brackets.length ()) + std::swap (brackets, child->brackets); + else + { + brackets.reserve (len); + for (ix = 0; ix < len; ix++) + brackets.quick_push (child->brackets[ix]); + } + } +} + +/* Remove brackets that terminate at PSEUDO. */ + +void +bb_sese::remove (const pseudo_node_t &pseudo) +{ + unsigned removed = 0; + int len = brackets.length (); + + for (int ix = 0; ix < len; ix++) + { + if (brackets[ix].back == pseudo) + { + if (dump_file) + fprintf (dump_file, "Removing backedge %d:%+d\n", + pseudo.first ? pseudo.first->index : 0, pseudo.second); + removed++; + } + else if (removed) + brackets[ix-removed] = brackets[ix]; + } + while (removed--) + brackets.pop (); +} + +/* Accessors for BB's aux pointer. */ +#define BB_SET_SESE(B, S) ((B)->aux = (S)) +#define BB_GET_SESE(B) ((bb_sese *)(B)->aux) + +/* DFS walk creating SESE data structures. Only cover nodes with + BB_VISITED set. Append discovered blocks to LIST. We number in + increments of 3 so that the above and below pseudo nodes can be + implicitly numbered too. */ + +static int +nvptx_sese_number (int n, int p, int dir, basic_block b, + auto_vec *list) +{ + if (BB_GET_SESE (b)) + return n; + + if (dump_file) + fprintf (dump_file, "Block %d(%d), parent (%d), orientation %+d\n", + b->index, n, p, dir); + + BB_SET_SESE (b, new bb_sese (n, p, dir)); + p = n; + + n += 3; + list->quick_push (b); + + /* First walk the nodes on the 'other side' of this node, then walk + the nodes on the same side. */ + for (unsigned ix = 2; ix; ix--) + { + vec *edges = dir > 0 ? b->succs : b->preds; + size_t offset = (dir > 0 ? offsetof (edge_def, dest) + : offsetof (edge_def, src)); + edge e; + edge_iterator (ei); + + FOR_EACH_EDGE (e, ei, edges) + { + basic_block target = *(basic_block *)((char *)e + offset); + + if (target->flags & BB_VISITED) + n = nvptx_sese_number (n, p, dir, target, list); + } + dir = -dir; + } + return n; +} + +/* Process pseudo node above (DIR < 0) or below (DIR > 0) ME. + EDGES are the outgoing edges and OFFSET is the offset to the src + or dst block on the edges. */ + +static void +nvptx_sese_pseudo (basic_block me, bb_sese *sese, int depth, int dir, + vec *edges, size_t offset) +{ + edge e; + edge_iterator (ei); + int hi_back = depth; + pseudo_node_t node_back (0, depth); + int hi_child = depth; + pseudo_node_t node_child (0, depth); + basic_block child = NULL; + unsigned num_children = 0; + int usd = -dir * sese->dir; + + if (dump_file) + fprintf (dump_file, "\nProcessing %d(%d) %+d\n", + me->index, sese->node, dir); + + if (dir < 0) + { + /* This is the above pseudo-child. It has the BB itself as an + additional child node. */ + node_child = sese->high; + hi_child = node_child.second; + if (node_child.first) + hi_child += BB_GET_SESE (node_child.first)->node; + num_children++; + } + + /* Examine each edge. + - if it is a child (a) append its bracket list and (b) record + whether it is the child with the highest reaching bracket. + - if it is an edge to ancestor, record whether it's the highest + reaching backlink. */ + FOR_EACH_EDGE (e, ei, edges) + { + basic_block target = *(basic_block *)((char *)e + offset); + + if (bb_sese *t_sese = BB_GET_SESE (target)) + { + if (t_sese->parent == sese->node && !(t_sese->dir + usd)) + { + /* Child node. Append its bracket list. */ + num_children++; + sese->append (t_sese); + + /* Compare it's hi value. */ + int t_hi = t_sese->high.second; + + if (basic_block child_hi_block = t_sese->high.first) + t_hi += BB_GET_SESE (child_hi_block)->node; + + if (hi_child > t_hi) + { + hi_child = t_hi; + node_child = t_sese->high; + child = target; + } + } + else if (t_sese->node < sese->node + dir + && !(dir < 0 && sese->parent == t_sese->node)) + { + /* Non-parental ancestor node -- a backlink. */ + int d = usd * t_sese->dir; + int back = t_sese->node + d; + + if (hi_back > back) + { + hi_back = back; + node_back = pseudo_node_t (target, d); + } + } + } + else + { /* Fallen off graph, backlink to entry node. */ + hi_back = 0; + node_back = pseudo_node_t (0, 0); + } + } + + /* Remove any brackets that terminate at this pseudo node. */ + sese->remove (pseudo_node_t (me, dir)); + + /* Now push any backlinks from this pseudo node. */ + FOR_EACH_EDGE (e, ei, edges) + { + basic_block target = *(basic_block *)((char *)e + offset); + if (bb_sese *t_sese = BB_GET_SESE (target)) + { + if (t_sese->node < sese->node + dir + && !(dir < 0 && sese->parent == t_sese->node)) + /* Non-parental ancestor node - backedge from me. */ + sese->push (pseudo_node_t (target, usd * t_sese->dir)); + } + else + { + /* back edge to entry node */ + sese->push (pseudo_node_t (0, 0)); + } + } + + /* If this node leads directly or indirectly to a no-return region of + the graph, then fake a backedge to entry node. */ + if (!sese->brackets.length () || !edges || !edges->length ()) + { + hi_back = 0; + node_back = pseudo_node_t (0, 0); + sese->push (node_back); + } + + /* Record the highest reaching backedge from us or a descendant. */ + sese->high = hi_back < hi_child ? node_back : node_child; + + if (num_children > 1) + { + /* There is more than one child -- this is a Y shaped piece of + spanning tree. We have to insert a fake backedge from this + node to the highest ancestor reached by not-the-highest + reaching child. Note that there may be multiple children + with backedges to the same highest node. That's ok and we + insert the edge to that highest node. */ + hi_child = depth; + if (dir < 0 && child) + { + node_child = sese->high; + hi_child = node_child.second; + if (node_child.first) + hi_child += BB_GET_SESE (node_child.first)->node; + } + + FOR_EACH_EDGE (e, ei, edges) + { + basic_block target = *(basic_block *)((char *)e + offset); + + if (target == child) + /* Ignore the highest child. */ + continue; + + bb_sese *t_sese = BB_GET_SESE (target); + if (!t_sese) + continue; + if (t_sese->parent != sese->node) + /* Not a child. */ + continue; + + /* Compare its hi value. */ + int t_hi = t_sese->high.second; + + if (basic_block child_hi_block = t_sese->high.first) + t_hi += BB_GET_SESE (child_hi_block)->node; + + if (hi_child > t_hi) + { + hi_child = t_hi; + node_child = t_sese->high; + } + } + + sese->push (node_child); + } +} + + +/* DFS walk of BB graph. Color node BLOCK according to COLORING then + proceed to successors. Set SESE entry and exit nodes of + REGIONS. */ + +static void +nvptx_sese_color (auto_vec &color_counts, bb_pair_vec_t ®ions, + basic_block block, int coloring) +{ + bb_sese *sese = BB_GET_SESE (block); + + if (block->flags & BB_VISITED) + { + /* If we've already encountered this block, either we must not + be coloring, or it must have been colored the current color. */ + gcc_assert (coloring < 0 || (sese && coloring == sese->color)); + return; + } + + block->flags |= BB_VISITED; + + if (sese) + { + if (coloring < 0) + { + /* Start coloring a region. */ + regions[sese->color].first = block; + coloring = sese->color; + } + + if (!--color_counts[sese->color] && sese->color == coloring) + { + /* Found final block of SESE region. */ + regions[sese->color].second = block; + coloring = -1; + } + else + /* Color the node, so we can assert on revisiting the node + that the graph is indeed SESE. */ + sese->color = coloring; + } + else + /* Fallen off the subgraph, we cannot be coloring. */ + gcc_assert (coloring < 0); + + /* Walk each successor block. */ + if (block->succs && block->succs->length ()) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, block->succs) + nvptx_sese_color (color_counts, regions, e->dest, coloring); + } + else + gcc_assert (coloring < 0); +} + +/* Find minimal set of SESE regions covering BLOCKS. REGIONS might + end up with NULL entries in it. */ + +static void +nvptx_find_sese (auto_vec &blocks, bb_pair_vec_t ®ions) +{ + basic_block block; + int ix; + + /* First clear each BB of the whole function. */ + FOR_EACH_BB_FN (block, cfun) + { + block->flags &= ~BB_VISITED; + BB_SET_SESE (block, 0); + } + block = EXIT_BLOCK_PTR_FOR_FN (cfun); + block->flags &= ~BB_VISITED; + BB_SET_SESE (block, 0); + block = ENTRY_BLOCK_PTR_FOR_FN (cfun); + block->flags &= ~BB_VISITED; + BB_SET_SESE (block, 0); + + /* Mark blocks in the function that are in this graph. */ + for (ix = 0; blocks.iterate (ix, &block); ix++) + block->flags |= BB_VISITED; + + /* Counts of nodes assigned to each color. There cannot be more + colors than blocks (and hopefully there will be fewer). */ + auto_vec color_counts; + color_counts.reserve (blocks.length ()); + + /* Worklist of nodes in the spanning tree. Again, there cannot be + more nodes in the tree than blocks (there will be fewer if the + CFG of blocks is disjoint). */ + auto_vec spanlist; + spanlist.reserve (blocks.length ()); + + /* Make sure every block has its cycle class determined. */ + for (ix = 0; blocks.iterate (ix, &block); ix++) + { + if (BB_GET_SESE (block)) + /* We already met this block in an earlier graph solve. */ + continue; + + if (dump_file) + fprintf (dump_file, "Searching graph starting at %d\n", block->index); + + /* Number the nodes reachable from block initial DFS order. */ + int depth = nvptx_sese_number (2, 0, +1, block, &spanlist); + + /* Now walk in reverse DFS order to find cycle equivalents. */ + while (spanlist.length ()) + { + block = spanlist.pop (); + bb_sese *sese = BB_GET_SESE (block); + + /* Do the pseudo node below. */ + nvptx_sese_pseudo (block, sese, depth, +1, + sese->dir > 0 ? block->succs : block->preds, + (sese->dir > 0 ? offsetof (edge_def, dest) + : offsetof (edge_def, src))); + sese->set_color (color_counts); + /* Do the pseudo node above. */ + nvptx_sese_pseudo (block, sese, depth, -1, + sese->dir < 0 ? block->succs : block->preds, + (sese->dir < 0 ? offsetof (edge_def, dest) + : offsetof (edge_def, src))); + } + if (dump_file) + fprintf (dump_file, "\n"); + } + + if (dump_file) + { + unsigned count; + const char *comma = ""; + + fprintf (dump_file, "Found %d cycle equivalents\n", + color_counts.length ()); + for (ix = 0; color_counts.iterate (ix, &count); ix++) + { + fprintf (dump_file, "%s%d[%d]={", comma, ix, count); + + comma = ""; + for (unsigned jx = 0; blocks.iterate (jx, &block); jx++) + if (BB_GET_SESE (block)->color == ix) + { + block->flags |= BB_VISITED; + fprintf (dump_file, "%s%d", comma, block->index); + comma=","; + } + fprintf (dump_file, "}"); + comma = ", "; + } + fprintf (dump_file, "\n"); + } + + /* Now we've colored every block in the subgraph. We now need to + determine the minimal set of SESE regions that cover that + subgraph. Do this with a DFS walk of the complete function. + During the walk we're either 'looking' or 'coloring'. When we + reach the last node of a particular color, we stop coloring and + return to looking. */ + + /* There cannot be more SESE regions than colors. */ + regions.reserve (color_counts.length ()); + for (ix = color_counts.length (); ix--;) + regions.quick_push (bb_pair_t (0, 0)); + + for (ix = 0; blocks.iterate (ix, &block); ix++) + block->flags &= ~BB_VISITED; + + nvptx_sese_color (color_counts, regions, ENTRY_BLOCK_PTR_FOR_FN (cfun), -1); + + if (dump_file) + { + const char *comma = ""; + int len = regions.length (); + + fprintf (dump_file, "SESE regions:"); + for (ix = 0; ix != len; ix++) + { + basic_block from = regions[ix].first; + basic_block to = regions[ix].second; + + if (from) + { + fprintf (dump_file, "%s %d{%d", comma, ix, from->index); + if (to != from) + fprintf (dump_file, "->%d", to->index); + + int color = BB_GET_SESE (from)->color; + + /* Print the blocks within the region (excluding ends). */ + FOR_EACH_BB_FN (block, cfun) + { + bb_sese *sese = BB_GET_SESE (block); + + if (sese && sese->color == color + && block != from && block != to) + fprintf (dump_file, ".%d", block->index); + } + fprintf (dump_file, "}"); + } + comma = ","; + } + fprintf (dump_file, "\n\n"); + } + + for (ix = 0; blocks.iterate (ix, &block); ix++) + delete BB_GET_SESE (block); +} + +#undef BB_SET_SESE +#undef BB_GET_SESE + /* Propagate live state at the start of a partitioned region. BLOCK provides the live register information, and might not contain INSN. Propagation is inserted just after INSN. RW indicates whether @@ -3086,14 +3711,36 @@ nvptx_neuter_pars (parallel *par, unsigned modes, unsigned outer) if (neuter_mask) { - int ix; - int len = par->blocks.length (); + int ix, len; - for (ix = 0; ix != len; ix++) + if (nvptx_optimize) + { + /* Neuter whole SESE regions. */ + bb_pair_vec_t regions; + + nvptx_find_sese (par->blocks, regions); + len = regions.length (); + for (ix = 0; ix != len; ix++) + { + basic_block from = regions[ix].first; + basic_block to = regions[ix].second; + + if (from) + nvptx_single (neuter_mask, from, to); + else + gcc_assert (!to); + } + } + else { - basic_block block = par->blocks[ix]; + /* Neuter each BB individually. */ + len = par->blocks.length (); + for (ix = 0; ix != len; ix++) + { + basic_block block = par->blocks[ix]; - nvptx_single (neuter_mask, block, block); + nvptx_single (neuter_mask, block, block); + } } } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b4e2db51fd2..e9a5adf4703 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2015-11-18 Nathan Sidwell + + * gcc.dg/goacc/nvptx-sese-1.c: New. + 2015-11-18 Eric Botcazou * gnat.dg/renaming7.adb: New test. diff --git a/gcc/testsuite/gcc.dg/goacc/nvptx-sese-1.c b/gcc/testsuite/gcc.dg/goacc/nvptx-sese-1.c new file mode 100644 index 00000000000..7e67fe78f06 --- /dev/null +++ b/gcc/testsuite/gcc.dg/goacc/nvptx-sese-1.c @@ -0,0 +1,35 @@ +/* { dg-do link } */ +/* { dg-require-effective-target offload_nvptx } */ +/* { dg-options "-fopenacc -O2 -foffload=-fdump-rtl-mach\\ -dumpbase\\ nvptx-sese-1.c\\ -Wa,--no-verify" } */ + +#pragma acc routine seq +int __attribute__((noinline)) foo (int x) +{ + return x & 2; +} + +int main () +{ + int r = 0; + +#pragma acc parallel copy(r) vector_length(32) + { +#pragma acc loop vector reduction (+:r) + for (int i = 00; i < 40; i++) + r += i; + + /* This piece is a multi-block SESE region */ + if (foo (r)) + r *= 2; + + if (r & 1) /* to here. */ +#pragma acc loop vector reduction (+:r) + for (int i = 00; i < 40; i++) + r += i; + } + + return 0; +} + +/* Match {N->N(.N)+} */ +/* { dg-final { scan-rtl-dump "SESE regions:.* \[0-9\]+{\[0-9\]+->\[0-9\]+(\\.\[0-9\]+)+}" "mach" } } */ -- 2.30.2