From eef99cd9567d40f72654faf6f21fdc65c63c9c3d Mon Sep 17 00:00:00 2001 From: Giuliano Belinassi Date: Mon, 18 Nov 2019 20:05:16 +0000 Subject: [PATCH] Refactor tree-loop-distribution.c for thread safety This patch refactors tree-loop-distribution.c for thread safety without use of C11 __thread feature. All global variables were moved to `class loop_distribution` which is initialized at ::execute time. From-SVN: r278421 --- gcc/ChangeLog | 33 ++ gcc/cfgloop.c | 13 + gcc/cfgloop.h | 2 + gcc/tree-loop-distribution.c | 663 +++++++++++++++++++++-------------- 4 files changed, 441 insertions(+), 270 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4e381af329b..0c71cef37ac 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,36 @@ +2019-11-18 Giuliano Belinassi + + * cfgloop.c (get_loop_body_in_custom_order): New. + * cfgloop.h (get_loop_body_in_custom_order): New prototype. + * tree-loop-distribution.c (class loop_distribution): New. + (bb_top_order_cmp): Remove. + (bb_top_order_cmp_r): New. + (create_rdg_vertices): Move into class loop_distribution. + (stmts_from_loop): Same as above. + (update_for_merge): Same as above. + (partition_merge_into): Same as above. + (get_data_dependence): Same as above. + (data_dep_in_cycle_p): Same as above. + (update_type_for_merge): Same as above. + (build_rdg_partition_for-vertex): Same as above. + (classify_builtin_ldst): Same as above. + (classify_partition): Same as above. + (share_memory_accesses): Same as above. + (rdg_build_partitions): Same as above. + (pg_add_dependence_edges): Same as above. + (build_partition_graph): Same as above. + (merge_dep_scc_partitions): Same as above. + (break_alias_scc_partitions): Same as above. + (finalize_partitions): Same as above. + (distribute_loop): Same as above. + (bb_top_order_init): New method + (bb_top_order_destroy): New method. + (get_bb_top_order_index_size): New method. + (get_bb_top_order_index_index): New method. + (get_bb_top_order_index_index): New method. + (loop_distribution::execute): New method. + (pass_loop_distribution::execute): Instantiate loop_distribution. + 2019-11-18 Jan Hubicka PR ipa/92508 diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index 4ad1f658708..308ed7d18d0 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -980,6 +980,19 @@ get_loop_body_in_custom_order (const class loop *loop, return bbs; } +/* Same as above, but use gcc_sort_r instead of qsort. */ + +basic_block * +get_loop_body_in_custom_order (const class loop *loop, void *data, + int (*bb_comparator) (const void *, const void *, void *)) +{ + basic_block *bbs = get_loop_body (loop); + + gcc_sort_r (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator, data); + + return bbs; +} + /* Get body of a LOOP in breadth first sort order. */ basic_block * diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 0b0154ffd7b..6256cc01ff4 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -376,6 +376,8 @@ extern basic_block *get_loop_body_in_dom_order (const class loop *); extern basic_block *get_loop_body_in_bfs_order (const class loop *); extern basic_block *get_loop_body_in_custom_order (const class loop *, int (*) (const void *, const void *)); +extern basic_block *get_loop_body_in_custom_order (const class loop *, void *, + int (*) (const void *, const void *, void *)); extern vec get_loop_exit_edges (const class loop *); extern edge single_exit (const class loop *); diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 28eeeb93174..839abb733ae 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -154,21 +154,10 @@ ddr_hasher::equal (const data_dependence_relation *ddr1, return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2)); } -/* The loop (nest) to be distributed. */ -static vec loop_nest; -/* Vector of data references in the loop to be distributed. */ -static vec datarefs_vec; -/* If there is nonaddressable data reference in above vector. */ -static bool has_nonaddressable_dataref_p; - -/* Store index of data reference in aux field. */ #define DR_INDEX(dr) ((uintptr_t) (dr)->aux) -/* Hash table for data dependence relation in the loop to be distributed. */ -static hash_table *ddrs_table; - /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ struct rdg_vertex { @@ -215,6 +204,83 @@ struct rdg_edge #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type +/* Kind of distributed loop. */ +enum partition_kind { + PKIND_NORMAL, + /* Partial memset stands for a paritition can be distributed into a loop + of memset calls, rather than a single memset call. It's handled just + like a normal parition, i.e, distributed as separate loop, no memset + call is generated. + + Note: This is a hacking fix trying to distribute ZERO-ing stmt in a + loop nest as deep as possible. As a result, parloop achieves better + parallelization by parallelizing deeper loop nest. This hack should + be unnecessary and removed once distributed memset can be understood + and analyzed in data reference analysis. See PR82604 for more. */ + PKIND_PARTIAL_MEMSET, + PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE +}; + +/* Type of distributed loop. */ +enum partition_type { + /* The distributed loop can be executed parallelly. */ + PTYPE_PARALLEL = 0, + /* The distributed loop has to be executed sequentially. */ + PTYPE_SEQUENTIAL +}; + +/* Builtin info for loop distribution. */ +struct builtin_info +{ + /* data-references a kind != PKIND_NORMAL partition is about. */ + data_reference_p dst_dr; + data_reference_p src_dr; + /* Base address and size of memory objects operated by the builtin. Note + both dest and source memory objects must have the same size. */ + tree dst_base; + tree src_base; + tree size; + /* Base and offset part of dst_base after stripping constant offset. This + is only used in memset builtin distribution for now. */ + tree dst_base_base; + unsigned HOST_WIDE_INT dst_base_offset; +}; + +/* Partition for loop distribution. */ +struct partition +{ + /* Statements of the partition. */ + bitmap stmts; + /* True if the partition defines variable which is used outside of loop. */ + bool reduction_p; + location_t loc; + enum partition_kind kind; + enum partition_type type; + /* Data references in the partition. */ + bitmap datarefs; + /* Information of builtin parition. */ + struct builtin_info *builtin; +}; + +/* Partitions are fused because of different reasons. */ +enum fuse_type +{ + FUSE_NON_BUILTIN = 0, + FUSE_REDUCTION = 1, + FUSE_SHARE_REF = 2, + FUSE_SAME_SCC = 3, + FUSE_FINALIZE = 4 +}; + +/* Description on different fusing reason. */ +static const char *fuse_message[] = { + "they are non-builtins", + "they have reductions", + "they have shared memory refs", + "they are in the same dependence scc", + "there is no point to distribute loop"}; + + /* Dump vertex I in RDG to FILE. */ static void @@ -435,11 +501,205 @@ create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop) } } -/* Build the vertices of the reduced dependence graph RDG. Return false - if that failed. */ -static bool -create_rdg_vertices (struct graph *rdg, vec stmts, loop_p loop) +class loop_distribution +{ + private: + /* The loop (nest) to be distributed. */ + vec loop_nest; + + /* Vector of data references in the loop to be distributed. */ + vec datarefs_vec; + + /* If there is nonaddressable data reference in above vector. */ + bool has_nonaddressable_dataref_p; + + /* Store index of data reference in aux field. */ + + /* Hash table for data dependence relation in the loop to be distributed. */ + hash_table *ddrs_table; + + /* Array mapping basic block's index to its topological order. */ + int *bb_top_order_index; + /* And size of the array. */ + int bb_top_order_index_size; + + /* Build the vertices of the reduced dependence graph RDG. Return false + if that failed. */ + bool create_rdg_vertices (struct graph *rdg, vec stmts, loop_p loop); + + /* Initialize STMTS with all the statements of LOOP. We use topological + order to discover all statements. The order is important because + generate_loops_for_partition is using the same traversal for identifying + statements in loop copies. */ + void stmts_from_loop (class loop *loop, vec *stmts); + + + /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of + LOOP, and one edge per flow dependence or control dependence from control + dependence CD. During visiting each statement, data references are also + collected and recorded in global data DATAREFS_VEC. */ + struct graph * build_rdg (class loop *loop, control_dependences *cd); + +/* Merge PARTITION into the partition DEST. RDG is the reduced dependence + graph and we update type for result partition if it is non-NULL. */ + void partition_merge_into (struct graph *rdg, + partition *dest, partition *partition, + enum fuse_type ft); + + + /* Return data dependence relation for data references A and B. The two + data references must be in lexicographic order wrto reduced dependence + graph RDG. We firstly try to find ddr from global ddr hash table. If + it doesn't exist, compute the ddr and cache it. */ + data_dependence_relation * get_data_dependence (struct graph *rdg, + data_reference_p a, + data_reference_p b); + + + /* In reduced dependence graph RDG for loop distribution, return true if + dependence between references DR1 and DR2 leads to a dependence cycle + and such dependence cycle can't be resolved by runtime alias check. */ + bool data_dep_in_cycle_p (struct graph *rdg, data_reference_p dr1, + data_reference_p dr2); + + + /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update + PARTITION1's type after merging PARTITION2 into PARTITION1. */ + void update_type_for_merge (struct graph *rdg, + partition *partition1, partition *partition2); + + + /* Returns a partition with all the statements needed for computing + the vertex V of the RDG, also including the loop exit conditions. */ + partition *build_rdg_partition_for_vertex (struct graph *rdg, int v); + + /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify + if it forms builtin memcpy or memmove call. */ + void classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition, + data_reference_p dst_dr, data_reference_p src_dr); + + /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. + For the moment we detect memset, memcpy and memmove patterns. Bitmap + STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. + Returns true if there is a reduction in all partitions and we + possibly did not mark PARTITION as having one for this reason. */ + + bool + classify_partition (loop_p loop, + struct graph *rdg, partition *partition, + bitmap stmt_in_all_partitions); + + + /* Returns true when PARTITION1 and PARTITION2 access the same memory + object in RDG. */ + bool share_memory_accesses (struct graph *rdg, + partition *partition1, partition *partition2); + + /* For each seed statement in STARTING_STMTS, this function builds + partition for it by adding depended statements according to RDG. + All partitions are recorded in PARTITIONS. */ + void rdg_build_partitions (struct graph *rdg, + vec starting_stmts, + vec *partitions); + + /* Compute partition dependence created by the data references in DRS1 + and DRS2, modify and return DIR according to that. IF ALIAS_DDR is + not NULL, we record dependence introduced by possible alias between + two data references in ALIAS_DDRS; otherwise, we simply ignore such + dependence as if it doesn't exist at all. */ + int pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1, + bitmap drs2, vec *alias_ddrs); + + + /* Build and return partition dependence graph for PARTITIONS. RDG is + reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P + is true, data dependence caused by possible alias between references + is ignored, as if it doesn't exist at all; otherwise all depdendences + are considered. */ + struct graph *build_partition_graph (struct graph *rdg, + vec *partitions, + bool ignore_alias_p); + + /* Given reduced dependence graph RDG merge strong connected components + of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by + possible alias between references is ignored, as if it doesn't exist + at all; otherwise all depdendences are considered. */ + void merge_dep_scc_partitions (struct graph *rdg, vec + *partitions, bool ignore_alias_p); + +/* This is the main function breaking strong conected components in + PARTITIONS giving reduced depdendence graph RDG. Store data dependence + relations for runtime alias check in ALIAS_DDRS. */ + void break_alias_scc_partitions (struct graph *rdg, vec + *partitions, vec *alias_ddrs); + + + /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. + ALIAS_DDRS contains ddrs which need runtime alias check. */ + void finalize_partitions (class loop *loop, vec + *partitions, vec *alias_ddrs); + + /* Distributes the code from LOOP in such a way that producer statements + are placed before consumer statements. Tries to separate only the + statements from STMTS into separate loops. Returns the number of + distributed loops. Set NB_CALLS to number of generated builtin calls. + Set *DESTROY_P to whether LOOP needs to be destroyed. */ + int distribute_loop (class loop *loop, vec stmts, + control_dependences *cd, int *nb_calls, bool *destroy_p, + bool only_patterns_p); + + /* Compute topological order for basic blocks. Topological order is + needed because data dependence is computed for data references in + lexicographical order. */ + void bb_top_order_init (void); + + void bb_top_order_destroy (void); + + public: + + /* Getter for bb_top_order. */ + + inline int get_bb_top_order_index_size (void) + { + return bb_top_order_index_size; + } + + inline int get_bb_top_order_index (int i) + { + return bb_top_order_index[i]; + } + + unsigned int execute (function *fun); +}; + + +/* If X has a smaller topological sort number than Y, returns -1; + if greater, returns 1. */ +static int +bb_top_order_cmp_r (const void *x, const void *y, void *loop) +{ + loop_distribution *_loop = + (loop_distribution *) loop; + + basic_block bb1 = *(const basic_block *) x; + basic_block bb2 = *(const basic_block *) y; + + int bb_top_order_index_size = _loop->get_bb_top_order_index_size (); + + gcc_assert (bb1->index < bb_top_order_index_size + && bb2->index < bb_top_order_index_size); + gcc_assert (bb1 == bb2 + || _loop->get_bb_top_order_index(bb1->index) + != _loop->get_bb_top_order_index(bb2->index)); + + return (_loop->get_bb_top_order_index(bb1->index) - + _loop->get_bb_top_order_index(bb2->index)); +} + +bool +loop_distribution::create_rdg_vertices (struct graph *rdg, vec stmts, + loop_p loop) { int i; gimple *stmt; @@ -476,39 +736,11 @@ create_rdg_vertices (struct graph *rdg, vec stmts, loop_p loop) return true; } -/* Array mapping basic block's index to its topological order. */ -static int *bb_top_order_index; -/* And size of the array. */ -static int bb_top_order_index_size; - -/* If X has a smaller topological sort number than Y, returns -1; - if greater, returns 1. */ - -static int -bb_top_order_cmp (const void *x, const void *y) -{ - basic_block bb1 = *(const basic_block *) x; - basic_block bb2 = *(const basic_block *) y; - - gcc_assert (bb1->index < bb_top_order_index_size - && bb2->index < bb_top_order_index_size); - gcc_assert (bb1 == bb2 - || bb_top_order_index[bb1->index] - != bb_top_order_index[bb2->index]); - - return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]); -} - -/* Initialize STMTS with all the statements of LOOP. We use topological - order to discover all statements. The order is important because - generate_loops_for_partition is using the same traversal for identifying - statements in loop copies. */ - -static void -stmts_from_loop (class loop *loop, vec *stmts) +void +loop_distribution::stmts_from_loop (class loop *loop, vec *stmts) { unsigned int i; - basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp); + basic_block *bbs = get_loop_body_in_custom_order (loop, this, bb_top_order_cmp_r); for (i = 0; i < loop->num_nodes; i++) { @@ -557,13 +789,8 @@ free_rdg (struct graph *rdg) free_graph (rdg); } -/* Build the Reduced Dependence Graph (RDG) with one vertex per statement of - LOOP, and one edge per flow dependence or control dependence from control - dependence CD. During visiting each statement, data references are also - collected and recorded in global data DATAREFS_VEC. */ - -static struct graph * -build_rdg (class loop *loop, control_dependences *cd) +struct graph * +loop_distribution::build_rdg (class loop *loop, control_dependences *cd) { struct graph *rdg; @@ -586,65 +813,6 @@ build_rdg (class loop *loop, control_dependences *cd) } -/* Kind of distributed loop. */ -enum partition_kind { - PKIND_NORMAL, - /* Partial memset stands for a paritition can be distributed into a loop - of memset calls, rather than a single memset call. It's handled just - like a normal parition, i.e, distributed as separate loop, no memset - call is generated. - - Note: This is a hacking fix trying to distribute ZERO-ing stmt in a - loop nest as deep as possible. As a result, parloop achieves better - parallelization by parallelizing deeper loop nest. This hack should - be unnecessary and removed once distributed memset can be understood - and analyzed in data reference analysis. See PR82604 for more. */ - PKIND_PARTIAL_MEMSET, - PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE -}; - -/* Type of distributed loop. */ -enum partition_type { - /* The distributed loop can be executed parallelly. */ - PTYPE_PARALLEL = 0, - /* The distributed loop has to be executed sequentially. */ - PTYPE_SEQUENTIAL -}; - -/* Builtin info for loop distribution. */ -struct builtin_info -{ - /* data-references a kind != PKIND_NORMAL partition is about. */ - data_reference_p dst_dr; - data_reference_p src_dr; - /* Base address and size of memory objects operated by the builtin. Note - both dest and source memory objects must have the same size. */ - tree dst_base; - tree src_base; - tree size; - /* Base and offset part of dst_base after stripping constant offset. This - is only used in memset builtin distribution for now. */ - tree dst_base_base; - unsigned HOST_WIDE_INT dst_base_offset; -}; - -/* Partition for loop distribution. */ -struct partition -{ - /* Statements of the partition. */ - bitmap stmts; - /* True if the partition defines variable which is used outside of loop. */ - bool reduction_p; - location_t loc; - enum partition_kind kind; - enum partition_type type; - /* Data references in the partition. */ - bitmap datarefs; - /* Information of builtin parition. */ - struct builtin_info *builtin; -}; - - /* Allocate and initialize a partition from BITMAP. */ static partition * @@ -689,33 +857,9 @@ partition_reduction_p (partition *partition) return partition->reduction_p; } -/* Partitions are fused because of different reasons. */ -enum fuse_type -{ - FUSE_NON_BUILTIN = 0, - FUSE_REDUCTION = 1, - FUSE_SHARE_REF = 2, - FUSE_SAME_SCC = 3, - FUSE_FINALIZE = 4 -}; - -/* Description on different fusing reason. */ -static const char *fuse_message[] = { - "they are non-builtins", - "they have reductions", - "they have shared memory refs", - "they are in the same dependence scc", - "there is no point to distribute loop"}; - -static void -update_type_for_merge (struct graph *, partition *, partition *); - -/* Merge PARTITION into the partition DEST. RDG is the reduced dependence - graph and we update type for result partition if it is non-NULL. */ - -static void -partition_merge_into (struct graph *rdg, partition *dest, - partition *partition, enum fuse_type ft) +void +loop_distribution::partition_merge_into (struct graph *rdg, + partition *dest, partition *partition, enum fuse_type ft) { if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1201,13 +1345,9 @@ generate_code_for_partition (class loop *loop, return false; } -/* Return data dependence relation for data references A and B. The two - data references must be in lexicographic order wrto reduced dependence - graph RDG. We firstly try to find ddr from global ddr hash table. If - it doesn't exist, compute the ddr and cache it. */ - -static data_dependence_relation * -get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b) +data_dependence_relation * +loop_distribution::get_data_dependence (struct graph *rdg, data_reference_p a, + data_reference_p b) { struct data_dependence_relation ent, **slot; struct data_dependence_relation *ddr; @@ -1228,13 +1368,10 @@ get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b) return *slot; } -/* In reduced dependence graph RDG for loop distribution, return true if - dependence between references DR1 and DR2 leads to a dependence cycle - and such dependence cycle can't be resolved by runtime alias check. */ - -static bool -data_dep_in_cycle_p (struct graph *rdg, - data_reference_p dr1, data_reference_p dr2) +bool +loop_distribution::data_dep_in_cycle_p (struct graph *rdg, + data_reference_p dr1, + data_reference_p dr2) { struct data_dependence_relation *ddr; @@ -1264,12 +1401,10 @@ data_dep_in_cycle_p (struct graph *rdg, return true; } -/* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update - PARTITION1's type after merging PARTITION2 into PARTITION1. */ - -static void -update_type_for_merge (struct graph *rdg, - partition *partition1, partition *partition2) +void +loop_distribution::update_type_for_merge (struct graph *rdg, + partition *partition1, + partition *partition2) { unsigned i, j; bitmap_iterator bi, bj; @@ -1297,11 +1432,8 @@ update_type_for_merge (struct graph *rdg, } } -/* Returns a partition with all the statements needed for computing - the vertex V of the RDG, also including the loop exit conditions. */ - -static partition * -build_rdg_partition_for_vertex (struct graph *rdg, int v) +partition * +loop_distribution::build_rdg_partition_for_vertex (struct graph *rdg, int v) { partition *partition = partition_alloc (); auto_vec nodes; @@ -1595,9 +1727,11 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr) /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify if it forms builtin memcpy or memmove call. */ -static void -classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition, - data_reference_p dst_dr, data_reference_p src_dr) +void +loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg, + partition *partition, + data_reference_p dst_dr, + data_reference_p src_dr) { tree base, size, src_base, src_size; auto_vec dst_steps, src_steps; @@ -1655,15 +1789,10 @@ classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition, return; } -/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. - For the moment we detect memset, memcpy and memmove patterns. Bitmap - STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. - Returns true if there is a reduction in all partitions and we - possibly did not mark PARTITION as having one for this reason. */ - -static bool -classify_partition (loop_p loop, struct graph *rdg, partition *partition, - bitmap stmt_in_all_partitions) +bool +loop_distribution::classify_partition (loop_p loop, + struct graph *rdg, partition *partition, + bitmap stmt_in_all_partitions) { bitmap_iterator bi; unsigned i; @@ -1721,11 +1850,8 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition, return has_reduction; } -/* Returns true when PARTITION1 and PARTITION2 access the same memory - object in RDG. */ - -static bool -share_memory_accesses (struct graph *rdg, +bool +loop_distribution::share_memory_accesses (struct graph *rdg, partition *partition1, partition *partition2) { unsigned i, j; @@ -1772,10 +1898,10 @@ share_memory_accesses (struct graph *rdg, partition for it by adding depended statements according to RDG. All partitions are recorded in PARTITIONS. */ -static void -rdg_build_partitions (struct graph *rdg, - vec starting_stmts, - vec *partitions) +void +loop_distribution::rdg_build_partitions (struct graph *rdg, + vec starting_stmts, + vec *partitions) { auto_bitmap processed; int i; @@ -1891,14 +2017,8 @@ partition_contains_all_rw (struct graph *rdg, return false; } -/* Compute partition dependence created by the data references in DRS1 - and DRS2, modify and return DIR according to that. IF ALIAS_DDR is - not NULL, we record dependence introduced by possible alias between - two data references in ALIAS_DDRS; otherwise, we simply ignore such - dependence as if it doesn't exist at all. */ - -static int -pg_add_dependence_edges (struct graph *rdg, int dir, +int +loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1, bitmap drs2, vec *alias_ddrs) { unsigned i, j; @@ -2114,10 +2234,10 @@ free_partition_graph_vdata (struct graph *pg) is ignored, as if it doesn't exist at all; otherwise all depdendences are considered. */ -static struct graph * -build_partition_graph (struct graph *rdg, - vec *partitions, - bool ignore_alias_p) +struct graph * +loop_distribution::build_partition_graph (struct graph *rdg, + vec *partitions, + bool ignore_alias_p) { int i, j; struct partition *partition1, *partition2; @@ -2199,15 +2319,10 @@ sort_partitions_by_post_order (struct graph *pg, } } -/* Given reduced dependence graph RDG merge strong connected components - of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by - possible alias between references is ignored, as if it doesn't exist - at all; otherwise all depdendences are considered. */ - -static void -merge_dep_scc_partitions (struct graph *rdg, - vec *partitions, - bool ignore_alias_p) +void +loop_distribution::merge_dep_scc_partitions (struct graph *rdg, + vec *partitions, + bool ignore_alias_p) { struct partition *partition1, *partition2; struct pg_vdata *data; @@ -2280,11 +2395,10 @@ pg_collect_alias_ddrs (struct graph *g, struct graph_edge *e, void *data) /* This is the main function breaking strong conected components in PARTITIONS giving reduced depdendence graph RDG. Store data dependence relations for runtime alias check in ALIAS_DDRS. */ - -static void -break_alias_scc_partitions (struct graph *rdg, - vec *partitions, - vec *alias_ddrs) +void +loop_distribution::break_alias_scc_partitions (struct graph *rdg, + vec *partitions, + vec *alias_ddrs) { int i, j, k, num_sccs, num_sccs_no_alias; /* Build partition dependence graph. */ @@ -2710,12 +2824,10 @@ fuse_memset_builtins (vec *partitions) } } -/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. - ALIAS_DDRS contains ddrs which need runtime alias check. */ - -static void -finalize_partitions (class loop *loop, vec *partitions, - vec *alias_ddrs) +void +loop_distribution::finalize_partitions (class loop *loop, + vec *partitions, + vec *alias_ddrs) { unsigned i; struct partition *partition, *a; @@ -2770,8 +2882,8 @@ finalize_partitions (class loop *loop, vec *partitions, distributed loops. Set NB_CALLS to number of generated builtin calls. Set *DESTROY_P to whether LOOP needs to be destroyed. */ -static int -distribute_loop (class loop *loop, vec stmts, +int +loop_distribution::distribute_loop (class loop *loop, vec stmts, control_dependences *cd, int *nb_calls, bool *destroy_p, bool only_patterns_p) { @@ -3011,40 +3123,27 @@ distribute_loop (class loop *loop, vec stmts, return nbp - *nb_calls; } -/* Distribute all loops in the current function. */ - -namespace { -const pass_data pass_data_loop_distribution = +void loop_distribution::bb_top_order_init (void) { - GIMPLE_PASS, /* type */ - "ldist", /* name */ - OPTGROUP_LOOP, /* optinfo_flags */ - TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ - ( PROP_cfg | PROP_ssa ), /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; + int rpo_num; + int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun)); -class pass_loop_distribution : public gimple_opt_pass -{ -public: - pass_loop_distribution (gcc::context *ctxt) - : gimple_opt_pass (pass_data_loop_distribution, ctxt) - {} + bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun)); + bb_top_order_index_size = last_basic_block_for_fn (cfun); + rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true); + for (int i = 0; i < rpo_num; i++) + bb_top_order_index[rpo[i]] = i; - /* opt_pass methods: */ - virtual bool gate (function *) - { - return flag_tree_loop_distribution - || flag_tree_loop_distribute_patterns; - } - - virtual unsigned int execute (function *); + free (rpo); +} -}; // class pass_loop_distribution +void loop_distribution::bb_top_order_destroy () +{ + free (bb_top_order_index); + bb_top_order_index = NULL; + bb_top_order_index_size = 0; +} /* Given LOOP, this function records seed statements for distribution in @@ -3131,8 +3230,9 @@ prepare_perfect_loop_nest (class loop *loop) return loop; } + unsigned int -pass_loop_distribution::execute (function *fun) +loop_distribution::execute (function *fun) { class loop *loop; bool changed = false; @@ -3143,22 +3243,7 @@ pass_loop_distribution::execute (function *fun) if (number_of_loops (fun) <= 1) return 0; - /* Compute topological order for basic blocks. Topological order is - needed because data dependence is computed for data references in - lexicographical order. */ - if (bb_top_order_index == NULL) - { - int rpo_num; - int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun)); - - bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun)); - bb_top_order_index_size = last_basic_block_for_fn (cfun); - rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true); - for (int i = 0; i < rpo_num; i++) - bb_top_order_index[rpo[i]] = i; - - free (rpo); - } + bb_top_order_init (); FOR_ALL_BB_FN (bb, fun) { @@ -3233,11 +3318,7 @@ pass_loop_distribution::execute (function *fun) delete cd; if (bb_top_order_index != NULL) - { - free (bb_top_order_index); - bb_top_order_index = NULL; - bb_top_order_index_size = 0; - } + bb_top_order_destroy (); if (changed) { @@ -3259,6 +3340,48 @@ pass_loop_distribution::execute (function *fun) return changed ? TODO_cleanup_cfg : 0; } + +/* Distribute all loops in the current function. */ + +namespace { + +const pass_data pass_data_loop_distribution = +{ + GIMPLE_PASS, /* type */ + "ldist", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_loop_distribution : public gimple_opt_pass +{ +public: + pass_loop_distribution (gcc::context *ctxt) + : gimple_opt_pass (pass_data_loop_distribution, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_tree_loop_distribution + || flag_tree_loop_distribute_patterns; + } + + virtual unsigned int execute (function *); + +}; // class pass_loop_distribution + +unsigned int +pass_loop_distribution::execute (function *fun) +{ + return loop_distribution ().execute (fun); +} + } // anon namespace gimple_opt_pass * -- 2.30.2