From ddb0b8247df6b9192a849508fab2aecc49d8312c Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sun, 29 Apr 2018 18:22:35 +0200 Subject: [PATCH] lto-partition.c: Include sreal.h * lto-partition.c: Include sreal.h (add_symbol_to_partition_1): Use size instead of self_size for size estimate. (account_reference_p): New. (lto_balanced_map): Use 64bit arithmetics for size calculatoins; cleanup; fix accounting errors in boundary size; add debug output; combine cost as cost/size instead of cost/internal; reduce the partitioning error to +- 1/8 of the parttion size. From-SVN: r259749 --- gcc/lto/ChangeLog | 11 ++++ gcc/lto/lto-partition.c | 129 +++++++++++++++++++++++++--------------- 2 files changed, 91 insertions(+), 49 deletions(-) diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index 821a462fb87..6d00e4ebdfb 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,14 @@ +2018-04-19 Jan Hubicka + + * lto-partition.c: Include sreal.h + (add_symbol_to_partition_1): Use size instead of self_size + for size estimate. + (account_reference_p): New. + (lto_balanced_map): Use 64bit arithmetics for size calculatoins; cleanup; + fix accounting errors in boundary size; add debug output; combine cost + as cost/size instead of cost/internal; reduce the partitioning error to + +- 1/8 of the parttion size. + 2018-04-19 Martin Liska * lto-symtab.c (lto_symtab_resolve_symbols): Do not bail out diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c index 60daafdf375..d83eba297ec 100644 --- a/gcc/lto/lto-partition.c +++ b/gcc/lto/lto-partition.c @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-prop.h" #include "ipa-fnsummary.h" #include "lto-partition.h" +#include "sreal.h" vec ltrans_partitions; @@ -152,8 +153,8 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node) if (cgraph_node *cnode = dyn_cast (node)) { struct cgraph_edge *e; - if (!node->alias) - part->insns += ipa_fn_summaries->get (cnode)->self_size; + if (!node->alias && c == SYMBOL_PARTITION) + part->insns += ipa_fn_summaries->get (cnode)->size; /* Add all inline clones and callees that are duplicated. */ for (e = cnode->callees; e; e = e->next_callee) @@ -276,8 +277,9 @@ undo_partition (ltrans_partition partition, unsigned int n_nodes) delete partition->initializers_visited; partition->initializers_visited = NULL; - if (!node->alias && (cnode = dyn_cast (node))) - partition->insns -= ipa_fn_summaries->get (cnode)->self_size; + if (!node->alias && (cnode = dyn_cast (node)) + && node->get_partitioning_class () == SYMBOL_PARTITION) + partition->insns -= ipa_fn_summaries->get (cnode)->size; lto_symtab_encoder_delete_node (partition->encoder, node); node->aux = (void *)((size_t)node->aux - 1); } @@ -408,6 +410,24 @@ add_sorted_nodes (vec &next_nodes, ltrans_partition partition) add_symbol_to_partition (partition, node); } +/* Return true if we should account reference from N1 to N2 in cost + of partition boundary. */ + +bool +account_reference_p (symtab_node *n1, symtab_node *n2) +{ + if (cgraph_node *cnode = dyn_cast (n1)) + n1 = cnode; + /* Do not account recursion - the code below will handle it incorrectly + otherwise. Also do not account references to external symbols. + They will never become local. */ + if (n1 == n2 + || DECL_EXTERNAL (n2->decl) + || !n2->definition) + return false; + return true; +} + /* Group cgraph nodes into equally-sized partitions. @@ -457,14 +477,14 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) auto_vec varpool_order; int i; struct cgraph_node *node; - int original_total_size, total_size = 0, best_total_size = 0; - int partition_size; + int64_t original_total_size, total_size = 0; + int64_t partition_size; ltrans_partition partition; int last_visited_node = 0; varpool_node *vnode; - int cost = 0, internal = 0; - int best_n_nodes = 0, best_i = 0, best_cost = - INT_MAX, best_internal = 0; + int64_t cost = 0, internal = 0; + int best_n_nodes = 0, best_i = 0; + int64_t best_cost = -1, best_internal = 0, best_size = 0; int npartitions; int current_order = -1; int noreorder_pos = 0; @@ -513,7 +533,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) /* Compute partition size and create the first partition. */ if (PARAM_VALUE (MIN_PARTITION_SIZE) > max_partition_size) - fatal_error (input_location, "min partition size cannot be greater than max partition size"); + fatal_error (input_location, "min partition size cannot be greater " + "than max partition size"); partition_size = total_size / n_lto_partitions; if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) @@ -521,7 +542,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) npartitions = 1; partition = new_partition (""); if (symtab->dump_file) - fprintf (symtab->dump_file, "Total unit size: %i, partition size: %i\n", + fprintf (symtab->dump_file, "Total unit size: %" PRId64 ", partition size: %" PRId64 "\n", total_size, partition_size); auto_vec next_nodes; @@ -540,17 +561,11 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) next_nodes.safe_push (varpool_order[varpool_pos++]); while (noreorder_pos < (int)noreorder.length () && noreorder[noreorder_pos]->order < current_order) - { - if (!noreorder[noreorder_pos]->alias) - total_size -= ipa_fn_summaries->get (noreorder[noreorder_pos])->size; - next_nodes.safe_push (noreorder[noreorder_pos++]); - } + next_nodes.safe_push (noreorder[noreorder_pos++]); add_sorted_nodes (next_nodes, partition); if (!symbol_partitioned_p (order[i])) add_symbol_to_partition (partition, order[i]); - if (!order[i]->alias) - total_size -= ipa_fn_summaries->get (order[i])->size; /* Once we added a new node to the partition, we also want to add @@ -567,7 +582,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) it and thus we need to subtract it from COST. */ while (last_visited_node < lto_symtab_encoder_size (partition->encoder)) { - symtab_node *refs_node; int j; struct ipa_ref *ref = NULL; symtab_node *snode = lto_symtab_encoder_deref (partition->encoder, @@ -577,7 +591,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) { struct cgraph_edge *edge; - refs_node = node; last_visited_node++; @@ -585,7 +598,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) /* Compute boundary cost of callgraph edges. */ for (edge = node->callees; edge; edge = edge->next_callee) - if (edge->callee->definition) + /* Inline edges will always end up local. */ + if (edge->inline_failed + && account_reference_p (node, edge->callee)) { int edge_cost = edge->frequency (); int index; @@ -602,6 +617,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) cost += edge_cost; } for (edge = node->callers; edge; edge = edge->next_caller) + if (edge->inline_failed + && account_reference_p (edge->caller, node)) { int edge_cost = edge->frequency (); int index; @@ -614,27 +631,24 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) edge->caller); if (index != LCC_NOT_FOUND && index < last_visited_node - 1) - cost -= edge_cost; + cost -= edge_cost, internal += edge_cost; else cost += edge_cost; } } else - { - refs_node = snode; - last_visited_node++; - } + last_visited_node++; /* Compute boundary cost of IPA REF edges and at the same time look into variables referenced from current partition and try to add them. */ - for (j = 0; refs_node->iterate_reference (j, ref); j++) - if (is_a (ref->referred)) + for (j = 0; snode->iterate_reference (j, ref); j++) + if (!account_reference_p (snode, ref->referred)) + ; + else if (is_a (ref->referred)) { int index; vnode = dyn_cast (ref->referred); - if (!vnode->definition) - continue; if (!symbol_partitioned_p (vnode) && !vnode->no_reorder && vnode->get_partitioning_class () == SYMBOL_PARTITION) @@ -652,8 +666,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) int index; node = dyn_cast (ref->referred); - if (!node->definition) - continue; index = lto_symtab_encoder_lookup (partition->encoder, node); if (index != LCC_NOT_FOUND @@ -662,8 +674,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) else cost++; } - for (j = 0; refs_node->iterate_referring (j, ref); j++) - if (is_a (ref->referring)) + for (j = 0; snode->iterate_referring (j, ref); j++) + if (!account_reference_p (ref->referring, snode)) + ; + else if (is_a (ref->referring)) { int index; @@ -682,7 +696,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) vnode); if (index != LCC_NOT_FOUND && index < last_visited_node - 1) - cost--; + cost--, internal++; else cost++; } @@ -696,36 +710,41 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) node); if (index != LCC_NOT_FOUND && index < last_visited_node - 1) - cost--; + cost--, internal++; else cost++; } } - /* If the partition is large enough, start looking for smallest boundary cost. */ - if (partition->insns < partition_size * 3 / 4 - || best_cost == INT_MAX - || ((!cost - || (best_internal * (HOST_WIDE_INT) cost - > (internal * (HOST_WIDE_INT)best_cost))) - && partition->insns < partition_size * 5 / 4)) + gcc_assert (cost >= 0 && internal >= 0); + + /* If the partition is large enough, start looking for smallest boundary cost. + If partition still seems too small (less than 7/8 of target weight) accept + any cost. If partition has right size, optimize for highest internal/cost. + Later we stop building partition if its size is 9/8 of the target wight. */ + if (partition->insns < partition_size * 7 / 8 + || best_cost == -1 + || (!cost + || ((sreal)best_internal * (sreal) cost + < ((sreal) internal * (sreal)best_cost)))) { best_cost = cost; best_internal = internal; + best_size = partition->insns; best_i = i; best_n_nodes = lto_symtab_encoder_size (partition->encoder); - best_total_size = total_size; best_varpool_pos = varpool_pos; } if (symtab->dump_file) - fprintf (symtab->dump_file, "Step %i: added %s/%i, size %i, cost %i/%i " - "best %i/%i, step %i\n", i, + fprintf (symtab->dump_file, "Step %i: added %s/%i, size %i, " + "cost %" PRId64 "/%" PRId64 " " + "best %" PRId64 "/%" PRId64", step %i\n", i, order[i]->name (), order[i]->order, partition->insns, cost, internal, best_cost, best_internal, best_i); /* Partition is too large, unwind into step when best cost was reached and start new partition. */ - if (partition->insns > 2 * partition_size + if (partition->insns > 9 * partition_size / 8 || partition->insns > max_partition_size) { if (best_i != i) @@ -736,21 +755,26 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) undo_partition (partition, best_n_nodes); varpool_pos = best_varpool_pos; } + gcc_assert (best_size == partition->insns); i = best_i; + if (symtab->dump_file) + fprintf (symtab->dump_file, + "Partition insns: %i (want %" PRId64 ")\n", + partition->insns, partition_size); /* When we are finished, avoid creating empty partition. */ while (i < n_nodes - 1 && symbol_partitioned_p (order[i + 1])) i++; if (i == n_nodes - 1) break; + total_size -= partition->insns; partition = new_partition (""); last_visited_node = 0; - total_size = best_total_size; cost = 0; if (symtab->dump_file) fprintf (symtab->dump_file, "New partition\n"); best_n_nodes = 0; - best_cost = INT_MAX; + best_cost = -1; /* Since the size of partitions is just approximate, update the size after we finished current one. */ @@ -760,6 +784,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) /* Watch for overflow. */ partition_size = INT_MAX / 16; + if (symtab->dump_file) + fprintf (symtab->dump_file, + "Total size: %" PRId64 " partition_size: %" PRId64 "\n", + total_size, partition_size); if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) partition_size = PARAM_VALUE (MIN_PARTITION_SIZE); npartitions ++; @@ -779,6 +807,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) next_nodes.safe_push (varpool_order[varpool_pos++]); while (noreorder_pos < (int)noreorder.length ()) next_nodes.safe_push (noreorder[noreorder_pos++]); + /* For one partition the cost of boundary should be 0 unless we added final + symbols here (these are not accounted) or we have accounting bug. */ + gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost); add_sorted_nodes (next_nodes, partition); free (order); -- 2.30.2