Make profile estimation more precise
authorJan Hubicka <jh@suse.cz>
Thu, 16 Jan 2020 22:55:44 +0000 (23:55 +0100)
committerJan Hubicka <jh@suse.cz>
Thu, 16 Jan 2020 22:55:44 +0000 (23:55 +0100)
While analyzing code size regression in SPEC2k GCC binary I noticed that we
perform some inline decisions because we think that number of executions are
very high.
In particular there was inline decision inlining gen_rtx_fmt_ee to find_reloads
believing that it is called 4 billion times.  This turned out to be cummulation
of roundoff errors in propagate_freq which was bit mechanically updated from
original sreals to C++ sreals and later to new probabilities.

This led us to estimate that a loopback edge is reached with probability 2.3
which was capped to 1-1/10000 and since this happened in nested loop it quickly
escalated to large values.

Originally capping to REG_BR_PROB_BASE avoided such problems but now we have
much higher range.

This patch avoids going from probabilites to REG_BR_PROB_BASE so precision is
kept.  In addition it makes the propagation to not estimate more than
param-max-predicted-loop-iterations.  The first change makes the cap to not
be triggered on the gcc build, but it is still better to be safe than sorry.

* ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
dump.
* params.opt: (max-predicted-iterations): Set bounds.
* predict.c (real_almost_one, real_br_prob_base,
real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
(propagate_freq): Add max_cyclic_prob parameter; cap cyclic
probabilities; do not truncate to reg_br_prob_bases.
(estimate_loops_at_level): Pass max_cyclic_prob.
(estimate_loops): Compute max_cyclic_prob.
(estimate_bb_frequencies): Do not initialize real_*; update calculation
of back edge prob.
* profile-count.c (profile_probability::to_sreal): New.
* profile-count.h (class sreal): Move up in file.
(profile_probability::to_sreal): Declare.

gcc/ChangeLog
gcc/ipa-fnsummary.c
gcc/params.opt
gcc/predict.c
gcc/profile-count.c
gcc/profile-count.h

index b98ead86f43d56817a8a4e2a9095a56a4870b846..907d0f751abeae655d120b1ddf3cb9741b8c2505 100644 (file)
@@ -1,3 +1,20 @@
+2020-01-16  Jan Hubicka  <hubicka@ucw.cz>
+
+       * ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
+       dump.
+       * params.opt: (max-predicted-iterations): Set bounds.
+       * predict.c (real_almost_one, real_br_prob_base,
+       real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
+       (propagate_freq): Add max_cyclic_prob parameter; cap cyclic
+       probabilities; do not truncate to reg_br_prob_bases.
+       (estimate_loops_at_level): Pass max_cyclic_prob.
+       (estimate_loops): Compute max_cyclic_prob.
+       (estimate_bb_frequencies): Do not initialize real_*; update calculation
+       of back edge prob.
+       * profile-count.c (profile_probability::to_sreal): New.
+       * profile-count.h (class sreal): Move up in file.
+       (profile_probability::to_sreal): Declare.
+
 2020-01-16  Stam Markianos-Wright  <stam.markianos-wright@arm.com>
 
        * config/arm/arm.c
index 4e1c587b10178b363a957593deeb3eafb2c20ecc..dbd53f12a405cf1c8a045a2e979311f8411c46b2 100644 (file)
@@ -3258,7 +3258,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size,
          gcc_assert (*size == old_size);
          if (time && (*time - old_time > 1 || *time - old_time < -1)
              && dump_file)
-           fprintf (dump_file, "Time mismatch in call summary %f!=%f",
+           fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
                     old_time.to_double (),
                     time->to_double ());
        }
index 31cc20031b13331c13e255ec3020cfc1708ef5f2..f02c769d0e3541aa1965622081d64445bb451cf1 100644 (file)
@@ -555,7 +555,7 @@ Common Joined UInteger Var(param_max_pow_sqrt_depth) Init(5) IntegerRange(1, 32)
 Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant.
 
 -param=max-predicted-iterations=
-Common Joined UInteger Var(param_max_predicted_iterations) Init(100) Param Optimization
+Common Joined UInteger Var(param_max_predicted_iterations) Init(100) IntegerRange(1, 65536) Param Optimization
 The maximum number of loop iterations we predict statically.
 
 -param=max-reload-search-insns=
index 02c5fe0667dd24d2c9cbec9ac7cb37be5125eb4b..c3aed9ed8543ca353580057cd8b734d59d3166a2 100644 (file)
@@ -76,10 +76,6 @@ enum predictor_reason
 static const char *reason_messages[] = {"", " (ignored)",
     " (single edge duplicate)", " (edge pair duplicate)"};
 
-/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
-                  1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
-static sreal real_almost_one, real_br_prob_base,
-            real_inv_br_prob_base, real_one_half, real_bb_freq_max;
 
 static void combine_predictions_for_insn (rtx_insn *, basic_block);
 static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
@@ -3266,7 +3262,8 @@ public:
    TOVISIT, starting in HEAD.  */
 
 static void
-propagate_freq (basic_block head, bitmap tovisit)
+propagate_freq (basic_block head, bitmap tovisit,
+               sreal max_cyclic_prob)
 {
   basic_block bb;
   basic_block last;
@@ -3322,22 +3319,14 @@ propagate_freq (basic_block head, bitmap tovisit)
 
          FOR_EACH_EDGE (e, ei, bb->preds)
            if (EDGE_INFO (e)->back_edge)
-             {
-               cyclic_probability += EDGE_INFO (e)->back_edge_prob;
-             }
+             cyclic_probability += EDGE_INFO (e)->back_edge_prob;
            else if (!(e->flags & EDGE_DFS_BACK))
              {
-               /*  frequency += (e->probability
-                                 * BLOCK_INFO (e->src)->frequency /
-                                 REG_BR_PROB_BASE);  */
-
                /* FIXME: Graphite is producing edges with no profile. Once
                   this is fixed, drop this.  */
                sreal tmp = e->probability.initialized_p () ?
-                           e->probability.to_reg_br_prob_base () : 0;
-               tmp *= BLOCK_INFO (e->src)->frequency;
-               tmp *= real_inv_br_prob_base;
-               frequency += tmp;
+                           e->probability.to_sreal () : 0;
+               frequency += tmp * BLOCK_INFO (e->src)->frequency;
              }
 
          if (cyclic_probability == 0)
@@ -3346,14 +3335,29 @@ propagate_freq (basic_block head, bitmap tovisit)
            }
          else
            {
-             if (cyclic_probability > real_almost_one)
-               cyclic_probability = real_almost_one;
-
-             /* BLOCK_INFO (bb)->frequency = frequency
-                                             / (1 - cyclic_probability) */
-
-             cyclic_probability = sreal (1) - cyclic_probability;
-             BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
+             if (cyclic_probability > max_cyclic_prob)
+               {
+                 if (dump_file)
+                   fprintf (dump_file,
+                            "cyclic probability of bb %i is %f (capped to %f)"
+                            "; turning freq %f",
+                            bb->index, cyclic_probability.to_double (),
+                            max_cyclic_prob.to_double (),
+                            frequency.to_double ());
+                       
+                 cyclic_probability = max_cyclic_prob;
+               }
+             else if (dump_file)
+               fprintf (dump_file,
+                        "cyclic probability of bb %i is %f; turning freq %f",
+                        bb->index, cyclic_probability.to_double (),
+                        frequency.to_double ());
+
+             BLOCK_INFO (bb)->frequency = frequency
+                                / (sreal (1) - cyclic_probability);
+             if (dump_file)
+               fprintf (dump_file, " to %f\n",
+                        BLOCK_INFO (bb)->frequency.to_double ());
            }
        }
 
@@ -3362,16 +3366,11 @@ propagate_freq (basic_block head, bitmap tovisit)
       e = find_edge (bb, head);
       if (e)
        {
-         /* EDGE_INFO (e)->back_edge_prob
-            = ((e->probability * BLOCK_INFO (bb)->frequency)
-            / REG_BR_PROB_BASE); */
-
          /* FIXME: Graphite is producing edges with no profile. Once
             this is fixed, drop this.  */
          sreal tmp = e->probability.initialized_p () ?
-                     e->probability.to_reg_br_prob_base () : 0;
-         tmp *= BLOCK_INFO (bb)->frequency;
-         EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
+                     e->probability.to_sreal () : 0;
+         EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency;
        }
 
       /* Propagate to successor blocks.  */
@@ -3396,7 +3395,7 @@ propagate_freq (basic_block head, bitmap tovisit)
 /* Estimate frequencies in loops at same nest level.  */
 
 static void
-estimate_loops_at_level (class loop *first_loop)
+estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
 {
   class loop *loop;
 
@@ -3407,7 +3406,7 @@ estimate_loops_at_level (class loop *first_loop)
       unsigned i;
       auto_bitmap tovisit;
 
-      estimate_loops_at_level (loop->inner);
+      estimate_loops_at_level (loop->inner, max_cyclic_prob);
 
       /* Find current loop back edge and mark it.  */
       e = loop_latch_edge (loop);
@@ -3417,7 +3416,7 @@ estimate_loops_at_level (class loop *first_loop)
       for (i = 0; i < loop->num_nodes; i++)
        bitmap_set_bit (tovisit, bbs[i]->index);
       free (bbs);
-      propagate_freq (loop->header, tovisit);
+      propagate_freq (loop->header, tovisit, max_cyclic_prob);
     }
 }
 
@@ -3428,17 +3427,18 @@ estimate_loops (void)
 {
   auto_bitmap tovisit;
   basic_block bb;
+  sreal max_cyclic_prob = (sreal)1 - (sreal)1 / param_max_predicted_iterations;
 
   /* Start by estimating the frequencies in the loops.  */
   if (number_of_loops (cfun) > 1)
-    estimate_loops_at_level (current_loops->tree_root->inner);
+    estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob);
 
   /* Now propagate the frequencies through all the blocks.  */
   FOR_ALL_BB_FN (bb, cfun)
     {
       bitmap_set_bit (tovisit, bb->index);
     }
-  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
+  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob);
 }
 
 /* Drop the profile for NODE to guessed, and update its frequency based on
@@ -3844,21 +3844,6 @@ estimate_bb_frequencies (bool force)
   if (force || profile_status_for_fn (cfun) != PROFILE_READ
       || !update_max_bb_count ())
     {
-      static int real_values_initialized = 0;
-
-      if (!real_values_initialized)
-        {
-         real_values_initialized = 1;
-         real_br_prob_base = REG_BR_PROB_BASE;
-         /* Scaling frequencies up to maximal profile count may result in
-            frequent overflows especially when inlining loops.
-            Small scalling results in unnecesary precision loss.  Stay in
-            the half of the (exponential) range.  */
-         real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
-         real_one_half = sreal (1, -1);
-         real_inv_br_prob_base = sreal (1) / real_br_prob_base;
-         real_almost_one = sreal (1) - real_inv_br_prob_base;
-       }
 
       mark_dfs_back_edges ();
 
@@ -3879,10 +3864,10 @@ estimate_bb_frequencies (bool force)
                 this is fixed, drop this.  */
              if (e->probability.initialized_p ())
                EDGE_INFO (e)->back_edge_prob
-                  = e->probability.to_reg_br_prob_base ();
+                  = e->probability.to_sreal ();
              else
-               EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
-             EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
+               /* back_edge_prob = 0.5 */
+               EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
            }
        }
 
@@ -3895,14 +3880,18 @@ estimate_bb_frequencies (bool force)
        if (freq_max < BLOCK_INFO (bb)->frequency)
          freq_max = BLOCK_INFO (bb)->frequency;
 
-      freq_max = real_bb_freq_max / freq_max;
+      /* Scaling frequencies up to maximal profile count may result in
+        frequent overflows especially when inlining loops.
+        Small scalling results in unnecesary precision loss.  Stay in
+        the half of the (exponential) range.  */
+      freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
       if (freq_max < 16)
        freq_max = 16;
       profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
       cfun->cfg->count_max = profile_count::uninitialized ();
       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
        {
-         sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
+         sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + sreal (1, -1);
          profile_count count = profile_count::from_gcov_type (tmp.to_int ());  
 
          /* If we have profile feedback in which this function was never
index d463ddd5cce723ee75aadbc17f3ab344c60b1f6b..0c7922978260ad5842965c890921d6e307babacb 100644 (file)
@@ -446,3 +446,12 @@ profile_probability::combine_with_count (profile_count count1,
   else
     return *this * even () + other * even ();
 }
+
+/* Return probability as sreal in range [0, 1].  */
+
+sreal
+profile_probability::to_sreal () const
+{
+  gcc_checking_assert (initialized_p ());
+  return ((sreal)m_val) >> (n_bits - 2);
+}
index 987d3ce9a49722b4f1d2651ab9b1eeb78ea34e33..09217a8699b03bc491ed36976ae458d24bab5e04 100644 (file)
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 
 struct function;
 struct profile_count;
+class sreal;
 
 /* Quality of the profile count.  Because gengtype does not support enums
    inside of classes, this is in global namespace.  */
@@ -614,6 +615,8 @@ public:
                                          profile_probability other,
                                          profile_count count2) const;
 
+  /* Return probability as sreal.  */
+  sreal to_sreal () const;
   /* LTO streaming support.  */
   static profile_probability stream_in (class lto_input_block *);
   void stream_out (struct output_block *);
@@ -674,8 +677,6 @@ public:
 
  */
 
-class sreal;
-
 struct GTY(()) profile_count
 {
 public: