predict.c (determine_unlikely_bbs): Set cgraph node count to 0 when entry block was...
authorJan Hubicka <hubicka@ucw.cz>
Fri, 17 Nov 2017 17:47:36 +0000 (18:47 +0100)
committerJan Hubicka <hubicka@gcc.gnu.org>
Fri, 17 Nov 2017 17:47:36 +0000 (17:47 +0000)
* predict.c (determine_unlikely_bbs): Set cgraph node count to 0
when entry block was promoted unlikely.
(estimate_bb_frequencies): Increase frequency scale.
* profile-count.h (profile_count): Export precision info.
* gcc.dg/tree-ssa/dump-2.c: Fixup template for profile precision
changes.
* gcc.dg/tree-ssa/pr77445-2.c: Fixup template for profile precision
changes.

From-SVN: r254888

gcc/ChangeLog
gcc/predict.c
gcc/profile-count.h
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/dump-2.c
gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c

index 5ab422c917ab7b11df27045a5fa99ec440800a87..3864ff2fa05afdffc60358ae756a1f4830deab0d 100644 (file)
@@ -1,3 +1,10 @@
+2017-11-17  Jan Hubicka  <hubicka@ucw.cz>
+
+       * predict.c (determine_unlikely_bbs): Set cgraph node count to 0
+       when entry block was promoted unlikely.
+       (estimate_bb_frequencies): Increase frequency scale.
+       * profile-count.h (profile_count): Export precision info.
+
 2017-11-17  Jan Hubicka  <hubicka@ucw.cz>
 
        * tree-tailcall.c (eliminate_tail_call): Be more careful about not
index 7404f1af1fa17859d66dbe473c6c556ca841f252..7e40f7773f198225a5c691270823619f82058b0c 100644 (file)
@@ -3542,6 +3542,8 @@ determine_unlikely_bbs ()
                        bb->index, e->dest->index);
             e->probability = profile_probability::never ();
           }
+  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
+    cgraph_node::get (current_function_decl)->count = profile_count::zero ();
 }
 
 /* Estimate and propagate basic block frequencies using the given branch
@@ -3565,7 +3567,11 @@ estimate_bb_frequencies (bool force)
         {
          real_values_initialized = 1;
          real_br_prob_base = REG_BR_PROB_BASE;
-         real_bb_freq_max = BB_FREQ_MAX;
+         /* Scaling frequencies up to maximal profile count may result in
+            frequent overflows especially when inlining loops.
+            Small scalling results in unnecesary precision loss.  Stay in
+            the half of the (exponential) range.  */
+         real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
          real_one_half = sreal (1, -1);
          real_inv_br_prob_base = sreal (1) / real_br_prob_base;
          real_almost_one = sreal (1) - real_inv_br_prob_base;
@@ -3610,6 +3616,8 @@ estimate_bb_frequencies (bool force)
          freq_max = BLOCK_INFO (bb)->frequency;
 
       freq_max = real_bb_freq_max / freq_max;
+      if (freq_max < 16)
+       freq_max = 16;
       cfun->cfg->count_max = profile_count::uninitialized ();
       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
        {
index 90d1bc747eeaccda030326fac87525efc8c4353a..66a217d39fdca76bdd1c42382d59a023a628179a 100644 (file)
@@ -605,11 +605,13 @@ class sreal;
 
 class GTY(()) profile_count
 {
+public:
   /* Use 62bit to hold basic block counters.  Should be at least
      64bit.  Although a counter cannot be negative, we use a signed
      type to hold various extra stages.  */
 
   static const int n_bits = 61;
+private:
   static const uint64_t max_count = ((uint64_t) 1 << n_bits) - 2;
   static const uint64_t uninitialized_count = ((uint64_t) 1 << n_bits) - 1;
 
index 9592080cea8cd8741d37a81bc651be96c43d3c40..149ed11a502c2ae2d2d16ebe5b6740d5b22c6a5d 100644 (file)
@@ -1,3 +1,10 @@
+2017-11-17  Jan Hubicka  <hubicka@ucw.cz>
+
+       * gcc.dg/tree-ssa/dump-2.c: Fixup template for profile precision
+       changes.
+       * gcc.dg/tree-ssa/pr77445-2.c: Fixup template for profile precision
+       changes.
+
 2017-11-17  Nathan Sidwell  <nathan@acm.org>
 
        * g++.dg/pr82836.C: Fix for c++17.
index 20f99c2df12282bdd9ea6bd92c0d8d4934d5ad26..a1ab635906b9d80d40b2a6c23b06ccb0f6b283be 100644 (file)
@@ -6,4 +6,4 @@ int f(void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[local count: 10000\\\]:" "optimized" } } */
+/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[local count: " "optimized" } } */
index 88e3b946fe4e4bf8852c34483935ba0059300446..eecfc4b195a9b9698e60a24aa66983d360da6f7a 100644 (file)
@@ -120,7 +120,7 @@ enum STATES FMS( u8 **in , u32 *transitions) {
    profile estimation stage. But the number of inconsistencies should not
    increase much.  */
 /* { dg-final { scan-tree-dump "Jumps threaded: 1\[1-9\]" "thread1" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum" 2 "thread1" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 3 "thread1" } } */
 /* { dg-final { scan-tree-dump-not "not considered" "thread1" } } */
 /* { dg-final { scan-tree-dump-not "not considered" "thread2" } } */
 /* { dg-final { scan-tree-dump-not "not considered" "thread3" } } */