pr44576 Avoid un-necessary prefetch analysis by distributing the cost models

author Changpeng Fang <changpeng.fang@amd.com>

Fri, 9 Jul 2010 23:08:55 +0000 (23:08 +0000)

committer Sebastian Pop <spop@gcc.gnu.org>

Fri, 9 Jul 2010 23:08:55 +0000 (23:08 +0000)
author Changpeng Fang <changpeng.fang@amd.com>
Fri, 9 Jul 2010 23:08:55 +0000 (23:08 +0000)
committer Sebastian Pop <spop@gcc.gnu.org>
Fri, 9 Jul 2010 23:08:55 +0000 (23:08 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 3aed4b505a74d10dc18f59a2bb20f55c2a966152..afbbb31e5b62e96cd6085cbe74bc5dd15d2ff464 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2010-07-09  Changpeng Fang  <changpeng.fang@amd.com>
+
+       PR tree-optimization/44576
+       * tree-ssa-loop-prefetch.c (trip_count_to_ahead_ratio_too_small_p):
+       New.  Pull out from is_loop_prefetching_profitable to implement
+       the trip count to ahead ratio heuristic.
+       (mem_ref_count_reasonable_p): New.  Pull out from
+       is_loop_prefetching_profitable to implement the instruction to
+       memory reference ratio heuristic.  Also consider not reasonable if
+       the memory reference count is above a threshold (to avoid
+       explosive compilation time.
+       (insn_to_prefetch_ratio_too_small_p): New.  Pull out from
+       is_loop_prefetching_profitable to implement the instruction to
+       prefetch ratio heuristic.
+       (is_loop_prefetching_profitable): Removed.
+       (loop_prefetch_arrays): Distribute the cost analysis across the
+       function to allow early exit of the prefetch analysis.
+       is_loop_prefetching_profitable is splitted into three functions,
+       with each one called as early as possible.
+       (PREFETCH_MAX_MEM_REFS_PER_LOOP): New.  Threshold above which the
+       number of memory references in a loop is considered too many.
+
  2010-07-09  Bernd Schmidt  <bernds@codesourcery.com>
  
         * reload.c (find_reloads): Don't clear badop if we have a
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c

index 58abfd34e90cd4e8e68d017fe2a8f827e6f6be6b..008f2ce4b7b8d442a20641d25cb71d07985172d8 100644 (file)
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -110,19 +110,29 @@ along with GCC; see the file COPYING3.  If not see
        prefetch instructions with guards in cases where 5) was not sufficient
        to satisfy the constraints?
  
-   The function is_loop_prefetching_profitable() implements a cost model
-   to determine if prefetching is profitable for a given loop. The cost
-   model has two heuristcs:
-   1. A heuristic that determines whether the given loop has enough CPU
-      ops that can be overlapped with cache missing memory ops.
-      If not, the loop won't benefit from prefetching. This is implemented
-      by requirung the ratio between the instruction count and the mem ref
-      count to be above a certain minimum.
-   2. A heuristic that disables prefetching in a loop with an unknown trip
-      count if the prefetching cost is above a certain limit. The relative
-      prefetching cost is estimated by taking the ratio between the
-      prefetch count and the total intruction count (this models the I-cache
-      cost).
+   A cost model is implemented to determine whether or not prefetching is
+   profitable for a given loop.  The cost model has three heuristics:
+
+   1. Function trip_count_to_ahead_ratio_too_small_p implements a
+      heuristic that determines whether or not the loop has too few
+      iterations (compared to ahead).  Prefetching is not likely to be
+      beneficial if the trip count to ahead ratio is below a certain
+      minimum.
+
+   2. Function mem_ref_count_reasonable_p implements a heuristic that
+      determines whether the given loop has enough CPU ops that can be
+      overlapped with cache missing memory ops.  If not, the loop
+      won't benefit from prefetching.  In the implementation,
+      prefetching is not considered beneficial if the ratio between
+      the instruction count and the mem ref count is below a certain
+      minimum.
+
+   3. Function insn_to_prefetch_ratio_too_small_p implements a
+      heuristic that disables prefetching in a loop if the prefetching
+      cost is above a certain limit.  The relative prefetching cost is
+      estimated by taking the ratio between the prefetch count and the
+      total intruction count (this models the I-cache cost).
+
     The limits used in these heuristics are defined as parameters with
     reasonable default values. Machine-specific default values will be
     added later.
@@ -238,6 +248,14 @@ struct mem_ref_group
  #define PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO 4
  #endif
  
+/* Some of the prefetch computations have quadratic complexity.  We want to
+   avoid huge compile times and, therefore, want to limit the amount of
+   memory references per loop where we consider prefetching.  */
+
+#ifndef PREFETCH_MAX_MEM_REFS_PER_LOOP
+#define PREFETCH_MAX_MEM_REFS_PER_LOOP 200
+#endif
+
  /* The memory reference.  */
  
  struct mem_ref
@@ -1640,24 +1658,51 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
      }
  }
  
-/* Do a cost-benefit analysis to determine if prefetching is profitable
-   for the current loop given the following parameters:
+/* Determine whether or not the trip count to ahead ratio is too small based
+   on prefitablility consideration.
     AHEAD: the iteration ahead distance,
-   EST_NITER: the estimated trip count,
+   EST_NITER: the estimated trip count.  */
+
+static bool
+trip_count_to_ahead_ratio_too_small_p (unsigned ahead, HOST_WIDE_INT est_niter)
+{
+  /* Assume trip count to ahead ratio is big enough if the trip count could not
+     be estimated at compile time.  */
+  if (est_niter < 0)
+    return false;
+
+  if (est_niter < (HOST_WIDE_INT) (TRIP_COUNT_TO_AHEAD_RATIO * ahead))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file,
+                "Not prefetching -- loop estimated to roll only %d times\n",
+                (int) est_niter);
+      return true;
+    }
+
+  return false;
+}
+
+/* Determine whether or not the number of memory references in the loop is
+   reasonable based on the profitablity and compilation time considerations.
     NINSNS: estimated number of instructions in the loop,
-   PREFETCH_COUNT: an estimate of the number of prefetches
     MEM_REF_COUNT: total number of memory references in the loop.  */
  
  static bool
-is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter,
-                               unsigned ninsns, unsigned prefetch_count,
-                               unsigned mem_ref_count, unsigned unroll_factor)
+mem_ref_count_reasonable_p (unsigned ninsns, unsigned mem_ref_count)
  {
-  int insn_to_mem_ratio, insn_to_prefetch_ratio;
+  int insn_to_mem_ratio;
  
    if (mem_ref_count == 0)
      return false;
  
+  /* Miss rate computation (is_miss_rate_acceptable) and dependence analysis
+     (compute_all_dependences) have high costs based on quadratic complexity.
+     To avoid huge compilation time, we give up prefetching if mem_ref_count
+     is too large.  */
+  if (mem_ref_count > PREFETCH_MAX_MEM_REFS_PER_LOOP)
+    return false;
+
    /* Prefetching improves performance by overlapping cache missing
       memory accesses with CPU operations.  If the loop does not have
       enough CPU operations to overlap with memory operations, prefetching
@@ -1678,6 +1723,21 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter,
        return false;
      }
  
+  return true;
+}
+
+/* Determine whether or not the instruction to prefetch ratio in the loop is
+   too small based on the profitablity consideration.
+   NINSNS: estimated number of instructions in the loop,
+   PREFETCH_COUNT: an estimate of the number of prefetches,
+   UNROLL_FACTOR:  the factor to unroll the loop if prefetching.  */
+
+static bool
+insn_to_prefetch_ratio_too_small_p (unsigned ninsns, unsigned prefetch_count,
+                                     unsigned unroll_factor)
+{
+  int insn_to_prefetch_ratio;
+
    /* Prefetching most likely causes performance degradation when the instruction
       to prefetch ratio is too small.  Too many prefetch instructions in a loop
       may reduce the I-cache performance.
@@ -1697,23 +1757,10 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter,
          fprintf (dump_file,
                  "Not prefetching -- instruction to prefetch ratio (%d) too small\n",
                  insn_to_prefetch_ratio);
-      return false;
+      return true;
      }
  
-  /* Could not do further estimation if the trip count is unknown.  Just assume
-     prefetching is profitable. Too aggressive???  */
-  if (est_niter < 0)
-    return true;
-
-  if (est_niter < (HOST_WIDE_INT) (TRIP_COUNT_TO_AHEAD_RATIO * ahead))
-    {
-      if (dump_file && (dump_flags & TDF_DETAILS))
-       fprintf (dump_file,
-                "Not prefetching -- loop estimated to roll only %d times\n",
-                (int) est_niter);
-      return false;
-    }
-  return true;
+  return false;
  }
  
  
@@ -1738,9 +1785,31 @@ loop_prefetch_arrays (struct loop *loop)
        return false;
      }
  
+  /* FIXME: the time should be weighted by the probabilities of the blocks in
+     the loop body.  */
+  time = tree_num_loop_insns (loop, &eni_time_weights);
+  if (time == 0)
+    return false;
+
+  ahead = (PREFETCH_LATENCY + time - 1) / time;
+  est_niter = estimated_loop_iterations_int (loop, false);
+
+  /* Prefetching is not likely to be profitable if the trip count to ahead
+     ratio is too small.  */
+  if (trip_count_to_ahead_ratio_too_small_p (ahead, est_niter))
+    return false;
+
+  ninsns = tree_num_loop_insns (loop, &eni_size_weights);
+
    /* Step 1: gather the memory references.  */
    refs = gather_memory_references (loop, &no_other_refs, &mem_ref_count);
  
+  /* Give up prefetching if the number of memory references in the
+     loop is not reasonable based on profitablity and compilation time
+     considerations.  */
+  if (!mem_ref_count_reasonable_p (ninsns, mem_ref_count))
+    goto fail;
+
    /* Step 2: estimate the reuse effects.  */
    prune_by_reuse (refs);
  
@@ -1749,15 +1818,7 @@ loop_prefetch_arrays (struct loop *loop)
  
    determine_loop_nest_reuse (loop, refs, no_other_refs);
  
-  /* Step 3: determine the ahead and unroll factor.  */
-
-  /* FIXME: the time should be weighted by the probabilities of the blocks in
-     the loop body.  */
-  time = tree_num_loop_insns (loop, &eni_time_weights);
-  ahead = (PREFETCH_LATENCY + time - 1) / time;
-  est_niter = estimated_loop_iterations_int (loop, false);
-
-  ninsns = tree_num_loop_insns (loop, &eni_size_weights);
+  /* Step 3: determine unroll factor.  */
    unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
                                            est_niter);
  
@@ -1773,8 +1834,10 @@ loop_prefetch_arrays (struct loop *loop)
              ahead, unroll_factor, est_niter,
              ninsns, mem_ref_count, prefetch_count);
  
-  if (!is_loop_prefetching_profitable (ahead, est_niter, ninsns, prefetch_count,
-                                      mem_ref_count, unroll_factor))
+  /* Prefetching is not likely to be profitable if the instruction to prefetch
+     ratio is too small.  */
+  if (insn_to_prefetch_ratio_too_small_p (ninsns, prefetch_count,
+                                         unroll_factor))
      goto fail;
  
    mark_nontemporal_stores (loop, refs);
author	Changpeng Fang <changpeng.fang@amd.com>
	Fri, 9 Jul 2010 23:08:55 +0000 (23:08 +0000)
committer	Sebastian Pop <spop@gcc.gnu.org>
	Fri, 9 Jul 2010 23:08:55 +0000 (23:08 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/tree-ssa-loop-prefetch.c		patch \| blob \| history