re PR tree-optimization/90078 (ICE with deep templates caused by overflow)

author Bin Cheng <bin.cheng@linux.alibaba.com>

Wed, 8 May 2019 11:37:45 +0000 (11:37 +0000)

committer Bin Cheng <amker@gcc.gnu.org>

Wed, 8 May 2019 11:37:45 +0000 (11:37 +0000)
author Bin Cheng <bin.cheng@linux.alibaba.com>
Wed, 8 May 2019 11:37:45 +0000 (11:37 +0000)
committer Bin Cheng <amker@gcc.gnu.org>
Wed, 8 May 2019 11:37:45 +0000 (11:37 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index be91d031277a5340e8249d1ee9e8c4291aba80aa..08b37f27d45b0173219a7b24102b9adb4340c755 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2019-05-08  Bin Cheng  <bin.cheng@linux.alibaba.com>
+
+       PR tree-optimization/90078
+       * tree-ssa-loop-ivopts.c (inttypes.h): Include new header file.
+       (INFTY): Increase the value for infinite cost.
+       (struct comp_cost): Promote type of members to int64_t.
+       (infinite_cost): Don't set complexity in initialization.
+       (comp_cost::operator +,-,+=,-+,/=,*=): Assert when cost computation
+       overflows to infinite_cost.
+       (adjust_setup_cost): Promote type of parameter and cost computation
+       to int64_t.
+       (struct ainc_cost_data, struct iv_ca): Promote type of member to
+       int64_t.
+       (get_scaled_computation_cost_at, determine_iv_cost): Promote type of
+       cost computation to int64_t.
+       (determine_group_iv_costs, iv_ca_dump, find_optimal_iv_set): Use
+       int64_t's format specifier in dump.
+
  2019-05-08  Bin Cheng  <bin.cheng@linux.alibaba.com>
  
         PR tree-optimization/90240
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 897a46a2cb3a9927565a13f00ca3f59af3595b0d..3042f825ff81825015a164db5dd6ca96e3c938f1 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2018-05-08  Bin Cheng  <bin.cheng@linux.alibaba.com>
+
+       PR tree-optimization/90078
+       * g++.dg/tree-ssa/pr90078.C: New test.
+
  2018-05-08  Bin Cheng  <bin.cheng@linux.alibaba.com>
  
         PR tree-optimization/90240
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr90078.C b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C

new file mode 100644 (file)

index 0000000..e36f50e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C
@@ -0,0 +1,199 @@
+// { dg-do compile }
+// { dg-options "-std=c++14 -O2 -ftemplate-depth=1000000" }
+
+template <class T, int Dim0, int Dim1, int Dim2> struct Tensor3;
+template <class A, class T, int Dim0, int Dim1, int Dim2, char i, char j,
+          char k>
+struct Tensor3_Expr;
+
+template <class T, int Dim0, int Dim1, int Dim2, int Dim3> struct Tensor4;
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
+          char j, char k, char l>
+struct Tensor4_Expr;
+
+template <char i, int Dim> struct Index
+{};
+template <const int N> struct Number
+{
+  Number(){};
+  operator int() const { return N; }
+};
+
+template <class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2>
+struct Tensor3
+{
+  T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2];
+
+  T operator()(const int N1, const int N2, const int N3) const
+  {
+    return data[N1][N2][N3];
+  }
+
+  template <char i, char j, char k, int Dim0, int Dim1, int Dim2>
+  Tensor3_Expr<const Tensor3<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, T,
+               Dim0, Dim1, Dim2, i, j, k>
+  operator()(const Index<i, Dim0>, const Index<j, Dim1>,
+             const Index<k, Dim2>) const
+  {
+    return Tensor3_Expr<const Tensor3<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>,
+                        T, Dim0, Dim1, Dim2, i, j, k>(*this);
+  }
+};
+
+template <class A, class T, int Dim0, int Dim1, int Dim2, char i, char j,
+          char k>
+struct Tensor3_Expr
+{
+  A iter;
+
+  Tensor3_Expr(const A &a) : iter(a) {}
+  T operator()(const int N1, const int N2, const int N3) const
+  {
+    return iter(N1, N2, N3);
+  }
+};
+
+template <class A, class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2,
+          int Dim0, int Dim1, int Dim2, char i, char j, char k>
+struct Tensor3_Expr<Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, T, Dim0,
+                   Dim1, Dim2, i, j, k>
+{
+  Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2> &iter;
+
+  Tensor3_Expr(Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2> &a) : iter(a)
+  {}
+  T operator()(const int N1, const int N2, const int N3) const
+  {
+    return iter(N1, N2, N3);
+  }
+};
+
+template <class A, class B, class T, class U, int Dim0, int Dim1, int Dim23,
+          int Dim4, int Dim5, char i, char j, char k, char l, char m>
+struct Tensor3_times_Tensor3_21
+{
+  Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> iterA;
+  Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> iterB;
+
+  template <int CurrentDim>
+  T eval(const int N1, const int N2, const int N3, const int N4,
+         const Number<CurrentDim> &) const
+  {
+    return iterA(N1, N2, CurrentDim - 1) * iterB(CurrentDim - 1, N3, N4)
+           + eval(N1, N2, N3, N4, Number<CurrentDim - 1>());
+  }
+  T eval(const int N1, const int N2, const int N3, const int N4,
+         const Number<1> &) const
+  {
+    return iterA(N1, N2, 0) * iterB(0, N3, N4);
+  }
+
+  Tensor3_times_Tensor3_21(
+    const Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> &a,
+    const Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> &b)
+      : iterA(a), iterB(b)
+  {}
+  T operator()(const int &N1, const int &N2, const int &N3,
+               const int &N4) const
+  {
+    return eval(N1, N2, N3, N4, Number<Dim23>());
+  }
+};
+
+template <class A, class B, class T, class U, int Dim0, int Dim1, int Dim23,
+          int Dim4, int Dim5, char i, char j, char k, char l, char m>
+Tensor4_Expr<Tensor3_times_Tensor3_21<A, B, T, U, Dim0, Dim1, Dim23, Dim4,
+                                      Dim5, i, j, k, l, m>,
+             T, Dim0, Dim1, Dim4, Dim5, i, j, l, m>
+operator*(const Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> &a,
+          const Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> &b)
+{
+  using TensorExpr = Tensor3_times_Tensor3_21<A, B, T, U, Dim0, Dim1, Dim23,
+                                              Dim4, Dim5, i, j, k, l, m>;
+  return Tensor4_Expr<TensorExpr, T, Dim0, Dim1, Dim4, Dim5, i, j, l, m>(
+    TensorExpr(a, b));
+};
+
+template <class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2,
+          int Tensor_Dim3>
+struct Tensor4
+{
+  T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2][Tensor_Dim3];
+
+  Tensor4() {}
+  T &operator()(const int N1, const int N2, const int N3, const int N4)
+  {
+    return data[N1][N2][N3][N4];
+  }
+
+  template <char i, char j, char k, char l, int Dim0, int Dim1, int Dim2,
+            int Dim3>
+  Tensor4_Expr<Tensor4<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2, Tensor_Dim3>,
+               T, Dim0, Dim1, Dim2, Dim3, i, j, k, l>
+  operator()(const Index<i, Dim0>, const Index<j, Dim1>, const Index<k, Dim2>,
+             const Index<l, Dim3>)
+  {
+    return Tensor4_Expr<
+      Tensor4<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2, Tensor_Dim3>, T, Dim0,
+      Dim1, Dim2, Dim3, i, j, k, l>(*this);
+  };
+};
+
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
+          char j, char k, char l>
+struct Tensor4_Expr
+{
+  A iter;
+
+  Tensor4_Expr(const A &a) : iter(a) {}
+  T operator()(const int N1, const int N2, const int N3, const int N4) const
+  {
+    return iter(N1, N2, N3, N4);
+  }
+};
+
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
+          char j, char k, char l>
+struct Tensor4_Expr<Tensor4<A, Dim0, Dim1, Dim2, Dim3>, T, Dim0, Dim1, Dim2,
+                   Dim3, i, j, k, l>
+{
+  Tensor4<A, Dim0, Dim1, Dim2, Dim3> &iter;
+
+  Tensor4_Expr(Tensor4<A, Dim0, Dim1, Dim2, Dim3> &a) : iter(a) {}
+  T operator()(const int N1, const int N2, const int N3, const int N4) const
+  {
+    return iter(N1, N2, N3, N4);
+  }
+
+  template <class B, class U, int Dim1_0, int Dim1_1, int Dim1_2, int Dim1_3,
+            char i_1, char j_1, char k_1, char l_1>
+  auto &operator=(const Tensor4_Expr<B, U, Dim1_0, Dim1_1, Dim1_2, Dim1_3, i_1,
+                                     j_1, k_1, l_1> &rhs)
+  {
+    for(int ii = 0; ii < Dim0; ++ii)
+      for(int jj = 0; jj < Dim1; ++jj)
+        for(int kk = 0; kk < Dim2; ++kk)
+          for(int ll = 0; ll < Dim3; ++ll)
+            {
+              iter(ii, jj, kk, ll) = rhs(ii, jj, kk, ll);
+            }
+    return *this;
+  }
+};
+
+int main()
+{
+  Tensor3<float, 100, 100, 1000> t1;
+  Tensor3<float, 1000, 100, 100> t2;
+
+  Index<'l', 100> l;
+  Index<'m', 100> m;
+  Index<'k', 1000> k;
+  Index<'n', 100> n;
+  Index<'o', 100> o;
+
+  Tensor4<float, 100, 100, 100, 100> res;
+  res(l, m, n, o) = t1(l, m, k) * t2(k, n, o);
+  return 0;
+}
+
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c

index 534e14638077a582b63b03696e0be3828626b1dd..9864b59ccfcf17fddd232719a43349c085a3aaf2 100644 (file)
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -114,7 +114,7 @@ along with GCC; see the file COPYING3.  If not see
     interface between the GIMPLE and RTL worlds.  */
  
  /* The infinite cost.  */
-#define INFTY 10000000
+#define INFTY 1000000000
  
  /* Returns the expected number of loop iterations for LOOP.
     The average trip count is computed from profile data if it
@@ -180,7 +180,7 @@ struct comp_cost
    comp_cost (): cost (0), complexity (0), scratch (0)
    {}
  
-  comp_cost (int cost, unsigned complexity, int scratch = 0)
+  comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
      : cost (cost), complexity (complexity), scratch (scratch)
    {}
  
@@ -220,16 +220,16 @@ struct comp_cost
    /* Returns true if COST1 is smaller or equal than COST2.  */
    friend bool operator<= (comp_cost cost1, comp_cost cost2);
  
-  int cost;            /* The runtime cost.  */
+  int64_t cost;                /* The runtime cost.  */
    unsigned complexity;  /* The estimate of the complexity of the code for
                            the computation (in no concrete units --
                            complexity field should be larger for more
                            complex expressions and addressing modes).  */
-  int scratch;         /* Scratch used during cost computation.  */
+  int64_t scratch;     /* Scratch used during cost computation.  */
  };
  
  static const comp_cost no_cost;
-static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
+static const comp_cost infinite_cost (INFTY, 0, INFTY);
  
  bool
  comp_cost::infinite_cost_p ()
@@ -243,6 +243,7 @@ operator+ (comp_cost cost1, comp_cost cost2)
    if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
      return infinite_cost;
  
+  gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
    cost1.cost += cost2.cost;
    cost1.complexity += cost2.complexity;
  
@@ -256,6 +257,7 @@ operator- (comp_cost cost1, comp_cost cost2)
      return infinite_cost;
  
    gcc_assert (!cost2.infinite_cost_p ());
+  gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
  
    cost1.cost -= cost2.cost;
    cost1.complexity -= cost2.complexity;
@@ -276,6 +278,7 @@ comp_cost::operator+= (HOST_WIDE_INT c)
    if (infinite_cost_p ())
      return *this;
  
+  gcc_assert (this->cost + c < infinite_cost.cost);
    this->cost += c;
  
    return *this;
@@ -287,6 +290,7 @@ comp_cost::operator-= (HOST_WIDE_INT c)
    if (infinite_cost_p ())
      return *this;
  
+  gcc_assert (this->cost - c < infinite_cost.cost);
    this->cost -= c;
  
    return *this;
@@ -295,6 +299,7 @@ comp_cost::operator-= (HOST_WIDE_INT c)
  comp_cost
  comp_cost::operator/= (HOST_WIDE_INT c)
  {
+  gcc_assert (c != 0);
    if (infinite_cost_p ())
      return *this;
  
@@ -309,6 +314,7 @@ comp_cost::operator*= (HOST_WIDE_INT c)
    if (infinite_cost_p ())
      return *this;
  
+  gcc_assert (this->cost * c < infinite_cost.cost);
    this->cost *= c;
  
    return *this;
@@ -638,7 +644,7 @@ struct iv_ca
    comp_cost cand_use_cost;
  
    /* Total cost of candidates.  */
-  unsigned cand_cost;
+  int64_t cand_cost;
  
    /* Number of times each invariant variable is used.  */
    unsigned *n_inv_var_uses;
@@ -4025,16 +4031,16 @@ get_computation_at (struct loop *loop, gimple *at,
     if we're optimizing for speed, amortize it over the per-iteration cost.
     If ROUND_UP_P is true, the result is round up rather than to zero when
     optimizing for speed.  */
-static unsigned
-adjust_setup_cost (struct ivopts_data *data, unsigned cost,
+static int64_t
+adjust_setup_cost (struct ivopts_data *data, int64_t cost,
                    bool round_up_p = false)
  {
    if (cost == INFTY)
      return cost;
    else if (optimize_loop_for_speed_p (data->current_loop))
      {
-      HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
-      return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
+      int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
+      return (cost + (round_up_p ? niters - 1 : 0)) / niters;
      }
    else
      return cost;
@@ -4305,7 +4311,7 @@ enum ainc_type
  
  struct ainc_cost_data
  {
-  unsigned costs[AINC_NONE];
+  int64_t costs[AINC_NONE];
  };
  
  static comp_cost
@@ -4566,12 +4572,12 @@ get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
        if (scale_factor == 1)
         return cost;
  
-      int scaled_cost
+      int64_t scaled_cost
         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
  
        if (dump_file && (dump_flags & TDF_DETAILS))
-       fprintf (dump_file, "Scaling cost based on bb prob "
-                "by %2.2f: %d (scratch: %d) -> %d\n",
+       fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
+                "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
  
        cost.cost = scaled_cost;
@@ -5539,7 +5545,7 @@ determine_group_iv_costs (struct ivopts_data *data)
                   || group->cost_map[j].cost.infinite_cost_p ())
                 continue;
  
-             fprintf (dump_file, "  %d\t%d\t%d\t",
+             fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
                        group->cost_map[j].cand->id,
                        group->cost_map[j].cost.cost,
                        group->cost_map[j].cost.complexity);
@@ -5569,7 +5575,7 @@ static void
  determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
  {
    comp_cost cost_base;
-  unsigned cost, cost_step;
+  int64_t cost, cost_step;
    tree base;
  
    gcc_assert (cand->iv != NULL);
@@ -6139,11 +6145,11 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
    unsigned i;
    comp_cost cost = iv_ca_cost (ivs);
  
-  fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
+  fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
            cost.complexity);
-  fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
-          ivs->cand_cost, ivs->cand_use_cost.cost,
-          ivs->cand_use_cost.complexity);
+  fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
+          "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
+          ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
    bitmap_print (file, ivs->cands, "  candidates: ","\n");
  
    for (i = 0; i < ivs->upto; i++)
@@ -6151,9 +6157,9 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
        struct iv_group *group = data->vgroups[i];
        struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
        if (cp)
-        fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
-                group->id, cp->cand->id, cp->cost.cost,
-                cp->cost.complexity);
+        fprintf (file, "   group:%d --> iv_cand:%d, cost=("
+                "%" PRId64 ",%d)\n", group->id, cp->cand->id,
+                cp->cost.cost, cp->cost.complexity);
        else
         fprintf (file, "   group:%d --> ??\n", group->id);
      }
@@ -6751,9 +6757,9 @@ find_optimal_iv_set (struct ivopts_data *data)
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
-      fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
+      fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
                origcost.cost, origcost.complexity);
-      fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
+      fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
                cost.cost, cost.complexity);
      }
author	Bin Cheng <bin.cheng@linux.alibaba.com>
	Wed, 8 May 2019 11:37:45 +0000 (11:37 +0000)
committer	Bin Cheng <amker@gcc.gnu.org>
	Wed, 8 May 2019 11:37:45 +0000 (11:37 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/g++.dg/tree-ssa/pr90078.C	[new file with mode: 0644]	patch \| blob
gcc/tree-ssa-loop-ivopts.c		patch \| blob \| history