gcc/graphite-interchange.c

   1 /* Interchange heuristics and transform for loop interchange on
   2    polyhedral representation.
   3
   4    Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
   5    Contributed by Sebastian Pop <sebastian.pop@amd.com> and
   6    Harsha Jagasia <harsha.jagasia@amd.com>.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 3, or (at your option)
  13 any later version.
  14
  15 GCC is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24 #include "config.h"
  25
  26 #ifdef HAVE_cloog
  27 #include <isl/aff.h>
  28 #include <isl/set.h>
  29 #include <isl/map.h>
  30 #include <isl/union_map.h>
  31 #include <isl/ilp.h>
  32 #include <cloog/cloog.h>
  33 #include <cloog/isl/domain.h>
  34 #endif
  35
  36 #include "system.h"
  37 #include "coretypes.h"
  38 #include "tree-flow.h"
  39 #include "dumpfile.h"
  40 #include "cfgloop.h"
  41 #include "tree-chrec.h"
  42 #include "tree-data-ref.h"
  43 #include "tree-scalar-evolution.h"
  44 #include "sese.h"
  45
  46 #ifdef HAVE_cloog
  47 #include "graphite-poly.h"
  48
  49 /* XXX isl rewrite following comment */
  50 /* Builds a linear expression, of dimension DIM, representing PDR's
  51    memory access:
  52
  53    L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
  54
  55    For an array A[10][20] with two subscript locations s0 and s1, the
  56    linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
  57    corresponds to a memory stride of 20.
  58
  59    OFFSET is a number of dimensions to prepend before the
  60    subscript dimensions: s_0, s_1, ..., s_n.
  61
  62    Thus, the final linear expression has the following format:
  63    0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
  64    where the expression itself is:
  65    c_0 * s_0 + c_1 * s_1 + ... c_n * s_n.  */
  66
  67 static isl_constraint *
  68 build_linearized_memory_access (isl_map *map, poly_dr_p pdr)
  69 {
  70   isl_constraint *res;
  71   isl_local_space *ls = isl_local_space_from_space (isl_map_get_space (map));
  72   unsigned offset, nsubs;
  73   int i;
  74   isl_int size, subsize;
  75
  76   res = isl_equality_alloc (ls);
  77   isl_int_init (size);
  78   isl_int_set_ui (size, 1);
  79   isl_int_init (subsize);
  80   isl_int_set_ui (subsize, 1);
  81
  82   nsubs = isl_set_dim (pdr->extent, isl_dim_set);
  83   /* -1 for the already included L dimension.  */
  84   offset = isl_map_dim (map, isl_dim_out) - 1 - nsubs;
  85   res = isl_constraint_set_coefficient_si (res, isl_dim_out, offset + nsubs, -1);
  86   /* Go through all subscripts from last to first.  First dimension
  87      is the alias set, ignore it.  */
  88   for (i = nsubs - 1; i >= 1; i--)
  89     {
  90       isl_space *dc;
  91       isl_aff *aff;
  92
  93       res = isl_constraint_set_coefficient (res, isl_dim_out, offset + i, size);
  94
  95       dc = isl_set_get_space (pdr->extent);
  96       aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
  97       aff = isl_aff_set_coefficient_si (aff, isl_dim_in, i, 1);
  98       isl_set_max (pdr->extent, aff, &subsize);
  99       isl_aff_free (aff);
 100       isl_int_mul (size, size, subsize);
 101     }
 102
 103   isl_int_clear (subsize);
 104   isl_int_clear (size);
 105
 106   return res;
 107 }
 108
 109 /* Set STRIDE to the stride of PDR in memory by advancing by one in
 110    the loop at DEPTH.  */
 111
 112 static void
 113 pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
 114 {
 115   poly_bb_p pbb = PDR_PBB (pdr);
 116   isl_map *map;
 117   isl_set *set;
 118   isl_aff *aff;
 119   isl_space *dc;
 120   isl_constraint *lma, *c;
 121   isl_int islstride;
 122   graphite_dim_t time_depth;
 123   unsigned offset, nt;
 124   unsigned i;
 125   /* XXX isl rewrite following comments.  */
 126   /* Builds a partial difference equations and inserts them
 127      into pointset powerset polyhedron P.  Polyhedron is assumed
 128      to have the format: T|I|T'|I'|G|S|S'|l1|l2.
 129
 130      TIME_DEPTH is the time dimension w.r.t. which we are
 131      differentiating.
 132      OFFSET represents the number of dimensions between
 133      columns t_{time_depth} and t'_{time_depth}.
 134      DIM_SCTR is the number of scattering dimensions.  It is
 135      essentially the dimensionality of the T vector.
 136
 137      The following equations are inserted into the polyhedron P:
 138      | t_1 = t_1'
 139      | ...
 140      | t_{time_depth-1} = t'_{time_depth-1}
 141      | t_{time_depth} = t'_{time_depth} + 1
 142      | t_{time_depth+1} = t'_{time_depth + 1}
 143      | ...
 144      | t_{dim_sctr} = t'_{dim_sctr}.  */
 145
 146   /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
 147      This is the core part of this alogrithm, since this
 148      constraint asks for the memory access stride (difference)
 149      between two consecutive points in time dimensions.  */
 150
 151   /* Add equalities:
 152      | t1 = t1'
 153      | ...
 154      | t_{time_depth-1} = t'_{time_depth-1}
 155      | t_{time_depth+1} = t'_{time_depth+1}
 156      | ...
 157      | t_{dim_sctr} = t'_{dim_sctr}
 158
 159      This means that all the time dimensions are equal except for
 160      time_depth, where the constraint is t_{depth} = t'_{depth} + 1
 161      step.  More to this: we should be careful not to add equalities
 162      to the 'coupled' dimensions, which happens when the one dimension
 163      is stripmined dimension, and the other dimension corresponds
 164      to the point loop inside stripmined dimension.  */
 165
 166   /* pdr->accesses:    [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript]
 167           ??? [P] not used for PDRs?
 168      pdr->extent:      [a,S1..nb_subscript]
 169      pbb->domain:      [P1..nb_param,I1..nb_domain]
 170      pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr]
 171           [T] includes local vars (currently unused)
 172
 173      First we create [P,I] -> [T,a,S].  */
 174
 175   map = isl_map_flat_range_product (isl_map_copy (pbb->transformed),
 176                                     isl_map_copy (pdr->accesses));
 177   /* Add a dimension for L: [P,I] -> [T,a,S,L].*/
 178   map = isl_map_add_dims (map, isl_dim_out, 1);
 179   /* Build a constraint for "lma[S] - L == 0", effectively calculating
 180      L in terms of subscripts.  */
 181   lma = build_linearized_memory_access (map, pdr);
 182   /* And add it to the map, so we now have:
 183      [P,I] -> [T,a,S,L] : lma([S]) == L.  */
 184   map = isl_map_add_constraint (map, lma);
 185
 186   /* Then we create  [P,I,P',I'] -> [T,a,S,L,T',a',S',L'].  */
 187   map = isl_map_flat_product (map, isl_map_copy (map));
 188
 189   /* Now add the equality T[time_depth] == T'[time_depth]+1.  This will
 190      force L' to be the linear address at T[time_depth] + 1. */
 191   time_depth = psct_dynamic_dim (pbb, depth);
 192   /* Length of [a,S] plus [L] ...  */
 193   offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out);
 194   /* ... plus [T].  */
 195   offset += isl_map_dim (pbb->transformed, isl_dim_out);
 196
 197   c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map)));
 198   c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1);
 199   c = isl_constraint_set_coefficient_si (c, isl_dim_out,
 200                                          offset + time_depth, -1);
 201   c = isl_constraint_set_constant_si (c, 1);
 202   map = isl_map_add_constraint (map, c);
 203
 204   /* Now we equate most of the T/T' elements (making PITaSL nearly
 205      the same is (PITaSL)', except for one dimension, namely for 'depth'
 206      (an index into [I]), after translating to index into [T].  Take care
 207      to not produce an empty map, which indicates we wanted to equate
 208      two dimensions that are already coupled via the above time_depth
 209      dimension.  Happens with strip mining where several scatter dimension
 210      are interdependend.  */
 211   /* Length of [T].  */
 212   nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb);
 213   for (i = 0; i < nt; i++)
 214     if (i != time_depth)
 215       {
 216         isl_map *temp = isl_map_equate (isl_map_copy (map),
 217                                         isl_dim_out, i,
 218                                         isl_dim_out, offset + i);
 219         if (isl_map_is_empty (temp))
 220           isl_map_free (temp);
 221         else
 222           {
 223             isl_map_free (map);
 224             map = temp;
 225           }
 226       }
 227
 228   /* Now maximize the expression L' - L.  */
 229   set = isl_map_range (map);
 230   dc = isl_set_get_space (set);
 231   aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
 232   aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1);
 233   aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1);
 234   isl_int_init (islstride);
 235   isl_set_max (set, aff, &islstride);
 236   isl_int_get_gmp (islstride, stride);
 237   isl_int_clear (islstride);
 238   isl_aff_free (aff);
 239   isl_set_free (set);
 240
 241   if (dump_file && (dump_flags & TDF_DETAILS))
 242     {
 243       char *str;
 244       void (*gmp_free) (void *, size_t);
 245
 246       fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:",
 247                pbb_index (pbb), PDR_ID (pdr), (int) depth);
 248       str = mpz_get_str (0, 10, stride);
 249       fprintf (dump_file, "  %s ", str);
 250       mp_get_memory_functions (NULL, NULL, &gmp_free);
 251       (*gmp_free) (str, strlen (str) + 1);
 252     }
 253 }
 254
 255 /* Sets STRIDES to the sum of all the strides of the data references
 256    accessed in LOOP at DEPTH.  */
 257
 258 static void
 259 memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides)
 260 {
 261   int i, j;
 262   lst_p l;
 263   poly_dr_p pdr;
 264   mpz_t s, n;
 265
 266   mpz_init (s);
 267   mpz_init (n);
 268
 269   FOR_EACH_VEC_ELT (lst_p, LST_SEQ (loop), j, l)
 270     if (LST_LOOP_P (l))
 271       memory_strides_in_loop_1 (l, depth, strides);
 272     else
 273       FOR_EACH_VEC_ELT (poly_dr_p, PBB_DRS (LST_PBB (l)), i, pdr)
 274         {
 275           pdr_stride_in_loop (s, depth, pdr);
 276           mpz_set_si (n, PDR_NB_REFS (pdr));
 277           mpz_mul (s, s, n);
 278           mpz_add (strides, strides, s);
 279         }
 280
 281   mpz_clear (s);
 282   mpz_clear (n);
 283 }
 284
 285 /* Sets STRIDES to the sum of all the strides of the data references
 286    accessed in LOOP at DEPTH.  */
 287
 288 static void
 289 memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides)
 290 {
 291   if (mpz_cmp_si (loop->memory_strides, -1) == 0)
 292     {
 293       mpz_set_si (strides, 0);
 294       memory_strides_in_loop_1 (loop, depth, strides);
 295     }
 296   else
 297     mpz_set (strides, loop->memory_strides);
 298 }
 299
 300 /* Return true when the interchange of loops LOOP1 and LOOP2 is
 301    profitable.
 302
 303    Example:
 304
 305    | int a[100][100];
 306    |
 307    | int
 308    | foo (int N)
 309    | {
 310    |   int j;
 311    |   int i;
 312    |
 313    |   for (i = 0; i < N; i++)
 314    |     for (j = 0; j < N; j++)
 315    |       a[j][2 * i] += 1;
 316    |
 317    |   return a[N][12];
 318    | }
 319
 320    The data access A[j][i] is described like this:
 321
 322    | i   j   N   a  s0  s1   1
 323    | 0   0   0   1   0   0  -5    = 0
 324    | 0  -1   0   0   1   0   0    = 0
 325    |-2   0   0   0   0   1   0    = 0
 326    | 0   0   0   0   1   0   0   >= 0
 327    | 0   0   0   0   0   1   0   >= 0
 328    | 0   0   0   0  -1   0 100   >= 0
 329    | 0   0   0   0   0  -1 100   >= 0
 330
 331    The linearized memory access L to A[100][100] is:
 332
 333    | i   j   N   a  s0  s1   1
 334    | 0   0   0   0 100   1   0
 335
 336    TODO: the shown format is not valid as it does not show the fact
 337    that the iteration domain "i j" is transformed using the scattering.
 338
 339    Next, to measure the impact of iterating once in loop "i", we build
 340    a maximization problem: first, we add to DR accesses the dimensions
 341    k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
 342    L1 and L2 are the linearized memory access functions.
 343
 344    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 345    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 346    | 0  -1   0   0   1   0   0   0   0   0   0   0   0    = 0  s0 = j
 347    |-2   0   0   0   0   1   0   0   0   0   0   0   0    = 0  s1 = 2 * i
 348    | 0   0   0   0   1   0   0   0   0   0   0   0   0   >= 0
 349    | 0   0   0   0   0   1   0   0   0   0   0   0   0   >= 0
 350    | 0   0   0   0  -1   0   0   0   0   0   0   0 100   >= 0
 351    | 0   0   0   0   0  -1   0   0   0   0   0   0 100   >= 0
 352    | 0   0   0   0 100   1   0   0   0  -1   0   0   0    = 0  L1 = 100 * s0 + s1
 353
 354    Then, we generate the polyhedron P2 by interchanging the dimensions
 355    (s0, s2), (s1, s3), (L1, L2), (k, i)
 356
 357    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 358    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 359    | 0  -1   0   0   0   0   0   1   0   0   0   0   0    = 0  s2 = j
 360    | 0   0   0   0   0   0  -2   0   1   0   0   0   0    = 0  s3 = 2 * k
 361    | 0   0   0   0   0   0   0   1   0   0   0   0   0   >= 0
 362    | 0   0   0   0   0   0   0   0   1   0   0   0   0   >= 0
 363    | 0   0   0   0   0   0   0  -1   0   0   0   0 100   >= 0
 364    | 0   0   0   0   0   0   0   0  -1   0   0   0 100   >= 0
 365    | 0   0   0   0   0   0   0 100   1   0  -1   0   0    = 0  L2 = 100 * s2 + s3
 366
 367    then we add to P2 the equality k = i + 1:
 368
 369    |-1   0   0   0   0   0   1   0   0   0   0   0  -1    = 0  k = i + 1
 370
 371    and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
 372
 373    Similarly, to determine the impact of one iteration on loop "j", we
 374    interchange (k, j), we add "k = j + 1", and we compute D2 the
 375    maximal value of the difference.
 376
 377    Finally, the profitability test is D1 < D2: if in the outer loop
 378    the strides are smaller than in the inner loop, then it is
 379    profitable to interchange the loops at DEPTH1 and DEPTH2.  */
 380
 381 static bool
 382 lst_interchange_profitable_p (lst_p nest, int depth1, int depth2)
 383 {
 384   mpz_t d1, d2;
 385   bool res;
 386
 387   gcc_assert (depth1 < depth2);
 388
 389   mpz_init (d1);
 390   mpz_init (d2);
 391
 392   memory_strides_in_loop (nest, depth1, d1);
 393   memory_strides_in_loop (nest, depth2, d2);
 394
 395   res = mpz_cmp (d1, d2) < 0;
 396
 397   mpz_clear (d1);
 398   mpz_clear (d2);
 399
 400   return res;
 401 }
 402
 403 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original
 404    scattering and assigns the resulting polyhedron to the transformed
 405    scattering.  */
 406
 407 static void
 408 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
 409                              poly_bb_p pbb)
 410 {
 411   unsigned i;
 412   unsigned dim1 = psct_dynamic_dim (pbb, depth1);
 413   unsigned dim2 = psct_dynamic_dim (pbb, depth2);
 414   isl_space *d = isl_map_get_space (pbb->transformed);
 415   isl_space *d1 = isl_space_range (d);
 416   unsigned n = isl_space_dim (d1, isl_dim_out);
 417   isl_space *d2 = isl_space_add_dims (d1, isl_dim_in, n);
 418   isl_map *x = isl_map_universe (d2);
 419
 420   x = isl_map_equate (x, isl_dim_in, dim1, isl_dim_out, dim2);
 421   x = isl_map_equate (x, isl_dim_in, dim2, isl_dim_out, dim1);
 422
 423   for (i = 0; i < n; i++)
 424     if (i != dim1 && i != dim2)
 425       x = isl_map_equate (x, isl_dim_in, i, isl_dim_out, i);
 426
 427   pbb->transformed = isl_map_apply_range (pbb->transformed, x);
 428 }
 429
 430 /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
 431    the statements below LST.  */
 432
 433 static void
 434 lst_apply_interchange (lst_p lst, int depth1, int depth2)
 435 {
 436   if (!lst)
 437     return;
 438
 439   if (LST_LOOP_P (lst))
 440     {
 441       int i;
 442       lst_p l;
 443
 444       FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
 445         lst_apply_interchange (l, depth1, depth2);
 446     }
 447   else
 448     pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
 449 }
 450
 451 /* Return true when the nest starting at LOOP1 and ending on LOOP2 is
 452    perfect: i.e. there are no sequence of statements.  */
 453
 454 static bool
 455 lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
 456 {
 457   if (loop1 == loop2)
 458     return true;
 459
 460   if (!LST_LOOP_P (loop1))
 461     return false;
 462
 463   return VEC_length (lst_p, LST_SEQ (loop1)) == 1
 464     && lst_perfectly_nested_p (VEC_index (lst_p, LST_SEQ (loop1), 0), loop2);
 465 }
 466
 467 /* Transform the loop nest between LOOP1 and LOOP2 into a perfect
 468    nest.  To continue the naming tradition, this function is called
 469    after perfect_nestify.  NEST is set to the perfectly nested loop
 470    that is created.  BEFORE/AFTER are set to the loops distributed
 471    before/after the loop NEST.  */
 472
 473 static void
 474 lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
 475                      lst_p *nest, lst_p *after)
 476 {
 477   poly_bb_p first, last;
 478
 479   gcc_assert (loop1 && loop2
 480               && loop1 != loop2
 481               && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
 482
 483   first = LST_PBB (lst_find_first_pbb (loop2));
 484   last = LST_PBB (lst_find_last_pbb (loop2));
 485
 486   *before = copy_lst (loop1);
 487   *nest = copy_lst (loop1);
 488   *after = copy_lst (loop1);
 489
 490   lst_remove_all_before_including_pbb (*before, first, false);
 491   lst_remove_all_before_including_pbb (*after, last, true);
 492
 493   lst_remove_all_before_excluding_pbb (*nest, first, true);
 494   lst_remove_all_before_excluding_pbb (*nest, last, false);
 495
 496   if (lst_empty_p (*before))
 497     {
 498       free_lst (*before);
 499       *before = NULL;
 500     }
 501   if (lst_empty_p (*after))
 502     {
 503       free_lst (*after);
 504       *after = NULL;
 505     }
 506   if (lst_empty_p (*nest))
 507     {
 508       free_lst (*nest);
 509       *nest = NULL;
 510     }
 511 }
 512
 513 /* Try to interchange LOOP1 with LOOP2 for all the statements of the
 514    body of LOOP2.  LOOP1 contains LOOP2.  Return true if it did the
 515    interchange.  */
 516
 517 static bool
 518 lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
 519 {
 520   int depth1 = lst_depth (loop1);
 521   int depth2 = lst_depth (loop2);
 522   lst_p transformed;
 523
 524   lst_p before = NULL, nest = NULL, after = NULL;
 525
 526   if (!lst_perfectly_nested_p (loop1, loop2))
 527     lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
 528
 529   if (!lst_interchange_profitable_p (loop2, depth1, depth2))
 530     return false;
 531
 532   lst_apply_interchange (loop2, depth1, depth2);
 533
 534   /* Sync the transformed LST information and the PBB scatterings
 535      before using the scatterings in the data dependence analysis.  */
 536   if (before || nest || after)
 537     {
 538       transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
 539                                       before, nest, after);
 540       lst_update_scattering (transformed);
 541       free_lst (transformed);
 542     }
 543
 544   if (graphite_legal_transform (scop))
 545     {
 546       if (dump_file && (dump_flags & TDF_DETAILS))
 547         fprintf (dump_file,
 548                  "Loops at depths %d and %d will be interchanged.\n",
 549                  depth1, depth2);
 550
 551       /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP.  */
 552       lst_insert_in_sequence (before, loop1, true);
 553       lst_insert_in_sequence (after, loop1, false);
 554
 555       if (nest)
 556         {
 557           lst_replace (loop1, nest);
 558           free_lst (loop1);
 559         }
 560
 561       return true;
 562     }
 563
 564   /* Undo the transform.  */
 565   free_lst (before);
 566   free_lst (nest);
 567   free_lst (after);
 568   lst_apply_interchange (loop2, depth2, depth1);
 569   return false;
 570 }
 571
 572 /* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
 573    with the loop OUTER in LST_SEQ (OUTER_FATHER).  */
 574
 575 static bool
 576 lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
 577                               lst_p inner_father)
 578 {
 579   int inner;
 580   lst_p loop1, loop2;
 581
 582   gcc_assert (outer_father
 583               && LST_LOOP_P (outer_father)
 584               && LST_LOOP_P (VEC_index (lst_p, LST_SEQ (outer_father), outer))
 585               && inner_father
 586               && LST_LOOP_P (inner_father));
 587
 588   loop1 = VEC_index (lst_p, LST_SEQ (outer_father), outer);
 589
 590   FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner_father), inner, loop2)
 591     if (LST_LOOP_P (loop2)
 592         && (lst_try_interchange_loops (scop, loop1, loop2)
 593             || lst_interchange_select_inner (scop, outer_father, outer, loop2)))
 594       return true;
 595
 596   return false;
 597 }
 598
 599 /* Interchanges all the loops of LOOP and the loops of its body that
 600    are considered profitable to interchange.  Return the number of
 601    interchanged loops.  OUTER is the index in LST_SEQ (LOOP) that
 602    points to the next outer loop to be considered for interchange.  */
 603
 604 static int
 605 lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
 606 {
 607   lst_p l;
 608   int res = 0;
 609   int i = 0;
 610   lst_p father;
 611
 612   if (!loop || !LST_LOOP_P (loop))
 613     return 0;
 614
 615   father = LST_LOOP_FATHER (loop);
 616   if (father)
 617     {
 618       while (lst_interchange_select_inner (scop, father, outer, loop))
 619         {
 620           res++;
 621           loop = VEC_index (lst_p, LST_SEQ (father), outer);
 622         }
 623     }
 624
 625   if (LST_LOOP_P (loop))
 626     FOR_EACH_VEC_ELT (lst_p, LST_SEQ (loop), i, l)
 627       if (LST_LOOP_P (l))
 628         res += lst_interchange_select_outer (scop, l, i);
 629
 630   return res;
 631 }
 632
 633 /* Interchanges all the loop depths that are considered profitable for
 634    SCOP.  Return the number of interchanged loops.  */
 635
 636 int
 637 scop_do_interchange (scop_p scop)
 638 {
 639   int res = lst_interchange_select_outer
 640     (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
 641
 642   lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
 643
 644   return res;
 645 }
 646
 647
 648 #endif
 649