gcc/graphite-interchange.c

   1 /* Interchange heuristics and transform for loop interchange on
   2    polyhedral representation.
   3
   4    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   5    Contributed by Sebastian Pop <sebastian.pop@amd.com> and
   6    Harsha Jagasia <harsha.jagasia@amd.com>.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 3, or (at your option)
  13 any later version.
  14
  15 GCC is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24 #include "config.h"
  25
  26 #ifdef HAVE_isl
  27 #include <isl/aff.h>
  28 #include <isl/set.h>
  29 #include <isl/map.h>
  30 #include <isl/union_map.h>
  31 #include <isl/ilp.h>
  32 #include <isl/val.h>
  33 #if defined(__cplusplus)
  34 extern "C" {
  35 #endif
  36 #include <isl/val_gmp.h>
  37 #if defined(__cplusplus)
  38 }
  39 #endif
  40 #ifdef HAVE_cloog
  41 #include <cloog/cloog.h>
  42 #include <cloog/isl/domain.h>
  43 #endif
  44 #endif
  45
  46 #include "system.h"
  47 #include "coretypes.h"
  48 #include "tree.h"
  49 #include "basic-block.h"
  50 #include "tree-ssa-alias.h"
  51 #include "internal-fn.h"
  52 #include "gimple-expr.h"
  53 #include "is-a.h"
  54 #include "gimple.h"
  55 #include "gimple-iterator.h"
  56 #include "tree-ssa-loop.h"
  57 #include "dumpfile.h"
  58 #include "cfgloop.h"
  59 #include "tree-chrec.h"
  60 #include "tree-data-ref.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "sese.h"
  63
  64 #ifdef HAVE_isl
  65 #include "graphite-poly.h"
  66
  67 /* XXX isl rewrite following comment */
  68 /* Builds a linear expression, of dimension DIM, representing PDR's
  69    memory access:
  70
  71    L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
  72
  73    For an array A[10][20] with two subscript locations s0 and s1, the
  74    linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
  75    corresponds to a memory stride of 20.
  76
  77    OFFSET is a number of dimensions to prepend before the
  78    subscript dimensions: s_0, s_1, ..., s_n.
  79
  80    Thus, the final linear expression has the following format:
  81    0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
  82    where the expression itself is:
  83    c_0 * s_0 + c_1 * s_1 + ... c_n * s_n.  */
  84
  85 static isl_constraint *
  86 build_linearized_memory_access (isl_map *map, poly_dr_p pdr)
  87 {
  88   isl_constraint *res;
  89   isl_local_space *ls = isl_local_space_from_space (isl_map_get_space (map));
  90   unsigned offset, nsubs;
  91   int i;
  92   isl_ctx *ctx;
  93
  94   isl_val *size, *subsize, *size1;
  95
  96   res = isl_equality_alloc (ls);
  97   ctx = isl_local_space_get_ctx (ls);
  98   size = isl_val_int_from_ui (ctx, 1);
  99
 100   nsubs = isl_set_dim (pdr->extent, isl_dim_set);
 101   /* -1 for the already included L dimension.  */
 102   offset = isl_map_dim (map, isl_dim_out) - 1 - nsubs;
 103   res = isl_constraint_set_coefficient_si (res, isl_dim_out, offset + nsubs, -1);
 104   /* Go through all subscripts from last to first.  First dimension
 105      is the alias set, ignore it.  */
 106   for (i = nsubs - 1; i >= 1; i--)
 107     {
 108       isl_space *dc;
 109       isl_aff *aff;
 110
 111       size1 = isl_val_copy (size);
 112       res = isl_constraint_set_coefficient_val (res, isl_dim_out, offset + i, size);
 113       dc = isl_set_get_space (pdr->extent);
 114       aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
 115       aff = isl_aff_set_coefficient_si (aff, isl_dim_in, i, 1);
 116       subsize = isl_set_max_val (pdr->extent, aff);
 117       isl_aff_free (aff);
 118       size = isl_val_mul (size1, subsize);
 119     }
 120
 121   isl_val_free (size);
 122
 123   return res;
 124 }
 125
 126 /* Set STRIDE to the stride of PDR in memory by advancing by one in
 127    the loop at DEPTH.  */
 128
 129 static void
 130 pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
 131 {
 132   poly_bb_p pbb = PDR_PBB (pdr);
 133   isl_map *map;
 134   isl_set *set;
 135   isl_aff *aff;
 136   isl_space *dc;
 137   isl_constraint *lma, *c;
 138   isl_val *islstride;
 139   graphite_dim_t time_depth;
 140   unsigned offset, nt;
 141   unsigned i;
 142   /* XXX isl rewrite following comments.  */
 143   /* Builds a partial difference equations and inserts them
 144      into pointset powerset polyhedron P.  Polyhedron is assumed
 145      to have the format: T|I|T'|I'|G|S|S'|l1|l2.
 146
 147      TIME_DEPTH is the time dimension w.r.t. which we are
 148      differentiating.
 149      OFFSET represents the number of dimensions between
 150      columns t_{time_depth} and t'_{time_depth}.
 151      DIM_SCTR is the number of scattering dimensions.  It is
 152      essentially the dimensionality of the T vector.
 153
 154      The following equations are inserted into the polyhedron P:
 155      | t_1 = t_1'
 156      | ...
 157      | t_{time_depth-1} = t'_{time_depth-1}
 158      | t_{time_depth} = t'_{time_depth} + 1
 159      | t_{time_depth+1} = t'_{time_depth + 1}
 160      | ...
 161      | t_{dim_sctr} = t'_{dim_sctr}.  */
 162
 163   /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
 164      This is the core part of this alogrithm, since this
 165      constraint asks for the memory access stride (difference)
 166      between two consecutive points in time dimensions.  */
 167
 168   /* Add equalities:
 169      | t1 = t1'
 170      | ...
 171      | t_{time_depth-1} = t'_{time_depth-1}
 172      | t_{time_depth+1} = t'_{time_depth+1}
 173      | ...
 174      | t_{dim_sctr} = t'_{dim_sctr}
 175
 176      This means that all the time dimensions are equal except for
 177      time_depth, where the constraint is t_{depth} = t'_{depth} + 1
 178      step.  More to this: we should be careful not to add equalities
 179      to the 'coupled' dimensions, which happens when the one dimension
 180      is stripmined dimension, and the other dimension corresponds
 181      to the point loop inside stripmined dimension.  */
 182
 183   /* pdr->accesses:    [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript]
 184           ??? [P] not used for PDRs?
 185      pdr->extent:      [a,S1..nb_subscript]
 186      pbb->domain:      [P1..nb_param,I1..nb_domain]
 187      pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr]
 188           [T] includes local vars (currently unused)
 189
 190      First we create [P,I] -> [T,a,S].  */
 191
 192   map = isl_map_flat_range_product (isl_map_copy (pbb->transformed),
 193                                     isl_map_copy (pdr->accesses));
 194   /* Add a dimension for L: [P,I] -> [T,a,S,L].*/
 195   map = isl_map_add_dims (map, isl_dim_out, 1);
 196   /* Build a constraint for "lma[S] - L == 0", effectively calculating
 197      L in terms of subscripts.  */
 198   lma = build_linearized_memory_access (map, pdr);
 199   /* And add it to the map, so we now have:
 200      [P,I] -> [T,a,S,L] : lma([S]) == L.  */
 201   map = isl_map_add_constraint (map, lma);
 202
 203   /* Then we create  [P,I,P',I'] -> [T,a,S,L,T',a',S',L'].  */
 204   map = isl_map_flat_product (map, isl_map_copy (map));
 205
 206   /* Now add the equality T[time_depth] == T'[time_depth]+1.  This will
 207      force L' to be the linear address at T[time_depth] + 1. */
 208   time_depth = psct_dynamic_dim (pbb, depth);
 209   /* Length of [a,S] plus [L] ...  */
 210   offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out);
 211   /* ... plus [T].  */
 212   offset += isl_map_dim (pbb->transformed, isl_dim_out);
 213
 214   c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map)));
 215   c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1);
 216   c = isl_constraint_set_coefficient_si (c, isl_dim_out,
 217                                          offset + time_depth, -1);
 218   c = isl_constraint_set_constant_si (c, 1);
 219   map = isl_map_add_constraint (map, c);
 220
 221   /* Now we equate most of the T/T' elements (making PITaSL nearly
 222      the same is (PITaSL)', except for one dimension, namely for 'depth'
 223      (an index into [I]), after translating to index into [T].  Take care
 224      to not produce an empty map, which indicates we wanted to equate
 225      two dimensions that are already coupled via the above time_depth
 226      dimension.  Happens with strip mining where several scatter dimension
 227      are interdependend.  */
 228   /* Length of [T].  */
 229   nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb);
 230   for (i = 0; i < nt; i++)
 231     if (i != time_depth)
 232       {
 233         isl_map *temp = isl_map_equate (isl_map_copy (map),
 234                                         isl_dim_out, i,
 235                                         isl_dim_out, offset + i);
 236         if (isl_map_is_empty (temp))
 237           isl_map_free (temp);
 238         else
 239           {
 240             isl_map_free (map);
 241             map = temp;
 242           }
 243       }
 244
 245   /* Now maximize the expression L' - L.  */
 246   set = isl_map_range (map);
 247   dc = isl_set_get_space (set);
 248   aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
 249   aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1);
 250   aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1);
 251   islstride = isl_set_max_val (set, aff);
 252   isl_val_get_num_gmp (islstride, stride);
 253   isl_val_free (islstride);
 254   isl_aff_free (aff);
 255   isl_set_free (set);
 256
 257   if (dump_file && (dump_flags & TDF_DETAILS))
 258     {
 259       gmp_fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:  %Zd ",
 260                    pbb_index (pbb), PDR_ID (pdr), (int) depth, stride);
 261     }
 262 }
 263
 264 /* Sets STRIDES to the sum of all the strides of the data references
 265    accessed in LOOP at DEPTH.  */
 266
 267 static void
 268 memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides)
 269 {
 270   int i, j;
 271   lst_p l;
 272   poly_dr_p pdr;
 273   mpz_t s, n;
 274
 275   mpz_init (s);
 276   mpz_init (n);
 277
 278   FOR_EACH_VEC_ELT (LST_SEQ (loop), j, l)
 279     if (LST_LOOP_P (l))
 280       memory_strides_in_loop_1 (l, depth, strides);
 281     else
 282       FOR_EACH_VEC_ELT (PBB_DRS (LST_PBB (l)), i, pdr)
 283         {
 284           pdr_stride_in_loop (s, depth, pdr);
 285           mpz_set_si (n, PDR_NB_REFS (pdr));
 286           mpz_mul (s, s, n);
 287           mpz_add (strides, strides, s);
 288         }
 289
 290   mpz_clear (s);
 291   mpz_clear (n);
 292 }
 293
 294 /* Sets STRIDES to the sum of all the strides of the data references
 295    accessed in LOOP at DEPTH.  */
 296
 297 static void
 298 memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides)
 299 {
 300   if (mpz_cmp_si (loop->memory_strides, -1) == 0)
 301     {
 302       mpz_set_si (strides, 0);
 303       memory_strides_in_loop_1 (loop, depth, strides);
 304     }
 305   else
 306     mpz_set (strides, loop->memory_strides);
 307 }
 308
 309 /* Return true when the interchange of loops LOOP1 and LOOP2 is
 310    profitable.
 311
 312    Example:
 313
 314    | int a[100][100];
 315    |
 316    | int
 317    | foo (int N)
 318    | {
 319    |   int j;
 320    |   int i;
 321    |
 322    |   for (i = 0; i < N; i++)
 323    |     for (j = 0; j < N; j++)
 324    |       a[j][2 * i] += 1;
 325    |
 326    |   return a[N][12];
 327    | }
 328
 329    The data access A[j][i] is described like this:
 330
 331    | i   j   N   a  s0  s1   1
 332    | 0   0   0   1   0   0  -5    = 0
 333    | 0  -1   0   0   1   0   0    = 0
 334    |-2   0   0   0   0   1   0    = 0
 335    | 0   0   0   0   1   0   0   >= 0
 336    | 0   0   0   0   0   1   0   >= 0
 337    | 0   0   0   0  -1   0 100   >= 0
 338    | 0   0   0   0   0  -1 100   >= 0
 339
 340    The linearized memory access L to A[100][100] is:
 341
 342    | i   j   N   a  s0  s1   1
 343    | 0   0   0   0 100   1   0
 344
 345    TODO: the shown format is not valid as it does not show the fact
 346    that the iteration domain "i j" is transformed using the scattering.
 347
 348    Next, to measure the impact of iterating once in loop "i", we build
 349    a maximization problem: first, we add to DR accesses the dimensions
 350    k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
 351    L1 and L2 are the linearized memory access functions.
 352
 353    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 354    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 355    | 0  -1   0   0   1   0   0   0   0   0   0   0   0    = 0  s0 = j
 356    |-2   0   0   0   0   1   0   0   0   0   0   0   0    = 0  s1 = 2 * i
 357    | 0   0   0   0   1   0   0   0   0   0   0   0   0   >= 0
 358    | 0   0   0   0   0   1   0   0   0   0   0   0   0   >= 0
 359    | 0   0   0   0  -1   0   0   0   0   0   0   0 100   >= 0
 360    | 0   0   0   0   0  -1   0   0   0   0   0   0 100   >= 0
 361    | 0   0   0   0 100   1   0   0   0  -1   0   0   0    = 0  L1 = 100 * s0 + s1
 362
 363    Then, we generate the polyhedron P2 by interchanging the dimensions
 364    (s0, s2), (s1, s3), (L1, L2), (k, i)
 365
 366    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 367    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 368    | 0  -1   0   0   0   0   0   1   0   0   0   0   0    = 0  s2 = j
 369    | 0   0   0   0   0   0  -2   0   1   0   0   0   0    = 0  s3 = 2 * k
 370    | 0   0   0   0   0   0   0   1   0   0   0   0   0   >= 0
 371    | 0   0   0   0   0   0   0   0   1   0   0   0   0   >= 0
 372    | 0   0   0   0   0   0   0  -1   0   0   0   0 100   >= 0
 373    | 0   0   0   0   0   0   0   0  -1   0   0   0 100   >= 0
 374    | 0   0   0   0   0   0   0 100   1   0  -1   0   0    = 0  L2 = 100 * s2 + s3
 375
 376    then we add to P2 the equality k = i + 1:
 377
 378    |-1   0   0   0   0   0   1   0   0   0   0   0  -1    = 0  k = i + 1
 379
 380    and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
 381
 382    Similarly, to determine the impact of one iteration on loop "j", we
 383    interchange (k, j), we add "k = j + 1", and we compute D2 the
 384    maximal value of the difference.
 385
 386    Finally, the profitability test is D1 < D2: if in the outer loop
 387    the strides are smaller than in the inner loop, then it is
 388    profitable to interchange the loops at DEPTH1 and DEPTH2.  */
 389
 390 static bool
 391 lst_interchange_profitable_p (lst_p nest, int depth1, int depth2)
 392 {
 393   mpz_t d1, d2;
 394   bool res;
 395
 396   gcc_assert (depth1 < depth2);
 397
 398   mpz_init (d1);
 399   mpz_init (d2);
 400
 401   memory_strides_in_loop (nest, depth1, d1);
 402   memory_strides_in_loop (nest, depth2, d2);
 403
 404   res = mpz_cmp (d1, d2) < 0;
 405
 406   mpz_clear (d1);
 407   mpz_clear (d2);
 408
 409   return res;
 410 }
 411
 412 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original
 413    scattering and assigns the resulting polyhedron to the transformed
 414    scattering.  */
 415
 416 static void
 417 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
 418                              poly_bb_p pbb)
 419 {
 420   unsigned i;
 421   unsigned dim1 = psct_dynamic_dim (pbb, depth1);
 422   unsigned dim2 = psct_dynamic_dim (pbb, depth2);
 423   isl_space *d = isl_map_get_space (pbb->transformed);
 424   isl_space *d1 = isl_space_range (d);
 425   unsigned n = isl_space_dim (d1, isl_dim_out);
 426   isl_space *d2 = isl_space_add_dims (d1, isl_dim_in, n);
 427   isl_map *x = isl_map_universe (d2);
 428
 429   x = isl_map_equate (x, isl_dim_in, dim1, isl_dim_out, dim2);
 430   x = isl_map_equate (x, isl_dim_in, dim2, isl_dim_out, dim1);
 431
 432   for (i = 0; i < n; i++)
 433     if (i != dim1 && i != dim2)
 434       x = isl_map_equate (x, isl_dim_in, i, isl_dim_out, i);
 435
 436   pbb->transformed = isl_map_apply_range (pbb->transformed, x);
 437 }
 438
 439 /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
 440    the statements below LST.  */
 441
 442 static void
 443 lst_apply_interchange (lst_p lst, int depth1, int depth2)
 444 {
 445   if (!lst)
 446     return;
 447
 448   if (LST_LOOP_P (lst))
 449     {
 450       int i;
 451       lst_p l;
 452
 453       FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
 454         lst_apply_interchange (l, depth1, depth2);
 455     }
 456   else
 457     pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
 458 }
 459
 460 /* Return true when the nest starting at LOOP1 and ending on LOOP2 is
 461    perfect: i.e. there are no sequence of statements.  */
 462
 463 static bool
 464 lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
 465 {
 466   if (loop1 == loop2)
 467     return true;
 468
 469   if (!LST_LOOP_P (loop1))
 470     return false;
 471
 472   return LST_SEQ (loop1).length () == 1
 473          && lst_perfectly_nested_p (LST_SEQ (loop1)[0], loop2);
 474 }
 475
 476 /* Transform the loop nest between LOOP1 and LOOP2 into a perfect
 477    nest.  To continue the naming tradition, this function is called
 478    after perfect_nestify.  NEST is set to the perfectly nested loop
 479    that is created.  BEFORE/AFTER are set to the loops distributed
 480    before/after the loop NEST.  */
 481
 482 static void
 483 lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
 484                      lst_p *nest, lst_p *after)
 485 {
 486   poly_bb_p first, last;
 487
 488   gcc_assert (loop1 && loop2
 489               && loop1 != loop2
 490               && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
 491
 492   first = LST_PBB (lst_find_first_pbb (loop2));
 493   last = LST_PBB (lst_find_last_pbb (loop2));
 494
 495   *before = copy_lst (loop1);
 496   *nest = copy_lst (loop1);
 497   *after = copy_lst (loop1);
 498
 499   lst_remove_all_before_including_pbb (*before, first, false);
 500   lst_remove_all_before_including_pbb (*after, last, true);
 501
 502   lst_remove_all_before_excluding_pbb (*nest, first, true);
 503   lst_remove_all_before_excluding_pbb (*nest, last, false);
 504
 505   if (lst_empty_p (*before))
 506     {
 507       free_lst (*before);
 508       *before = NULL;
 509     }
 510   if (lst_empty_p (*after))
 511     {
 512       free_lst (*after);
 513       *after = NULL;
 514     }
 515   if (lst_empty_p (*nest))
 516     {
 517       free_lst (*nest);
 518       *nest = NULL;
 519     }
 520 }
 521
 522 /* Try to interchange LOOP1 with LOOP2 for all the statements of the
 523    body of LOOP2.  LOOP1 contains LOOP2.  Return true if it did the
 524    interchange.  */
 525
 526 static bool
 527 lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
 528 {
 529   int depth1 = lst_depth (loop1);
 530   int depth2 = lst_depth (loop2);
 531   lst_p transformed;
 532
 533   lst_p before = NULL, nest = NULL, after = NULL;
 534
 535   if (!lst_perfectly_nested_p (loop1, loop2))
 536     lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
 537
 538   if (!lst_interchange_profitable_p (loop2, depth1, depth2))
 539     return false;
 540
 541   lst_apply_interchange (loop2, depth1, depth2);
 542
 543   /* Sync the transformed LST information and the PBB scatterings
 544      before using the scatterings in the data dependence analysis.  */
 545   if (before || nest || after)
 546     {
 547       transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
 548                                       before, nest, after);
 549       lst_update_scattering (transformed);
 550       free_lst (transformed);
 551     }
 552
 553   if (graphite_legal_transform (scop))
 554     {
 555       if (dump_file && (dump_flags & TDF_DETAILS))
 556         fprintf (dump_file,
 557                  "Loops at depths %d and %d will be interchanged.\n",
 558                  depth1, depth2);
 559
 560       /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP.  */
 561       lst_insert_in_sequence (before, loop1, true);
 562       lst_insert_in_sequence (after, loop1, false);
 563
 564       if (nest)
 565         {
 566           lst_replace (loop1, nest);
 567           free_lst (loop1);
 568         }
 569
 570       return true;
 571     }
 572
 573   /* Undo the transform.  */
 574   free_lst (before);
 575   free_lst (nest);
 576   free_lst (after);
 577   lst_apply_interchange (loop2, depth2, depth1);
 578   return false;
 579 }
 580
 581 /* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
 582    with the loop OUTER in LST_SEQ (OUTER_FATHER).  */
 583
 584 static bool
 585 lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
 586                               lst_p inner_father)
 587 {
 588   int inner;
 589   lst_p loop1, loop2;
 590
 591   gcc_assert (outer_father
 592               && LST_LOOP_P (outer_father)
 593               && LST_LOOP_P (LST_SEQ (outer_father)[outer])
 594               && inner_father
 595               && LST_LOOP_P (inner_father));
 596
 597   loop1 = LST_SEQ (outer_father)[outer];
 598
 599   FOR_EACH_VEC_ELT (LST_SEQ (inner_father), inner, loop2)
 600     if (LST_LOOP_P (loop2)
 601         && (lst_try_interchange_loops (scop, loop1, loop2)
 602             || lst_interchange_select_inner (scop, outer_father, outer, loop2)))
 603       return true;
 604
 605   return false;
 606 }
 607
 608 /* Interchanges all the loops of LOOP and the loops of its body that
 609    are considered profitable to interchange.  Return the number of
 610    interchanged loops.  OUTER is the index in LST_SEQ (LOOP) that
 611    points to the next outer loop to be considered for interchange.  */
 612
 613 static int
 614 lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
 615 {
 616   lst_p l;
 617   int res = 0;
 618   int i = 0;
 619   lst_p father;
 620
 621   if (!loop || !LST_LOOP_P (loop))
 622     return 0;
 623
 624   father = LST_LOOP_FATHER (loop);
 625   if (father)
 626     {
 627       while (lst_interchange_select_inner (scop, father, outer, loop))
 628         {
 629           res++;
 630           loop = LST_SEQ (father)[outer];
 631         }
 632     }
 633
 634   if (LST_LOOP_P (loop))
 635     FOR_EACH_VEC_ELT (LST_SEQ (loop), i, l)
 636       if (LST_LOOP_P (l))
 637         res += lst_interchange_select_outer (scop, l, i);
 638
 639   return res;
 640 }
 641
 642 /* Interchanges all the loop depths that are considered profitable for
 643    SCOP.  Return the number of interchanged loops.  */
 644
 645 int
 646 scop_do_interchange (scop_p scop)
 647 {
 648   int res = lst_interchange_select_outer
 649     (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
 650
 651   lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
 652
 653   return res;
 654 }
 655
 656
 657 #endif
 658