graphite-interchange.c (build_partial_difference): New.
[gcc.git] / gcc / graphite-interchange.c
1 /* Interchange heuristics and transform for loop interchange on
2 polyhedral representation.
3
4 Copyright (C) 2009 Free Software Foundation, Inc.
5 Contributed by Sebastian Pop <sebastian.pop@amd.com> and
6 Harsha Jagasia <harsha.jagasia@amd.com>.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "rtl.h"
30 #include "output.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "toplev.h"
35 #include "tree-dump.h"
36 #include "timevar.h"
37 #include "cfgloop.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-pass.h"
42 #include "domwalk.h"
43 #include "value-prof.h"
44 #include "pointer-set.h"
45 #include "gimple.h"
46 #include "params.h"
47
48 #ifdef HAVE_cloog
49 #include "cloog/cloog.h"
50 #include "ppl_c.h"
51 #include "sese.h"
52 #include "graphite-ppl.h"
53 #include "graphite.h"
54 #include "graphite-poly.h"
55
56 /* Builds a linear expression, of dimension DIM, representing PDR's
57 memory access:
58
59 L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
60
61 For an array A[10][20] with two subscript locations s0 and s1, the
62 linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
63 corresponds to a memory stride of 20.
64
65 OFFSET is a number of dimensions to prepend before the
66 subscript dimensions: s_0, s_1, ..., s_n.
67
68 Thus, the final linear expression has the following format:
69 0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
70 where the expression itself is:
71 c_0 * s_0 + c_1 * s_1 + ... c_n * s_n. */
72
73 static ppl_Linear_Expression_t
74 build_linearized_memory_access (ppl_dimension_type offset, poly_dr_p pdr)
75 {
76 ppl_Linear_Expression_t res;
77 ppl_Linear_Expression_t le;
78 ppl_dimension_type i;
79 ppl_dimension_type first = pdr_subscript_dim (pdr, 0);
80 ppl_dimension_type last = pdr_subscript_dim (pdr, PDR_NB_SUBSCRIPTS (pdr));
81 Value size, sub_size;
82 graphite_dim_t dim = offset + pdr_dim (pdr);
83
84 ppl_new_Linear_Expression_with_dimension (&res, dim);
85
86 value_init (size);
87 value_set_si (size, 1);
88 value_init (sub_size);
89 value_set_si (sub_size, 1);
90
91 for (i = last - 1; i >= first; i--)
92 {
93 ppl_set_coef_gmp (res, i + offset, size);
94
95 ppl_new_Linear_Expression_with_dimension (&le, dim - offset);
96 ppl_set_coef (le, i, 1);
97 ppl_max_for_le_pointset (PDR_ACCESSES (pdr), le, sub_size);
98 value_multiply (size, size, sub_size);
99 ppl_delete_Linear_Expression (le);
100 }
101
102 value_clear (sub_size);
103 value_clear (size);
104 return res;
105 }
106
107 /* Builds a partial difference equations and inserts them
108 into pointset powerset polyhedron P. Polyhedron is assumed
109 to have the format: T|I|T'|I'|G|S|S'|l1|l2.
110
111 TIME_DEPTH is the time dimension w.r.t. which we are
112 differentiating.
113 OFFSET represents the number of dimensions between
114 columns t_{time_depth} and t'_{time_depth}.
115 DIM_SCTR is the number of scattering dimensions. It is
116 essentially the dimensionality of the T vector.
117
118 The following equations are inserted into the polyhedron P:
119 | t_1 = t_1'
120 | ...
121 | t_{time_depth-1} = t'_{time_depth-1}
122 | t_{time_depth} = t'_{time_depth} + 1
123 | t_{time_depth+1} = t'_{time_depth + 1}
124 | ...
125 | t_{dim_sctr} = t'_{dim_sctr}. */
126
127 static void
128 build_partial_difference (ppl_Pointset_Powerset_C_Polyhedron_t *p,
129 ppl_dimension_type time_depth,
130 ppl_dimension_type offset,
131 ppl_dimension_type dim_sctr)
132 {
133 ppl_Constraint_t new_cstr;
134 ppl_Linear_Expression_t le;
135 ppl_dimension_type i;
136 ppl_dimension_type dim;
137 ppl_Pointset_Powerset_C_Polyhedron_t temp;
138
139 /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
140 This is the core part of this alogrithm, since this
141 constraint asks for the memory access stride (difference)
142 between two consecutive points in time dimensions. */
143
144 ppl_Pointset_Powerset_C_Polyhedron_space_dimension (*p, &dim);
145 ppl_new_Linear_Expression_with_dimension (&le, dim);
146 ppl_set_coef (le, time_depth, 1);
147 ppl_set_coef (le, time_depth + offset, -1);
148 ppl_set_inhomogeneous (le, 1);
149 ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
150 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr);
151 ppl_delete_Linear_Expression (le);
152 ppl_delete_Constraint (new_cstr);
153
154 /* Add equalities:
155 | t1 = t1'
156 | ...
157 | t_{time_depth-1} = t'_{time_depth-1}
158 | t_{time_depth+1} = t'_{time_depth+1}
159 | ...
160 | t_{dim_sctr} = t'_{dim_sctr}
161
162 This means that all the time dimensions are equal except for
163 time_depth, where the constraint is t_{depth} = t'_{depth} + 1
164 step. More to this: we should be carefull not to add equalities
165 to the 'coupled' dimensions, which happens when the one dimension
166 is stripmined dimension, and the other dimension corresponds
167 to the point loop inside stripmined dimension. */
168
169 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p);
170
171 for (i = 0; i < dim_sctr; i++)
172 if (i != time_depth)
173 {
174 ppl_new_Linear_Expression_with_dimension (&le, dim);
175 ppl_set_coef (le, i, 1);
176 ppl_set_coef (le, i + offset, -1);
177 ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
178 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (temp, new_cstr);
179
180 if (ppl_Pointset_Powerset_C_Polyhedron_is_empty (temp))
181 {
182 ppl_delete_Pointset_Powerset_C_Polyhedron (temp);
183 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p);
184 }
185 else
186 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr);
187 ppl_delete_Linear_Expression (le);
188 ppl_delete_Constraint (new_cstr);
189 }
190
191 ppl_delete_Pointset_Powerset_C_Polyhedron (temp);
192 }
193
194
195 /* Set STRIDE to the stride of PDR in memory by advancing by one in
196 time dimension DEPTH. */
197
198 static void
199 memory_stride_in_loop (Value stride, graphite_dim_t depth, poly_dr_p pdr)
200 {
201 ppl_dimension_type time_depth;
202 ppl_Linear_Expression_t le, lma;
203 ppl_Constraint_t new_cstr;
204 ppl_dimension_type i, *map;
205 ppl_Pointset_Powerset_C_Polyhedron_t p1, p2, sctr;
206 graphite_dim_t nb_subscripts = PDR_NB_SUBSCRIPTS (pdr) + 1;
207 poly_bb_p pbb = PDR_PBB (pdr);
208 ppl_dimension_type offset = pbb_nb_scattering_transform (pbb)
209 + pbb_nb_local_vars (pbb)
210 + pbb_dim_iter_domain (pbb);
211 ppl_dimension_type offsetg = offset + pbb_nb_params (pbb);
212 ppl_dimension_type dim_sctr = pbb_nb_scattering_transform (pbb)
213 + pbb_nb_local_vars (pbb);
214 ppl_dimension_type dim_L1 = offset + offsetg + 2 * nb_subscripts;
215 ppl_dimension_type dim_L2 = offset + offsetg + 2 * nb_subscripts + 1;
216 ppl_dimension_type new_dim = offset + offsetg + 2 * nb_subscripts + 2;
217
218 /* The resulting polyhedron should have the following format:
219 T|I|T'|I'|G|S|S'|l1|l2
220 where:
221 | T = t_1..t_{dim_sctr}
222 | I = i_1..i_{dim_iter_domain}
223 | T'= t'_1..t'_{dim_sctr}
224 | I'= i'_1..i'_{dim_iter_domain}
225 | G = g_1..g_{nb_params}
226 | S = s_1..s_{nb_subscripts}
227 | S'= s'_1..s'_{nb_subscripts}
228 | l1 and l2 are scalars.
229
230 Some invariants:
231 offset = dim_sctr + dim_iter_domain + nb_local_vars
232 offsetg = dim_sctr + dim_iter_domain + nb_local_vars + nb_params. */
233
234 /* Construct the T|I|0|0|G|0|0|0|0 part. */
235 {
236 ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
237 (&sctr, PBB_TRANSFORMED_SCATTERING (pbb));
238 ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
239 (sctr, 2 * nb_subscripts + 2);
240 ppl_insert_dimensions_pointset (sctr, offset, offset);
241 }
242
243 /* Construct the 0|I|0|0|G|S|0|0|0 part. */
244 {
245 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
246 (&p1, PDR_ACCESSES (pdr));
247 ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
248 (p1, nb_subscripts + 2);
249 ppl_insert_dimensions_pointset (p1, 0, dim_sctr);
250 ppl_insert_dimensions_pointset (p1, offset, offset);
251 }
252
253 /* Construct the 0|0|0|0|0|S|0|l1|0 part. */
254 {
255 lma = build_linearized_memory_access (offset + dim_sctr, pdr);
256 ppl_set_coef (lma, dim_L1, -1);
257 ppl_new_Constraint (&new_cstr, lma, PPL_CONSTRAINT_TYPE_EQUAL);
258 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p1, new_cstr);
259 ppl_delete_Linear_Expression (lma);
260 ppl_delete_Constraint (new_cstr);
261 }
262
263 /* Now intersect all the parts to get the polyhedron P1:
264 T|I|0|0|G|0|0|0 |0
265 0|I|0|0|G|S|0|0 |0
266 0|0|0|0|0|S|0|l1|0
267 ------------------
268 T|I|0|0|G|S|0|l1|0. */
269
270 ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, sctr);
271 ppl_delete_Pointset_Powerset_C_Polyhedron (sctr);
272
273 /* Build P2, which would have the following form:
274 0|0|T'|I'|G|0|S'|0|l2
275
276 P2 is built, by remapping the P1 polyhedron:
277 T|I|0|0|G|S|0|l1|0
278
279 using the following mapping:
280 T->T'
281 I->I'
282 S->S'
283 l1->l2. */
284 {
285 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
286 (&p2, p1);
287
288 map = ppl_new_id_map (new_dim);
289
290 /* TI -> T'I'. */
291 for (i = 0; i < offset; i++)
292 ppl_interchange (map, i, i + offset);
293
294 /* l1 -> l2. */
295 ppl_interchange (map, dim_L1, dim_L2);
296
297 /* S -> S'. */
298 for (i = 0; i < nb_subscripts; i++)
299 ppl_interchange (map, offset + offsetg + i,
300 offset + offsetg + nb_subscripts + i);
301
302 ppl_Pointset_Powerset_C_Polyhedron_map_space_dimensions (p2, map, new_dim);
303 free (map);
304 }
305
306 time_depth = psct_dynamic_dim (pbb, depth);
307
308 /* P1 = P1 inter P2. */
309 ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, p2);
310 build_partial_difference (&p1, time_depth, offset, dim_sctr);
311
312 /* Maximise the expression L2 - L1. */
313 {
314 ppl_new_Linear_Expression_with_dimension (&le, new_dim);
315 ppl_set_coef (le, dim_L2, 1);
316 ppl_set_coef (le, dim_L1, -1);
317 ppl_max_for_le_pointset (p1, le, stride);
318 }
319
320 if (dump_file && (dump_flags & TDF_DETAILS))
321 {
322 fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:",
323 pbb_index (pbb), PDR_ID (pdr), (int) depth);
324 value_print (dump_file, " %s ", stride);
325 }
326
327 ppl_delete_Pointset_Powerset_C_Polyhedron (p1);
328 ppl_delete_Pointset_Powerset_C_Polyhedron (p2);
329 ppl_delete_Linear_Expression (le);
330 }
331
332 /* Returns true when it is profitable to interchange time dimensions DEPTH1
333 and DEPTH2 with DEPTH1 < DEPTH2 for PBB.
334
335 Example:
336
337 | int a[100][100];
338 |
339 | int
340 | foo (int N)
341 | {
342 | int j;
343 | int i;
344 |
345 | for (i = 0; i < N; i++)
346 | for (j = 0; j < N; j++)
347 | a[j][2 * i] += 1;
348 |
349 | return a[N][12];
350 | }
351
352 The data access A[j][i] is described like this:
353
354 | i j N a s0 s1 1
355 | 0 0 0 1 0 0 -5 = 0
356 | 0 -1 0 0 1 0 0 = 0
357 |-2 0 0 0 0 1 0 = 0
358 | 0 0 0 0 1 0 0 >= 0
359 | 0 0 0 0 0 1 0 >= 0
360 | 0 0 0 0 -1 0 100 >= 0
361 | 0 0 0 0 0 -1 100 >= 0
362
363 The linearized memory access L to A[100][100] is:
364
365 | i j N a s0 s1 1
366 | 0 0 0 0 100 1 0
367
368 TODO: the shown format is not valid as it does not show the fact
369 that the iteration domain "i j" is transformed using the scattering.
370
371 Next, to measure the impact of iterating once in loop "i", we build
372 a maximization problem: first, we add to DR accesses the dimensions
373 k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
374 L1 and L2 are the linearized memory access functions.
375
376 | i j N a s0 s1 k s2 s3 L1 L2 D1 1
377 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
378 | 0 -1 0 0 1 0 0 0 0 0 0 0 0 = 0 s0 = j
379 |-2 0 0 0 0 1 0 0 0 0 0 0 0 = 0 s1 = 2 * i
380 | 0 0 0 0 1 0 0 0 0 0 0 0 0 >= 0
381 | 0 0 0 0 0 1 0 0 0 0 0 0 0 >= 0
382 | 0 0 0 0 -1 0 0 0 0 0 0 0 100 >= 0
383 | 0 0 0 0 0 -1 0 0 0 0 0 0 100 >= 0
384 | 0 0 0 0 100 1 0 0 0 -1 0 0 0 = 0 L1 = 100 * s0 + s1
385
386 Then, we generate the polyhedron P2 by interchanging the dimensions
387 (s0, s2), (s1, s3), (L1, L2), (k, i)
388
389 | i j N a s0 s1 k s2 s3 L1 L2 D1 1
390 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5
391 | 0 -1 0 0 0 0 0 1 0 0 0 0 0 = 0 s2 = j
392 | 0 0 0 0 0 0 -2 0 1 0 0 0 0 = 0 s3 = 2 * k
393 | 0 0 0 0 0 0 0 1 0 0 0 0 0 >= 0
394 | 0 0 0 0 0 0 0 0 1 0 0 0 0 >= 0
395 | 0 0 0 0 0 0 0 -1 0 0 0 0 100 >= 0
396 | 0 0 0 0 0 0 0 0 -1 0 0 0 100 >= 0
397 | 0 0 0 0 0 0 0 100 1 0 -1 0 0 = 0 L2 = 100 * s2 + s3
398
399 then we add to P2 the equality k = i + 1:
400
401 |-1 0 0 0 0 0 1 0 0 0 0 0 -1 = 0 k = i + 1
402
403 and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
404
405 Similarly, to determine the impact of one iteration on loop "j", we
406 interchange (k, j), we add "k = j + 1", and we compute D2 the
407 maximal value of the difference.
408
409 Finally, the profitability test is D1 < D2: if in the outer loop
410 the strides are smaller than in the inner loop, then it is
411 profitable to interchange the loops at DEPTH1 and DEPTH2. */
412
413 static bool
414 pbb_interchange_profitable_p (graphite_dim_t depth1, graphite_dim_t depth2,
415 poly_bb_p pbb)
416 {
417 int i;
418 poly_dr_p pdr;
419 Value d1, d2, s, n;
420 bool res;
421
422 gcc_assert (depth1 < depth2);
423
424 value_init (d1);
425 value_set_si (d1, 0);
426 value_init (d2);
427 value_set_si (d2, 0);
428 value_init (s);
429 value_init (n);
430
431 for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb), i, pdr); i++)
432 {
433 value_set_si (n, PDR_NB_REFS (pdr));
434
435 memory_stride_in_loop (s, depth1, pdr);
436 value_multiply (s, s, n);
437 value_addto (d1, d1, s);
438
439 memory_stride_in_loop (s, depth2, pdr);
440 value_multiply (s, s, n);
441 value_addto (d2, d2, s);
442 }
443
444 res = value_lt (d1, d2);
445
446 value_clear (d1);
447 value_clear (d2);
448 value_clear (s);
449 value_clear (n);
450
451 return res;
452 }
453
454 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original
455 scattering and assigns the resulting polyhedron to the transformed
456 scattering. */
457
458 static void
459 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
460 poly_bb_p pbb)
461 {
462 ppl_dimension_type i, dim;
463 ppl_dimension_type *map;
464 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
465 ppl_dimension_type dim1 = psct_dynamic_dim (pbb, depth1);
466 ppl_dimension_type dim2 = psct_dynamic_dim (pbb, depth2);
467
468 ppl_Polyhedron_space_dimension (poly, &dim);
469 map = (ppl_dimension_type *) XNEWVEC (ppl_dimension_type, dim);
470
471 for (i = 0; i < dim; i++)
472 map[i] = i;
473
474 map[dim1] = dim2;
475 map[dim2] = dim1;
476
477 ppl_Polyhedron_map_space_dimensions (poly, map, dim);
478 free (map);
479 }
480
481 /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
482 the statements below LST. */
483
484 static void
485 lst_apply_interchange (lst_p lst, int depth1, int depth2)
486 {
487 if (!lst)
488 return;
489
490 if (LST_LOOP_P (lst))
491 {
492 int i;
493 lst_p l;
494
495 for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
496 lst_apply_interchange (l, depth1, depth2);
497 }
498 else
499 pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
500 }
501
502 /* Return true when the interchange of loops at depths DEPTH1 and
503 DEPTH2 to all the statements below LST is profitable. */
504
505 static bool
506 lst_interchange_profitable_p (lst_p lst, int depth1, int depth2)
507 {
508 if (!lst)
509 return false;
510
511 if (LST_LOOP_P (lst))
512 {
513 int i;
514 lst_p l;
515 bool res = false;
516
517 for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
518 {
519 bool profitable = lst_interchange_profitable_p (l, depth1, depth2);
520
521 if (profitable && !LST_LOOP_P (lst)
522 && dump_file && (dump_flags & TDF_DETAILS))
523 fprintf (dump_file,
524 "Interchanging loops at depths %d and %d is profitable for stmt_%d.\n",
525 depth1, depth2, pbb_index (LST_PBB (lst)));
526
527 res |= profitable;
528 }
529
530 return res;
531 }
532 else
533 return pbb_interchange_profitable_p (depth1, depth2, LST_PBB (lst));
534 }
535
536 /* Return true when the nest starting at LOOP1 and ending on LOOP2 is
537 perfect: i.e. there are no sequence of statements. */
538
539 static bool
540 lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
541 {
542 if (loop1 == loop2)
543 return true;
544
545 if (!LST_LOOP_P (loop1))
546 return false;
547
548 return VEC_length (lst_p, LST_SEQ (loop1)) == 1
549 && lst_perfectly_nested_p (VEC_index (lst_p, LST_SEQ (loop1), 0), loop2);
550 }
551
552 /* Transform the loop nest between LOOP1 and LOOP2 into a perfect
553 nest. To continue the naming tradition, this function is called
554 after perfect_nestify. NEST is set to the perfectly nested loop
555 that is created. BEFORE/AFTER are set to the loops distributed
556 before/after the loop NEST. */
557
558 static void
559 lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
560 lst_p *nest, lst_p *after)
561 {
562 poly_bb_p first, last;
563
564 gcc_assert (loop1 && loop2
565 && loop1 != loop2
566 && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
567
568 first = LST_PBB (lst_find_first_pbb (loop2));
569 last = LST_PBB (lst_find_last_pbb (loop2));
570
571 *before = copy_lst (loop1);
572 *nest = copy_lst (loop1);
573 *after = copy_lst (loop1);
574
575 lst_remove_all_before_including_pbb (*before, first, false);
576 lst_remove_all_before_including_pbb (*after, last, true);
577
578 lst_remove_all_before_excluding_pbb (*nest, first, true);
579 lst_remove_all_before_excluding_pbb (*nest, last, false);
580 }
581
582 /* Try to interchange LOOP1 with LOOP2 for all the statements of the
583 body of LOOP2. LOOP1 contains LOOP2. Return true if it did the
584 interchange. CREATED_LOOP_BEFORE/CREATED_LOOP_AFTER are set to
585 true if the loop distribution created a loop before/after LOOP1. */
586
587 static bool
588 lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2,
589 lst_p *before, lst_p *nest, lst_p *after)
590 {
591 int depth1 = lst_depth (loop1);
592 int depth2 = lst_depth (loop2);
593 lst_p transformed;
594
595 *before = NULL;
596 *after = NULL;
597 *nest = NULL;
598
599 if (!lst_interchange_profitable_p (loop2, depth1, depth2))
600 return false;
601
602 if (!lst_perfectly_nested_p (loop1, loop2))
603 lst_perfect_nestify (loop1, loop2, before, nest, after);
604
605 lst_apply_interchange (loop2, depth1, depth2);
606
607 /* Sync the transformed LST information and the PBB scatterings
608 before using the scatterings in the data dependence analysis. */
609 if (*before || *nest || *after)
610 {
611 transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
612 *before, *nest, *after);
613 lst_update_scattering (transformed);
614 free_lst (transformed);
615 }
616
617 if (graphite_legal_transform (scop))
618 {
619 if (dump_file && (dump_flags & TDF_DETAILS))
620 fprintf (dump_file,
621 "Loops at depths %d and %d will be interchanged.\n",
622 depth1, depth2);
623
624 /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP. */
625 lst_insert_in_sequence (*before, loop1, true);
626 lst_insert_in_sequence (*after, loop1, false);
627
628 if (*nest)
629 {
630 lst_replace (loop1, *nest);
631 free_lst (loop1);
632 }
633
634 return true;
635 }
636
637 /* Undo the transform. */
638 lst_apply_interchange (loop2, depth2, depth1);
639 *before = NULL;
640 *after = NULL;
641 *nest = NULL;
642 return false;
643 }
644
645 static bool lst_do_interchange_1 (scop_p, lst_p, int *);
646
647 /* Try to interchange LOOP with all the loops contained in the body of
648 LST. Return true if it did interchanged some loops. INDEX points
649 to the next element to be processed by lst_do_interchange. */
650
651 static bool
652 lst_try_interchange (scop_p scop, lst_p loop, lst_p lst, int *index)
653 {
654 int i;
655 lst_p l;
656 lst_p before, nest, after;
657 bool res;
658
659 if (!lst || !LST_LOOP_P (lst))
660 return false;
661
662 res = lst_try_interchange_loops (scop, loop, lst, &before, &nest, &after);
663
664 if (before)
665 {
666 res |= lst_do_interchange_1 (scop, before, index);
667 (*index)++;
668 }
669
670 if (nest)
671 res |= lst_do_interchange_1 (scop, nest, index);
672 else
673 for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
674 res |= lst_try_interchange (scop, loop, l, index);
675
676 if (after)
677 {
678 res |= lst_do_interchange_1 (scop, after, index);
679 (*index)++;
680 }
681
682 (*index)++;
683 return res;
684 }
685
686 /* Interchanges all the loops of LOOP that are considered profitable
687 to interchange. Return true if it did interchanged some loops.
688 INDEX points to the next element to be processed by
689 lst_do_interchange. */
690
691 static bool
692 lst_do_interchange_1 (scop_p scop, lst_p loop, int *index)
693 {
694 int i;
695 lst_p l;
696 bool res = false;
697
698 if (!loop || !LST_LOOP_P (loop))
699 return false;
700
701 for (i = 0; VEC_iterate (lst_p, LST_SEQ (loop), i, l); i++)
702 res |= lst_try_interchange (scop, loop, l, index);
703
704 return res;
705 }
706
707 /* Interchanges all the loops of LOOP and the loops of its body that
708 are considered profitable to interchange. Return true if it did
709 interchanged some loops. INDEX points to the next element to be
710 processed in the LST_SEQ (LOOP) vector. */
711
712 static bool
713 lst_do_interchange (scop_p scop, lst_p loop, int *index)
714 {
715 lst_p l;
716 bool res = false;
717
718 if (!loop || !LST_LOOP_P (loop))
719 return false;
720
721 if (lst_depth (loop) >= 0)
722 res = lst_do_interchange_1 (scop, loop, index);
723
724 while (VEC_iterate (lst_p, LST_SEQ (loop), *index, l))
725 if (LST_LOOP_P (l))
726 res |= lst_do_interchange (scop, l, index);
727 else
728 (*index)++;
729
730 (*index)++;
731 return res;
732 }
733
734 /* Interchanges all the loop depths that are considered profitable for SCOP. */
735
736 bool
737 scop_do_interchange (scop_p scop)
738 {
739 int i = 0;
740 bool res = lst_do_interchange (scop, SCOP_TRANSFORMED_SCHEDULE (scop), &i);
741
742 lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
743
744 return res;
745 }
746
747
748 #endif
749